16 #ifndef __MATHEMATICS_H_
17 #define __MATHEMATICS_H_
31 #include <sys/types.h>
41 #define cygwin_log2 log2
49 #if _GLIBCXX_USE_C99_MATH
50 #if !_GLIBCXX_USE_C99_FP_MACROS_DYNAMIC
55 using std::fpclassify;
63 using std::isgreaterequal;
65 using std::islessequal;
66 using std::islessgreater;
67 using std::isunordered;
79 #define isfinite _isfinite
85 #define NAN (strtod("NAN",NULL))
89 #define RNG_SEED_SIZE 256
92 #define RADIX_STACK_SIZE 512
95 #define radix_push(a, n, i) sp->sa = a, sp->sn = n, (sp++)->si = i
96 #define radix_pop(a, n, i) a = (--sp)->sa, n = sp->sn, i = sp->si
98 #ifndef DOXYGEN_SHOULD_SKIP_THIS
100 template <
class T>
struct radix_stack_t
113 template <
class T1,
class T2>
struct thread_qsort
123 int32_t* qsort_threads;
129 #endif // DOXYGEN_SHOULD_SKIP_THIS
155 static inline T
min(T a, T b)
157 return (a<=b) ? a : b;
162 static inline T
max(T a, T b)
164 return (a>=b) ? a : b;
169 static inline T
clamp(T value, T lb, T ub)
218 else return (a<0) ? (-1) : (+1);
223 static inline void swap(T &a,T &b)
234 static inline void resize(T* &data, int64_t old_size, int64_t new_size)
236 if (old_size==new_size)
239 for (int64_t i=0; i<old_size && i<new_size; i++)
250 for (int32_t i=0; i<len; i++)
261 for (int32_t i=0; i<len; i++)
277 static inline T
sq(T x)
321 tmp.i = 0x5f3759d5 - (tmp.i >> 1);
323 x = x*(1.5f - xhalf*x*x);
339 static inline int32_t
pow(int32_t x, int32_t n)
397 #endif // HAVE_LGAMMAL
427 for (
int i=1; i<len; i++)
428 area += 0.5*(xy[2*i]-xy[2*(i-1)])*(xy[2*i+1]+xy[2*(i-1)+1]);
432 for (
int i=1; i<len; i++)
433 area += 0.5*(xy[2*i+1]-xy[2*(i-1)+1])*(xy[2*i]+xy[2*(i-1)]);
441 T*& matrix, int32_t& num_feat, int32_t& num_vec)
443 T* transposed=
SG_MALLOC(T, num_vec*num_feat);
444 for (int32_t i=0; i<num_vec; i++)
446 for (int32_t j=0; j<num_feat; j++)
447 transposed[i+j*num_vec]=matrix[i*num_feat+j];
460 float64_t* matrix, int32_t rows, int32_t cols,
467 double alpha,
const double* A,
int rows,
int cols,
468 CBLAS_TRANSPOSE transposeA,
double *B,
int cols_B,
469 CBLAS_TRANSPOSE transposeB,
double beta,
double *C)
471 cblas_dgemm(CblasColMajor, transposeA, transposeB, rows, cols, cols_B,
472 alpha, A, cols, B, cols_B, beta, C, cols);
477 double alpha,
const double *A,
int rows,
int cols,
478 const CBLAS_TRANSPOSE transposeA,
const double* X,
double beta,
481 cblas_dgemv(CblasColMajor, transposeA,
482 rows, cols, alpha, A, cols,
490 for (
int i=2; i<=n; i++)
500 gettimeofday(&tv, NULL);
501 seed=(uint32_t) (4223517*getpid()*tv.tv_sec*tv.tv_usec);
505 #if !defined(CYGWIN) && !defined(__INTERIX)
514 #if defined(CYGWIN) || defined(__INTERIX)
521 static inline int32_t
random(int32_t min_value, int32_t max_value)
523 int32_t ret = min_value + (int32_t) ((max_value-min_value+1) * (
random() / (RAND_MAX+1.0)));
524 ASSERT(ret>=min_value && ret<=max_value);
530 float32_t ret = min_value + ((max_value-min_value) * (
random() / (1.0*RAND_MAX)));
532 if (ret<min_value || ret>max_value)
533 SG_SPRINT(
"min_value:%10.10f value: %10.10f max_value:%10.10f", min_value, ret, max_value);
534 ASSERT(ret>=min_value && ret<=max_value);
540 float64_t ret = min_value + ((max_value-min_value) * (
random() / (1.0*RAND_MAX)));
542 if (ret<min_value || ret>max_value)
543 SG_SPRINT(
"min_value:%10.10f value: %10.10f max_value:%10.10f", min_value, ret, max_value);
544 ASSERT(ret>=min_value && ret<=max_value);
560 rand_u =
random(-1.0, 1.0);
561 rand_v =
random(-1.0, 1.0);
562 rand_s = rand_u*rand_u + rand_v*rand_v;
563 }
while ((rand_s == 0) || (rand_s >= 1));
566 ret = rand_u*
sqrt(-2.0*
log(rand_s)/rand_s);
567 ret = std_dev*ret +
mean;
582 rand_u =
random(-1.0, 1.0);
583 rand_v =
random(-1.0, 1.0);
584 rand_s = rand_u*rand_u + rand_v*rand_v;
585 }
while ((rand_s == 0) || (rand_s >= 1));
587 ret = rand_u*
sqrt(-2.0*
log(rand_s)/rand_s);
588 ret = std_dev*ret +
mean;
610 for (int32_t i=0; i<len; i++)
618 for (int32_t i=0; i<len; i++)
624 for (int32_t i=0; i<len; i++)
631 for (int32_t i=0; i<len; i++)
641 for (int32_t i = 0; i < n; i++)
643 for (int32_t i = 0; i < n; i++)
648 static inline int64_t
nchoosek(int32_t n, int32_t k)
652 for (int32_t i=n-k+1; i<=n; i++)
661 T scalar,
const T* vec2, int32_t n)
663 for (int32_t i=0; i<n; i++)
664 vec1[i]+=scalar*vec2[i];
671 for (int32_t i=0; i<n; i++)
672 r+=((v1[i]) ? 1 : 0) * ((v2[i]) ? 1 : 0);
680 for (int32_t i=0; i<n; i++)
692 r = cblas_ddot(n, v1, skip, v2, skip);
694 for (int32_t i=0; i<n; i++)
707 r = cblas_sdot(n, v1, skip, v2, skip);
709 for (int32_t i=0; i<n; i++)
717 const uint64_t* v1,
const uint64_t* v2, int32_t n)
720 for (int32_t i=0; i<n; i++)
727 const int64_t* v1,
const int64_t* v2, int32_t n)
730 for (int32_t i=0; i<n; i++)
738 const int32_t* v1,
const int32_t* v2, int32_t n)
741 for (int32_t i=0; i<n; i++)
749 const uint32_t* v1,
const uint32_t* v2, int32_t n)
752 for (int32_t i=0; i<n; i++)
760 const uint16_t* v1,
const uint16_t* v2, int32_t n)
763 for (int32_t i=0; i<n; i++)
771 const int16_t* v1,
const int16_t* v2, int32_t n)
774 for (int32_t i=0; i<n; i++)
782 const char* v1,
const char* v2, int32_t n)
785 for (int32_t i=0; i<n; i++)
793 const uint8_t* v1,
const uint8_t* v2, int32_t n)
796 for (int32_t i=0; i<n; i++)
804 const int8_t* v1,
const int8_t* v2, int32_t n)
807 for (int32_t i=0; i<n; i++)
815 const float64_t* v1,
const char* v2, int32_t n)
818 for (int32_t i=0; i<n; i++)
827 T* target,
const T* v1,
const T* v2,int32_t len)
829 for (int32_t i=0; i<len; i++)
830 target[i]=v1[i]*v2[i];
837 T* target, T alpha,
const T* v1, T beta,
const T* v2,
840 for (int32_t i=0; i<len; i++)
841 target[i]=alpha*v1[i]+beta*v2[i];
848 for (int32_t i=0; i<len; i++)
856 for (int32_t i=0; i<len; i++)
862 static inline T
sum(T* vec, int32_t len)
865 for (int32_t i=0; i<len; i++)
873 static inline T
max(T* vec, int32_t len)
878 for (int32_t i=1; i<len; i++)
889 for (int32_t i=0; i<len; i++)
910 for (int32_t i=0; i<len; i++)
916 float64_t* mat, int32_t cols, int32_t rows)
919 for (int32_t i=0; i<rows; i++)
920 trace+=mat[i*cols+i];
927 static void sort(int32_t *a, int32_t cols, int32_t sort_col=0);
944 static inline uint8_t
byte(T word, uint16_t p)
946 return (word >> (
sizeof(T)-p-1) * 8) & 0xff;
952 static size_t count[256], nc, cmin;
956 T *an, *aj, *pile[256];
971 for (ak = array; ak < an; ak++) {
999 for (cp = count + cmin; nc > 0; cp++) {
1011 if (i <
sizeof(T)-1)
1014 pile[cp - count] = ak += *cp;
1038 for (r = *aj; aj < (ak = --pile[c =
byte(r, i)]);)
1053 for (int32_t i=0; i<size-1; i++)
1057 while (j >= 0 && output[j] > value)
1059 output[j+1] = output[j];
1070 static void qsort(T* output, int32_t size)
1077 if (output[0] > output [1])
1078 swap(output[0],output[1]);
1082 T split=output[size/2];
1085 int32_t right=size-1;
1089 while (output[left] < split)
1091 while (output[right] > split)
1096 swap(output[left],output[right]);
1103 qsort(output,right+1);
1106 qsort(&output[left],size-left);
1132 int32_t right=array.
vlen-1;
1136 while (*array.
vector[left]<*split)
1138 while (*array.
vector[right]>*split)
1152 if (array.
vlen-left>1)
1157 template <
class T>
static void display_bits(T word, int32_t width=8*
sizeof(T))
1160 for (
int i=0; i<width; i++)
1162 T mask = ((T) 1)<<(
sizeof(T)*8-1);
1177 const T* vector, int32_t n,
const char* name=
"vector");
1181 const T* matrix, int32_t rows, int32_t cols,
const char* name=
"matrix");
1188 template <
class T1,
class T2>
1189 static void qsort_index(T1* output, T2* index, uint32_t size);
1196 template <
class T1,
class T2>
1198 T1* output, T2* index, int32_t size);
1207 template <
class T1,
class T2>
1208 inline static void parallel_qsort_index(T1* output, T2* index, uint32_t size, int32_t n_threads, int32_t limit=262144)
1211 thread_qsort<T1,T2> t;
1217 t.num_threads=n_threads;
1218 parallel_qsort_index<T1,T2>(&t);
1222 template <
class T1,
class T2>
1229 static void min(
float64_t* output, T* index, int32_t size);
1235 float64_t* output, T* index, int32_t size, int32_t n);
1240 static int32_t
unique(T* output, int32_t size)
1242 qsort(output, size);
1244 for (i=0; i<size; i++)
1245 if (i==0 || output[i]!=output[i-1])
1246 output[j++]=output[i];
1271 eigenvalues, &info);
1274 SG_SERROR(
"DSYEV failed with code %d\n", info);
1278 SG_SERROR(
"Function not available - Lapack/Atlas not enabled at compile time!\n");
1289 for (int32_t i=0; i<n; i++)
1291 for (int32_t j=0; j<m; j++)
1292 rowsums[i]+=matrix[j+int64_t(i)*m];
1303 for (int32_t i=0; i<n; i++)
1305 for (int32_t j=0; j<m; j++)
1306 colsums[j]+=matrix[j+int64_t(i)*m];
1320 for (int32_t i=0; i<m; i++)
1321 colsums[i]/=num_data;
1322 for (int32_t j=0; j<n; j++)
1323 rowsums[j]/=num_data;
1325 T s=
sum(rowsums, n)/num_data;
1327 for (int32_t i=0; i<n; i++)
1329 for (int32_t j=0; j<m; j++)
1330 matrix[int64_t(i)*m+j]+=s-colsums[j]-rowsums[i];
1350 int32_t middle=(start+end)/2;
1352 if (output[middle]>elem)
1354 else if (output[middle]<elem)
1369 if (ind >= 0 && output[ind] == elem)
1384 int32_t end=array.
vlen-1;
1391 int32_t middle=(start+end)/2;
1393 if (*array.
vector[middle]>*elem)
1395 else if (*array.
vector[middle]<*elem)
1404 if (start>=0&&*array.
vector[start]==*elem)
1412 T* output, int32_t size, T elem)
1416 if (output[ind]<=elem)
1418 if (ind>0 && output[ind-1] <= elem)
1426 char * seq1,
char* seq2, int32_t l1, int32_t l2,
float64_t gapCost);
1457 inline static uint32_t get_log_accuracy()
1459 return CMath::LOGACCURACY;
1466 #if defined(isfinite) && !defined(SUNOS)
1477 if (fpclass(f) == FP_NINF || fpclass(f) == FP_PINF)
1526 SG_SWARNING(
"INVALID second operand to logsum(%f,%f) expect undefined results\n", p, q);
1531 return diff >
LOGRANGE? p : p + logtable[(int)(diff * LOGACCURACY)];
1532 return -diff >
LOGRANGE? q : q + logtable[(int)(-diff * LOGACCURACY)];
1536 static void init_log_table();
1539 static int32_t determine_logrange();
1542 static int32_t determine_logaccuracy(int32_t range);
1559 #ifdef USE_LOGSUMARRAY
1565 static inline float64_t logarithmic_sum_array(
1579 if (len%2==1) pp++ ;
1580 for (
register int32_t j=0; j < len>>1; j++)
1583 return logarithmic_sum_array(p,len%2 + (len>>1)) ;
1585 #endif //USE_LOGSUMARRAY
1589 inline virtual const char*
get_name()
const {
return "Mathematics"; }
1621 static int32_t LOGACCURACY;
1629 template <
class T1,
class T2>
1632 struct thread_qsort<T1,T2>* ps=(thread_qsort<T1,T2>*) p;
1633 T1* output=ps->output;
1634 T2* index=ps->index;
1635 uint32_t size=ps->size;
1636 int32_t* qsort_threads=ps->qsort_threads;
1637 int32_t sort_limit=ps->sort_limit;
1638 int32_t num_threads=ps->num_threads;
1642 if (output[0] > output [1])
1644 swap(output[0], output[1]);
1645 swap(index[0], index[1]);
1650 T1 split=output[size/2];
1653 int32_t right=size-1;
1657 while (output[left] < split)
1659 while (output[right] > split)
1664 swap(output[left], output[right]);
1665 swap(index[left], index[right]);
1670 bool lthread_start=
false;
1671 bool rthread_start=
false;
1674 struct thread_qsort<T1,T2> t1;
1675 struct thread_qsort<T1,T2> t2;
1677 if (right+1> 1 && (right+1< sort_limit || *qsort_threads >= num_threads-1))
1679 else if (right+1> 1)
1686 t1.qsort_threads=qsort_threads;
1687 t1.sort_limit=sort_limit;
1688 t1.num_threads=num_threads;
1689 if (pthread_create(<hread, NULL, parallel_qsort_index<T1,T2>, &t1) != 0)
1691 lthread_start=
false;
1698 if (size-left> 1 && (size-left< sort_limit || *qsort_threads >= num_threads-1))
1699 qsort_index(&output[left],&index[left], size-left);
1700 else if (size-left> 1)
1704 t2.output=&output[left];
1705 t2.index=&index[left];
1707 t2.qsort_threads=qsort_threads;
1708 t2.sort_limit=sort_limit;
1709 t2.num_threads=num_threads;
1710 if (pthread_create(&rthread, NULL, parallel_qsort_index<T1,T2>, &t2) != 0)
1712 rthread_start=
false;
1714 qsort_index(&output[left],&index[left], size-left);
1720 pthread_join(lthread, NULL);
1726 pthread_join(rthread, NULL);
1733 template <
class T1,
class T2>
1741 if (output[0] > output [1])
1743 swap(output[0],output[1]);
1744 swap(index[0],index[1]);
1749 T1 split=output[size/2];
1752 int32_t right=size-1;
1756 while (output[left] < split)
1758 while (output[right] > split)
1763 swap(output[left],output[right]);
1764 swap(index[left],index[right]);
1774 qsort_index(&output[left],&index[left], size-left);
1777 template <
class T1,
class T2>
1782 if (output[0] < output [1])
1784 swap(output[0],output[1]);
1785 swap(index[0],index[1]);
1791 T1 split=output[size/2];
1794 int32_t right=size-1;
1798 while (output[left] > split)
1800 while (output[right] < split)
1805 swap(output[left],output[right]);
1806 swap(index[left],index[right]);
1823 for (int32_t i=0; i<n; i++)
1824 min(&output[i], &index[i], size-i) ;
1836 int32_t min_index=0;
1837 for (int32_t i=1; i<size; i++)
1839 if (output[i]<min_elem)
1845 swap(output[0], output[min_index]);
1846 swap(index[0], index[min_index]);