SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
SGInterface.cpp
Go to the documentation of this file.
4 
5 #include <shogun/lib/config.h>
6 #include <shogun/lib/memory.h>
9 #include <shogun/lib/Hash.h>
10 #include <shogun/lib/Set.h>
11 #include <shogun/lib/Signal.h>
12 
29 
30 #include <shogun/structure/Plif.h>
36 
37 #include <ctype.h>
38 
39 using namespace shogun;
40 
41 CSGInterface* interface=NULL;
43 
44 #if defined(HAVE_CMDLINE)
45 #define USAGE(method) "", ""
46 #define USAGE_I(method, in) "", " " in ""
47 #define USAGE_O(method, out) "" out " = ", ""
48 #define USAGE_IO(method, in, out) "" out " = ", " " in ""
49 #define USAGE_COMMA " "
50 #define USAGE_STR ""
51 #elif defined(HAVE_R)
52 #define USAGE(method) "sg('", "')"
53 #define USAGE_I(method, in) "sg('", "', " in ")"
54 #define USAGE_O(method, out) "[" out "] <- sg('", "')"
55 #define USAGE_IO(method, in, out) "[" out "] <- sg('", "', " in ")"
56 #define USAGE_COMMA ", "
57 #define USAGE_STR "'"
58 #else
59 #define USAGE(method) "sg('", "')"
60 #define USAGE_I(method, in) "sg('", "', " in ")"
61 #define USAGE_O(method, out) "[" out "]=sg('", "')"
62 #define USAGE_IO(method, in, out) "[" out "]=sg('", "', " in ")"
63 #define USAGE_COMMA ", "
64 #define USAGE_STR "'"
65 #endif
66 
67 CSGInterfaceMethod sg_methods[]=
68 {
69  { "Features", NULL, NULL, NULL },
70  {
71  N_PR_LOQO,
72  (&CSGInterface::cmd_pr_loqo),
74  "'Var1', Var1, 'Var2', Var2", "results")
75  },
76  {
78  (&CSGInterface::cmd_load_features),
80  "filename" USAGE_COMMA "feature_class" USAGE_COMMA "type" USAGE_COMMA "target[" USAGE_COMMA "size[" USAGE_COMMA "comp_features]]")
81  },
82  {
84  (&CSGInterface::cmd_save_features),
85  USAGE_I(N_SAVE_FEATURES, "filename" USAGE_COMMA "type" USAGE_COMMA "target")
86  },
87  {
89  (&CSGInterface::cmd_clean_features),
91  },
92  {
94  (&CSGInterface::cmd_get_features),
95  USAGE_IO(N_GET_FEATURES, USAGE_STR "TRAIN|TEST" USAGE_STR, "features")
96  },
97  {
99  (&CSGInterface::cmd_add_features),
101  USAGE_STR "TRAIN|TEST" USAGE_STR USAGE_COMMA "features[" USAGE_COMMA "DNABINFILE|<ALPHABET>]")
102  },
103  {
105  (&CSGInterface::cmd_add_multiple_features),
107  USAGE_STR "TRAIN|TEST" USAGE_STR USAGE_COMMA "repetitions" USAGE_COMMA "features[" USAGE_COMMA "DNABINFILE|<ALPHABET>]")
108  },
109  {
111  (&CSGInterface::cmd_add_dotfeatures),
113  USAGE_STR "TRAIN|TEST" USAGE_STR USAGE_COMMA "features[" USAGE_COMMA "DNABINFILE|<ALPHABET>]")
114  },
115  {
117  (&CSGInterface::cmd_set_features),
119  USAGE_STR "TRAIN|TEST" USAGE_STR
120  USAGE_COMMA "features["
121  USAGE_COMMA "DNABINFILE|<ALPHABET>]["
122  USAGE_COMMA "[from_position_list|slide_window]"
123  USAGE_COMMA "window size"
124  USAGE_COMMA "[position_list|shift]"
125  USAGE_COMMA "skip")
126  },
127  {
129  (&CSGInterface::cmd_set_reference_features),
131  },
132  {
134  (&CSGInterface::cmd_del_last_features),
136  },
137  {
138  N_CONVERT,
139  (&CSGInterface::cmd_convert),
140  USAGE_I(N_CONVERT, USAGE_STR "TRAIN|TEST" USAGE_STR
141  USAGE_COMMA "from_class"
142  USAGE_COMMA "from_type"
143  USAGE_COMMA "to_class"
144  USAGE_COMMA "to_type["
145  USAGE_COMMA "order"
146  USAGE_COMMA "start"
147  USAGE_COMMA "gap"
148  USAGE_COMMA "reversed]")
149  },
150  {
151  N_RESHAPE,
152  (&CSGInterface::cmd_reshape),
153  USAGE_I(N_RESHAPE, USAGE_STR "TRAIN|TEST"
154  USAGE_COMMA "num_feat"
155  USAGE_COMMA "num_vec")
156  },
157  {
159  (&CSGInterface::cmd_load_labels),
160  USAGE_I(N_LOAD_LABELS, "filename"
161  USAGE_COMMA USAGE_STR "TRAIN|TARGET" USAGE_STR)
162  },
163  {
164  N_SET_LABELS,
165  (&CSGInterface::cmd_set_labels),
167  USAGE_COMMA "labels")
168  },
169  {
170  N_GET_LABELS,
171  (&CSGInterface::cmd_get_labels),
172  USAGE_IO(N_GET_LABELS, USAGE_STR "TRAIN|TEST" USAGE_STR, "labels")
173  },
174 
175 
176  { "Kernel", NULL, NULL },
177  {
179  (&CSGInterface::cmd_set_kernel_normalization),
180  USAGE_I(N_SET_KERNEL_NORMALIZATION, "IDENTITY|AVGDIAG|SQRTDIAG|FIRSTELEMENT|VARIANCE|ZEROMEANCENTER"
181  USAGE_COMMA "size[" USAGE_COMMA "kernel-specific parameters]")
182  },
183  {
184  N_SET_KERNEL,
185  (&CSGInterface::cmd_set_kernel),
186  USAGE_I(N_SET_KERNEL, "type" USAGE_COMMA "size[" USAGE_COMMA "kernel-specific parameters]")
187  },
188  {
189  N_ADD_KERNEL,
190  (&CSGInterface::cmd_add_kernel),
191  USAGE_I(N_ADD_KERNEL, "weight" USAGE_COMMA "kernel-specific parameters")
192  },
193  {
195  (&CSGInterface::cmd_del_last_kernel),
197  },
198  {
200  (&CSGInterface::cmd_init_kernel),
202  },
203  {
205  (&CSGInterface::cmd_clean_kernel),
207  },
208  {
210  (&CSGInterface::cmd_save_kernel),
211  USAGE_I(N_SAVE_KERNEL, "filename" USAGE_COMMA USAGE_STR "TRAIN|TEST" USAGE_STR)
212  },
213  {
215  (&CSGInterface::cmd_get_kernel_matrix),
216  USAGE_IO(N_GET_KERNEL_MATRIX, "[" USAGE_STR "TRAIN|TEST" USAGE_STR, "K]")
217  },
218  {
220  (&CSGInterface::cmd_set_WD_position_weights),
222  },
223  {
225  (&CSGInterface::cmd_get_subkernel_weights),
227  },
228  {
230  (&CSGInterface::cmd_set_subkernel_weights),
232  },
233  {
235  (&CSGInterface::cmd_set_subkernel_weights_combined),
237  },
238  {
240  (&CSGInterface::cmd_get_dotfeature_weights_combined),
242  },
243  {
245  (&CSGInterface::cmd_set_dotfeature_weights_combined),
247  },
248  {
250  (&CSGInterface::cmd_set_last_subkernel_weights),
252  },
253  {
255  (&CSGInterface::cmd_get_WD_position_weights),
257  },
258  {
260  (&CSGInterface::cmd_get_last_subkernel_weights),
262  },
263  {
265  (&CSGInterface::cmd_compute_by_subkernels),
267  },
268  {
270  (&CSGInterface::cmd_init_kernel_optimization),
272  },
273  {
275  (&CSGInterface::cmd_get_kernel_optimization),
277  },
278  {
280  (&CSGInterface::cmd_delete_kernel_optimization),
282  },
283  {
285  (&CSGInterface::cmd_use_diagonal_speedup),
287  },
288  {
290  (&CSGInterface::cmd_set_kernel_optimization_type),
291  USAGE_I(N_SET_KERNEL_OPTIMIZATION_TYPE, USAGE_STR "FASTBUTMEMHUNGRY|SLOWBUTMEMEFFICIENT" USAGE_STR)
292  },
293  {
294  N_SET_SOLVER,
295  (&CSGInterface::cmd_set_solver),
296  USAGE_I(N_SET_SOLVER, USAGE_STR "AUTO|CPLEX|GLPK|INTERNAL" USAGE_STR)
297  },
298  {
300  (&CSGInterface::cmd_set_constraint_generator),
301  USAGE_I(N_SET_CONSTRAINT_GENERATOR, USAGE_STR "LIBSVM_ONECLASS|LIBSVM_MULTICLASS|LIBSVM"
302  "|SVMLIGHT|LIGHT|SVMLIGHT_ONECLASS|GPBTSVM|MPDSVM|GNPPSVM|GMNPSVM"
303  USAGE_STR)
304  },
305  {
307  (&CSGInterface::cmd_set_prior_probs),
308  USAGE_I(N_SET_PRIOR_PROBS, USAGE_STR "pos probs, neg_probs" USAGE_STR)
309  },
310  {
312  (&CSGInterface::cmd_set_prior_probs_from_labels),
314  },
315 
316 
317 
318  { "Distance", NULL, NULL },
319  {
321  (&CSGInterface::cmd_set_distance),
322  USAGE_I(N_SET_DISTANCE, "type" USAGE_COMMA "data type[" USAGE_COMMA "distance-specific parameters]")
323  },
324  {
326  (&CSGInterface::cmd_init_distance),
328  },
329  {
331  (&CSGInterface::cmd_get_distance_matrix),
333  },
334 
335 
336  { "Classifier", NULL, NULL },
337  {
338  N_CLASSIFY,
339  (&CSGInterface::cmd_classify),
340  USAGE_O(N_CLASSIFY, "result")
341  },
342  {
344  (&CSGInterface::cmd_classify),
345  USAGE_O(N_SVM_CLASSIFY, "result")
346  },
347  {
349  (&CSGInterface::cmd_classify_example),
350  USAGE_IO(N_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
351  },
352  {
354  (&CSGInterface::cmd_classify_example),
355  USAGE_IO(N_SVM_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
356  },
357  {
359  (&CSGInterface::cmd_get_classifier),
360  USAGE_IO(N_GET_CLASSIFIER, "[index in case of MultiClassSVM]", "bias" USAGE_COMMA "weights")
361  },
362  {
364  (&CSGInterface::cmd_get_classifier),
365  USAGE_O(N_GET_CLUSTERING, "radi" USAGE_COMMA "centers|merge_distances" USAGE_COMMA "pairs")
366  },
367  {
368  N_NEW_SVM,
369  (&CSGInterface::cmd_new_classifier),
370  USAGE_I(N_NEW_SVM, USAGE_STR "LIBSVM_ONECLASS|LIBSVM_MULTICLASS|LIBSVM"
371  "|SVMLIGHT|LIGHT|LIGHT_ONECLASS|SVMLIN|GPBTSVM|MPDSVM|GNPPSVM|GMNPSVM"
372  "|SUBGRADIENTSVM|WDSVMOCAS|SVMOCAS|SVMSGD|SVMBMRM|SVMPERF"
373  "|KERNELPERCEPTRON|PERCEPTRON|LIBLINEAR_LR|LIBLINEAR_L2|LDA"
374  "|LPM|LPBOOST|SUBGRADIENTLPM|KNN" USAGE_STR)
375  },
376  {
378  (&CSGInterface::cmd_new_classifier),
379  USAGE_I(N_NEW_CLASSIFIER, USAGE_STR "LIBSVM_ONECLASS|LIBSVM_MULTICLASS"
380  "|LIBSVM|SVMLIGHT|LIGHT|LIGHT_ONECLASS|SVMLIN|GPBTSVM|MPDSVM|GNPPSVM|GMNPSVM"
381  "|SUBGRADIENTSVM|WDSVMOCAS|SVMOCAS|SVMSGD|SVMBMRM|SVMPERF"
382  "|KERNELPERCEPTRON|PERCEPTRON|LIBLINEAR_LR|LIBLINEAR_L2|LDA"
383  "|LPM|LPBOOST|SUBGRADIENTLPM|KNN" USAGE_STR)
384  },
385  {
387  (&CSGInterface::cmd_new_classifier),
388  USAGE_I(N_NEW_REGRESSION, USAGE_STR "SVRLIGHT|LIBSVR|KRR" USAGE_STR)
389  },
390  {
392  (&CSGInterface::cmd_new_classifier),
393  USAGE_I(N_NEW_CLUSTERING, USAGE_STR "KMEANS|HIERARCHICAL" USAGE_STR)
394  },
395  {
397  (&CSGInterface::cmd_load_classifier),
398  USAGE_O(N_LOAD_CLASSIFIER, "filename" USAGE_COMMA "type")
399  },
400  {
402  (&CSGInterface::cmd_save_classifier),
403  USAGE_I(N_SAVE_CLASSIFIER, "filename")
404  },
405  {
407  (&CSGInterface::cmd_get_num_svms),
408  USAGE_O(N_GET_NUM_SVMS, "number of SVMs in MultiClassSVM")
409  },
410  {
411  N_GET_SVM,
412  (&CSGInterface::cmd_get_svm),
413  USAGE_IO(N_GET_SVM, "[index in case of MultiClassSVM]", "bias" USAGE_COMMA "alphas")
414  },
415  {
416  N_SET_SVM,
417  (&CSGInterface::cmd_set_svm),
418  USAGE_I(N_SET_SVM, "bias" USAGE_COMMA "alphas")
419  },
420  {
422  (&CSGInterface::cmd_set_linear_classifier),
424  },
425  {
427  (&CSGInterface::cmd_get_svm_objective),
428  USAGE_O(N_GET_SVM_OBJECTIVE, "objective")
429  },
430  {
432  (&CSGInterface::cmd_compute_svm_primal_objective),
434  },
435  {
437  (&CSGInterface::cmd_compute_svm_dual_objective),
439  },
440  {
442  (&CSGInterface::cmd_compute_svm_primal_objective),
444  },
445  {
447  (&CSGInterface::cmd_compute_mkl_dual_objective),
449  },
450  {
452  (&CSGInterface::cmd_compute_relative_mkl_duality_gap),
454  },
455  {
457  (&CSGInterface::cmd_compute_absolute_mkl_duality_gap),
459  },
460  {
462  (&CSGInterface::cmd_do_auc_maximization),
464  },
465  {
467  (&CSGInterface::cmd_set_perceptron_parameters),
468  USAGE_I(N_SET_PERCEPTRON_PARAMETERS, "learnrate" USAGE_COMMA "maxiter")
469  },
470  {
472  (&CSGInterface::cmd_train_classifier),
473  USAGE_I(N_TRAIN_CLASSIFIER, "[classifier-specific parameters]")
474  },
475  {
477  (&CSGInterface::cmd_train_classifier),
479  },
480  {
482  (&CSGInterface::cmd_train_classifier),
484  },
485  {
486  N_SVM_TRAIN,
487  (&CSGInterface::cmd_train_classifier),
488  USAGE_I(N_SVM_TRAIN, "[classifier-specific parameters]")
489  },
490  {
491  N_SVMQPSIZE,
492  (&CSGInterface::cmd_set_svm_qpsize),
493  USAGE_I(N_SVMQPSIZE, "size")
494  },
495  {
497  (&CSGInterface::cmd_set_svm_max_qpsize),
498  USAGE_I(N_SVMMAXQPSIZE, "size")
499  },
500  {
501  N_SVMBUFSIZE,
502  (&CSGInterface::cmd_set_svm_bufsize),
503  USAGE_I(N_SVMBUFSIZE, "size")
504  },
505  {
506  N_C,
507  (&CSGInterface::cmd_set_svm_C),
508  USAGE_I(N_C, "C1[" USAGE_COMMA "C2]")
509  },
510  {
512  (&CSGInterface::cmd_set_svm_epsilon),
513  USAGE_I(N_SVM_EPSILON, "epsilon")
514  },
515  {
517  (&CSGInterface::cmd_set_svr_tube_epsilon),
518  USAGE_I(N_SVR_TUBE_EPSILON, "tube_epsilon")
519  },
520  {
521  N_SVM_NU,
522  (&CSGInterface::cmd_set_svm_nu),
523  USAGE_I(N_SVM_NU, "nu")
524  },
525  {
527  (&CSGInterface::cmd_set_svm_mkl_parameters),
528  USAGE_I(N_MKL_PARAMETERS, "weight_epsilon" USAGE_COMMA "C_MKL [" USAGE_COMMA "mkl_norm ]")
529  },
530  {
531  N_ENT_LAMBDA,
532  (&CSGInterface::cmd_set_elasticnet_lambda),
533  USAGE_I(N_ENT_LAMBDA, "ent_lambda")
534  },
535  {
537  (&CSGInterface::cmd_set_mkl_block_norm),
538  USAGE_I(N_MKL_BLOCK_NORM, "mkl_block_norm")
539  },
540  {
542  (&CSGInterface::cmd_set_max_train_time),
543  USAGE_I(N_SVM_MAX_TRAIN_TIME, "max_train_time")
544  },
545  {
547  (&CSGInterface::cmd_set_svm_shrinking_enabled),
548  USAGE_I(N_USE_SHRINKING, "enable_shrinking")
549  },
550  {
552  (&CSGInterface::cmd_set_svm_batch_computation_enabled),
553  USAGE_I(N_USE_BATCH_COMPUTATION, "enable_batch_computation")
554  },
555  {
556  N_USE_LINADD,
557  (&CSGInterface::cmd_set_svm_linadd_enabled),
558  USAGE_I(N_USE_LINADD, "enable_linadd")
559  },
560  {
562  (&CSGInterface::cmd_set_svm_bias_enabled),
563  USAGE_I(N_SVM_USE_BIAS, "enable_bias")
564  },
565  {
567  (&CSGInterface::cmd_set_mkl_interleaved_enabled),
568  USAGE_I(N_MKL_USE_INTERLEAVED_OPTIMIZATION, "enable_interleaved_optimization")
569  },
570  {
571  N_KRR_TAU,
572  (&CSGInterface::cmd_set_krr_tau),
573  USAGE_I(N_KRR_TAU, "tau")
574  },
575 
576 
577  { "Preprocessors", NULL, NULL },
578  {
580  (&CSGInterface::cmd_add_preproc),
581  USAGE_I(N_ADD_PREPROC, "preproc[, preproc-specific parameters]")
582  },
583  {
585  (&CSGInterface::cmd_del_preproc),
587  },
588  {
590  (&CSGInterface::cmd_attach_preproc),
592  },
593  {
595  (&CSGInterface::cmd_clean_preproc),
597  },
598 
599 
600  { "HMM", NULL, NULL },
601  {
602  N_NEW_HMM,
603  (&CSGInterface::cmd_new_hmm),
604  USAGE_I(N_NEW_HMM, "N" USAGE_COMMA "M")
605  },
606  {
607  N_LOAD_HMM,
608  (&CSGInterface::cmd_load_hmm),
609  USAGE_I(N_LOAD_HMM, "filename")
610  },
611  {
612  N_SAVE_HMM,
613  (&CSGInterface::cmd_save_hmm),
614  USAGE_I(N_SAVE_HMM, "filename[" USAGE_COMMA "save_binary]")
615  },
616  {
617  N_GET_HMM,
618  (&CSGInterface::cmd_get_hmm),
620  },
621  {
622  N_APPEND_HMM,
623  (&CSGInterface::cmd_append_hmm),
625  },
626  {
628  (&CSGInterface::cmd_append_model),
629  USAGE_I(N_APPEND_MODEL, USAGE_STR "filename" USAGE_STR "[" USAGE_COMMA "base1" USAGE_COMMA "base2]")
630  },
631  {
632  N_SET_HMM,
633  (&CSGInterface::cmd_set_hmm),
635  },
636  {
637  N_SET_HMM_AS,
638  (&CSGInterface::cmd_set_hmm_as),
639  USAGE_I(N_SET_HMM_AS, "POS|NEG|TEST")
640  },
641  {
642  N_CHOP,
643  (&CSGInterface::cmd_set_chop),
644  USAGE_I(N_CHOP, "chop")
645  },
646  {
647  N_PSEUDO,
648  (&CSGInterface::cmd_set_pseudo),
649  USAGE_I(N_PSEUDO, "pseudo")
650  },
651  {
653  (&CSGInterface::cmd_load_definitions),
654  USAGE_I(N_LOAD_DEFINITIONS, "filename" USAGE_COMMA "init")
655  },
656  {
658  (&CSGInterface::cmd_hmm_classify),
659  USAGE_O(N_HMM_CLASSIFY, "result")
660  },
661  {
663  (&CSGInterface::cmd_one_class_linear_hmm_classify),
665  },
666  {
668  (&CSGInterface::cmd_one_class_hmm_classify),
670  },
671  {
673  (&CSGInterface::cmd_one_class_hmm_classify_example),
674  USAGE_IO(N_ONE_CLASS_HMM_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
675  },
676  {
678  (&CSGInterface::cmd_hmm_classify_example),
679  USAGE_IO(N_HMM_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
680  },
681  {
682  N_OUTPUT_HMM,
683  (&CSGInterface::cmd_output_hmm),
685  },
686  {
688  (&CSGInterface::cmd_output_hmm_defined),
690  },
691  {
693  (&CSGInterface::cmd_hmm_likelihood),
694  USAGE_O(N_HMM_LIKELIHOOD, "likelihood")
695  },
696  {
697  N_LIKELIHOOD,
698  (&CSGInterface::cmd_likelihood),
700  },
701  {
703  (&CSGInterface::cmd_save_likelihood),
704  USAGE_I(N_SAVE_LIKELIHOOD, "filename[" USAGE_COMMA "save_binary]")
705  },
706  {
708  (&CSGInterface::cmd_get_viterbi_path),
709  USAGE_IO(N_GET_VITERBI_PATH, "dim", "path" USAGE_COMMA "likelihood")
710  },
711  {
713  (&CSGInterface::cmd_viterbi_train_defined),
715  },
716  {
718  (&CSGInterface::cmd_viterbi_train),
720  },
721  {
723  (&CSGInterface::cmd_baum_welch_train),
725  },
726  {
728  (&CSGInterface::cmd_baum_welch_train_defined),
730  },
731  {
733  (&CSGInterface::cmd_baum_welch_trans_train),
735  },
736  {
738  (&CSGInterface::cmd_linear_train),
740  },
741  {
742  N_SAVE_PATH,
743  (&CSGInterface::cmd_save_path),
744  USAGE_I(N_SAVE_PATH, "filename[" USAGE_COMMA "save_binary]")
745  },
746  {
748  (&CSGInterface::cmd_convergence_criteria),
749  USAGE_I(N_CONVERGENCE_CRITERIA, "num_iterations" USAGE_COMMA "epsilon")
750  },
751  {
752  N_NORMALIZE,
753  (&CSGInterface::cmd_normalize),
754  USAGE_I(N_NORMALIZE, "[keep_dead_states]")
755  },
756  {
757  N_ADD_STATES,
758  (&CSGInterface::cmd_add_states),
759  USAGE_I(N_ADD_STATES, "states" USAGE_COMMA "value")
760  },
761  {
763  (&CSGInterface::cmd_permutation_entropy),
764  USAGE_I(N_PERMUTATION_ENTROPY, "width" USAGE_COMMA "seqnum")
765  },
766  {
768  (&CSGInterface::cmd_relative_entropy),
769  USAGE_O(N_RELATIVE_ENTROPY, "result")
770  },
771  {
772  N_ENTROPY,
773  (&CSGInterface::cmd_entropy),
774  USAGE_O(N_ENTROPY, "result")
775  },
776  {
777  (char*) N_SET_FEATURE_MATRIX,
778  (&CSGInterface::cmd_set_feature_matrix),
779  (char*) USAGE_I(N_SET_FEATURE_MATRIX, "features")
780  },
781  {
783  (&CSGInterface::cmd_set_feature_matrix_sparse),
784  (char*) USAGE_I(N_SET_FEATURE_MATRIX_SPARSE, "sp1" USAGE_COMMA "sp2" )
785  },
786  {
788  (&CSGInterface::cmd_new_plugin_estimator),
789  USAGE_I(N_NEW_PLUGIN_ESTIMATOR, "pos_pseudo" USAGE_COMMA "neg_pseudo")
790  },
791  {
793  (&CSGInterface::cmd_train_estimator),
795  },
796  {
798  (&CSGInterface::cmd_plugin_estimate_classify_example),
799  USAGE_IO(N_PLUGIN_ESTIMATE_CLASSIFY_EXAMPLE, "feature_vector_index", "result")
800  },
801  {
803  (&CSGInterface::cmd_plugin_estimate_classify),
805  },
806  {
808  (&CSGInterface::cmd_set_plugin_estimate),
809  USAGE_I(N_SET_PLUGIN_ESTIMATE, "emission_probs" USAGE_COMMA "model_sizes")
810  },
811  {
813  (&CSGInterface::cmd_get_plugin_estimate),
814  USAGE_O(N_GET_PLUGIN_ESTIMATE, "emission_probs" USAGE_COMMA "model_sizes")
815  },
816  { "Signals", NULL, NULL },
817  {
819  (&CSGInterface::cmd_signals_set_model),
821  },
822  {
824  (&CSGInterface::cmd_signals_set_positions),
825  USAGE_I(N_SIGNALS_SET_POSITIONS, "positions")
826  },
827  {
829  (&CSGInterface::cmd_signals_set_labels),
830  USAGE_I(N_SIGNALS_SET_LABELS, "labels")
831  },
832  {
834  (&CSGInterface::cmd_signals_set_split),
835  USAGE_I(N_SIGNALS_SET_SPLIT, "split")
836  },
837  {
839  (&CSGInterface::cmd_signals_set_train_mask),
841  },
842  {
844  (&CSGInterface::cmd_signals_add_feature),
845  USAGE_I(N_SIGNALS_ADD_FEATURE, "feature")
846  },
847  {
849  (&CSGInterface::cmd_signals_add_kernel),
850  USAGE_I(N_SIGNALS_ADD_KERNEL, "kernelparam")
851  },
852  {
854  (&CSGInterface::cmd_signals_run),
855  USAGE_I(N_SIGNALS_RUN, "arg1")
856  },
857  { "Structure", NULL, NULL },
858  {
859  N_BEST_PATH,
860  (&CSGInterface::cmd_best_path),
861  USAGE_I(N_BEST_PATH, "from" USAGE_COMMA "to")
862  },
863  {
865  (&CSGInterface::cmd_best_path_2struct),
867  USAGE_COMMA "q"
868  USAGE_COMMA "cmd_trans"
869  USAGE_COMMA "seq"
870  USAGE_COMMA "pos"
871  USAGE_COMMA "genestr"
872  USAGE_COMMA "penalties"
873  USAGE_COMMA "penalty_info"
874  USAGE_COMMA "nbest"
875  USAGE_COMMA "content_weights"
876  USAGE_COMMA "segment_sum_weights",
877  "prob" USAGE_COMMA "path" USAGE_COMMA "pos")
878  },
879  {
880  (char*) N_SET_PLIF_STRUCT,
881  (&CSGInterface::cmd_set_plif_struct),
882  (char*) USAGE_I(N_SET_PLIF_STRUCT, "id"
883  USAGE_COMMA "name"
884  USAGE_COMMA "limits"
885  USAGE_COMMA "penalties"
886  USAGE_COMMA "transform"
887  USAGE_COMMA "min_value"
888  USAGE_COMMA "max_value"
889  USAGE_COMMA "use_cache"
890  USAGE_COMMA "use_svm")
891  },
892  {
893  (char*) N_GET_PLIF_STRUCT,
894  (&CSGInterface::cmd_get_plif_struct),
895  (char*) USAGE_O(N_GET_PLIF_STRUCT, "id"
896  USAGE_COMMA "name"
897  USAGE_COMMA "limits"
898  USAGE_COMMA "penalties"
899  USAGE_COMMA "transform"
900  USAGE_COMMA "min_value"
901  USAGE_COMMA "max_value"
902  USAGE_COMMA "use_cache"
903  USAGE_COMMA "use_svm")
904  },
905  {
906  (char*) N_PRECOMPUTE_SUBKERNELS,
907  (&CSGInterface::cmd_precompute_subkernels),
909  },
910  {
912  (&CSGInterface::cmd_precompute_content_svms),
913  (char*) USAGE_I(N_PRECOMPUTE_CONTENT_SVMS, "sequence"
914  USAGE_COMMA "position_list"
915  USAGE_COMMA "weights")
916  },
917  {
918  (char*) N_GET_LIN_FEAT,
919  (&CSGInterface::cmd_get_lin_feat),
920  (char*) USAGE_O(N_GET_LIN_FEAT, "lin_feat")
921  },
922  {
923  (char*) N_SET_LIN_FEAT,
924  (&CSGInterface::cmd_set_lin_feat),
925  (char*) USAGE_I(N_SET_LIN_FEAT, "lin_feat")
926  },
927  {
928  (char*) N_INIT_DYN_PROG,
929  (&CSGInterface::cmd_init_dyn_prog),
930  (char*) USAGE_I(N_INIT_DYN_PROG, "num_svms")
931  },
932  {
933  (char*) N_CLEAN_UP_DYN_PROG,
934  (&CSGInterface::cmd_clean_up_dyn_prog),
935  (char*) USAGE(N_CLEAN_UP_DYN_PROG)
936  },
937  {
938  (char*) N_INIT_INTRON_LIST,
939  (&CSGInterface::cmd_init_intron_list),
940  (char*) USAGE_I(N_INIT_INTRON_LIST, "start_positions"
941  USAGE_COMMA "end_positions"
942  USAGE_COMMA "quality")
943  },
944  {
946  (&CSGInterface::cmd_precompute_tiling_features),
947  (char*) USAGE_I(N_PRECOMPUTE_TILING_FEATURES, "intensities"
948  USAGE_COMMA "probe_pos"
949  USAGE_COMMA "tiling_plif_ids")
950  },
951  {
953  (&CSGInterface::cmd_long_transition_settings),
954  (char*) USAGE_I(N_LONG_TRANSITION_SETTINGS, "use_long_transitions"
955  USAGE_COMMA "threshold"
956  USAGE_COMMA "max_len")
957  },
958 
959  {
960  (char*) N_SET_MODEL,
961  (&CSGInterface::cmd_set_model),
962  (char*) USAGE_I(N_SET_MODEL, "content_weights"
963  USAGE_COMMA "transition_pointers"
964  USAGE_COMMA "use_orf"
965  USAGE_COMMA "mod_words")
966  },
967 
968  {
969  (char*) N_BEST_PATH_TRANS,
970  (&CSGInterface::cmd_best_path_trans),
972  USAGE_COMMA "q"
973  USAGE_COMMA "nbest"
974  USAGE_COMMA "seq_path"
975  USAGE_COMMA "a_trans"
976  USAGE_COMMA "segment_loss",
977  "prob" USAGE_COMMA "path" USAGE_COMMA "pos")
978  },
979  {
981  (&CSGInterface::cmd_best_path_trans_deriv),
983  USAGE_COMMA "my_path"
984  USAGE_COMMA "my_pos"
985  USAGE_COMMA "p"
986  USAGE_COMMA "q"
987  USAGE_COMMA "cmd_trans"
988  USAGE_COMMA "seq"
989  USAGE_COMMA "pos"
990  USAGE_COMMA "genestr"
991  USAGE_COMMA "penalties"
992  USAGE_COMMA "state_signals"
993  USAGE_COMMA "penalty_info"
994  USAGE_COMMA "dict_weights"
995  USAGE_COMMA "mod_words ["
996  USAGE_COMMA "segment_loss"
997  USAGE_COMMA "segmend_ids_mask]", "p_deriv"
998  USAGE_COMMA "q_deriv"
999  USAGE_COMMA "cmd_deriv"
1000  USAGE_COMMA "penalties_deriv"
1001  USAGE_COMMA "my_scores"
1002  USAGE_COMMA "my_loss")
1003  },
1004 
1005  { "POIM", NULL, NULL },
1006  {
1008  (&CSGInterface::cmd_compute_POIM_WD),
1009  USAGE_IO(N_COMPUTE_POIM_WD, "max_order" USAGE_COMMA "distribution", "W")
1010  },
1011  {
1013  (&CSGInterface::cmd_get_SPEC_consensus),
1015  },
1016  {
1018  (&CSGInterface::cmd_get_SPEC_scoring),
1019  USAGE_IO(N_GET_SPEC_SCORING, "max_order", "W")
1020  },
1021  {
1023  (&CSGInterface::cmd_get_WD_consensus),
1025  },
1026  {
1028  (&CSGInterface::cmd_get_WD_scoring),
1029  USAGE_IO(N_GET_WD_SCORING, "max_order", "W")
1030  },
1031 
1032 
1033  { "Utility", NULL, NULL },
1034  {
1035  N_CRC,
1036  (&CSGInterface::cmd_crc),
1037  USAGE_IO(N_CRC, "string", "crc32")
1038  },
1039  {
1040  N_SYSTEM,
1041  (&CSGInterface::cmd_system),
1042  USAGE_I(N_SYSTEM, "system_command")
1043  },
1044  {
1045  N_EXIT,
1046  (&CSGInterface::cmd_exit),
1047  USAGE(N_EXIT)
1048  },
1049  {
1050  N_QUIT,
1051  (&CSGInterface::cmd_exit),
1052  USAGE(N_QUIT)
1053  },
1054  {
1055  N_EXEC,
1056  (&CSGInterface::cmd_exec),
1057  USAGE_I(N_EXEC, "filename")
1058  },
1059  {
1060  N_SET_OUTPUT,
1061  (&CSGInterface::cmd_set_output),
1062  USAGE_I(N_SET_OUTPUT, USAGE_STR "STDERR|STDOUT|filename" USAGE_STR)
1063  },
1064  {
1066  (&CSGInterface::cmd_set_threshold),
1067  USAGE_I(N_SET_THRESHOLD, "threshold")
1068  },
1069  {
1070  N_INIT_RANDOM,
1071  (&CSGInterface::cmd_init_random),
1072  USAGE_I(N_INIT_RANDOM, "value_to_initialize_RNG_with")
1073  },
1074  {
1075  N_THREADS,
1076  (&CSGInterface::cmd_set_num_threads),
1077  USAGE_I(N_THREADS, "num_threads")
1078  },
1079  {
1081  (&CSGInterface::cmd_translate_string),
1083  "string, order, start", "translation")
1084  },
1085  {
1086  N_CLEAR,
1087  (&CSGInterface::cmd_clear),
1088  USAGE(N_CLEAR)
1089  },
1090  {
1091  N_TIC,
1092  (&CSGInterface::cmd_tic),
1093  USAGE(N_TIC)
1094  },
1095  {
1096  N_TOC,
1097  (&CSGInterface::cmd_toc),
1098  USAGE(N_TOC)
1099  },
1100  {
1101  N_PRINT,
1102  (&CSGInterface::cmd_print),
1103  USAGE_I(N_PRINT, "msg")
1104  },
1105  {
1106  N_ECHO,
1107  (&CSGInterface::cmd_echo),
1108  USAGE_I(N_ECHO, "level")
1109  },
1110  {
1111  N_LOGLEVEL,
1112  (&CSGInterface::cmd_loglevel),
1113  USAGE_I(N_LOGLEVEL, USAGE_STR "ALL|DEBUG|INFO|NOTICE|WARN|ERROR|CRITICAL|ALERT|EMERGENCY" USAGE_STR)
1114  },
1115  {
1117  (&CSGInterface::cmd_syntax_highlight),
1119  },
1120  {
1121  N_PROGRESS,
1122  (&CSGInterface::cmd_progress),
1124  },
1125  {
1126  N_GET_VERSION,
1127  (&CSGInterface::cmd_get_version),
1128  USAGE_O(N_GET_VERSION, "version")
1129  },
1130  {
1131  N_HELP,
1132  (&CSGInterface::cmd_help),
1133  USAGE(N_HELP)
1134  },
1135  {
1136  N_WHOS,
1137  (&CSGInterface::cmd_whos),
1138  USAGE(N_WHOS)
1139  },
1140  {
1142  (&CSGInterface::cmd_send_command),
1143  NULL
1144  },
1145  {
1146  N_RUN_PYTHON,
1147  (&CSGInterface::cmd_run_python),
1149  "'Var1', Var1, 'Var2', Var2,..., python_function", "results")
1150  },
1151  {
1152  N_RUN_OCTAVE,
1153  (&CSGInterface::cmd_run_octave),
1155  "'Var1', Var1, 'Var2', Var2,..., octave_function", "results")
1156  },
1157  {
1158  N_RUN_R,
1159  (&CSGInterface::cmd_run_r),
1160  USAGE_IO(N_RUN_R,
1161  "'Var1', Var1, 'Var2', Var2,..., r_function", "results")
1162  },
1163  {NULL, NULL, NULL} /* Sentinel */
1164 };
1165 
1166 
1167 CSGInterface::CSGInterface(bool print_copyright)
1168 : CSGObject(),
1169  ui_classifier(new CGUIClassifier(this)),
1170  ui_distance(new CGUIDistance(this)),
1171  ui_features(new CGUIFeatures(this)),
1172  ui_hmm(new CGUIHMM(this)),
1173  ui_kernel(new CGUIKernel(this)),
1174  ui_labels(new CGUILabels(this)),
1175  ui_math(new CGUIMath(this)),
1176  ui_pluginestimate(new CGUIPluginEstimate(this)),
1177  ui_preproc(new CGUIPreprocessor(this)),
1178  ui_time(new CGUITime(this)),
1179  ui_structure(new CGUIStructure(this))/*,
1180 / ui_signals(new CGUISignals(this))*/
1181 {
1182  if (print_copyright)
1183  {
1184  version->print_version();
1185  SG_PRINT("( seeding random number generator with %u (seed size %d))\n",
1187 #ifdef USE_LOGCACHE
1188  SG_PRINT( "initializing log-table (size=%i*%i*%i=%2.1fMB) ... ) ",
1189  CMath::get_log_range(),CMath::get_log_accuracy(),sizeof(float64_t),
1190  CMath::get_log_range()*CMath::get_log_accuracy()*sizeof(float64_t)/(1024.0*1024.0));
1191 #else
1192  SG_PRINT("determined range for x in log(1+exp(-x)) is:%d )\n", CMath::get_log_range());
1193 #endif
1194  }
1195 
1196  reset();
1197 }
1198 
1199 CSGInterface::~CSGInterface()
1200 {
1201  delete ui_classifier;
1202  delete ui_hmm;
1203  delete ui_pluginestimate;
1204  delete ui_kernel;
1205  delete ui_preproc;
1206  delete ui_features;
1207  delete ui_labels;
1208  delete ui_math;
1209  delete ui_structure;
1210  //delete ui_signals;
1211  delete ui_time;
1212  delete ui_distance;
1213 
1214  if (file_out)
1215  fclose(file_out);
1216 }
1217 
1218 void CSGInterface::reset()
1219 {
1220  m_lhs_counter=0;
1221  m_rhs_counter=0;
1222  m_nlhs=0;
1223  m_nrhs=0;
1224  m_legacy_strptr=NULL;
1225  file_out=NULL;
1226  echo=true;
1227 }
1228 
1229 void CSGInterface::translate_arg(CSGInterface* source, CSGInterface* target)
1230 {
1231  switch (source->get_argument_type())
1232  {
1233  case SCALAR_INT:
1234  target->set_int(source->get_int());
1235  break;
1236  case SCALAR_REAL:
1237  target->set_real(source->get_real());
1238  break;
1239  case SCALAR_BOOL:
1240  target->set_bool(source->get_bool());
1241  break;
1242  case VECTOR_BOOL:
1243  {
1244  bool* v=NULL;
1245  int32_t len=0;
1246  source->get_vector(v, len);
1247  target->set_vector(v, len);
1248  SG_FREE(v);
1249  break;
1250  }
1251  case VECTOR_BYTE:
1252  {
1253  uint8_t* v=NULL;
1254  int32_t len=0;
1255  source->get_vector(v, len);
1256  target->set_vector(v, len);
1257  SG_FREE(v);
1258  break;
1259  }
1260  case VECTOR_CHAR:
1261  {
1262  char* v=NULL;
1263  int32_t len=0;
1264  source->get_vector(v, len);
1265  target->set_vector(v, len);
1266  SG_FREE(v);
1267  break;
1268  }
1269  case VECTOR_INT:
1270  {
1271  int32_t* v=NULL;
1272  int32_t len=0;
1273  source->get_vector(v, len);
1274  target->set_vector(v, len);
1275  SG_FREE(v);
1276  break;
1277  }
1278  case VECTOR_REAL:
1279  {
1280  float64_t* v=NULL;
1281  int32_t len=0;
1282  source->get_vector(v, len);
1283  target->set_vector(v, len);
1284  SG_FREE(v);
1285  break;
1286  }
1287  case VECTOR_SHORTREAL:
1288  {
1289  float32_t* v=NULL;
1290  int32_t len=0;
1291  source->get_vector(v, len);
1292  target->set_vector(v, len);
1293  SG_FREE(v);
1294  break;
1295  }
1296  case VECTOR_SHORT:
1297  {
1298  int16_t* v=NULL;
1299  int32_t len=0;
1300  source->get_vector(v, len);
1301  target->set_vector(v, len);
1302  SG_FREE(v);
1303  break;
1304  }
1305  case VECTOR_WORD:
1306  {
1307  uint16_t* v=NULL;
1308  int32_t len=0;
1309  source->get_vector(v, len);
1310  target->set_vector(v, len);
1311  SG_FREE(v);
1312  break;
1313  }
1314 
1315  case STRING_BYTE:
1316  {
1317  int32_t num_str=0;
1318  int32_t max_str_len=0;
1319  SGString<uint8_t>* strs=NULL;
1320  source->get_string_list(strs, num_str, max_str_len);
1321  target->set_string_list(strs, num_str);
1322  SG_FREE(strs);
1323  break;
1324  }
1325  case STRING_CHAR:
1326  {
1327  int32_t num_str=0;
1328  int32_t max_str_len=0;
1329  SGString<char>* strs;
1330  source->get_string_list(strs, num_str,max_str_len);
1331  target->set_string_list(strs, num_str);
1332  SG_FREE(strs);
1333  break;
1334  }
1335  case STRING_INT:
1336  {
1337  int32_t num_str=0;
1338  int32_t max_str_len=0;
1339  SGString<int32_t>* strs;
1340  source->get_string_list(strs, num_str,max_str_len);
1341  target->set_string_list(strs, num_str);
1342  SG_FREE(strs);
1343  break;
1344  }
1345  case STRING_SHORT:
1346  {
1347  int32_t num_str=0;
1348  int32_t max_str_len=0;
1349  SGString<int16_t>* strs=NULL;
1350  source->get_string_list(strs, num_str, max_str_len);
1351  target->set_string_list(strs, num_str);
1352  SG_FREE(strs);
1353  break;
1354  }
1355  case STRING_WORD:
1356  {
1357  int32_t num_str=0;
1358  int32_t max_str_len=0;
1359  SGString<uint16_t>* strs=NULL;
1360  source->get_string_list(strs, num_str, max_str_len);
1361  target->set_string_list(strs, num_str);
1362  SG_FREE(strs);
1363  break;
1364  }
1365  case DENSE_INT:
1366  {
1367  int32_t num_feat=0;
1368  int32_t num_vec=0;
1369  int32_t* fmatrix=NULL;
1370  source->get_matrix(fmatrix, num_feat, num_vec);
1371  target->set_matrix(fmatrix, num_feat, num_vec);
1372  SG_FREE(fmatrix);
1373  break;
1374  }
1375  case DENSE_REAL:
1376  {
1377  int32_t num_feat=0;
1378  int32_t num_vec=0;
1379  float64_t* fmatrix=NULL;
1380  source->get_matrix(fmatrix, num_feat, num_vec);
1381  target->set_matrix(fmatrix, num_feat, num_vec);
1382  SG_FREE(fmatrix);
1383  break;
1384  }
1385  case DENSE_SHORT:
1386  {
1387  int32_t num_feat=0;
1388  int32_t num_vec=0;
1389  int16_t* fmatrix=NULL;
1390  source->get_matrix(fmatrix, num_feat, num_vec);
1391  target->set_matrix(fmatrix, num_feat, num_vec);
1392  SG_FREE(fmatrix);
1393  break;
1394  }
1395  case DENSE_SHORTREAL:
1396  {
1397  int32_t num_feat=0;
1398  int32_t num_vec=0;
1399  float32_t* fmatrix=NULL;
1400  source->get_matrix(fmatrix, num_feat, num_vec);
1401  target->set_matrix(fmatrix, num_feat, num_vec);
1402  SG_FREE(fmatrix);
1403  break;
1404  }
1405  case DENSE_WORD:
1406  {
1407  int32_t num_feat=0;
1408  int32_t num_vec=0;
1409  uint16_t* fmatrix=NULL;
1410  source->get_matrix(fmatrix, num_feat, num_vec);
1411  target->set_matrix(fmatrix, num_feat, num_vec);
1412  SG_FREE(fmatrix);
1413  break;
1414  }
1415  /*
1416  case NDARRAY_BYTE:
1417  {
1418  uint8_t* a=NULL;
1419  int32_t* dims=NULL;
1420  int32_t num_dims=0;
1421  source->get_ndarray(a, dims, num_dims);
1422  target->set_ndarray(a, dims, num_dims);
1423  SG_FREE(a);
1424  SG_FREE(dims);
1425  break;
1426  }
1427  case NDARRAY_CHAR:
1428  {
1429  char* a=NULL;
1430  int32_t* dims=NULL;
1431  int32_t num_dims=0;
1432  source->get_ndarray(a, dims, num_dims);
1433  target->set_ndarray(a, dims, num_dims);
1434  SG_FREE(a);
1435  SG_FREE(dims);
1436  break;
1437  }
1438  case NDARRAY_INT:
1439  {
1440  int32_t* a=NULL;
1441  int32_t* dims=NULL;
1442  int32_t num_dims=0;
1443  source->get_ndarray(a, dims, num_dims);
1444  target->set_ndarray(a, dims, num_dims);
1445  SG_FREE(a);
1446  SG_FREE(dims);
1447  break;
1448  }
1449  case NDARRAY_REAL:
1450  {
1451  float64_t* a=NULL;
1452  int32_t* dims=NULL;
1453  int32_t num_dims=0;
1454  source->get_ndarray(a, dims, num_dims);
1455  target->set_ndarray(a, dims, num_dims);
1456  SG_FREE(a);
1457  SG_FREE(dims);
1458  break;
1459  }
1460  case NDARRAY_SHORTREAL:
1461  {
1462  float32_t* a=NULL;
1463  int32_t* dims=NULL;
1464  int32_t num_dims=0;
1465  source->get_ndarray(a, dims, num_dims);
1466  target->set_ndarray(a, dims, num_dims);
1467  SG_FREE(a);
1468  SG_FREE(dims);
1469  break;
1470  }
1471  case NDARRAY_SHORT:
1472  {
1473  int16_t* a=NULL;
1474  int32_t* dims=NULL;
1475  int32_t num_dims=0;
1476  source->get_ndarray(a, dims, num_dims);
1477  target->set_ndarray(a, dims, num_dims);
1478  SG_FREE(a);
1479  SG_FREE(dims);
1480  break;
1481  }
1482  case NDARRAY_WORD:
1483  {
1484  uint16_t* a=NULL;
1485  int32_t* dims=NULL;
1486  int32_t num_dims=0;
1487  source->get_ndarray(a, dims, num_dims);
1488  target->set_ndarray(a, dims, num_dims);
1489  SG_FREE(a);
1490  SG_FREE(dims);
1491  break;
1492  }*/
1493  case SPARSE_REAL:
1494  {
1495  int32_t num_feat=0;
1496  int32_t num_vec=0;
1497  SGSparseVector<float64_t>* fmatrix=NULL;
1498  source->get_sparse_matrix(fmatrix, num_feat, num_vec);
1499  int64_t nnz=0;
1500  for (int32_t i=0; i<num_vec; i++)
1501  nnz+=fmatrix[i].num_feat_entries;
1502  target->set_sparse_matrix(fmatrix, num_feat, num_vec, nnz);
1503  SG_FREE(fmatrix);
1504  break;
1505  }
1506 
1507  default:
1508  SG_ERROR("unknown return type");
1509  break;
1510  }
1511 }
1512 
1514 // commands
1516 
1517 /* Features */
1518 
1519 bool CSGInterface::cmd_load_features()
1520 {
1521  if (m_nrhs<8 || !create_return_values(0))
1522  return false;
1523 
1524  int32_t len=0;
1525  char* filename=get_str_from_str_or_direct(len);
1526  char* fclass=get_str_from_str_or_direct(len);
1527  char* type=get_str_from_str_or_direct(len);
1528  char* target=get_str_from_str_or_direct(len);
1529  int32_t size=get_int_from_int_or_str();
1530  int32_t comp_features=get_int_from_int_or_str();
1531 
1532  bool success=ui_features->load(
1533  filename, fclass, type, target, size, comp_features);
1534 
1535  SG_FREE(filename);
1536  SG_FREE(fclass);
1537  SG_FREE(type);
1538  SG_FREE(target);
1539  return success;
1540 }
1541 
1542 bool CSGInterface::cmd_save_features()
1543 {
1544  if (m_nrhs<5 || !create_return_values(0))
1545  return false;
1546 
1547  int32_t len=0;
1548  char* filename=get_str_from_str_or_direct(len);
1549  char* type=get_str_from_str_or_direct(len);
1550  char* target=get_str_from_str_or_direct(len);
1551 
1552  bool success=ui_features->save(filename, type, target);
1553 
1554  SG_FREE(filename);
1555  SG_FREE(type);
1556  SG_FREE(target);
1557  return success;
1558 }
1559 
1560 bool CSGInterface::cmd_clean_features()
1561 {
1562  if (m_nrhs<2 || !create_return_values(0))
1563  return false;
1564 
1565  int32_t len=0;
1566  char* target=get_str_from_str_or_direct(len);
1567 
1568  bool success=ui_features->clean(target);
1569 
1570  SG_FREE(target);
1571  return success;
1572 }
1573 
1574 bool CSGInterface::cmd_get_features()
1575 {
1576  if (m_nrhs!=2 || !create_return_values(1))
1577  return false;
1578 
1579  int32_t tlen=0;
1580  char* target=get_string(tlen);
1581  CFeatures* feat=NULL;
1582 
1583  if (strmatch(target, "TRAIN"))
1584  feat=ui_features->get_train_features();
1585  else if (strmatch(target, "TEST"))
1586  feat=ui_features->get_test_features();
1587  else
1588  {
1589  SG_FREE(target);
1590  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n");
1591  }
1592  SG_FREE(target);
1593 
1594  ASSERT(feat);
1595 
1596  switch (feat->get_feature_class())
1597  {
1598  case C_SIMPLE:
1599  {
1600  int32_t num_feat=0;
1601  int32_t num_vec=0;
1602 
1603  switch (feat->get_feature_type())
1604  {
1605  case F_BYTE:
1606  {
1607  uint8_t* fmatrix=((CSimpleFeatures<uint8_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1608  set_matrix(fmatrix, num_feat, num_vec);
1609  break;
1610  }
1611 
1612  case F_CHAR:
1613  {
1614  char* fmatrix=((CSimpleFeatures<char> *) feat)->get_feature_matrix(num_feat, num_vec);
1615  set_matrix(fmatrix, num_feat, num_vec);
1616  break;
1617  }
1618 
1619  case F_DREAL:
1620  {
1621  float64_t* fmatrix=((CSimpleFeatures<float64_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1622  set_matrix(fmatrix, num_feat, num_vec);
1623  break;
1624  }
1625 
1626  case F_INT:
1627  {
1628  int32_t* fmatrix=((CSimpleFeatures<int32_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1629  set_matrix(fmatrix, num_feat, num_vec);
1630  break;
1631  }
1632 
1633  case F_SHORT:
1634  {
1635  int16_t* fmatrix=((CSimpleFeatures<int16_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1636  set_matrix(fmatrix, num_feat, num_vec);
1637  break;
1638  }
1639 
1640  case F_SHORTREAL:
1641  {
1642  float32_t* fmatrix=((CSimpleFeatures<float32_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1643  set_matrix(fmatrix, num_feat, num_vec);
1644  break;
1645  }
1646 
1647  case F_WORD:
1648  {
1649  uint16_t* fmatrix=((CSimpleFeatures<uint16_t> *) feat)->get_feature_matrix(num_feat, num_vec);
1650  set_matrix(fmatrix, num_feat, num_vec);
1651  break;
1652  }
1653 
1654  default:
1656  }
1657  break;
1658  }
1659 
1660  case C_SPARSE:
1661  {
1662  switch (feat->get_feature_type())
1663  {
1664  case F_DREAL:
1665  {
1666  int64_t nnz=((CSparseFeatures<float64_t>*) feat)->
1667  get_num_nonzero_entries();
1668  int32_t num_feat=0;
1669  int32_t num_vec=0;
1670  SGSparseVector<float64_t>* fmatrix=((CSparseFeatures<float64_t>*) feat)->get_sparse_feature_matrix(num_feat, num_vec);
1671  SG_INFO("sparse matrix has %d feats, %d vecs and %d nnz elemements\n", num_feat, num_vec, nnz);
1672 
1673  set_sparse_matrix(fmatrix, num_feat, num_vec, nnz);
1674  break;
1675  }
1676 
1677  default:
1679  }
1680  break;
1681  }
1682 
1683  case C_STRING:
1684  {
1685  int32_t num_str=0;
1686  int32_t max_str_len=0;
1687  switch (feat->get_feature_type())
1688  {
1689  case F_BYTE:
1690  {
1691  SGString<uint8_t>* fmatrix=((CStringFeatures<uint8_t>*) feat)->get_features(num_str, max_str_len);
1692  set_string_list(fmatrix, num_str);
1693  break;
1694  }
1695 
1696  case F_CHAR:
1697  {
1698  SGString<char>* fmatrix=((CStringFeatures<char>*) feat)->get_features(num_str, max_str_len);
1699  set_string_list(fmatrix, num_str);
1700  break;
1701  }
1702 
1703  case F_WORD:
1704  {
1705  SGString<uint16_t>* fmatrix=((CStringFeatures<uint16_t>*) feat)->get_features(num_str, max_str_len);
1706  set_string_list(fmatrix, num_str);
1707  break;
1708  }
1709 
1710  default:
1712  }
1713  break;
1714  }
1715 
1716  case C_WD:
1717  case C_WEIGHTEDSPEC:
1718  case C_SPEC:
1719  case C_COMBINED_DOT:
1720  case C_POLY:
1721  {
1722 
1723  SGMatrix<float64_t> fmatrix = ((CDotFeatures*) feat)->get_computed_dot_feature_matrix();
1724  set_matrix(fmatrix.matrix, fmatrix.num_cols, fmatrix.num_rows);
1725  fmatrix.free_matrix();
1726  break;
1727  }
1728 
1729  default:
1731  }
1732 
1733  return true;
1734 }
1735 
1736 bool CSGInterface::cmd_add_features()
1737 {
1738  if (m_nrhs<3 || !create_return_values(0))
1739  return false;
1740 
1741  return do_set_features(true, false);
1742 }
1743 
1744 bool CSGInterface::cmd_add_multiple_features()
1745 {
1746  if ((m_nrhs!=4 && m_nrhs<5) || !create_return_values(0))
1747  return false;
1748 
1749  int32_t repetitions=get_int();
1750 
1751  ASSERT(repetitions>=1);
1752 
1753  return do_set_features(true, false, repetitions);
1754 }
1755 
1756 bool CSGInterface::cmd_add_dotfeatures()
1757 {
1758  if (m_nrhs<3 || !create_return_values(0))
1759  return false;
1760 
1761  return do_set_features(true, true);
1762 }
1763 
1764 bool CSGInterface::cmd_set_features()
1765 {
1766  if (m_nrhs<3 || !create_return_values(0))
1767  return false;
1768 
1769  return do_set_features(false, false);
1770 }
1771 
1772 bool CSGInterface::do_set_features(bool add, bool check_dot, int32_t repetitions)
1773 {
1774  int32_t tlen=0;
1775  char* target=get_string(tlen);
1776  if (!strmatch(target, "TRAIN") && !strmatch(target, "TEST"))
1777  {
1778  SG_FREE(target);
1779  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n");
1780  }
1781 
1782  CFeatures* feat=NULL;
1783  int32_t num_feat=0;
1784  int32_t num_vec=0;
1785 
1786  switch (get_argument_type())
1787  {
1788  case SPARSE_REAL:
1789  {
1790  SGSparseVector<float64_t>* fmatrix=NULL;
1791  get_sparse_matrix(fmatrix, num_feat, num_vec);
1792 
1793  feat=new CSparseFeatures<float64_t>();
1794  ((CSparseFeatures<float64_t>*) feat)->
1795  set_sparse_feature_matrix(SGSparseMatrix<float64_t>(fmatrix, num_feat, num_vec));
1796  break;
1797  }
1798 
1799  case DENSE_REAL:
1800  {
1801  float64_t* fmatrix=NULL;
1802  get_matrix(fmatrix, num_feat, num_vec);
1803 
1804  feat=new CSimpleFeatures<float64_t>(0);
1805  ((CSimpleFeatures<float64_t>*) feat)->
1806  set_feature_matrix(fmatrix, num_feat, num_vec);
1807 
1808  if (m_nrhs==6)
1809  feat = create_custom_real_features((CSimpleFeatures<float64_t>*) feat);
1810 
1811  break;
1812  }
1813 
1814  case DENSE_INT:
1815  {
1816  int32_t* fmatrix=NULL;
1817  get_matrix(fmatrix, num_feat, num_vec);
1818 
1819  feat=new CSimpleFeatures<int32_t>(0);
1820  ((CSimpleFeatures<int32_t>*) feat)->
1821  set_feature_matrix(fmatrix, num_feat, num_vec);
1822  break;
1823  }
1824 
1825  case DENSE_SHORT:
1826  {
1827  int16_t* fmatrix=NULL;
1828  get_matrix(fmatrix, num_feat, num_vec);
1829 
1830  feat=new CSimpleFeatures<int16_t>(0);
1831  ((CSimpleFeatures<int16_t>*) feat)->
1832  set_feature_matrix(fmatrix, num_feat, num_vec);
1833  break;
1834  }
1835 
1836  case DENSE_WORD:
1837  {
1838  uint16_t* fmatrix=NULL;
1839  get_matrix(fmatrix, num_feat, num_vec);
1840 
1841  feat=new CSimpleFeatures<uint16_t>(0);
1842  ((CSimpleFeatures<uint16_t>*) feat)->
1843  set_feature_matrix(fmatrix, num_feat, num_vec);
1844  break;
1845  }
1846 
1847  case DENSE_SHORTREAL:
1848  {
1849  float32_t* fmatrix=NULL;
1850  get_matrix(fmatrix, num_feat, num_vec);
1851 
1852  feat=new CSimpleFeatures<float32_t>(0);
1853  ((CSimpleFeatures<float32_t>*) feat)->
1854  set_feature_matrix(fmatrix, num_feat, num_vec);
1855  break;
1856  }
1857 
1858  case STRING_CHAR:
1859  {
1860  if (m_nrhs<4)
1861  SG_ERROR("Please specify alphabet!\n");
1862 
1863  int32_t num_str=0;
1864  int32_t max_str_len=0;
1865  SGString<char>* fmatrix=NULL;
1866  get_string_list(fmatrix, num_str, max_str_len);
1867 
1868  int32_t alphabet_len=0;
1869  char* alphabet_str=get_string(alphabet_len);
1870  ASSERT(alphabet_str);
1871 
1872  if (strmatch(alphabet_str, "DNABINFILE"))
1873  {
1874  SG_FREE(alphabet_str);
1875 
1876  ASSERT(fmatrix[0].string);
1877  feat=new CStringFeatures<uint8_t>(DNA);
1878 
1879  try
1880  {
1881  ((CStringFeatures<uint8_t>*) feat)->load_ascii_file(fmatrix[0].string);
1882  }
1883  catch (...)
1884  {
1885  SG_UNREF(feat);
1886  SG_ERROR("Couldn't load DNA features from file.\n");
1887  }
1888  feat=create_custom_string_features((CStringFeatures<uint8_t>*) feat);
1889  break;
1890  }
1891  else
1892  {
1893  bool convert_to_word=false;
1894  bool convert_to_ulong=false;
1895  CAlphabet* alphabet=NULL;
1896  if (strmatch(alphabet_str, "DNAWORD"))
1897  {
1898  alphabet=new CAlphabet(DNA);
1899  convert_to_word=true;
1900  }
1901  else if (strmatch(alphabet_str, "DNAULONG"))
1902  {
1903  alphabet=new CAlphabet(DNA);
1904  convert_to_ulong=true;
1905  }
1906  else
1907  alphabet=new CAlphabet(alphabet_str, alphabet_len);
1908 
1909  SG_REF(alphabet);
1910  SG_FREE(alphabet_str);
1911 
1912  feat=new CStringFeatures<char>(alphabet);
1913 
1914  if (!((CStringFeatures<char>*) feat)->set_features(fmatrix, num_str, max_str_len))
1915  {
1916  SG_UNREF(alphabet);
1917  SG_UNREF(feat);
1918  SG_ERROR("Couldnt set byte string features.\n");
1919  }
1920 
1921  SG_UNREF(alphabet);
1922 
1923  if (convert_to_word || convert_to_ulong)
1924  convert_to_bitembedding(feat, convert_to_word, convert_to_ulong);
1925  }
1926 
1927  obtain_from_single_string(feat);
1928  break;
1929  }
1930 
1931  case STRING_BYTE:
1932  {
1933  if (m_nrhs<4)
1934  SG_ERROR("Please specify alphabet!\n");
1935 
1936  int32_t num_str=0;
1937  int32_t max_str_len=0;
1938  SGString<uint8_t>* fmatrix=NULL;
1939  get_string_list(fmatrix, num_str, max_str_len);
1940 
1941  int32_t alphabet_len=0;
1942  char* alphabet_str=get_string(alphabet_len);
1943  ASSERT(alphabet_str);
1944  CAlphabet* alphabet=NULL;
1945  alphabet=new CAlphabet(alphabet_str, alphabet_len);
1946  SG_FREE(alphabet_str);
1947 
1948  feat=new CStringFeatures<uint8_t>(alphabet);
1949  if (!((CStringFeatures<uint8_t>*) feat)->set_features(fmatrix, num_str, max_str_len))
1950  {
1951  SG_UNREF(alphabet);
1952  SG_UNREF(feat);
1953  SG_ERROR("Couldnt set byte string features.\n");
1954  }
1955  feat=create_custom_string_features((CStringFeatures<uint8_t>*) feat);
1956  break;
1957  }
1958 
1959  default:
1960  SG_ERROR("Wrong argument type %d.\n", get_argument_type());
1961  }
1962 
1963  if (check_dot && !feat->has_property(FP_DOT))
1964  {
1965  SG_UNREF(feat);
1966  SG_ERROR("Feature type not supported by DOT Features\n");
1967  }
1968 
1969  if (strmatch(target, "TRAIN"))
1970  {
1971  if (!add)
1972  ui_features->set_train_features(feat);
1973  else if (check_dot)
1974  {
1975  for (int32_t i=0; i<repetitions; i++)
1976  ui_features->add_train_dotfeatures((CDotFeatures*) feat);
1977  }
1978  else
1979  {
1980  for (int32_t i=0; i<repetitions; i++)
1981  ui_features->add_train_features(feat);
1982  }
1983  }
1984  else
1985  {
1986  if (!add)
1987  ui_features->set_test_features(feat);
1988  else if (check_dot)
1989  {
1990  for (int32_t i=0; i<repetitions; i++)
1991  ui_features->add_test_dotfeatures((CDotFeatures*) feat);
1992  }
1993  else
1994  {
1995  for (int32_t i=0; i<repetitions; i++)
1996  ui_features->add_test_features(feat);
1997  }
1998  }
1999 
2000  SG_FREE(target);
2001 
2002  return true;
2003 }
2004 
2005 bool CSGInterface::cmd_set_reference_features()
2006 {
2007  if (m_nrhs<3 || !create_return_values(0))
2008  return false;
2009 
2010  int32_t len=0;
2011  char* target=get_str_from_str_or_direct(len);
2012 
2013  bool success=ui_features->set_reference_features(target);
2014 
2015  SG_FREE(target);
2016  return success;
2017 }
2018 
2019 bool CSGInterface::cmd_del_last_features()
2020 {
2021  if (m_nrhs<2 || !create_return_values(0))
2022  return false;
2023 
2024  int32_t len=0;
2025  char* target=get_str_from_str_or_direct(len);
2026  bool success=ui_features->del_last_feature_obj(target);
2027 
2028  SG_FREE(target);
2029  return success;
2030 }
2031 
2032 bool CSGInterface::cmd_convert()
2033 {
2034  if (m_nrhs<5 || !create_return_values(0))
2035  return false;
2036 
2037  int32_t len=0;
2038  char* target=get_str_from_str_or_direct(len);
2039  CFeatures* features=ui_features->get_convert_features(target);
2040  if (!features)
2041  {
2042  SG_FREE(target);
2043  SG_ERROR("No \"%s\" features available.\n", target);
2044  }
2045 
2046  char* from_class=get_str_from_str_or_direct(len);
2047  char* from_type=get_str_from_str_or_direct(len);
2048  char* to_class=get_str_from_str_or_direct(len);
2049  char* to_type=get_str_from_str_or_direct(len);
2050 
2051  CFeatures* result=NULL;
2052  if (strmatch(from_class, "SIMPLE"))
2053  {
2054  if (strmatch(from_type, "REAL"))
2055  {
2056  if (strmatch(to_class, "SPARSE") &&
2057  strmatch(to_type, "REAL"))
2058  {
2059  result=ui_features->convert_simple_real_to_sparse_real(
2060  ((CSimpleFeatures<float64_t>*) features));
2061  }
2062  else
2064  } // from_type REAL
2065 
2066  else if (strmatch(from_type, "CHAR"))
2067  {
2068  if (strmatch(to_class, "STRING") &&
2069  strmatch(to_type, "CHAR"))
2070  {
2071  result=ui_features->convert_simple_char_to_string_char(
2072  ((CSimpleFeatures<char>*) features));
2073  }
2074  else if (strmatch(to_class, "SIMPLE"))
2075  {
2076  if (strmatch(to_type, "ALIGN") && m_nrhs==8)
2077  {
2078  float64_t gap_cost=get_real_from_real_or_str();
2079  result=ui_features->convert_simple_char_to_simple_align(
2080  (CSimpleFeatures<char>*) features, gap_cost);
2081  }
2082  else
2084  }
2085  else
2087  } // from_type CHAR
2088 
2089  else if (strmatch(from_type, "WORD"))
2090  {
2091  if (strmatch(to_class, "SIMPLE") &&
2092  strmatch(to_type, "SALZBERG"))
2093  {
2094  result=ui_features->convert_simple_word_to_simple_salzberg(
2095  (CSimpleFeatures<uint16_t>*) features);
2096  }
2097  else
2099  } // from_type WORD
2100 
2101  else
2103  } // from_class SIMPLE
2104 
2105  else if (strmatch(from_class, "SPARSE"))
2106  {
2107  if (strmatch(from_type, "REAL"))
2108  {
2109  if (strmatch(to_class, "SIMPLE") &&
2110  strmatch(to_type, "REAL"))
2111  {
2112  result=ui_features->convert_sparse_real_to_simple_real(
2113  (CSparseFeatures<float64_t>*) features);
2114  }
2115  else
2117  } // from_type REAL
2118  else
2120  } // from_class SPARSE
2121 
2122  else if (strmatch(from_class, "STRING"))
2123  {
2124  if (strmatch(from_type, "CHAR"))
2125  {
2126  if (strmatch(to_class, "STRING"))
2127  {
2128  int32_t order=1;
2129  int32_t start=0;
2130  int32_t gap=0;
2131  char rev='f';
2132 
2133  if (m_nrhs>6)
2134  {
2135  order=get_int_from_int_or_str();
2136 
2137  if (m_nrhs>7)
2138  {
2139  start=get_int_from_int_or_str();
2140 
2141  if (m_nrhs>8)
2142  {
2143  gap=get_int_from_int_or_str();
2144 
2145  if (m_nrhs>9)
2146  {
2147  char* rev_str=get_str_from_str_or_direct(len);
2148  if (rev_str)
2149  rev=rev_str[0];
2150 
2151  SG_FREE(rev_str);
2152  }
2153  }
2154  }
2155  }
2156 
2157  if (strmatch(to_type, "BYTE"))
2158  {
2159  result=ui_features->convert_string_char_to_string_generic<char,uint8_t>(
2160  (CStringFeatures<char>*) features, order, start,
2161  gap, rev);
2162  }
2163  else if (strmatch(to_type, "WORD"))
2164  {
2165  result=ui_features->convert_string_char_to_string_generic<char,uint16_t>(
2166  (CStringFeatures<char>*) features, order, start,
2167  gap, rev);
2168  }
2169  else if (strmatch(to_type, "ULONG"))
2170  {
2171  result=ui_features->convert_string_char_to_string_generic<char,uint64_t>(
2172  (CStringFeatures<char>*) features, order, start,
2173  gap, rev);
2174  }
2175  else
2177  }
2178  else
2180  } // from_type CHAR
2181 
2182  else if (strmatch(from_type, "BYTE"))
2183  {
2184  if (strmatch(to_class, "STRING"))
2185  {
2186  int32_t order=1;
2187  int32_t start=0;
2188  int32_t gap=0;
2189  char rev='f';
2190 
2191  if (m_nrhs>6)
2192  {
2193  order=get_int_from_int_or_str();
2194 
2195  if (m_nrhs>7)
2196  {
2197  start=get_int_from_int_or_str();
2198 
2199  if (m_nrhs>8)
2200  {
2201  gap=get_int_from_int_or_str();
2202 
2203  if (m_nrhs>9)
2204  {
2205  char* rev_str=get_str_from_str_or_direct(len);
2206  if (rev_str)
2207  rev=rev_str[0];
2208 
2209  SG_FREE(rev_str);
2210  }
2211  }
2212  }
2213  }
2214 
2215  if (strmatch(to_type, "WORD"))
2216  {
2217  result=ui_features->convert_string_char_to_string_generic<uint8_t,uint16_t>(
2218  (CStringFeatures<uint8_t>*) features, order, start,
2219  gap, rev);
2220  }
2221  else if (strmatch(to_type, "ULONG"))
2222  {
2223  result=ui_features->convert_string_char_to_string_generic<uint8_t,uint64_t>(
2224  (CStringFeatures<uint8_t>*) features, order, start,
2225  gap, rev);
2226  }
2227  else
2229  }
2230  else
2232  } // from_type uint8_t
2233 
2234  else if (strmatch(from_type, "WORD"))
2235  {
2236  if (strmatch(to_class, "SIMPLE") && strmatch(to_type, "TOP"))
2237  {
2238  result=ui_features->convert_string_word_to_simple_top(
2239  (CStringFeatures<uint16_t>*) features);
2240  }
2241  else if (strmatch(to_class, "SPEC") && strmatch(to_type, "WORD") && m_nrhs==7)
2242  {
2243  bool use_norm=get_bool();
2244  result=ui_features->convert_string_byte_to_spec_word((CStringFeatures<uint16_t>*) features, use_norm);
2245 
2246  }
2247  else
2249  } // from_type WORD
2250 
2251  else if (strmatch(to_class, "SIMPLE") && strmatch(to_type, "FK"))
2252  {
2253  result=ui_features->convert_string_word_to_simple_fk(
2254  (CStringFeatures<uint16_t>*) features);
2255  } // to_type FK
2256 
2257  else
2259 
2260  } // from_class STRING
2261 
2262  if (result && ui_features->set_convert_features(result, target))
2263  SG_INFO("Conversion was successful.\n");
2264  else
2265  SG_ERROR("Conversion failed.\n");
2266 
2267  SG_FREE(target);
2268  SG_FREE(from_class);
2269  SG_FREE(from_type);
2270  SG_FREE(to_class);
2271  SG_FREE(to_type);
2272  return (result!=NULL);
2273 }
2274 
2275 void CSGInterface::convert_to_bitembedding(CFeatures* &features, bool convert_to_word, bool convert_to_ulong)
2276 {
2277  int32_t order=1;
2278  int32_t start=0;
2279  int32_t gap=0;
2280  char rev='f';
2281 
2282  if (m_nrhs<5)
2283  return;
2284 
2285  order=get_int();
2286  // remove arg, for parameters to come
2287  m_nrhs--;
2288 
2289  if (convert_to_word)
2290  {
2291  SG_INFO("Converting into word-bitembedding\n");
2292  features=ui_features->convert_string_char_to_string_generic<char,uint16_t>(
2293  (CStringFeatures<char>*) features, order, start, gap, rev);
2294  }
2295 
2296  if (convert_to_ulong)
2297  {
2298  SG_INFO("Converting into ulong-bitembedding\n");
2299  features=ui_features->convert_string_char_to_string_generic<char,uint64_t>(
2300  (CStringFeatures<char>*) features, order, start, gap, rev);
2301  }
2302 }
2303 
2304 void CSGInterface::obtain_from_single_string(CFeatures* features)
2305 {
2306  if (m_nrhs<5)
2307  return;
2308 
2309  int32_t len=0;
2310  char* str=get_string(len);
2311  ASSERT(str);
2312 
2313  if (strmatch(str, "from_position_list"))
2314  {
2315  obtain_from_position_list(features);
2316  }
2317  else if (strmatch(str, "slide_window"))
2318  {
2319  obtain_by_sliding_window(features);
2320  }
2321  else
2322  SG_SERROR("Unknown conversion\n");
2323 }
2324 
2325 bool CSGInterface::obtain_from_position_list(CFeatures* features)
2326 {
2327  int32_t winsize=get_int();
2328 
2329  int32_t* shifts=NULL;
2330  int32_t num_shift=0;
2331  get_vector(shifts, num_shift);
2332 
2333  int32_t skip=0;
2334  if (m_nrhs==8)
2335  skip=get_int();
2336 
2337  SG_DEBUG("winsize: %d num_shifts: %d skip: %d\n", winsize, num_shift, skip);
2338 
2339  CDynamicArray<int32_t> positions(num_shift+1);
2340 
2341  for (int32_t i=0; i<num_shift; i++)
2342  positions.set_element(shifts[i], i);
2343 
2344  if (features->get_feature_class()!=C_STRING)
2345  SG_ERROR("No string features.\n");
2346 
2347  bool success=false;
2348  switch (features->get_feature_type())
2349  {
2350  case F_CHAR:
2351  {
2352  success=(((CStringFeatures<char>*) features)->
2353  obtain_by_position_list(winsize, &positions, skip)>0);
2354  break;
2355  }
2356  case F_BYTE:
2357  {
2358  success=(((CStringFeatures<uint8_t>*) features)->
2359  obtain_by_position_list(winsize, &positions, skip)>0);
2360  break;
2361  }
2362  case F_WORD:
2363  {
2364  success=(((CStringFeatures<uint16_t>*) features)->
2365  obtain_by_position_list(winsize, &positions, skip)>0);
2366  break;
2367  }
2368  case F_ULONG:
2369  {
2370  success=(((CStringFeatures<uint64_t>*) features)->
2371  obtain_by_position_list(winsize, &positions, skip)>0);
2372  break;
2373  }
2374  default:
2375  SG_ERROR("Unsupported string features type.\n");
2376  }
2377 
2378  return success;
2379 }
2380 
2381 bool CSGInterface::obtain_by_sliding_window(CFeatures* features)
2382 {
2383  int32_t winsize=get_int();
2384  int32_t shift=get_int();
2385  int32_t skip=0;
2386 
2387  if (m_nrhs==8)
2388  skip=get_int();
2389 
2390  bool success=false;
2391 
2392  ASSERT(features);
2393  ASSERT(((CFeatures*) features)->get_feature_class()==C_STRING);
2394 
2395  switch (features->get_feature_type())
2396  {
2397  case F_CHAR:
2398  return ( ((CStringFeatures<char>*) features)->obtain_by_sliding_window(winsize, shift, skip)>0);
2399  case F_BYTE:
2400  return ( ((CStringFeatures<uint8_t>*) features)->obtain_by_sliding_window(winsize, shift, skip)>0);
2401  case F_WORD:
2402  return ( ((CStringFeatures<uint16_t>*) features)->obtain_by_sliding_window(winsize, shift, skip)>0);
2403  case F_ULONG:
2404  return ( ((CStringFeatures<uint64_t>*) features)->obtain_by_sliding_window(winsize, shift, skip)>0);
2405  default:
2406  SG_SERROR("Unsupported string features type.\n");
2407  return false;
2408  }
2409 
2410  return success;
2411 }
2412 
2413 bool CSGInterface::cmd_reshape()
2414 {
2415  if (m_nrhs<4 || !create_return_values(0))
2416  return false;
2417 
2418  int32_t len=0;
2419  char* target=get_str_from_str_or_direct(len);
2420  int32_t num_feat=get_int_from_int_or_str();
2421  int32_t num_vec=get_int_from_int_or_str();
2422 
2423  bool success=ui_features->reshape(target, num_feat, num_vec);
2424 
2425  SG_FREE(target);
2426  return success;
2427 }
2428 
2429 bool CSGInterface::cmd_load_labels()
2430 {
2431  if (m_nrhs<4 || !create_return_values(0))
2432  return false;
2433 
2434  int32_t len=0;
2435  char* filename=get_str_from_str_or_direct(len);
2436  char* target=get_str_from_str_or_direct(len);
2437 
2438  bool success=ui_labels->load(filename, target);
2439 
2440  SG_FREE(filename);
2441  SG_FREE(target);
2442  return success;
2443 }
2444 
2445 bool CSGInterface::cmd_set_labels()
2446 {
2447  if (m_nrhs!=3 || !create_return_values(0))
2448  return false;
2449 
2450  int32_t tlen=0;
2451  char* target=get_string(tlen);
2452  if (!strmatch(target, "TRAIN") && !strmatch(target, "TEST"))
2453  {
2454  SG_FREE(target);
2455  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n");
2456  }
2457 
2458  float64_t* lab=NULL;
2459  int32_t len=0;
2460  get_vector(lab, len);
2461 
2462  CLabels* labels=new CLabels(len);
2463  SG_INFO("num labels: %d\n", labels->get_num_labels());
2464 
2465  for (int32_t i=0; i<len; i++)
2466  {
2467  if (!labels->set_label(i, lab[i]))
2468  SG_ERROR("Couldn't set label %d (of %d): %f.\n", i, len, lab[i]);
2469  }
2470  SG_FREE(lab);
2471 
2472  if (strmatch(target, "TRAIN"))
2473  ui_labels->set_train_labels(labels);
2474  else if (strmatch(target, "TEST"))
2475  ui_labels->set_test_labels(labels);
2476  else
2477  {
2478  SG_FREE(target);
2479  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n");
2480  }
2481  SG_FREE(target);
2482 
2483  return true;
2484 }
2485 
2486 bool CSGInterface::cmd_get_labels()
2487 {
2488  if (m_nrhs!=2 || !create_return_values(1))
2489  return false;
2490 
2491  int32_t tlen=0;
2492  char* target=get_string(tlen);
2493  CLabels* labels=NULL;
2494 
2495  if (strmatch(target, "TRAIN"))
2496  labels=ui_labels->get_train_labels();
2497  else if (strmatch(target, "TEST"))
2498  labels=ui_labels->get_test_labels();
2499  else
2500  {
2501  SG_FREE(target);
2502  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n");
2503  }
2504  SG_FREE(target);
2505 
2506  if (!labels)
2507  SG_ERROR("No labels.\n");
2508 
2509  int32_t num_labels=labels->get_num_labels();
2510  float64_t* lab=SG_MALLOC(float64_t, num_labels);
2511 
2512  for (int32_t i=0; i<num_labels ; i++)
2513  lab[i]=labels->get_label(i);
2514 
2515  set_vector(lab, num_labels);
2516  SG_FREE(lab);
2517 
2518  return true;
2519 }
2520 
2521 
2524 bool CSGInterface::cmd_set_kernel_normalization()
2525 {
2526  if (m_nrhs<2 || !create_return_values(0))
2527  return false;
2528 
2529  int32_t len=0;
2530  char* normalization=get_string(len);
2531 
2532  float64_t c=0;
2533  float64_t r=0;
2534 
2535  if (m_nrhs>=3)
2536  c=get_real();
2537  if (m_nrhs>=4)
2538  r=get_real();
2539 
2540  bool success=ui_kernel->set_normalization(normalization, c, r);
2541 
2542  SG_FREE(normalization);
2543  return success;
2544 }
2545 
2546 bool CSGInterface::cmd_set_kernel()
2547 {
2548  if (m_nrhs<2 || !create_return_values(0))
2549  return false;
2550 
2551  SG_DEBUG("SGInterface: set_kernel\n");
2552  CKernel* kernel=create_kernel();
2553  return ui_kernel->set_kernel(kernel);
2554 }
2555 
2556 bool CSGInterface::cmd_add_kernel()
2557 {
2558  if (m_nrhs<3 || !create_return_values(0))
2559  return false;
2560 
2561  float64_t weight=get_real_from_real_or_str();
2562  // adjust m_nrhs to play well with checks in create_kernel
2563  m_nrhs--;
2564  CKernel* kernel=create_kernel();
2565 
2566  SG_DEBUG("SGInterface: add_kernel\n");
2567  return ui_kernel->add_kernel(kernel, weight);
2568 }
2569 
2570 bool CSGInterface::cmd_del_last_kernel()
2571 {
2572  if (m_nrhs<1 || !create_return_values(0))
2573  return false;
2574 
2575  return ui_kernel->del_last_kernel();
2576 }
2577 
2578 CKernel* CSGInterface::create_kernel()
2579 {
2580  CKernel* kernel=NULL;
2581  int32_t len=0;
2582  char* type=get_str_from_str_or_direct(len);
2583 
2584  SG_DEBUG("set_kernel with type: %s\n", type);
2585 
2586  if (strmatch(type, "COMBINED"))
2587  {
2588  if (m_nrhs<3)
2589  return NULL;
2590 
2591  int32_t size=get_int_from_int_or_str();
2592  bool append_subkernel_weights=false;
2593  if (m_nrhs>3)
2594  append_subkernel_weights=get_bool_from_bool_or_str();
2595 
2596  kernel=ui_kernel->create_combined(size, append_subkernel_weights);
2597  }
2598  else if (strmatch(type, "DISTANCE"))
2599  {
2600  if (m_nrhs<3)
2601  return NULL;
2602 
2603  int32_t size=get_int_from_int_or_str();
2604  float64_t width=1;
2605  if (m_nrhs>3)
2606  width=get_real_from_real_or_str();
2607 
2608  kernel=ui_kernel->create_distance(size, width);
2609  }
2610  else if (strmatch(type, "WAVELET"))
2611  {
2612 
2613  if (m_nrhs<4)
2614  return NULL;
2615 
2616  char* dtype=get_str_from_str_or_direct(len);
2617  if (strmatch(dtype, "REAL"))
2618  {
2619  int32_t size=get_int_from_int_or_str();
2620  float64_t Wdilation=5.0;
2621  float64_t Wtranslation=2.0;
2622 
2623  if (m_nrhs>4)
2624  {
2625  Wdilation=get_real_from_real_or_str();
2626 
2627  if (m_nrhs>5)
2628  Wtranslation=get_real_from_real_or_str();
2629  }
2630 
2631  kernel=ui_kernel->create_sigmoid(size, Wdilation, Wtranslation);
2632  }
2633 
2634  SG_FREE(dtype);
2635  }
2636  else if (strmatch(type, "LINEAR"))
2637  {
2638  if (m_nrhs<4)
2639  return NULL;
2640  if (m_nrhs>5)
2641  return NULL;
2642 
2643  char* dtype=get_str_from_str_or_direct(len);
2644  int32_t size=get_int_from_int_or_str();
2645  float64_t scale=-1;
2646  if (m_nrhs==5)
2647  scale=get_real_from_real_or_str();
2648 
2649  if (strmatch(dtype, "BYTE"))
2650  kernel=ui_kernel->create_linearbyte(size, scale);
2651  else if (strmatch(dtype, "WORD"))
2652  kernel=ui_kernel->create_linearword(size, scale);
2653  else if (strmatch(dtype, "CHAR"))
2654  kernel=ui_kernel->create_linearstring(size, scale);
2655  else if (strmatch(dtype, "REAL"))
2656  kernel=ui_kernel->create_linear(size, scale);
2657  else if (strmatch(dtype, "SPARSEREAL"))
2658  kernel=ui_kernel->create_sparselinear(size, scale);
2659 
2660  SG_FREE(dtype);
2661  }
2662  else if (strmatch(type, "HISTOGRAM"))
2663  {
2664  if (m_nrhs<4)
2665  return NULL;
2666 
2667  char* dtype=get_str_from_str_or_direct(len);
2668  if (strmatch(dtype, "WORD"))
2669  {
2670  int32_t size=get_int_from_int_or_str();
2671  kernel=ui_kernel->create_histogramword(size);
2672  }
2673 
2674  SG_FREE(dtype);
2675  }
2676  else if (strmatch(type, "SALZBERG"))
2677  {
2678  if (m_nrhs<4)
2679  return NULL;
2680 
2681  char* dtype=get_str_from_str_or_direct(len);
2682  if (strmatch(dtype, "WORD"))
2683  {
2684  int32_t size=get_int_from_int_or_str();
2685  kernel=ui_kernel->create_salzbergword(size);
2686  }
2687 
2688  SG_FREE(dtype);
2689  }
2690  else if (strmatch(type, "POLYMATCH"))
2691  {
2692  if (m_nrhs<4)
2693  return NULL;
2694 
2695  char* dtype=get_str_from_str_or_direct(len);
2696  int32_t size=get_int_from_int_or_str();
2697  int32_t degree=3;
2698  bool inhomogene=false;
2699  bool normalize=true;
2700 
2701  if (m_nrhs>4)
2702  {
2703  degree=get_int_from_int_or_str();
2704  if (m_nrhs>5)
2705  {
2706  inhomogene=get_bool_from_bool_or_str();
2707  if (m_nrhs>6)
2708  normalize=get_bool_from_bool_or_str();
2709  }
2710  }
2711 
2712  if (strmatch(dtype, "CHAR"))
2713  {
2714  kernel=ui_kernel->create_polymatchstring(
2715  size, degree, inhomogene, normalize);
2716  }
2717  else if (strmatch(dtype, "WORD"))
2718  {
2719  kernel=ui_kernel->create_polymatchwordstring(
2720  size, degree, inhomogene, normalize);
2721  }
2722 
2723  SG_FREE(dtype);
2724  }
2725  else if (strmatch(type, "MATCH"))
2726  {
2727  if (m_nrhs<4)
2728  return NULL;
2729 
2730  char* dtype=get_str_from_str_or_direct(len);
2731  if (strmatch(dtype, "WORD"))
2732  {
2733  int32_t size=get_int_from_int_or_str();
2734  int32_t d=3;
2735  bool normalize=true;
2736 
2737  if (m_nrhs>4)
2738  d=get_int_from_int_or_str();
2739  if (m_nrhs>5)
2740  normalize=get_bool_from_bool_or_str();
2741 
2742  kernel=ui_kernel->create_matchwordstring(size, d, normalize);
2743  }
2744 
2745  SG_FREE(dtype);
2746  }
2747  else if (strmatch(type, "WEIGHTEDCOMMSTRING") || strmatch(type, "COMMSTRING"))
2748  {
2749  char* dtype=get_str_from_str_or_direct(len);
2750  int32_t size=get_int_from_int_or_str();
2751  bool use_sign=false;
2752  char* norm_str=NULL;
2753 
2754  if (m_nrhs>4)
2755  {
2756  use_sign=get_bool_from_bool_or_str();
2757 
2758  if (m_nrhs>5)
2759  norm_str=get_str_from_str_or_direct(len);
2760  }
2761 
2762  if (strmatch(dtype, "WORD"))
2763  {
2764  if (strmatch(type, "WEIGHTEDCOMMSTRING"))
2765  {
2766  kernel=ui_kernel->create_commstring(
2767  size, use_sign, norm_str, K_WEIGHTEDCOMMWORDSTRING);
2768  }
2769  else if (strmatch(type, "COMMSTRING"))
2770  {
2771  kernel=ui_kernel->create_commstring(
2772  size, use_sign, norm_str, K_COMMWORDSTRING);
2773  }
2774  }
2775  else if (strmatch(dtype, "ULONG"))
2776  {
2777  kernel=ui_kernel->create_commstring(
2778  size, use_sign, norm_str, K_COMMULONGSTRING);
2779  }
2780 
2781  SG_FREE(dtype);
2782  SG_FREE(norm_str);
2783  }
2784  else if (strmatch(type, "CHI2"))
2785  {
2786  if (m_nrhs<4)
2787  return NULL;
2788 
2789  char* dtype=get_str_from_str_or_direct(len);
2790  if (strmatch(dtype, "REAL"))
2791  {
2792  int32_t size=get_int_from_int_or_str();
2793  float64_t width=1;
2794 
2795  if (m_nrhs>4)
2796  width=get_real_from_real_or_str();
2797 
2798  kernel=ui_kernel->create_chi2(size, width);
2799  }
2800 
2801  SG_FREE(dtype);
2802  }
2803  else if (strmatch(type, "FIXEDDEGREE"))
2804  {
2805  if (m_nrhs<4)
2806  return NULL;
2807 
2808  char* dtype=get_str_from_str_or_direct(len);
2809  if (strmatch(dtype, "CHAR"))
2810  {
2811  int32_t size=get_int_from_int_or_str();
2812  int32_t d=3;
2813  if (m_nrhs>4)
2814  d=get_int_from_int_or_str();
2815 
2816  kernel=ui_kernel->create_fixeddegreestring(size, d);
2817  }
2818 
2819  SG_FREE(dtype);
2820  }
2821  else if (strmatch(type, "LOCALALIGNMENT"))
2822  {
2823  if (m_nrhs<4)
2824  return NULL;
2825 
2826  char* dtype=get_str_from_str_or_direct(len);
2827  if (strmatch(dtype, "CHAR"))
2828  {
2829  int32_t size=get_int_from_int_or_str();
2830 
2831  kernel=ui_kernel->create_localalignmentstring(size);
2832  }
2833 
2834  SG_FREE(dtype);
2835  }
2836  else if (strmatch(type, "OLIGO"))
2837  {
2838  if (m_nrhs<6)
2839  return NULL;
2840 
2841  char* dtype=get_str_from_str_or_direct(len);
2842  if (strmatch(dtype, "CHAR"))
2843  {
2844  int32_t size=get_int_from_int_or_str();
2845  int32_t k=get_int_from_int_or_str();
2846  float64_t w=get_real_from_real_or_str();
2847 
2848  kernel=ui_kernel->create_oligo(size, k, w);
2849  }
2850 
2851  SG_FREE(dtype);
2852  }
2853  else if (strmatch(type, "WEIGHTEDDEGREEPOS2") ||
2854  strmatch(type, "WEIGHTEDDEGREEPOS2_NONORM"))
2855  {
2856  if (m_nrhs<7)
2857  return NULL;
2858 
2859  char* dtype=get_str_from_str_or_direct(len);
2860  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
2861  {
2862  int32_t size=get_int_from_int_or_str();
2863  int32_t order=get_int_from_int_or_str();
2864  int32_t max_mismatch=get_int_from_int_or_str();
2865  int32_t length=get_int_from_int_or_str();
2866  int32_t* shifts=NULL;
2867  int32_t l=0;
2868  get_vector_from_int_vector_or_str(shifts, l);
2869 
2870  ASSERT(l==length);
2871 
2872  bool use_normalization=true;
2873  if (strmatch(type, "WEIGHTEDDEGREEPOS2_NONORM"))
2874  use_normalization=false;
2875 
2876  kernel=ui_kernel->create_weighteddegreepositionstring2(
2877  size, order, max_mismatch, shifts, length,
2878  use_normalization);
2879 
2880  SG_FREE(shifts);
2881  }
2882 
2883  SG_FREE(dtype);
2884  }
2885  else if (strmatch(type, "WEIGHTEDDEGREEPOS3"))
2886  {
2887  if (m_nrhs<7)
2888  return NULL;
2889 
2890  char* dtype=get_str_from_str_or_direct(len);
2891  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
2892  {
2893  int32_t size=get_int_from_int_or_str();
2894  int32_t order=get_int_from_int_or_str();
2895  int32_t max_mismatch=get_int_from_int_or_str();
2896  int32_t length=get_int_from_int_or_str();
2897  int32_t mkl_stepsize=get_int_from_int_or_str();
2898  int32_t* shifts=NULL;
2899  int32_t l=0;
2900  get_vector_from_int_vector_or_str(shifts, l);
2901  ASSERT(l==length);
2902 
2903  float64_t* position_weights=NULL;
2904  if (m_nrhs>9+length)
2905  {
2906  get_vector_from_real_vector_or_str(
2907  position_weights, length);
2908  }
2909 
2910  kernel=ui_kernel->create_weighteddegreepositionstring3(
2911  size, order, max_mismatch, shifts, length,
2912  mkl_stepsize, position_weights);
2913 
2914  SG_FREE(position_weights);
2915  SG_FREE(shifts);
2916  }
2917 
2918  SG_FREE(dtype);
2919  }
2920  else if (strmatch(type, "WEIGHTEDDEGREEPOS"))
2921  {
2922  if (m_nrhs<4)
2923  return NULL;
2924 
2925  char* dtype=get_str_from_str_or_direct(len);
2926  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
2927  {
2928  int32_t size=get_int_from_int_or_str();
2929  int32_t order=3;
2930  int32_t max_mismatch=0;
2931  int32_t length=0;
2932  int32_t center=0;
2933  float64_t step=1;
2934 
2935  if (m_nrhs>4)
2936  {
2937  order=get_int_from_int_or_str();
2938 
2939  if (m_nrhs>5)
2940  {
2941  max_mismatch=get_int_from_int_or_str();
2942 
2943  if (m_nrhs>6)
2944  {
2945  length=get_int_from_int_or_str();
2946 
2947  if (m_nrhs>7)
2948  {
2949  center=get_int_from_int_or_str();
2950 
2951  if (m_nrhs>8)
2952  step=get_real_from_real_or_str();
2953  }
2954  }
2955  }
2956  }
2957 
2958  kernel=ui_kernel->create_weighteddegreepositionstring(
2959  size, order, max_mismatch, length, center, step);
2960  }
2961 
2962  SG_FREE(dtype);
2963  }
2964  else if (strmatch(type, "WEIGHTEDDEGREE"))
2965  {
2966  if (m_nrhs<4)
2967  return NULL;
2968 
2969  char* dtype=get_str_from_str_or_direct(len);
2970  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
2971  {
2972  int32_t size=get_int_from_int_or_str();
2973  int32_t order=3;
2974  int32_t max_mismatch=0;
2975  bool use_normalization=true;
2976  int32_t mkl_stepsize=1;
2977  bool block_computation=true;
2978  int32_t single_degree=-1;
2979 
2980  if (m_nrhs>4)
2981  {
2982  order=get_int_from_int_or_str();
2983 
2984  if (m_nrhs>5)
2985  {
2986  max_mismatch=get_int_from_int_or_str();
2987 
2988  if (m_nrhs>6)
2989  {
2990  use_normalization=get_bool_from_bool_or_str();
2991 
2992  if (m_nrhs>7)
2993  {
2994  mkl_stepsize=get_int_from_int_or_str();
2995 
2996  if (m_nrhs>8)
2997  {
2998  block_computation=get_int_from_int_or_str();
2999 
3000  if (m_nrhs>9)
3001  single_degree=get_int_from_int_or_str();
3002  }
3003  }
3004  }
3005  }
3006  }
3007 
3008  kernel=ui_kernel->create_weighteddegreestring(
3009  size, order, max_mismatch, use_normalization,
3010  mkl_stepsize, block_computation, single_degree);
3011  }
3012 
3013  SG_FREE(dtype);
3014  }
3015  else if (strmatch(type, "WEIGHTEDDEGREERBF"))
3016  {
3017  if (m_nrhs<5)
3018  return NULL;
3019 
3020  char* dtype=get_str_from_str_or_direct(len);
3021  int32_t size=get_int_from_int_or_str();
3022  int32_t nof_properties=get_int_from_int_or_str();
3023  int32_t degree=1;
3024  float64_t width=1;
3025  if (m_nrhs>5)
3026  {
3027  degree=get_int_from_int_or_str();
3028  if (m_nrhs>6)
3029  {
3030  width=get_real_from_real_or_str();
3031  }
3032 
3033  }
3034  //if (strmatch(dtype, "REAL"))
3035 
3036  kernel=ui_kernel->create_weighteddegreerbf(size, degree, nof_properties, width);
3037 
3038  SG_FREE(dtype);
3039 
3040  }
3041  else if (strmatch(type, "SPECTRUMMISMATCHRBF"))
3042  {
3043  if (m_nrhs<7)
3044  return NULL;
3045 
3046  char* dtype=get_str_from_str_or_direct(len);
3047  if (strmatch(dtype, "CHAR") || strmatch(dtype, "STRING"))
3048  {
3049  int32_t size=get_int_from_int_or_str();
3050  int32_t degree=get_int_from_int_or_str();
3051  int32_t max_mismatch=get_int_from_int_or_str();
3052  float64_t width=get_real_from_real_or_str();
3053  float64_t* AA_matrix = NULL;
3054 
3055  //int32_t length=128*128;
3056  //get_vector_from_real_vector_or_str(AA_matrix, length);
3057  float64_t* helper_matrix=NULL;
3058  int32_t N=0;
3059  int32_t M=0;
3060  get_matrix(helper_matrix, N, M);
3061 
3062  if (N == 128 && M == 128)
3063  {
3064  AA_matrix=SG_MALLOC(float64_t, N*M);
3065  memcpy(AA_matrix, helper_matrix, N*M*sizeof(float64_t)) ;
3066  kernel=ui_kernel->create_spectrummismatchrbf(size, AA_matrix, max_mismatch, degree, width);
3067  }
3068  else
3069  {
3070  SG_ERROR("Matrix size %d %d\n", N, M);
3071  }
3072  }
3073  SG_FREE(dtype);
3074 
3075  }
3076 
3077  else if (strmatch(type, "SLIK") || strmatch(type, "LIK"))
3078  {
3079  if (m_nrhs<4)
3080  return NULL;
3081 
3082  char* dtype=get_str_from_str_or_direct(len);
3083  if (strmatch(dtype, "CHAR"))
3084  {
3085  int32_t size=get_int_from_int_or_str();
3086  int32_t length=3;
3087  int32_t inner_degree=3;
3088  int32_t outer_degree=1;
3089 
3090  if (m_nrhs>4)
3091  {
3092  length=get_int_from_int_or_str();
3093 
3094  if (m_nrhs>5)
3095  {
3096  inner_degree=get_int_from_int_or_str();
3097 
3098  if (m_nrhs>6)
3099  outer_degree=get_int_from_int_or_str();
3100  }
3101  }
3102 
3103  if (strmatch(type, "SLIK"))
3104  {
3105  kernel=ui_kernel->create_localityimprovedstring(
3106  size, length, inner_degree, outer_degree,
3108  }
3109  else
3110  {
3111  kernel=ui_kernel->create_localityimprovedstring(
3112  size, length, inner_degree, outer_degree,
3114  }
3115  }
3116 
3117  SG_FREE(dtype);
3118  }
3119  else if (strmatch(type, "POLY"))
3120  {
3121  if (m_nrhs<4)
3122  return NULL;
3123 
3124  char* dtype=get_str_from_str_or_direct(len);
3125  int32_t size=get_int_from_int_or_str();
3126  int32_t degree=2;
3127  bool inhomogene=false;
3128  bool normalize=true;
3129 
3130  if (m_nrhs>4)
3131  {
3132  degree=get_int_from_int_or_str();
3133 
3134  if (m_nrhs>5)
3135  {
3136  inhomogene=get_bool_from_bool_or_str();
3137 
3138  if (m_nrhs>6)
3139  normalize=get_bool_from_bool_or_str();
3140  }
3141  }
3142 
3143  if (strmatch(dtype, "REAL"))
3144  {
3145  kernel=ui_kernel->create_poly(
3146  size, degree, inhomogene, normalize);
3147  }
3148  else if (strmatch(dtype, "SPARSEREAL"))
3149  {
3150  kernel=ui_kernel->create_sparsepoly(
3151  size, degree, inhomogene, normalize);
3152  }
3153 
3154  SG_FREE(dtype);
3155  }
3156  else if (strmatch(type, "SIGMOID"))
3157  {
3158  if (m_nrhs<4)
3159  return NULL;
3160 
3161  char* dtype=get_str_from_str_or_direct(len);
3162  if (strmatch(dtype, "REAL"))
3163  {
3164  int32_t size=get_int_from_int_or_str();
3165  float64_t gamma=0.01;
3166  float64_t coef0=0;
3167 
3168  if (m_nrhs>4)
3169  {
3170  gamma=get_real_from_real_or_str();
3171 
3172  if (m_nrhs>5)
3173  coef0=get_real_from_real_or_str();
3174  }
3175 
3176  kernel=ui_kernel->create_sigmoid(size, gamma, coef0);
3177  }
3178 
3179  SG_FREE(dtype);
3180  }
3181  else if (strmatch(type, "GAUSSIAN")) // RBF
3182  {
3183  if (m_nrhs<4)
3184  return NULL;
3185 
3186  char* dtype=get_str_from_str_or_direct(len);
3187  int32_t size=get_int_from_int_or_str();
3188  float64_t width=1;
3189  if (m_nrhs>4)
3190  width=get_real_from_real_or_str();
3191 
3192  if (strmatch(dtype, "REAL"))
3193  kernel=ui_kernel->create_gaussian(size, width);
3194  else if (strmatch(dtype, "SPARSEREAL"))
3195  kernel=ui_kernel->create_sparsegaussian(size, width);
3196 
3197  SG_FREE(dtype);
3198  }
3199  else if (strmatch(type, "GAUSSIANSHIFT")) // RBF
3200  {
3201  if (m_nrhs<7)
3202  return NULL;
3203 
3204  char* dtype=get_str_from_str_or_direct(len);
3205  if (strmatch(dtype, "REAL"))
3206  {
3207  int32_t size=get_int_from_int_or_str();
3208  float64_t width=get_real_from_real_or_str();
3209  int32_t max_shift=get_int_from_int_or_str();
3210  int32_t shift_step=get_int_from_int_or_str();
3211 
3212  kernel=ui_kernel->create_gaussianshift(
3213  size, width, max_shift, shift_step);
3214  }
3215 
3216  SG_FREE(dtype);
3217  }
3218  else if (strmatch(type, "CUSTOM"))
3219  {
3220  if (m_nrhs!=4 || !create_return_values(0))
3221  return false;
3222 
3223  float64_t* kmatrix=NULL;
3224  int32_t num_feat=0;
3225  int32_t num_vec=0;
3226  get_matrix(kmatrix, num_feat, num_vec);
3227 
3228  int32_t tlen=0;
3229  char* ktype=get_string(tlen);
3230 
3231  if (!strmatch(ktype, "DIAG") &&
3232  !strmatch(ktype, "FULL") &&
3233  !strmatch(ktype, "FULL2DIAG"))
3234  {
3235  SG_FREE(ktype);
3236  SG_ERROR("Undefined type, not DIAG, FULL or FULL2DIAG.\n");
3237  }
3238 
3239  bool source_is_diag=false;
3240  bool dest_is_diag=false;
3241 
3242  if (strmatch(ktype, "FULL2DIAG"))
3243  dest_is_diag=true;
3244  else if (strmatch(ktype, "DIAG"))
3245  {
3246  source_is_diag=true;
3247  dest_is_diag=true;
3248  }
3249 
3250  kernel=ui_kernel->create_custom(kmatrix, num_feat, num_vec,
3251  source_is_diag, dest_is_diag);
3252  }
3253  else if (strmatch(type, "CONST"))
3254  {
3255  if (m_nrhs<4)
3256  return NULL;
3257 
3258  char* dtype=get_str_from_str_or_direct(len);
3259  if (strmatch(dtype, "REAL"))
3260  {
3261  int32_t size=get_int_from_int_or_str();
3262  float64_t c=1;
3263  if (m_nrhs>4)
3264  c=get_real_from_real_or_str();
3265 
3266  kernel=ui_kernel->create_const(size, c);
3267  }
3268 
3269  SG_FREE(dtype);
3270  }
3271  else if (strmatch(type, "DIAG"))
3272  {
3273  if (m_nrhs<4)
3274  return NULL;
3275 
3276  char* dtype=get_str_from_str_or_direct(len);
3277  if (strmatch(dtype, "REAL"))
3278  {
3279  int32_t size=get_int_from_int_or_str();
3280  float64_t diag=1;
3281  if (m_nrhs>4)
3282  diag=get_real_from_real_or_str();
3283 
3284  kernel=ui_kernel->create_diag(size, diag);
3285  }
3286 
3287  SG_FREE(dtype);
3288  }
3289 
3290  else if (strmatch(type, "TPPK"))
3291  {
3292  if (m_nrhs!=5)
3293  return NULL;
3294 
3295  char* dtype=get_str_from_str_or_direct(len);
3296  if (strmatch(dtype, "INT"))
3297  {
3298  int32_t size=get_int_from_int_or_str();
3299  float64_t* km=NULL;
3300  int32_t rows=0;
3301  int32_t cols=0;
3302  get_matrix(km, rows, cols);
3303  kernel=ui_kernel->create_tppk(size, km, rows, cols);
3304  }
3305 
3306  SG_FREE(dtype);
3307  }
3308  else
3310 
3311  SG_FREE(type);
3312  SG_DEBUG("created kernel: %p\n", kernel);
3313  return kernel;
3314 }
3315 
3316 
3317 CFeatures* CSGInterface::create_custom_string_features(CStringFeatures<uint8_t>* orig_feat)
3318 {
3319  CFeatures* feat=orig_feat;
3320 
3321  if (m_nrhs>4)
3322  {
3323  int32_t start=-1;
3324  int32_t order=0;
3325  int32_t from_order=0;
3326  bool normalize=true;
3327 
3328  int32_t feature_class_len=0;
3329  char* feature_class_str=get_string(feature_class_len);
3330  ASSERT(feature_class_str);
3331  CAlphabet* alphabet=NULL;
3332  if (strmatch(feature_class_str, "WD"))
3333  {
3334  if (m_nrhs!=7)
3335  SG_ERROR("Please specify alphabet, WD, order, from_order\n");
3336 
3337  alphabet=new CAlphabet(RAWDNA);
3338  order=get_int();
3339  from_order=get_int();
3340  feat = new CWDFeatures((CStringFeatures<uint8_t>*) feat, order, from_order);
3341  }
3342  else if (strmatch(feature_class_str, "WSPEC"))
3343  {
3344  if (m_nrhs!=8)
3345  SG_ERROR("Please specify alphabet, order, WSPEC, start, normalize\n");
3346 
3347  alphabet=new CAlphabet(RAWDNA);
3348  order=get_int();
3349  start=get_int();
3350  normalize=get_bool();
3352  sf->obtain_from_char_features((CStringFeatures<uint8_t>*) feat, start, order, 0, normalize);
3353  sf->add_preprocessor(new CSortWordString());
3354  sf->apply_preprocessor();
3355  SG_UNREF(feat);
3356  feat = new CImplicitWeightedSpecFeatures(sf, normalize);
3357  }
3358  SG_FREE(feature_class_str);
3359 
3360  SG_UNREF(alphabet);
3361  }
3362 
3363  return feat;
3364 }
3365 
3366 CFeatures* CSGInterface::create_custom_real_features(CSimpleFeatures<float64_t>* orig_feat)
3367 {
3368  CFeatures* feat=orig_feat;
3369 
3370  if (m_nrhs==6)
3371  {
3372  int32_t degree=0;
3373  int32_t feature_class_len=0;
3374  bool normalize;
3375  char* feature_class_str=get_string(feature_class_len);
3376  ASSERT(feature_class_str);
3377  if (strmatch(feature_class_str, "POLY"))
3378  {
3379  //if (m_nrhs!=7)
3380  // SG_ERROR("Please specify POLY, degree\n");
3381 
3382  degree=get_int();
3383  normalize = get_bool();
3384  feat = new CPolyFeatures((CSimpleFeatures<float64_t>*) feat, degree, normalize);
3385 
3386  }
3387  else
3388  SG_ERROR("Unknown feature class: %s\n", feature_class_str);
3389 
3390  SG_FREE(feature_class_str);
3391  }
3392 
3393  return feat;
3394 }
3395 
3396 bool CSGInterface::cmd_init_kernel()
3397 {
3398  SG_DEPRECATED;
3399  return true;
3400 }
3401 
3402 bool CSGInterface::cmd_clean_kernel()
3403 {
3404  if (m_nrhs<1 || !create_return_values(0))
3405  return false;
3406 
3407  return ui_kernel->clean_kernel();
3408 }
3409 
3410 bool CSGInterface::cmd_save_kernel()
3411 {
3412  if (m_nrhs<2 || !create_return_values(0))
3413  return false;
3414 
3415  int32_t len=0;
3416  char* filename=get_str_from_str_or_direct(len);
3417 
3418  bool success=ui_kernel->save_kernel(filename);
3419 
3420  SG_FREE(filename);
3421  return success;
3422 }
3423 
3424 bool CSGInterface::cmd_get_kernel_matrix()
3425 {
3426  if (m_nrhs>2 || !create_return_values(1))
3427  return false;
3428 
3429  int32_t len=0;
3430  char* target=NULL;
3431 
3432  if (m_nrhs==2)
3433  target=get_string(len);
3434  bool success=ui_kernel->init_kernel(target);
3435 
3436  if (success)
3437  {
3438  CKernel* kernel=ui_kernel->get_kernel();
3439  if (!kernel || !kernel->has_features())
3440  SG_ERROR("No kernel defined or not initialized.\n");
3441 
3443  set_matrix(km.matrix, km.num_rows, km.num_cols);
3444  }
3445 
3446  SG_FREE(target);
3447 
3448  return success;
3449 }
3450 
3451 bool CSGInterface::cmd_set_WD_position_weights()
3452 {
3453  if (m_nrhs<2 || m_nrhs>3 || !create_return_values(0))
3454  return false;
3455 
3456  CKernel* kernel=ui_kernel->get_kernel();
3457  if (!kernel)
3458  SG_ERROR("No kernel.\n");
3459 
3460  if (kernel->get_kernel_type()==K_COMBINED)
3461  {
3462  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
3463  if (!kernel)
3464  SG_ERROR("No last kernel.\n");
3465 
3466  EKernelType ktype=kernel->get_kernel_type();
3467  if (ktype!=K_WEIGHTEDDEGREE && ktype!=K_WEIGHTEDDEGREEPOS)
3468  SG_ERROR("Unsupported kernel.\n");
3469  }
3470 
3471  bool success=false;
3472  float64_t* weights=NULL;
3473  int32_t dim=0;
3474  int32_t len=0;
3475  get_matrix(weights, dim, len);
3476 
3477  if (kernel->get_kernel_type()==K_WEIGHTEDDEGREE)
3478  {
3480  (CWeightedDegreeStringKernel*) kernel;
3481 
3482  if (dim!=1 && len>0)
3483  SG_ERROR("Dimension mismatch (should be 1 x seq_length or 0x0\n");
3484 
3485  ui_kernel->init_kernel("TRAIN");
3486  success=k->set_position_weights(weights, len);
3487  }
3488  else
3489  {
3492  char* target=NULL;
3493  bool is_train=true;
3494 
3495  if (m_nrhs==3)
3496  {
3497  int32_t tlen=0;
3498  target=get_string(tlen);
3499  if (!target)
3500  {
3501  SG_FREE(weights);
3502  SG_ERROR("Couldn't find second argument to method.\n");
3503  }
3504 
3505  if (!strmatch(target, "TRAIN") && !strmatch(target, "TEST"))
3506  {
3507  SG_FREE(target);
3508  SG_ERROR("Second argument none of TRAIN or TEST.\n");
3509  }
3510 
3511  if (strmatch(target, "TEST"))
3512  is_train=false;
3513  }
3514 
3515  if (dim!=1 && len>0)
3516  {
3517  SG_FREE(target);
3518  SG_ERROR("Dimension mismatch (should be 1 x seq_length or 0x0\n");
3519  }
3520 
3521  if (dim==0 && len==0)
3522  {
3523  if (create_return_values(3))
3524  {
3525  if (is_train)
3526  success=k->delete_position_weights_lhs();
3527  else
3528  success=k->delete_position_weights_rhs();
3529  }
3530  else
3531  success=k->delete_position_weights();
3532  }
3533  else
3534  {
3535  if (create_return_values(3))
3536  {
3537  if (is_train)
3538  success=k->set_position_weights_lhs(weights, dim, len);
3539  else
3540  success=k->set_position_weights_rhs(weights, dim, len);
3541  }
3542  else
3543  {
3544  ui_kernel->init_kernel("TRAIN");
3545  k->set_position_weights(SGVector<float64_t>(weights, len));
3546  success=true;
3547  }
3548  }
3549 
3550  SG_FREE(target);
3551  }
3552 
3553  return success;
3554 }
3555 
3556 bool CSGInterface::cmd_get_subkernel_weights()
3557 {
3558  if (m_nrhs!=1 || !create_return_values(1))
3559  return false;
3560 
3561  CKernel *kernel=ui_kernel->get_kernel();
3562  if (!kernel)
3563  SG_ERROR("Invalid kernel.\n");
3564 
3565  EKernelType ktype=kernel->get_kernel_type();
3566  const float64_t* weights=NULL;
3567 
3568  if (ktype==K_COMBINED)
3569  {
3570  int32_t num_weights=-1;
3571  weights=((CCombinedKernel *) kernel)->get_subkernel_weights(num_weights);
3572 
3573  // matrices of shape 1 x num_weight are returned
3574  set_matrix(weights, 1, num_weights);
3575  return true;
3576  }
3577 
3578  int32_t degree=-1;
3579  int32_t length=-1;
3580 
3581  if (ktype==K_WEIGHTEDDEGREE)
3582  {
3583  weights=((CWeightedDegreeStringKernel *) kernel)->
3584  get_degree_weights(degree, length);
3585  }
3586  else if (ktype==K_WEIGHTEDDEGREEPOS)
3587  {
3588  weights=((CWeightedDegreePositionStringKernel *) kernel)->
3589  get_degree_weights(degree, length);
3590  }
3591  else
3592  SG_ERROR("Setting subkernel weights not supported on this kernel.\n");
3593 
3594  if (length==0)
3595  length=1;
3596 
3597  set_matrix(weights, degree, length);
3598  return true;
3599 }
3600 
3601 bool CSGInterface::cmd_set_subkernel_weights()
3602 {
3603  if (m_nrhs!=2 || !create_return_values(0))
3604  return false;
3605 
3606  CKernel* kernel=ui_kernel->get_kernel();
3607  if (!kernel)
3608  SG_ERROR("No kernel.\n");
3609 
3610  bool success=false;
3611  float64_t* weights=NULL;
3612  int32_t dim=0;
3613  int32_t len=0;
3614  get_matrix(weights, dim, len);
3615 
3616  EKernelType ktype=kernel->get_kernel_type();
3617  if (ktype==K_WEIGHTEDDEGREE)
3618  {
3620  (CWeightedDegreeStringKernel*) kernel;
3621  int32_t degree=k->get_degree();
3622  if (dim!=degree || len<1)
3623  SG_ERROR("WD: Dimension mismatch (should be (seq_length | 1) x degree) got (%d x %d)\n", len, degree);
3624 
3625  if (len==1)
3626  len=0;
3627 
3628  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3629  }
3630  else if (ktype==K_WEIGHTEDDEGREEPOS)
3631  {
3634  int32_t degree=k->get_degree();
3635  if (dim!=degree || len<1)
3636  SG_ERROR("WDPos: Dimension mismatch (should be (seq_length | 1) x degree) got (%d x %d)\n", len, degree);
3637 
3638  if (len==1)
3639  len=0;
3640 
3641  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3642  }
3643  else // all other kernels
3644  {
3645  int32_t num_subkernels=kernel->get_num_subkernels();
3646  if (dim!=1 || len!=num_subkernels)
3647  SG_ERROR("All: Dimension mismatch (should be 1 x num_subkernels)\n");
3648 
3649  kernel->set_subkernel_weights(SGVector<float64_t>(weights, len));
3650  success=true;
3651  }
3652 
3653  return success;
3654 }
3655 
3656 bool CSGInterface::cmd_set_subkernel_weights_combined()
3657 {
3658  if (m_nrhs!=3 || !create_return_values(0))
3659  return false;
3660 
3661  CKernel* kernel=ui_kernel->get_kernel();
3662  if (!kernel)
3663  SG_ERROR("No kernel.\n");
3664  if (kernel->get_kernel_type()!=K_COMBINED)
3665  SG_ERROR("Only works for combined kernels.\n");
3666 
3667  bool success=false;
3668  float64_t* weights=NULL;
3669  int32_t dim=0;
3670  int32_t len=0;
3671  get_matrix(weights, dim, len);
3672 
3673  int32_t idx=get_int();
3674  SG_DEBUG("using kernel_idx=%i\n", idx);
3675 
3676  kernel=((CCombinedKernel*) kernel)->get_kernel(idx);
3677  if (!kernel)
3678  SG_ERROR("No subkernel at idx %d.\n", idx);
3679 
3680  EKernelType ktype=kernel->get_kernel_type();
3681  if (ktype==K_WEIGHTEDDEGREE)
3682  {
3684  (CWeightedDegreeStringKernel*) kernel;
3685  int32_t degree=k->get_degree();
3686  if (dim!=degree || len<1)
3687  SG_ERROR("Dimension mismatch (should be de(seq_length | 1) x degree)\n");
3688 
3689  if (len==1)
3690  len=0;
3691 
3692  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3693  }
3694  else if (ktype==K_WEIGHTEDDEGREEPOS)
3695  {
3698  int32_t degree=k->get_degree();
3699  if (dim!=degree || len<1)
3700  SG_ERROR("Dimension mismatch (should be de(seq_length | 1) x degree)\n");
3701 
3702  if (len==1)
3703  len=0;
3704 
3705  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3706  }
3707  else // all other kernels
3708  {
3709  int32_t num_subkernels=kernel->get_num_subkernels();
3710  if (dim!=1 || len!=num_subkernels)
3711  SG_ERROR("Dimension mismatch (should be 1 x num_subkernels)\n");
3712 
3713  kernel->set_subkernel_weights(SGVector<float64_t>(weights, len));
3714  success=true;
3715  }
3716 
3717  return success;
3718 }
3719 
3720 bool CSGInterface::cmd_get_dotfeature_weights_combined()
3721 {
3722  if (m_nrhs!=2 || !create_return_values(1))
3723  return false;
3724 
3725  int32_t tlen=0;
3726  char* target=get_string(tlen);
3727  CFeatures* features=NULL;
3728 
3729  if (strmatch(target, "TRAIN"))
3730  features=ui_features->get_train_features();
3731  else if (strmatch(target, "TEST"))
3732  features=ui_features->get_test_features();
3733  else
3734  {
3735  SG_FREE(target);
3736  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n");
3737  }
3738  SG_FREE(target);
3739 
3740  if (!features)
3741  SG_ERROR("No features.\n");
3742  if (features->get_feature_class()!=C_COMBINED_DOT)
3743  SG_ERROR("Only works for combined dot features.\n");
3744 
3745  float64_t* weights=NULL;
3746  int32_t len=0;
3747  ((CCombinedDotFeatures*) features)->get_subfeature_weights(&weights, &len);
3748  set_vector(weights, len);
3749  SG_FREE(weights);
3750 
3751  return true;
3752 }
3753 
3754 bool CSGInterface::cmd_set_dotfeature_weights_combined()
3755 {
3756  if (m_nrhs!=3 || !create_return_values(0))
3757  return false;
3758 
3759  int32_t tlen=0;
3760  char* target=get_string(tlen);
3761  CFeatures* features=NULL;
3762 
3763  if (strmatch(target, "TRAIN"))
3764  features=ui_features->get_train_features();
3765  else if (strmatch(target, "TEST"))
3766  features=ui_features->get_test_features();
3767  else
3768  {
3769  SG_FREE(target);
3770  SG_ERROR("Unknown target, neither TRAIN nor TEST.\n");
3771  }
3772  SG_FREE(target);
3773 
3774  if (!features)
3775  SG_ERROR("No features.\n");
3776  if (features->get_feature_class()!=C_COMBINED_DOT)
3777  SG_ERROR("Only works for combined dot features.\n");
3778 
3779  float64_t* weights=NULL;
3780  int32_t dim=0;
3781  int32_t len=0;
3782  get_matrix(weights, dim, len);
3783 
3784  ((CCombinedDotFeatures*) features)->set_subfeature_weights(weights, len);
3785 
3786  return true;
3787 }
3788 
3789 bool CSGInterface::cmd_set_last_subkernel_weights()
3790 {
3791  if (m_nrhs!=2 || !create_return_values(0))
3792  return false;
3793 
3794  CKernel* kernel=ui_kernel->get_kernel();
3795  if (!kernel)
3796  SG_ERROR("No kernel.\n");
3797  if (kernel->get_kernel_type()!=K_COMBINED)
3798  SG_ERROR("Only works for Combined kernels.\n");
3799 
3800  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
3801  if (!kernel)
3802  SG_ERROR("No last kernel.\n");
3803 
3804  bool success=false;
3805  float64_t* weights=NULL;
3806  int32_t dim=0;
3807  int32_t len=0;
3808  get_matrix(weights, dim, len);
3809 
3810  EKernelType ktype=kernel->get_kernel_type();
3811  if (ktype==K_WEIGHTEDDEGREE)
3812  {
3814  if (dim!=k->get_degree() || len<1)
3815  SG_ERROR("Dimension mismatch (should be de(seq_length | 1) x degree)\n");
3816 
3817  if (len==1)
3818  len=0;
3819 
3820  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3821  }
3822  else if (ktype==K_WEIGHTEDDEGREEPOS)
3823  {
3826  if (dim!=k->get_degree() || len<1)
3827  SG_ERROR("Dimension mismatch (should be de(seq_length | 1) x degree)\n");
3828 
3829  if (len==1)
3830  len=0;
3831 
3832  success=k->set_weights(SGMatrix<float64_t>(weights, dim, len));
3833  }
3834  else // all other kernels
3835  {
3836  int32_t num_subkernels=kernel->get_num_subkernels();
3837  if (dim!=1 || len!=num_subkernels)
3838  SG_ERROR("Dimension mismatch (should be 1 x num_subkernels)\n");
3839 
3840  kernel->set_subkernel_weights(SGVector<float64_t>(weights, len));
3841  success=true;
3842  }
3843 
3844  return success;
3845 }
3846 
3847 bool CSGInterface::cmd_get_WD_position_weights()
3848 {
3849  if (m_nrhs!=1 || !create_return_values(1))
3850  return false;
3851 
3852  CKernel* kernel=ui_kernel->get_kernel();
3853  if (!kernel)
3854  SG_ERROR("No kernel.\n");
3855 
3856  if (kernel->get_kernel_type()==K_COMBINED)
3857  {
3858  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
3859  if (!kernel)
3860  SG_ERROR("Couldn't find last kernel.\n");
3861 
3862  EKernelType ktype=kernel->get_kernel_type();
3863  if (ktype!=K_WEIGHTEDDEGREE && ktype!=K_WEIGHTEDDEGREEPOS)
3864  SG_ERROR("Wrong subkernel type.\n");
3865  }
3866 
3867  int32_t len=0;
3868  const float64_t* position_weights;
3869 
3870  if (kernel->get_kernel_type()==K_WEIGHTEDDEGREE)
3871  position_weights=((CWeightedDegreeStringKernel*) kernel)->get_position_weights(len);
3872  else
3873  position_weights=((CWeightedDegreePositionStringKernel*) kernel)->get_position_weights(len);
3874 
3875  if (position_weights==NULL)
3876  set_vector(position_weights, 0);
3877  else
3878  set_vector(position_weights, len);
3879 
3880  return true;
3881 }
3882 
3883 bool CSGInterface::cmd_get_last_subkernel_weights()
3884 {
3885  if (m_nrhs!=1 || !create_return_values(1))
3886  return false;
3887 
3888  CKernel* kernel=ui_kernel->get_kernel();
3889  EKernelType ktype=kernel->get_kernel_type();
3890  if (!kernel)
3891  SG_ERROR("No kernel.\n");
3892  if (ktype!=K_COMBINED)
3893  SG_ERROR("Only works for Combined kernels.\n");
3894 
3895  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
3896  if (!kernel)
3897  SG_ERROR("Couldn't find last kernel.\n");
3898 
3899  int32_t degree=0;
3900  int32_t len=0;
3901 
3902  if (ktype==K_COMBINED)
3903  {
3904  int32_t num_weights=0;
3905  const float64_t* weights=
3906  ((CCombinedKernel*) kernel)->get_subkernel_weights(num_weights);
3907 
3908  set_vector(weights, num_weights);
3909  return true;
3910  }
3911 
3912  float64_t* weights=NULL;
3913  if (ktype==K_WEIGHTEDDEGREE)
3914  weights=((CWeightedDegreeStringKernel*) kernel)->
3915  get_degree_weights(degree, len);
3916  else if (ktype==K_WEIGHTEDDEGREEPOS)
3917  weights=((CWeightedDegreePositionStringKernel*) kernel)->
3918  get_degree_weights(degree, len);
3919  else
3920  SG_ERROR("Only works for Weighted Degree (Position) kernels.\n");
3921 
3922  if (len==0)
3923  len=1;
3924 
3925  set_matrix(weights, degree, len);
3926 
3927  return true;
3928 }
3929 
3930 bool CSGInterface::cmd_compute_by_subkernels()
3931 {
3932  if (m_nrhs!=1 || !create_return_values(1))
3933  return false;
3934 
3935  CKernel* kernel=ui_kernel->get_kernel();
3936  if (!kernel)
3937  SG_ERROR("No kernel.\n");
3938  if (!kernel->get_rhs())
3939  SG_ERROR("No rhs.\n");
3940 
3941  int32_t num_vec=kernel->get_rhs()->get_num_vectors();
3942  int32_t degree=0;
3943  int32_t len=0;
3944  EKernelType ktype=kernel->get_kernel_type();
3945 
3946  // it would be nice to have a common base class for the WD kernels
3947  if (ktype==K_WEIGHTEDDEGREE)
3948  {
3950  k->get_degree_weights(degree, len);
3951  if (!k->is_tree_initialized())
3952  SG_ERROR("Kernel optimization not initialized.\n");
3953  }
3954  else if (ktype==K_WEIGHTEDDEGREEPOS)
3955  {
3958  k->get_degree_weights(degree, len);
3959  if (!k->is_tree_initialized())
3960  SG_ERROR("Kernel optimization not initialized.\n");
3961  }
3962  else
3963  SG_ERROR("Only works for Weighted Degree (Position) kernels.\n");
3964 
3965  if (len==0)
3966  len=1;
3967 
3968  int32_t num_feat=degree*len;
3969  int32_t num=num_feat*num_vec;
3970  float64_t* result=SG_MALLOC(float64_t, num);
3971 
3972  for (int32_t i=0; i<num; i++)
3973  result[i]=0;
3974 
3975  if (ktype==K_WEIGHTEDDEGREE)
3976  {
3978  for (int32_t i=0; i<num_vec; i++)
3979  k->compute_by_tree(i, &result[i*num_feat]);
3980  }
3981  else
3982  {
3985  for (int32_t i=0; i<num_vec; i++)
3986  k->compute_by_tree(i, &result[i*num_feat]);
3987  }
3988 
3989  set_matrix(result, num_feat, num_vec);
3990  SG_FREE(result);
3991 
3992  return true;
3993 }
3994 
3995 bool CSGInterface::cmd_init_kernel_optimization()
3996 {
3997  if (m_nrhs<1 || !create_return_values(0))
3998  return false;
3999 
4000  return ui_kernel->init_kernel_optimization();
4001 }
4002 
4003 bool CSGInterface::cmd_get_kernel_optimization()
4004 {
4005  if (m_nrhs<1 || !create_return_values(1))
4006  return false;
4007 
4008  CKernel* kernel=ui_kernel->get_kernel();
4009  if (!kernel)
4010  SG_ERROR("No kernel defined.\n");
4011 
4012  switch (kernel->get_kernel_type())
4013  {
4014  case K_WEIGHTEDDEGREEPOS:
4015  {
4016  if (m_nrhs!=2)
4017  SG_ERROR("parameter missing\n");
4018 
4019  int32_t max_order=get_int();
4020  if ((max_order<1) || (max_order>12))
4021  {
4022  SG_WARNING( "max_order out of range 1..12 (%d). setting to 1\n", max_order);
4023  max_order=1;
4024  }
4025 
4027  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4028  if (!svm)
4029  SG_ERROR("No SVM defined.\n");
4030 
4031  int32_t num_suppvec=svm->get_num_support_vectors();
4032  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4033  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4034  int32_t num_feat=0;
4035  int32_t num_sym=0;
4036 
4037  for (int32_t i=0; i<num_suppvec; i++)
4038  {
4039  sv_idx[i]=svm->get_support_vector(i);
4040  sv_weight[i]=svm->get_alpha(i);
4041  }
4042 
4043  float64_t* position_weights=k->extract_w(max_order, num_feat,
4044  num_sym, NULL, num_suppvec, sv_idx, sv_weight);
4045  SG_FREE(sv_idx);
4046  SG_FREE(sv_weight);
4047 
4048  set_matrix(position_weights, num_sym, num_feat);
4049  SG_FREE(position_weights);
4050 
4051  return true;
4052  }
4053 
4054  case K_COMMWORDSTRING:
4056  {
4058  int32_t len=0;
4059  float64_t* weights;
4060  k->get_dictionary(len, weights);
4061 
4062  set_vector(weights, len);
4063  return true;
4064  }
4065  case K_LINEAR:
4066  {
4067  CLinearKernel* k=(CLinearKernel*) kernel;
4068  int32_t len=0;
4069  const float64_t* weights=k->get_normal(len);
4070 
4071  set_vector(weights, len);
4072  return true;
4073  }
4074  default:
4075  SG_ERROR("Unsupported kernel %s.\n", kernel->get_name());
4076  }
4077 
4078  return true;
4079 }
4080 
4081 bool CSGInterface::cmd_delete_kernel_optimization()
4082 {
4083  if (m_nrhs<1 || !create_return_values(0))
4084  return false;
4085 
4086  return ui_kernel->delete_kernel_optimization();
4087 }
4088 
4089 bool CSGInterface::cmd_use_diagonal_speedup()
4090 {
4091  if (m_nrhs<2 || !create_return_values(0))
4092  return false;
4093 
4094  bool speedup=get_bool();
4095 
4096  CKernel* kernel=ui_kernel->get_kernel();
4097  if (!kernel)
4098  SG_ERROR("No kernel defined.\n");
4099 
4100  if (kernel->get_kernel_type()==K_COMBINED)
4101  {
4102  SG_DEBUG("Identified combined kernel.\n");
4103  kernel=((CCombinedKernel*) kernel)->get_last_kernel();
4104  if (!kernel)
4105  SG_ERROR("No last kernel defined.\n");
4106  }
4107 
4108  if (kernel->get_kernel_type()!=K_COMMWORDSTRING)
4109  SG_ERROR("Currently only commwordstring kernel supports diagonal speedup\n");
4110 
4111  ((CCommWordStringKernel*) kernel)->set_use_dict_diagonal_optimization(speedup);
4112 
4113  SG_INFO("Diagonal speedup %s.\n", speedup ? "enabled" : "disabled");
4114 
4115  return true;
4116 }
4117 
4118 bool CSGInterface::cmd_set_kernel_optimization_type()
4119 {
4120  if (m_nrhs<2 || !create_return_values(0))
4121  return false;
4122 
4123  int32_t len=0;
4124  char* opt_type=get_str_from_str_or_direct(len);
4125 
4126  bool success=ui_kernel->set_optimization_type(opt_type);
4127 
4128  SG_FREE(opt_type);
4129  return success;
4130 }
4131 
4132 bool CSGInterface::cmd_set_solver()
4133 {
4134  if (m_nrhs<2 || !create_return_values(0))
4135  return false;
4136 
4137  int32_t len=0;
4138  char* solver=get_str_from_str_or_direct(len);
4139 
4140  bool success=ui_classifier->set_solver(solver);
4141 
4142  SG_FREE(solver);
4143  return success;
4144 }
4145 
4146 bool CSGInterface::cmd_set_constraint_generator()
4147 {
4148  if (m_nrhs<2 || !create_return_values(0))
4149  return false;
4150 
4151  int32_t len=0;
4152  char* cg=get_str_from_str_or_direct(len);
4153 
4154  bool success=ui_classifier->set_constraint_generator(cg);
4155 
4156  SG_FREE(cg);
4157  return success;
4158 }
4159 
4160 bool CSGInterface::cmd_set_prior_probs()
4161 {
4162  if (m_nrhs<3 || !create_return_values(0))
4163  return false;
4164 
4165  CSalzbergWordStringKernel* kernel=
4166  (CSalzbergWordStringKernel*) ui_kernel->get_kernel();
4167  if (kernel->get_kernel_type()!=K_SALZBERG)
4168  SG_ERROR("SalzbergWordStringKernel required for setting prior probs!\n");
4169 
4170  float64_t pos_probs=get_real_from_real_or_str();
4171  float64_t neg_probs=get_real_from_real_or_str();
4172 
4173  kernel->set_prior_probs(pos_probs, neg_probs);
4174 
4175  return true;
4176 }
4177 
4178 bool CSGInterface::cmd_set_prior_probs_from_labels()
4179 {
4180  if (m_nrhs<2 || !create_return_values(0))
4181  return false;
4182 
4183  CSalzbergWordStringKernel* kernel=
4184  (CSalzbergWordStringKernel*) ui_kernel->get_kernel();
4185  if (kernel->get_kernel_type()!=K_SALZBERG)
4186  SG_ERROR("SalzbergWordStringKernel required for setting prior probs!\n");
4187 
4188  float64_t* lab=NULL;
4189  int32_t len=0;
4190  get_vector(lab, len);
4191 
4192  CLabels* labels=new CLabels(len);
4193  for (int32_t i=0; i<len; i++)
4194  {
4195  if (!labels->set_label(i, lab[i]))
4196  SG_ERROR("Couldn't set label %d (of %d): %f.\n", i, len, lab[i]);
4197  }
4198  SG_FREE(lab);
4199 
4200  kernel->set_prior_probs_from_labels(labels);
4201 
4202  SG_UNREF(labels);
4203  return true;
4204 }
4205 
4206 
4207 
4208 
4211 bool CSGInterface::cmd_set_distance()
4212 {
4213  if (m_nrhs<3 || !create_return_values(0))
4214  return false;
4215 
4216  CDistance* distance=NULL;
4217  int32_t len=0;
4218  char* type=get_str_from_str_or_direct(len);
4219  char* dtype=get_str_from_str_or_direct(len);
4220 
4221  if (strmatch(type, "MINKOWSKI") && m_nrhs==4)
4222  {
4223  float64_t k=get_real_from_real_or_str();
4224  distance=ui_distance->create_minkowski(k);
4225  }
4226  else if (strmatch(type, "MANHATTAN"))
4227  {
4228  if (strmatch(dtype, "REAL"))
4229  distance=ui_distance->create_generic(D_MANHATTAN);
4230  else if (strmatch(dtype, "WORD"))
4231  distance=ui_distance->create_generic(D_MANHATTANWORD);
4232  }
4233  else if (strmatch(type, "HAMMING") && strmatch(dtype, "WORD"))
4234  {
4235  bool use_sign=false;
4236  if (m_nrhs==4)
4237  use_sign=get_bool_from_bool_or_str(); // optional
4238 
4239  distance=ui_distance->create_hammingword(use_sign);
4240  }
4241  else if (strmatch(type, "CANBERRA"))
4242  {
4243  if (strmatch(dtype, "REAL"))
4244  distance=ui_distance->create_generic(D_CANBERRA);
4245  else if (strmatch(dtype, "WORD"))
4246  distance=ui_distance->create_generic(D_CANBERRAWORD);
4247  }
4248  else if (strmatch(type, "CHEBYSHEW") && strmatch(dtype, "REAL"))
4249  {
4250  distance=ui_distance->create_generic(D_CHEBYSHEW);
4251  }
4252  else if (strmatch(type, "GEODESIC") && strmatch(dtype, "REAL"))
4253  {
4254  distance=ui_distance->create_generic(D_GEODESIC);
4255  }
4256  else if (strmatch(type, "JENSEN") && strmatch(dtype, "REAL"))
4257  {
4258  distance=ui_distance->create_generic(D_JENSEN);
4259  }
4260  else if (strmatch(type, "CHISQUARE") && strmatch(dtype, "REAL"))
4261  {
4262  distance=ui_distance->create_generic(D_CHISQUARE);
4263  }
4264  else if (strmatch(type, "TANIMOTO") && strmatch(dtype, "REAL"))
4265  {
4266  distance=ui_distance->create_generic(D_TANIMOTO);
4267  }
4268  else if (strmatch(type, "COSINE") && strmatch(dtype, "REAL"))
4269  {
4270  distance=ui_distance->create_generic(D_COSINE);
4271  }
4272  else if (strmatch(type, "BRAYCURTIS") && strmatch(dtype, "REAL"))
4273  {
4274  distance=ui_distance->create_generic(D_BRAYCURTIS);
4275  }
4276  else if (strmatch(type, "EUCLIDIAN"))
4277  {
4278  if (strmatch(dtype, "REAL"))
4279  distance=ui_distance->create_generic(D_EUCLIDIAN);
4280  else if (strmatch(dtype, "SPARSEREAL"))
4281  distance=ui_distance->create_generic(D_SPARSEEUCLIDIAN);
4282  }
4283  else
4285 
4286  SG_FREE(type);
4287  SG_FREE(dtype);
4288  return ui_distance->set_distance(distance);
4289 }
4290 
4291 bool CSGInterface::cmd_init_distance()
4292 {
4293  SG_DEPRECATED;
4294  return true;
4295 }
4296 
4297 bool CSGInterface::cmd_get_distance_matrix()
4298 {
4299  if (m_nrhs!=2 || !create_return_values(1))
4300  return false;
4301 
4302  int32_t len=0;
4303  char* target=get_string(len);
4304 
4305  bool success=ui_distance->init_distance(target);
4306 
4307  if (success)
4308  {
4309  CDistance* distance=ui_distance->get_distance();
4310  if (!distance || !distance->has_features())
4311  SG_ERROR("No distance defined or not initialized.\n");
4312 
4313  int32_t num_vec_lhs=0;
4314  int32_t num_vec_rhs=0;
4315  float64_t* dmatrix=NULL;
4316  dmatrix=distance->get_distance_matrix_real(num_vec_lhs, num_vec_rhs, dmatrix);
4317 
4318  set_matrix(dmatrix, num_vec_lhs, num_vec_rhs);
4319  SG_FREE(dmatrix);
4320  }
4321 
4322  return success;
4323 }
4324 
4325 
4326 /* POIM */
4327 
4328 bool CSGInterface::cmd_get_SPEC_consensus()
4329 {
4330  if (m_nrhs!=1 || !create_return_values(1))
4331  return false;
4332 
4333  CKernel* kernel=ui_kernel->get_kernel();
4334  if (!kernel)
4335  SG_ERROR("No kernel.\n");
4336  if (kernel->get_kernel_type()!=K_COMMWORDSTRING)
4337  SG_ERROR("Only works for CommWordString kernels.\n");
4338 
4339  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4340  ASSERT(svm);
4341  int32_t num_suppvec=svm->get_num_support_vectors();
4342  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4343  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4344  int32_t num_feat=0;
4345 
4346  for (int32_t i=0; i<num_suppvec; i++)
4347  {
4348  sv_idx[i]=svm->get_support_vector(i);
4349  sv_weight[i]=svm->get_alpha(i);
4350  }
4351 
4352  char* consensus=((CCommWordStringKernel*) kernel)->compute_consensus(
4353  num_feat, num_suppvec, sv_idx, sv_weight);
4354  SG_FREE(sv_idx);
4355  SG_FREE(sv_weight);
4356 
4357  set_vector(consensus, num_feat);
4358  SG_FREE(consensus);
4359 
4360  return true;
4361 }
4362 
4363 bool CSGInterface::cmd_get_SPEC_scoring()
4364 {
4365  if (m_nrhs!=2 || !create_return_values(1))
4366  return false;
4367 
4368  int32_t max_order=get_int();
4369  CKernel* kernel=ui_kernel->get_kernel();
4370  if (!kernel)
4371  SG_ERROR("No kernel.\n");
4372 
4373  EKernelType ktype=kernel->get_kernel_type();
4374  if (ktype!=K_COMMWORDSTRING && ktype!=K_WEIGHTEDCOMMWORDSTRING)
4375  SG_ERROR("Only works for (Weighted) CommWordString kernels.\n");
4376 
4377  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4378  ASSERT(svm);
4379  int32_t num_suppvec=svm->get_num_support_vectors();
4380  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4381  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4382  int32_t num_feat=0;
4383  int32_t num_sym=0;
4384 
4385  for (int32_t i=0; i<num_suppvec; i++)
4386  {
4387  sv_idx[i]=svm->get_support_vector(i);
4388  sv_weight[i]=svm->get_alpha(i);
4389  }
4390 
4391  if ((max_order<1) || (max_order>8))
4392  {
4393  SG_WARNING( "max_order out of range 1..8 (%d). setting to 1\n", max_order);
4394  max_order=1;
4395  }
4396 
4397  float64_t* position_weights=NULL;
4398  if (ktype==K_COMMWORDSTRING)
4399  position_weights=((CCommWordStringKernel*) kernel)->compute_scoring(
4400  max_order, num_feat, num_sym, NULL,
4401  num_suppvec, sv_idx, sv_weight);
4402  else
4403  position_weights=((CWeightedCommWordStringKernel*) kernel)->compute_scoring(
4404  max_order, num_feat, num_sym, NULL,
4405  num_suppvec, sv_idx, sv_weight);
4406  SG_FREE(sv_idx);
4407  SG_FREE(sv_weight);
4408 
4409  set_matrix(position_weights, num_sym, num_feat);
4410  SG_FREE(position_weights);
4411 
4412  return true;
4413 }
4414 
4415 bool CSGInterface::cmd_get_WD_consensus()
4416 {
4417  if (m_nrhs!=1 || !create_return_values(1))
4418  return false;
4419 
4420  CKernel* kernel=ui_kernel->get_kernel();
4421  if (!kernel)
4422  SG_ERROR("No kernel.\n");
4423  if (kernel->get_kernel_type()!=K_WEIGHTEDDEGREEPOS)
4424  SG_ERROR("Only works for Weighted Degree Position kernels.\n");
4425 
4426  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4427  ASSERT(svm);
4428  int32_t num_suppvec=svm->get_num_support_vectors();
4429  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4430  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4431  int32_t num_feat=0;
4432 
4433  for (int32_t i=0; i<num_suppvec; i++)
4434  {
4435  sv_idx[i]=svm->get_support_vector(i);
4436  sv_weight[i]=svm->get_alpha(i);
4437  }
4438 
4439  char* consensus=((CWeightedDegreePositionStringKernel*) kernel)->compute_consensus(
4440  num_feat, num_suppvec, sv_idx, sv_weight);
4441  SG_FREE(sv_idx);
4442  SG_FREE(sv_weight);
4443 
4444  set_vector(consensus, num_feat);
4445  SG_FREE(consensus);
4446 
4447  return true;
4448 }
4449 
4450 bool CSGInterface::cmd_compute_POIM_WD()
4451 {
4452  if (m_nrhs!=3 || !create_return_values(1))
4453  return false;
4454 
4455  int32_t max_order=get_int();
4456  float64_t* distribution=NULL;
4457  int32_t num_dfeat=0;
4458  int32_t num_dvec=0;
4459  get_matrix(distribution, num_dfeat, num_dvec);
4460 
4461  if (!distribution)
4462  SG_ERROR("Wrong distribution.\n");
4463 
4464  CKernel* kernel=ui_kernel->get_kernel();
4465  if (!kernel)
4466  SG_ERROR("No Kernel.\n");
4467  if (kernel->get_kernel_type()!=K_WEIGHTEDDEGREEPOS)
4468  SG_ERROR("Only works for Weighted Degree Position kernels.\n");
4469 
4470  int32_t seqlen=0;
4471  int32_t num_sym=0;
4473  (((CWeightedDegreePositionStringKernel*) kernel)->get_lhs());
4474  ASSERT(sfeat);
4475  seqlen=sfeat->get_max_vector_length();
4476  num_sym=(int32_t) sfeat->get_num_symbols();
4477 
4478  if (num_dvec!=seqlen || num_dfeat!=num_sym)
4479  {
4480  SG_ERROR("distribution should have (seqlen x num_sym) elements"
4481  "(seqlen: %d vs. %d symbols: %d vs. %d)\n", seqlen,
4482  num_dvec, num_sym, num_dfeat);
4483  }
4484 
4485  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4486  ASSERT(svm);
4487  int32_t num_suppvec=svm->get_num_support_vectors();
4488  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4489  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4490 
4491  for (int32_t i=0; i<num_suppvec; i++)
4492  {
4493  sv_idx[i]=svm->get_support_vector(i);
4494  sv_weight[i]=svm->get_alpha(i);
4495  }
4496 
4497  /*
4498  if ((max_order < 1) || (max_order > 12))
4499  {
4500  SG_WARNING( "max_order out of range 1..12 (%d). setting to 1.\n", max_order);
4501  max_order=1;
4502  }
4503  */
4504 
4505  float64_t* position_weights;
4506  position_weights=((CWeightedDegreePositionStringKernel*) kernel)->compute_POIM(
4507  max_order, seqlen, num_sym, NULL,
4508  num_suppvec, sv_idx, sv_weight, distribution);
4509  SG_FREE(sv_idx);
4510  SG_FREE(sv_weight);
4511 
4512  set_matrix(position_weights, num_sym, seqlen);
4513  SG_FREE(position_weights);
4514 
4515  return true;
4516  }
4517 
4518  bool CSGInterface::cmd_get_WD_scoring()
4519  {
4520  if (m_nrhs!=2 || !create_return_values(1))
4521  return false;
4522 
4523  int32_t max_order=get_int();
4524 
4525  CKernel* kernel=ui_kernel->get_kernel();
4526  if (!kernel)
4527  SG_ERROR("No kernel.\n");
4528  if (kernel->get_kernel_type()!=K_WEIGHTEDDEGREEPOS)
4529  SG_ERROR("Only works for Weighted Degree Position kernels.\n");
4530 
4531  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4532  ASSERT(svm);
4533  int32_t num_suppvec=svm->get_num_support_vectors();
4534  int32_t* sv_idx=SG_MALLOC(int32_t, num_suppvec);
4535  float64_t* sv_weight=SG_MALLOC(float64_t, num_suppvec);
4536  int32_t num_feat=0;
4537  int32_t num_sym=0;
4538 
4539  for (int32_t i=0; i<num_suppvec; i++)
4540  {
4541  sv_idx[i]=svm->get_support_vector(i);
4542  sv_weight[i]=svm->get_alpha(i);
4543  }
4544 
4545  if ((max_order<1) || (max_order>12))
4546  {
4547  SG_WARNING("max_order out of range 1..12 (%d). setting to 1\n", max_order);
4548  max_order=1;
4549  }
4550 
4551  float64_t* position_weights=
4552  ((CWeightedDegreePositionStringKernel*) kernel)->compute_scoring(
4553  max_order, num_feat, num_sym, NULL, num_suppvec, sv_idx, sv_weight);
4554  SG_FREE(sv_idx);
4555  SG_FREE(sv_weight);
4556 
4557  set_matrix(position_weights, num_sym, num_feat);
4558  SG_FREE(position_weights);
4559 
4560  return true;
4561 }
4562 
4563 
4564 /* Classifier */
4565 
4566 bool CSGInterface::cmd_classify()
4567 {
4568  if (m_nrhs!=1 || !create_return_values(1))
4569  return false;
4570 
4571  if (!ui_kernel->get_kernel() ||
4572  !ui_kernel->get_kernel()->get_kernel_type()==K_CUSTOM)
4573  {
4574  CFeatures* feat=ui_features->get_test_features();
4575  if (!feat)
4576  SG_ERROR("No features found.\n");
4577  }
4578 
4579  CLabels* labels=ui_classifier->classify();
4580  if (!labels)
4581  SG_ERROR("Classify failed\n");
4582 
4583  int32_t num_vec=labels->get_num_labels();
4584  float64_t* result=SG_MALLOC(float64_t, num_vec);
4585  for (int32_t i=0; i<num_vec; i++)
4586  result[i]=labels->get_label(i);
4587  SG_UNREF(labels);
4588 
4589  set_vector(result, num_vec);
4590  SG_FREE(result);
4591 
4592  return true;
4593 }
4594 
4595 bool CSGInterface::cmd_classify_example()
4596 {
4597  if (m_nrhs!=2 || !create_return_values(1))
4598  return false;
4599 
4600  int32_t idx=get_int();
4601  float64_t result=0;
4602 
4603  if (!ui_classifier->classify_example(idx, result))
4604  SG_ERROR("Classify_example failed.\n");
4605 
4606  set_real(result);
4607 
4608  return true;
4609 }
4610 
4611 bool CSGInterface::cmd_get_classifier()
4612 {
4613  if (m_nrhs<1 || m_nrhs>2 || !create_return_values(2))
4614  return false;
4615 
4616  int32_t idx=-1;
4617  if (m_nrhs==2)
4618  idx=get_int();
4619 
4620  float64_t* bias=NULL;
4621  float64_t* weights=NULL;
4622  int32_t rows=0;
4623  int32_t cols=0;
4624  int32_t brows=0;
4625  int32_t bcols=0;
4626 
4627  if (!ui_classifier->get_trained_classifier(
4628  weights, rows, cols, bias, brows, bcols, idx))
4629  return false;
4630 
4631  //SG_PRINT("brows %d, bcols %d\n", brows, bcols);
4632  //CMath::display_matrix(bias, brows, bcols);
4633  set_matrix(bias, brows, bcols);
4634  SG_FREE(bias);
4635 
4636  //SG_PRINT("rows %d, cols %d\n", rows, cols);
4637  //CMath::display_matrix(weights, rows, cols);
4638  set_matrix(weights, rows, cols);
4639  SG_FREE(weights);
4640 
4641  return true;
4642 }
4643 
4644 bool CSGInterface::cmd_new_classifier()
4645 {
4646  if (m_nrhs<2 || !create_return_values(0))
4647  return false;
4648 
4649  int32_t len=0;
4650  char* name=get_str_from_str_or_direct(len);
4651  int32_t d=6;
4652  int32_t from_d=40;
4653 
4654  if (m_nrhs>2)
4655  {
4656  d=get_int_from_int_or_str();
4657 
4658  if (m_nrhs>3)
4659  from_d=get_int_from_int_or_str();
4660  }
4661 
4662  bool success=ui_classifier->new_classifier(name, d, from_d);
4663 
4664  SG_FREE(name);
4665  return success;
4666 }
4667 
4668 bool CSGInterface::cmd_save_classifier()
4669 {
4670  if (m_nrhs<2 || !create_return_values(0))
4671  return false;
4672 
4673  int32_t len=0;
4674  char* filename=get_str_from_str_or_direct(len);
4675 
4676  bool success=ui_classifier->save(filename);
4677 
4678  SG_FREE(filename);
4679  return success;
4680 }
4681 
4682 bool CSGInterface::cmd_load_classifier()
4683 {
4684  if (m_nrhs<3 || !create_return_values(0))
4685  return false;
4686 
4687  int32_t len=0;
4688  char* filename=get_str_from_str_or_direct(len);
4689  char* type=get_str_from_str_or_direct(len);
4690 
4691  bool success=ui_classifier->load(filename, type);
4692 
4693  SG_FREE(filename);
4694  SG_FREE(type);
4695  return success;
4696 }
4697 
4698 
4699 bool CSGInterface::cmd_get_num_svms()
4700 {
4701  if (m_nrhs!=1 || !create_return_values(1))
4702  return false;
4703 
4704  set_int(ui_classifier->get_num_svms());
4705 
4706  return true;
4707 }
4708 
4709 
4710 bool CSGInterface::cmd_get_svm()
4711 {
4712  return cmd_get_classifier();
4713 }
4714 
4715 bool CSGInterface::cmd_set_svm()
4716 {
4717  if (m_nrhs!=3 || !create_return_values(0))
4718  return false;
4719 
4720  float64_t bias=get_real();
4721 
4722  float64_t* alphas=NULL;
4723  int32_t num_feat_alphas=0;
4724  int32_t num_vec_alphas=0;
4725  get_matrix(alphas, num_feat_alphas, num_vec_alphas);
4726 
4727  if (!alphas)
4728  SG_ERROR("No proper alphas given.\n");
4729  if (num_vec_alphas!=2)
4730  SG_ERROR("Not 2 vectors in alphas.\n");
4731 
4732  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4733  if (!svm)
4734  SG_ERROR("No SVM object available.\n");
4735 
4736  svm->create_new_model(num_feat_alphas);
4737  svm->set_bias(bias);
4738 
4739  int32_t num_support_vectors=svm->get_num_support_vectors();
4740  for (int32_t i=0; i<num_support_vectors; i++)
4741  {
4742  svm->set_alpha(i, alphas[i]);
4743  svm->set_support_vector(i, (int32_t) alphas[i+num_support_vectors]);
4744  }
4745  SG_FREE(alphas);
4746 
4747  return true;
4748 }
4749 
4750 bool CSGInterface::cmd_set_linear_classifier()
4751 {
4752  if (m_nrhs!=3 || !create_return_values(0))
4753  return false;
4754 
4755  float64_t bias=get_real();
4756 
4757  float64_t* w=NULL;
4758  int32_t len=0;
4759  get_vector(w, len);
4760 
4761  if (!len)
4762  SG_ERROR("No proper weight vector given.\n");
4763 
4764  CLinearMachine* c=(CLinearMachine*) ui_classifier->get_classifier();
4765  if (!c)
4766  SG_ERROR("No Linear Classifier object available.\n");
4767 
4768  c->set_w(SGVector<float64_t>(w, len));
4769  c->set_bias(bias);
4770 
4771  SG_FREE(w);
4772 
4773  return true;
4774 }
4775 
4776 bool CSGInterface::cmd_get_svm_objective()
4777 {
4778  if (m_nrhs!=1 || !create_return_values(1))
4779  return false;
4780 
4781  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4782  if (!svm)
4783  SG_ERROR("No SVM set.\n");
4784 
4785  set_real(svm->get_objective());
4786 
4787  return true;
4788 }
4789 
4790 bool CSGInterface::cmd_compute_svm_primal_objective()
4791 {
4792  return do_compute_objective(SVM_PRIMAL);
4793 }
4794 
4795 bool CSGInterface::cmd_compute_svm_dual_objective()
4796 {
4797  return do_compute_objective(SVM_DUAL);
4798 }
4799 
4800 bool CSGInterface::cmd_compute_mkl_dual_objective()
4801 {
4802  return do_compute_objective(MKL_DUAL);
4803 }
4804 
4805 bool CSGInterface::cmd_compute_relative_mkl_duality_gap()
4806 {
4807  return do_compute_objective(MKL_RELATIVE_DUALITY_GAP);
4808 }
4809 
4810 bool CSGInterface::cmd_compute_absolute_mkl_duality_gap()
4811 {
4812  return do_compute_objective(MKL_ABSOLUTE_DUALITY_GAP);
4813 }
4814 
4815 bool CSGInterface::do_compute_objective(E_WHICH_OBJ obj)
4816 {
4817  if (m_nrhs!=1 || !create_return_values(1))
4818  return false;
4819 
4820  float64_t result=23.5;
4821 
4822  CSVM* svm=(CSVM*) ui_classifier->get_classifier();
4823  if (!svm)
4824  SG_ERROR("No SVM set.\n");
4825 
4826  CLabels* trainlabels=NULL;
4827  trainlabels=ui_labels->get_train_labels();
4828 
4829  if (!trainlabels)
4830  SG_ERROR("No trainlabels available.\n");
4831 
4832  CKernel* kernel=ui_kernel->get_kernel();
4833  if (!kernel)
4834  SG_ERROR("No kernel available.\n");
4835 
4836  if (!ui_kernel->is_initialized() || !kernel->has_features())
4837  SG_ERROR("Kernel not initialized.\n");
4838 
4839  ((CKernelMachine*) svm)->set_labels(trainlabels);
4840  ((CKernelMachine*) svm)->set_kernel(kernel);
4841 
4842 
4843  switch (obj)
4844  {
4845  case SVM_PRIMAL:
4846  result=svm->compute_svm_primal_objective();
4847  break;
4848  case SVM_DUAL:
4849  result=svm->compute_svm_dual_objective();
4850  break;
4851  case MKL_PRIMAL:
4853  result=((CMKL*) svm)->compute_mkl_primal_objective();
4854  break;
4855  case MKL_DUAL:
4857  result=((CMKL*) svm)->compute_mkl_dual_objective();
4858  break;
4859  case MKL_RELATIVE_DUALITY_GAP:
4860  {
4862  float64_t primal=((CMKL*) svm)->compute_mkl_dual_objective();
4863  float64_t dual=((CMKL*) svm)->compute_mkl_primal_objective();
4864  result=(primal-dual)/dual;
4865  }
4866  break;
4867  case MKL_ABSOLUTE_DUALITY_GAP:
4868  {
4870  float64_t primal=((CMKL*) svm)->compute_mkl_dual_objective();
4871  float64_t dual=((CMKL*) svm)->compute_mkl_primal_objective();
4872  result=dual-primal;
4873  }
4874  break;
4875  default:
4876  SG_SERROR("Error calling do_compute_objective\n");
4877  return false;
4878  };
4879 
4880  set_real(result);
4881  return true;
4882 }
4883 
4884 bool CSGInterface::cmd_train_classifier()
4885 {
4886  if (m_nrhs<1 || !create_return_values(0))
4887  return false;
4888 
4889  CMachine* classifier=ui_classifier->get_classifier();
4890  if (!classifier)
4891  SG_ERROR("No classifier available.\n");
4892 
4893  EClassifierType type=classifier->get_classifier_type();
4894  switch (type)
4895  {
4896  case CT_LIGHT:
4897  case CT_LIGHTONECLASS:
4898  case CT_LIBSVM:
4899  case CT_SCATTERSVM:
4900  case CT_MPD:
4901  case CT_GPBT:
4902  case CT_CPLEXSVM:
4903  case CT_GMNPSVM:
4904  case CT_GNPPSVM:
4905  case CT_KERNELPERCEPTRON:
4906  case CT_LIBSVR:
4907  case CT_LIBSVMMULTICLASS:
4908  case CT_LIBSVMONECLASS:
4909  case CT_SVRLIGHT:
4910  case CT_LARANK:
4911  return ui_classifier->train_svm();
4912  case CT_MKLMULTICLASS:
4913  return ui_classifier->train_mkl_multiclass();
4914  case CT_MKLCLASSIFICATION:
4915  case CT_MKLREGRESSION:
4916  case CT_MKLONECLASS:
4917  return ui_classifier->train_mkl();
4918 
4919  case CT_KRR:
4920  return ui_classifier->train_krr();
4921 
4922  case CT_KNN:
4923  {
4924  if (m_nrhs<2)
4925  return false;
4926 
4927  int32_t k=get_int_from_int_or_str();
4928 
4929  return ui_classifier->train_knn(k);
4930  }
4931 
4932  case CT_KMEANS:
4933  {
4934  if (m_nrhs<3)
4935  return false;
4936 
4937  int32_t k=get_int_from_int_or_str();
4938  int32_t max_iter=get_int_from_int_or_str();
4939 
4940  return ui_classifier->train_clustering(k, max_iter);
4941  }
4942 
4943  case CT_HIERARCHICAL:
4944  {
4945  if (m_nrhs<2)
4946  return false;
4947 
4948  int32_t merges=get_int_from_int_or_str();
4949 
4950  return ui_classifier->train_clustering(merges);
4951  }
4952 
4953  case CT_LDA:
4954  {
4955  float64_t gamma=0;
4956  if (m_nrhs==2)
4957  gamma=get_real_from_real_or_str();
4958 
4959  return ui_classifier->train_linear(gamma);
4960  }
4961 
4962  case CT_PERCEPTRON:
4963  case CT_SVMLIN:
4964  case CT_SVMPERF:
4965  case CT_SUBGRADIENTSVM:
4966  case CT_SVMOCAS:
4967  case CT_SVMSGD:
4968  case CT_LPM:
4969  case CT_LPBOOST:
4970  case CT_SUBGRADIENTLPM:
4971  case CT_LIBLINEAR:
4972  return ui_classifier->train_linear();
4973 
4974  case CT_WDSVMOCAS:
4975  return ui_classifier->train_wdocas();
4976 
4977  default:
4978  SG_ERROR("Unknown classifier type %d.\n", type);
4979  }
4980 
4981  return false;
4982 }
4983 
4984 bool CSGInterface::cmd_do_auc_maximization()
4985 {
4986  if (m_nrhs!=2 || !create_return_values(0))
4987  return false;
4988 
4989  bool do_auc=get_bool_from_bool_or_str();
4990 
4991  return ui_classifier->set_do_auc_maximization(do_auc);
4992 }
4993 
4994 bool CSGInterface::cmd_set_perceptron_parameters()
4995 {
4996  if (m_nrhs!=3 || !create_return_values(0))
4997  return false;
4998 
4999  float64_t lernrate=get_real_from_real_or_str();
5000  int32_t maxiter=get_int_from_int_or_str();
5001 
5002  return ui_classifier->set_perceptron_parameters(lernrate, maxiter);
5003 }
5004 
5005 bool CSGInterface::cmd_set_svm_qpsize()
5006 {
5007  if (m_nrhs!=2 || !create_return_values(0))
5008  return false;
5009 
5010  int32_t qpsize=get_int_from_int_or_str();
5011 
5012  return ui_classifier->set_svm_qpsize(qpsize);
5013 }
5014 
5015 bool CSGInterface::cmd_set_svm_max_qpsize()
5016 {
5017  if (m_nrhs!=2 || !create_return_values(0))
5018  return false;
5019 
5020  int32_t max_qpsize=get_int_from_int_or_str();
5021 
5022  return ui_classifier->set_svm_max_qpsize(max_qpsize);
5023 }
5024 
5025 bool CSGInterface::cmd_set_svm_bufsize()
5026 {
5027  if (m_nrhs!=2 || !create_return_values(0))
5028  return false;
5029 
5030  int32_t bufsize=get_int_from_int_or_str();
5031 
5032  return ui_classifier->set_svm_bufsize(bufsize);
5033 }
5034 
5035 bool CSGInterface::cmd_set_svm_C()
5036 {
5037  if (m_nrhs<2 || !create_return_values(0))
5038  return false;
5039 
5040  float64_t C1=get_real_from_real_or_str();
5041  float64_t C2=C1;
5042 
5043  if (m_nrhs==3)
5044  C2=get_real_from_real_or_str();
5045 
5046  return ui_classifier->set_svm_C(C1, C2);
5047 }
5048 
5049 bool CSGInterface::cmd_set_svm_epsilon()
5050 {
5051  if (m_nrhs!=2 || !create_return_values(0))
5052  return false;
5053 
5054  float64_t epsilon=get_real_from_real_or_str();
5055 
5056  return ui_classifier->set_svm_epsilon(epsilon);
5057 }
5058 
5059 bool CSGInterface::cmd_set_svr_tube_epsilon()
5060 {
5061  if (m_nrhs!=2 || !create_return_values(0))
5062  return false;
5063 
5064  float64_t tube_epsilon=get_real_from_real_or_str();
5065 
5066  return ui_classifier->set_svr_tube_epsilon(tube_epsilon);
5067 }
5068 
5069 bool CSGInterface::cmd_set_svm_nu()
5070 {
5071  if (m_nrhs!=2 || !create_return_values(0))
5072  return false;
5073 
5074  float64_t nu=get_real_from_real_or_str();
5075 
5076  return ui_classifier->set_svm_nu(nu);
5077 }
5078 
5079 bool CSGInterface::cmd_set_svm_mkl_parameters()
5080 {
5081  if (m_nrhs<3 || m_nrhs>4 || !create_return_values(0))
5082  return false;
5083 
5084  float64_t weight_epsilon=get_real_from_real_or_str();
5085  float64_t C_mkl=get_real_from_real_or_str();
5086  float64_t mkl_norm=1.0;
5087 
5088  if (m_nrhs==4)
5089  mkl_norm=get_real_from_real_or_str();
5090 
5091  return ui_classifier->set_svm_mkl_parameters(weight_epsilon, C_mkl, mkl_norm);
5092 }
5093 
5094 bool CSGInterface::cmd_set_elasticnet_lambda()
5095 {
5096  if (m_nrhs!=2 || !create_return_values(0))
5097  return false;
5098  float64_t lambda=get_real_from_real_or_str();
5099  return ui_classifier->set_elasticnet_lambda(lambda);
5100 }
5101 
5102 bool CSGInterface::cmd_set_mkl_block_norm()
5103 {
5104  if (m_nrhs!=2 || !create_return_values(0))
5105  return false;
5106  float64_t bnorm=get_real_from_real_or_str();
5107  return ui_classifier->set_mkl_block_norm(bnorm);
5108 }
5109 
5110 
5111 bool CSGInterface::cmd_set_max_train_time()
5112 {
5113  if (m_nrhs!=2 || !create_return_values(0))
5114  return false;
5115 
5116  float64_t max_train_time=get_real_from_real_or_str();
5117 
5118  return ui_classifier->set_max_train_time(max_train_time);
5119 }
5120 
5121 bool CSGInterface::cmd_set_svm_shrinking_enabled()
5122 {
5123  if (m_nrhs!=2 || !create_return_values(0))
5124  return false;
5125 
5126  bool shrinking_enabled=get_bool_from_bool_or_str();
5127 
5128  return ui_classifier->set_svm_shrinking_enabled(shrinking_enabled);
5129 }
5130 
5131 bool CSGInterface::cmd_set_svm_batch_computation_enabled()
5132 {
5133  if (m_nrhs!=2 || !create_return_values(0))
5134  return false;
5135 
5136  bool batch_computation_enabled=get_bool_from_bool_or_str();
5137 
5138  return ui_classifier->set_svm_batch_computation_enabled(
5139  batch_computation_enabled);
5140 }
5141 
5142 bool CSGInterface::cmd_set_svm_linadd_enabled()
5143 {
5144  if (m_nrhs!=2 || !create_return_values(0))
5145  return false;
5146 
5147  bool linadd_enabled=get_bool_from_bool_or_str();
5148 
5149  return ui_classifier->set_svm_linadd_enabled(linadd_enabled);
5150 }
5151 
5152 bool CSGInterface::cmd_set_svm_bias_enabled()
5153 {
5154  if (m_nrhs!=2 || !create_return_values(0))
5155  return false;
5156 
5157  bool bias_enabled=get_bool_from_bool_or_str();
5158 
5159  return ui_classifier->set_svm_bias_enabled(bias_enabled);
5160 }
5161 
5162 bool CSGInterface::cmd_set_mkl_interleaved_enabled()
5163 {
5164  if (m_nrhs!=2 || !create_return_values(0))
5165  return false;
5166 
5167  bool interleaved_enabled=get_bool_from_bool_or_str();
5168 
5169  return ui_classifier->set_mkl_interleaved_enabled(interleaved_enabled);
5170 }
5171 
5172 bool CSGInterface::cmd_set_krr_tau()
5173 {
5174  if (m_nrhs!=2 || !create_return_values(0))
5175  return false;
5176 
5177  float64_t tau=get_real_from_real_or_str();
5178 
5179  return ui_classifier->set_krr_tau(tau);
5180 }
5181 
5182 
5183 /* Preproc */
5184 
5185 bool CSGInterface::cmd_add_preproc()
5186 {
5187  if (m_nrhs<2 || !create_return_values(0))
5188  return false;
5189 
5190  int32_t len=0;
5191  char* type=get_str_from_str_or_direct(len);
5192  CPreprocessor* preproc=NULL;
5193 
5194  if (strmatch(type, "NORMONE"))
5195  preproc=ui_preproc->create_generic(P_NORMONE);
5196  else if (strmatch(type, "LOGPLUSONE"))
5197  preproc=ui_preproc->create_generic(P_LOGPLUSONE);
5198  else if (strmatch(type, "SORTWORDSTRING"))
5199  preproc=ui_preproc->create_generic(P_SORTWORDSTRING);
5200  else if (strmatch(type, "SORTULONGSTRING"))
5201  preproc=ui_preproc->create_generic(P_SORTULONGSTRING);
5202  else if (strmatch(type, "DECOMPRESSCHARSTRING"))
5203  preproc=ui_preproc->create_generic(P_DECOMPRESSCHARSTRING);
5204  else if (strmatch(type, "SORTWORD"))
5205  preproc=ui_preproc->create_generic(P_SORTWORD);
5206 
5207  else if (strmatch(type, "PRUNEVARSUBMEAN"))
5208  {
5209  bool divide_by_std=false;
5210  if (m_nrhs==3)
5211  divide_by_std=get_bool_from_bool_or_str();
5212 
5213  preproc=ui_preproc->create_prunevarsubmean(divide_by_std);
5214  }
5215 
5216 #ifdef HAVE_LAPACK
5217  else if (strmatch(type, "PCA") && m_nrhs==4)
5218  {
5219  bool do_whitening=get_bool_from_bool_or_str();
5220  float64_t threshold=get_real_from_real_or_str();
5221 
5222  preproc=ui_preproc->create_pca(do_whitening, threshold);
5223  }
5224 #endif
5225 
5226  else
5228 
5229  SG_FREE(type);
5230  return ui_preproc->add_preproc(preproc);
5231 }
5232 
5233 bool CSGInterface::cmd_del_preproc()
5234 {
5235  if (m_nrhs!=1 || !create_return_values(0))
5236  return false;
5237 
5238  return ui_preproc->del_preproc();
5239 }
5240 
5241 bool CSGInterface::cmd_attach_preproc()
5242 {
5243  if (m_nrhs<2 || !create_return_values(0))
5244  return false;
5245 
5246  int32_t len=0;
5247  char* target=get_str_from_str_or_direct(len);
5248 
5249  bool do_force=false;
5250  if (m_nrhs==3)
5251  do_force=get_bool_from_bool_or_str();
5252 
5253  bool success=ui_preproc->attach_preproc(target, do_force);
5254 
5255  SG_FREE(target);
5256  return success;
5257 }
5258 
5259 bool CSGInterface::cmd_clean_preproc()
5260 {
5261  if (m_nrhs!=1 || !create_return_values(0))
5262  return false;
5263 
5264  return ui_preproc->clean_preproc();
5265 }
5266 
5267 
5268 /* HMM */
5269 
5270 bool CSGInterface::cmd_new_plugin_estimator()
5271 {
5272  if (m_nrhs<2 || !create_return_values(0))
5273  return false;
5274 
5275  float64_t pos_pseudo=get_real_from_real_or_str();
5276  float64_t neg_pseudo=get_real_from_real_or_str();
5277 
5278  return ui_pluginestimate->new_estimator(pos_pseudo, neg_pseudo);
5279 }
5280 
5281 bool CSGInterface::cmd_train_estimator()
5282 {
5283  if (m_nrhs!=1 || !create_return_values(0))
5284  return false;
5285 
5286  return ui_pluginestimate->train();
5287 }
5288 
5289 bool CSGInterface::cmd_plugin_estimate_classify_example()
5290 {
5291  if (m_nrhs!=2 || !create_return_values(1))
5292  return false;
5293 
5294  int32_t idx=get_int();
5295  float64_t result=ui_pluginestimate->apply(idx);
5296 
5297  set_vector(&result, 1);
5298  return true;
5299 }
5300 
5301 bool CSGInterface::cmd_plugin_estimate_classify()
5302 {
5303  if (m_nrhs!=1 || !create_return_values(1))
5304  return false;
5305 
5306  CFeatures* feat=ui_features->get_test_features();
5307  if (!feat)
5308  SG_ERROR("No features found.\n");
5309 
5310  int32_t num_vec=feat->get_num_vectors();
5311  float64_t* result=SG_MALLOC(float64_t, num_vec);
5312  CLabels* labels=ui_pluginestimate->apply();
5313  for (int32_t i=0; i<num_vec; i++)
5314  result[i]=labels->get_label(i);
5315  SG_UNREF(labels);
5316 
5317  set_vector(result, num_vec);
5318  SG_FREE(result);
5319 
5320  return true;
5321 }
5322 
5323 bool CSGInterface::cmd_set_plugin_estimate()
5324 {
5325  if (m_nrhs!=3 || !create_return_values(0))
5326  return false;
5327 
5328  float64_t* emission_probs=NULL;
5329  int32_t num_probs=0;
5330  int32_t num_vec=0;
5331  get_matrix(emission_probs, num_probs, num_vec);
5332 
5333  if (num_vec!=2)
5334  SG_ERROR("Need at least 1 set of positive and 1 set of negative params.\n");
5335 
5336  float64_t* pos_params=emission_probs;
5337  float64_t* neg_params=&(emission_probs[num_probs]);
5338 
5339  float64_t* model_sizes=NULL;
5340  int32_t len=0;
5341  get_vector(model_sizes, len);
5342 
5343  int32_t seq_length=(int32_t) model_sizes[0];
5344  int32_t num_symbols=(int32_t) model_sizes[1];
5345  if (num_probs!=seq_length*num_symbols)
5346  SG_ERROR("Mismatch in number of emission probs and sequence length * number of symbols.\n");
5347 
5348  ui_pluginestimate->get_estimator()->set_model_params(
5349  pos_params, neg_params, seq_length, num_symbols);
5350 
5351  return true;
5352 }
5353 
5354 bool CSGInterface::cmd_get_plugin_estimate()
5355 {
5356  if (m_nrhs!=1 || !create_return_values(2))
5357  return false;
5358 
5359  float64_t* pos_params=NULL;
5360  float64_t* neg_params=NULL;
5361  int32_t num_params=0;
5362  int32_t seq_length=0;
5363  int32_t num_symbols=0;
5364 
5365  if (!ui_pluginestimate->get_estimator()->get_model_params(
5366  pos_params, neg_params, seq_length, num_symbols))
5367  return false;
5368 
5369  num_params=seq_length*num_symbols;
5370 
5371  float64_t* result=SG_MALLOC(float64_t, num_params*2);
5372  for (int32_t i=0; i<num_params; i++)
5373  result[i]=pos_params[i];
5374  for (int32_t i=0; i<num_params; i++)
5375  result[i+num_params]=neg_params[i];
5376 
5377  set_matrix(result, num_params, 2);
5378  SG_FREE(result);
5379 
5380  float64_t model_sizes[2];
5381  model_sizes[0]=(float64_t) seq_length;
5382  model_sizes[1]=(float64_t) num_symbols;
5383  set_vector(model_sizes, 2);
5384 
5385  return true;
5386 }
5387 
5388 bool CSGInterface::cmd_convergence_criteria()
5389 {
5390  if (m_nrhs<3 || !create_return_values(0))
5391  return false;
5392 
5393  int32_t num_iterations=get_int_from_int_or_str();
5394  float64_t epsilon=get_real_from_real_or_str();
5395 
5396  return ui_hmm->convergence_criteria(num_iterations, epsilon);
5397 }
5398 
5399 bool CSGInterface::cmd_normalize()
5400 {
5401  if (m_nrhs<2 || !create_return_values(0))
5402  return false;
5403 
5404  bool keep_dead_states=get_bool_from_bool_or_str();
5405 
5406  return ui_hmm->normalize(keep_dead_states);
5407 }
5408 
5409 bool CSGInterface::cmd_add_states()
5410 {
5411  if (m_nrhs<3 || !create_return_values(0))
5412  return false;
5413 
5414  int32_t num_states=get_int_from_int_or_str();
5415  float64_t value=get_real_from_real_or_str();
5416 
5417  return ui_hmm->add_states(num_states, value);
5418 }
5419 
5420 bool CSGInterface::cmd_permutation_entropy()
5421 {
5422  if (m_nrhs<3 || !create_return_values(0))
5423  return false;
5424 
5425  int32_t width=get_int_from_int_or_str();
5426  int32_t seq_num=get_int_from_int_or_str();
5427 
5428  return ui_hmm->permutation_entropy(width, seq_num);
5429 }
5430 
5431 bool CSGInterface::cmd_relative_entropy()
5432 {
5433  if (m_nrhs!=1 || !create_return_values(1))
5434  return false;
5435 
5436  float64_t* entropy=NULL;
5437  int32_t len=0;
5438  bool success=ui_hmm->relative_entropy(entropy, len);
5439  if (!success)
5440  return false;
5441 
5442  set_vector(entropy, len);
5443 
5444  SG_FREE(entropy);
5445  return true;
5446 }
5447 
5448 bool CSGInterface::cmd_entropy()
5449 {
5450  if (m_nrhs!=1 || !create_return_values(1))
5451  return false;
5452 
5453  float64_t* entropy=NULL;
5454  int32_t len=0;
5455  bool success=ui_hmm->entropy(entropy, len);
5456  if (!success)
5457  return false;
5458 
5459  set_vector(entropy, len);
5460 
5461  SG_FREE(entropy);
5462  return true;
5463 }
5464 
5465 bool CSGInterface::cmd_hmm_classify()
5466 {
5467  return do_hmm_classify(false, false);
5468 }
5469 
5470 bool CSGInterface::cmd_one_class_hmm_classify()
5471 {
5472  return do_hmm_classify(false, true);
5473 }
5474 
5475 bool CSGInterface::cmd_one_class_linear_hmm_classify()
5476 {
5477  return do_hmm_classify(true, true);
5478 }
5479 
5480 bool CSGInterface::do_hmm_classify(bool linear, bool one_class)
5481 {
5482  if (m_nrhs>1 || !create_return_values(1))
5483  return false;
5484 
5485  CFeatures* feat=ui_features->get_test_features();
5486  if (!feat)
5487  return false;
5488 
5489  int32_t num_vec=feat->get_num_vectors();
5490  CLabels* labels=NULL;
5491 
5492  if (linear) // must be one_class as well
5493  {
5494  labels=ui_hmm->linear_one_class_classify();
5495  }
5496  else
5497  {
5498  if (one_class)
5499  labels=ui_hmm->one_class_classify();
5500  else
5501  labels=ui_hmm->classify();
5502  }
5503  if (!labels)
5504  return false;
5505 
5506  float64_t* result=SG_MALLOC(float64_t, num_vec);
5507  for (int32_t i=0; i<num_vec; i++)
5508  result[i]=labels->get_label(i);
5509  SG_UNREF(labels);
5510 
5511  set_vector(result, num_vec);
5512  SG_FREE(result);
5513 
5514  return true;
5515 }
5516 
5517 bool CSGInterface::cmd_one_class_hmm_classify_example()
5518 {
5519  return do_hmm_classify_example(true);
5520 }
5521 
5522 bool CSGInterface::cmd_hmm_classify_example()
5523 {
5524  return do_hmm_classify_example(false);
5525 }
5526 
5527 bool CSGInterface::do_hmm_classify_example(bool one_class)
5528 {
5529  if (m_nrhs!=2 || !create_return_values(1))
5530  return false;
5531 
5532  int32_t idx=get_int();
5533  float64_t result=0;
5534 
5535  if (one_class)
5536  result=ui_hmm->one_class_classify_example(idx);
5537  else
5538  result=ui_hmm->classify_example(idx);
5539 
5540  set_real(result);
5541 
5542  return true;
5543 }
5544 
5545 bool CSGInterface::cmd_output_hmm()
5546 {
5547  if (m_nrhs!=1 || !create_return_values(0))
5548  return false;
5549 
5550  return ui_hmm->output_hmm();
5551 }
5552 
5553 bool CSGInterface::cmd_output_hmm_defined()
5554 {
5555  if (m_nrhs!=1 || !create_return_values(0))
5556  return false;
5557 
5558  return ui_hmm->output_hmm_defined();
5559 }
5560 
5561 bool CSGInterface::cmd_hmm_likelihood()
5562 {
5563  if (m_nrhs!=1 || !create_return_values(1))
5564  return false;
5565 
5566  CHMM* h=ui_hmm->get_current();
5567  if (!h)
5568  SG_ERROR("No HMM.\n");
5569 
5570  float64_t likelihood=h->model_probability();
5571  set_real(likelihood);
5572 
5573  return true;
5574 }
5575 
5576 bool CSGInterface::cmd_likelihood()
5577 {
5578  if (m_nrhs!=1 || !create_return_values(0))
5579  return false;
5580 
5581  return ui_hmm->likelihood();
5582 }
5583 
5584 bool CSGInterface::cmd_save_likelihood()
5585 {
5586  if (m_nrhs<2 || !create_return_values(0))
5587  return false;
5588 
5589  int32_t len=0;
5590  char* filename=get_str_from_str_or_direct(len);
5591 
5592  bool is_binary=false;
5593  if (m_nrhs==3)
5594  is_binary=get_bool_from_bool_or_str();
5595 
5596  bool success=ui_hmm->save_likelihood(filename, is_binary);
5597 
5598  SG_FREE(filename);
5599  return success;
5600 }
5601 
5602 bool CSGInterface::cmd_get_viterbi_path()
5603 {
5604  if (m_nrhs!=2 || !create_return_values(2))
5605  return false;
5606 
5607  int32_t dim=get_int();
5608  SG_DEBUG("dim: %f\n", dim);
5609 
5610  CHMM* h=ui_hmm->get_current();
5611  if (!h)
5612  return false;
5613 
5614  CFeatures* feat=ui_features->get_test_features();
5615  if (!feat || (feat->get_feature_class()!=C_STRING) ||
5616  (feat->get_feature_type()!=F_WORD))
5617  return false;
5618 
5620 
5621  int32_t num_feat=0;
5622  bool free_vec;
5623  uint16_t* vec=((CStringFeatures<uint16_t>*) feat)->get_feature_vector(dim, num_feat, free_vec);
5624  if (!vec || num_feat<=0)
5625  {
5626  ((CStringFeatures<uint16_t>*) feat)->free_feature_vector(vec, dim, free_vec);
5627  return false;
5628  }
5629  ((CStringFeatures<uint16_t>*) feat)->free_feature_vector(vec, dim, free_vec);
5630 
5631  SG_DEBUG( "computing viterbi path for vector %d (length %d)\n", dim, num_feat);
5632  float64_t likelihood=0;
5633  T_STATES* path=h->get_path(dim, likelihood);
5634 
5635  set_vector(path, num_feat);
5636  SG_FREE(path);
5637  set_real(likelihood);
5638 
5639  return true;
5640 }
5641 
5642 bool CSGInterface::cmd_viterbi_train()
5643 {
5644  if (m_nrhs!=1 || !create_return_values(0))
5645  return false;
5646 
5647  return ui_hmm->viterbi_train();
5648 }
5649 
5650 bool CSGInterface::cmd_viterbi_train_defined()
5651 {
5652  if (m_nrhs!=1 || !create_return_values(0))
5653  return false;
5654 
5655  return ui_hmm->viterbi_train_defined();
5656 }
5657 
5658 bool CSGInterface::cmd_baum_welch_train()
5659 {
5660  if (m_nrhs!=1 || !create_return_values(0))
5661  return false;
5662 
5663  return ui_hmm->baum_welch_train();
5664 }
5665 
5666 bool CSGInterface::cmd_baum_welch_train_defined()
5667 {
5668  if (m_nrhs!=1 || !create_return_values(0))
5669  return false;
5670 
5671  return ui_hmm->baum_welch_train_defined();
5672 }
5673 
5674 
5675 bool CSGInterface::cmd_baum_welch_trans_train()
5676 {
5677  if (m_nrhs!=1 || !create_return_values(0))
5678  return false;
5679 
5680  return ui_hmm->baum_welch_trans_train();
5681 }
5682 
5683 bool CSGInterface::cmd_linear_train()
5684 {
5685  if (m_nrhs<1 || !create_return_values(0))
5686  return false;
5687 
5688  if (m_nrhs==2)
5689  {
5690  int32_t len=0;
5691  char* align=get_str_from_str_or_direct(len);
5692 
5693  bool success=ui_hmm->linear_train(align[0]);
5694 
5695  SG_FREE(align);
5696  return success;
5697  }
5698  else
5699  return ui_hmm->linear_train();
5700 }
5701 
5702 bool CSGInterface::cmd_save_path()
5703 {
5704  if (m_nrhs<2 || !create_return_values(0))
5705  return false;
5706 
5707  int32_t len=0;
5708  char* filename=get_str_from_str_or_direct(len);
5709 
5710  bool is_binary=false;
5711  if (m_nrhs==3)
5712  is_binary=get_bool_from_bool_or_str();
5713 
5714  bool success=ui_hmm->save_path(filename, is_binary);
5715 
5716  SG_FREE(filename);
5717  return success;
5718 }
5719 
5720 bool CSGInterface::cmd_append_hmm()
5721 {
5722  if (m_nrhs!=5 || !create_return_values(0))
5723  return false;
5724 
5725  CHMM* old_h=ui_hmm->get_current();
5726  if (!old_h)
5727  SG_ERROR("No current HMM set.\n");
5728 
5729  float64_t* p=NULL;
5730  int32_t N_p=0;
5731  get_vector(p, N_p);
5732 
5733  float64_t* q=NULL;
5734  int32_t N_q=0;
5735  get_vector(q, N_q);
5736 
5737  float64_t* a=NULL;
5738  int32_t M_a=0;
5739  int32_t N_a=0;
5740  get_matrix(a, M_a, N_a);
5741  int32_t N=N_a;
5742 
5743  float64_t* b=NULL;
5744  int32_t M_b=0;
5745  int32_t N_b=0;
5746  get_matrix(b, M_b, N_b);
5747  int32_t M=N_b;
5748 
5749  if (N_p!=N || N_q!=N || N_a!=N || M_a!=N || N_b!=M || M_b!=N)
5750  {
5751  SG_ERROR("Model matrices not matching in size.\n"
5752  "p:(%d) q:(%d) a:(%d,%d) b(%d,%d)\n",
5753  N_p, N_q, N_a, M_a, N_b, M_b);
5754  }
5755 
5756  CHMM* h=new CHMM(N, M, NULL, ui_hmm->get_pseudo());
5757  int32_t i,j;
5758 
5759  for (i=0; i<N; i++)
5760  {
5761  h->set_p(i, p[i]);
5762  h->set_q(i, q[i]);
5763  }
5764 
5765  for (i=0; i<N; i++)
5766  for (j=0; j<N; j++)
5767  h->set_a(i,j, a[i+j*N]);
5768 
5769  for (i=0; i<N; i++)
5770  for (j=0; j<M; j++)
5771  h->set_b(i,j, b[i+j*N]);
5772 
5773  old_h->append_model(h);
5774  SG_UNREF(h);
5775 
5776  return true;
5777 }
5778 
5779 bool CSGInterface::cmd_append_model()
5780 {
5781  if (m_nrhs<2 || !create_return_values(0))
5782  return false;
5783  if (m_nrhs>2 && m_nrhs!=4)
5784  return false;
5785 
5786  int32_t len=0;
5787  char* filename=get_str_from_str_or_direct(len);
5788  int32_t base1=-1;
5789  int32_t base2=-1;
5790  if (m_nrhs>2)
5791  {
5792  base1=get_int_from_int_or_str();
5793  base2=get_int_from_int_or_str();
5794  }
5795 
5796  bool success=ui_hmm->append_model(filename, base1, base2);
5797 
5798  SG_FREE(filename);
5799  return success;
5800 }
5801 
5802 bool CSGInterface::cmd_new_hmm()
5803 {
5804  if (m_nrhs!=3 || !create_return_values(0))
5805  return false;
5806 
5807  int32_t n=get_int_from_int_or_str();
5808  int32_t m=get_int_from_int_or_str();
5809 
5810  return ui_hmm->new_hmm(n, m);
5811 }
5812 
5813 bool CSGInterface::cmd_load_hmm()
5814 {
5815  if (m_nrhs!=2 || !create_return_values(0))
5816  return false;
5817 
5818  int32_t len=0;
5819  char* filename=get_str_from_str_or_direct(len);
5820 
5821  bool success=ui_hmm->load(filename);
5822 
5823  SG_FREE(filename);
5824  return success;
5825 }
5826 
5827 bool CSGInterface::cmd_save_hmm()
5828 {
5829  if (m_nrhs<2 || !create_return_values(0))
5830  return false;
5831 
5832  int32_t len=0;
5833  char* filename=get_str_from_str_or_direct(len);
5834 
5835  bool is_binary=false;
5836  if (m_nrhs==3)
5837  is_binary=get_bool_from_bool_or_str();
5838 
5839  bool success=ui_hmm->save(filename, is_binary);
5840 
5841  SG_FREE(filename);
5842  return success;
5843 }
5844 
5845 bool CSGInterface::cmd_set_hmm()
5846 {
5847  if (m_nrhs!=5 || !create_return_values(0))
5848  return false;
5849 
5850  float64_t* p=NULL;
5851  int32_t N_p=0;
5852  get_vector(p, N_p);
5853 
5854  float64_t* q=NULL;
5855  int32_t N_q=0;
5856  get_vector(q, N_q);
5857 
5858  float64_t* a=NULL;
5859  int32_t M_a=0;
5860  int32_t N_a=0;
5861  get_matrix(a, M_a, N_a);
5862  int32_t N=N_a;
5863 
5864  float64_t* b=NULL;
5865  int32_t M_b=0;
5866  int32_t N_b=0;
5867  get_matrix(b, M_b, N_b);
5868  int32_t M=N_b;
5869 
5870  if (N_p!=N || N_q!=N || N_a!=N || M_a!=N || N_b!=M || M_b!=N)
5871  {
5872  SG_ERROR("Model matrices not matching in size.\n"
5873  "p:(%d) q:(%d) a:(%d,%d) b(%d,%d)\n",
5874  N_p, N_q, N_a, M_a, N_b, M_b);
5875  }
5876 
5877  CHMM* current=ui_hmm->get_current();
5878  if (!current)
5879  SG_ERROR("Need a previously created HMM.\n");
5880 
5881  int32_t i,j;
5882 
5883  for (i=0; i<N; i++)
5884  {
5885  current->set_p(i, p[i]);
5886  current->set_q(i, q[i]);
5887  }
5888 
5889  for (i=0; i<N; i++)
5890  for (j=0; j<N; j++)
5891  current->set_a(i,j, a[i+j*N]);
5892 
5893  for (i=0; i<N; i++)
5894  for (j=0; j<M; j++)
5895  current->set_b(i,j, b[i+j*N]);
5896 
5897  CStringFeatures<uint16_t>* sf = ((CStringFeatures<uint16_t>*) (ui_features->get_train_features()));
5898  current->set_observations(sf);
5899 
5900  return true;
5901 }
5902 
5903 bool CSGInterface::cmd_set_hmm_as()
5904 {
5905  if (m_nrhs!=2 || !create_return_values(0))
5906  return false;
5907 
5908  int32_t len=0;
5909  char* target=get_str_from_str_or_direct(len);
5910 
5911  bool success=ui_hmm->set_hmm_as(target);
5912 
5913  SG_FREE(target);
5914  return success;
5915 }
5916 
5917 bool CSGInterface::cmd_set_chop()
5918 {
5919  if (m_nrhs!=2 || !create_return_values(0))
5920  return false;
5921 
5922  float64_t value=get_real_from_real_or_str();
5923  return ui_hmm->chop(value);
5924 }
5925 
5926 bool CSGInterface::cmd_set_pseudo()
5927 {
5928  if (m_nrhs!=2 || !create_return_values(0))
5929  return false;
5930 
5931  float64_t value=get_real_from_real_or_str();
5932  return ui_hmm->set_pseudo(value);
5933 }
5934 
5935 bool CSGInterface::cmd_load_definitions()
5936 {
5937  if (m_nrhs<2 || !create_return_values(0))
5938  return false;
5939 
5940  int32_t len=0;
5941  char* filename=get_str_from_str_or_direct(len);
5942 
5943  bool do_init=false;
5944  if (m_nrhs==3)
5945  do_init=get_bool_from_bool_or_str();
5946 
5947  bool success=ui_hmm->load_definitions(filename, do_init);
5948 
5949  SG_FREE(filename);
5950  return success;
5951 }
5952 
5953 bool CSGInterface::cmd_get_hmm()
5954 {
5955  if (m_nrhs!=1 || !create_return_values(4))
5956  return false;
5957 
5958  CHMM* h=ui_hmm->get_current();
5959  if (!h)
5960  return false;
5961 
5962  int32_t N=h->get_N();
5963  int32_t M=h->get_M();
5964  int32_t i=0;
5965  int32_t j=0;
5966  float64_t* p=SG_MALLOC(float64_t, N);
5967  float64_t* q=SG_MALLOC(float64_t, N);
5968 
5969  for (i=0; i<N; i++)
5970  {
5971  p[i]=h->get_p(i);
5972  q[i]=h->get_q(i);
5973  }
5974 
5975  set_vector(p, N);
5976  SG_FREE(p);
5977  set_vector(q, N);
5978  SG_FREE(q);
5979 
5980  float64_t* a=SG_MALLOC(float64_t, N*N);
5981  for (i=0; i<N; i++)
5982  for (j=0; j<N; j++)
5983  a[i+j*N]=h->get_a(i, j);
5984  set_matrix(a, N, N);
5985  SG_FREE(a);
5986 
5987  float64_t* b=SG_MALLOC(float64_t, N*M);
5988  for (i=0; i<N; i++)
5989  for (j=0; j<M; j++)
5990  b[i+j*N]=h->get_b(i, j);
5991  set_matrix(b, N, M);
5992  SG_FREE(b);
5993 
5994  return true;
5995 }
5996 
5997 bool CSGInterface::cmd_best_path()
5998 {
5999  if (m_nrhs!=3 || !create_return_values(0))
6000  return false;
6001 
6002  int32_t from=get_int_from_int_or_str();
6003  int32_t to=get_int_from_int_or_str();
6004 
6005  return ui_hmm->best_path(from, to);
6006 }
6007 
6008 bool CSGInterface::cmd_best_path_2struct()
6009 {
6010  if (m_nrhs!=12 || !create_return_values(3))
6011  return false;
6012 
6013  SG_ERROR("Sorry, this parameter list is awful!\n");
6014 
6015  return true;
6016 }
6017 
6018 void CSGInterface::get_vector(bool*& vector, int32_t& len)
6019 {
6020  int32_t* int_vector;
6021  get_vector(int_vector, len);
6022 
6023  ASSERT(len>0);
6024  vector= SG_MALLOC(bool, len);
6025 
6026  for (int32_t i=0; i<len; i++)
6027  vector[i]= (int_vector[i]!=0);
6028 
6029  SG_FREE(int_vector);
6030 }
6031 
6032 void CSGInterface::set_vector(const bool* vector, int32_t len)
6033 {
6034  int32_t* int_vector = SG_MALLOC(int32_t, len);
6035  for (int32_t i=0;i<len;i++)
6036  {
6037  if (vector[i])
6038  int_vector[i]=1;
6039  else
6040  int_vector[i]=0;
6041  }
6042  set_vector(int_vector,len);
6043  SG_FREE(int_vector);
6044 }
6045 
6046 bool CSGInterface::cmd_set_plif_struct()
6047 {
6048  // ARG 2
6049  int32_t Nid=0;
6050  int32_t* ids;
6051  get_vector(ids,Nid);
6052 
6053  // ARG 3
6054  int32_t Nname=0;
6055  int32_t Mname=0;
6056  SGString<char>* names;
6057  get_string_list(names, Nname,Mname);
6058 
6059  // ARG 4
6060  int32_t Nlimits=0;
6061  int32_t Mlimits=0;
6062  float64_t* all_limits;
6063  get_matrix(all_limits, Mlimits, Nlimits);
6064 
6065  // ARG 5
6066  int32_t Npenalties=0;
6067  int32_t Mpenalties=0;
6068  float64_t* all_penalties;
6069  get_matrix(all_penalties, Mpenalties, Npenalties);
6070 
6071  // ARG 6
6072  int32_t Ntransform=0;
6073  int32_t Mtransform=0;
6074  SGString<char>* all_transform;
6075  get_string_list(all_transform, Ntransform, Mtransform);
6076 
6077  // ARG 7
6078  int32_t Nmin=0;
6079  float64_t* min_values;
6080  get_vector(min_values,Nmin);
6081 
6082  // ARG 8
6083  int32_t Nmax=0;
6084  float64_t* max_values;
6085  get_vector(max_values,Nmax);
6086 
6087  // ARG 9
6088  int32_t Ncache=0;
6089  bool* all_use_cache;
6090  get_vector(all_use_cache,Ncache);
6091 
6092  // ARG 10
6093  int32_t Nsvm=0;
6094  int32_t* all_use_svm;
6095  get_vector(all_use_svm,Nsvm);
6096 
6097  // ARG 11
6098  int32_t Ncalc=0;
6099  bool* all_do_calc;
6100  get_vector(all_do_calc,Ncalc);
6101 
6102  if (Ncalc!=Nsvm)
6103  SG_ERROR("Ncalc!=Nsvm, Ncalc:%i, Nsvm:%i\n",Ncalc,Nsvm);
6104  if (Ncalc!=Ncache)
6105  SG_ERROR("Ncalc!=Ncache, Ncalc:%i, Ncache:%i\n",Ncalc,Ncache);
6106  if (Ncalc!=Ntransform)
6107  SG_ERROR("Ncalc!=Ntransform, Ncalc:%i, Ntransform:%i\n",Ncalc,Ntransform);
6108  if (Ncalc!=Nmin)
6109  SG_ERROR("Ncalc!=Nmin, Ncalc:%i, Nmin:%i\n",Ncalc,Nmin);
6110  if (Ncalc!=Nmax)
6111  SG_ERROR("Ncalc!=Nmax, Ncalc:%i, Nmax:%i\n",Ncalc,Nmax);
6112  if (Ncalc!=Npenalties)
6113  SG_ERROR("Ncalc!=Npenalties, Ncalc:%i, Npenalties:%i\n",Ncalc,Npenalties);
6114  if (Ncalc!=Nlimits)
6115  SG_ERROR("Ncalc!=Nlimits, Ncalc:%i, Nlimits:%i\n",Ncalc,Nlimits);
6116  if (Ncalc!=Nname)
6117  SG_ERROR("Ncalc!=Nname, Ncalc:%i, Nname:%i\n",Ncalc,Nname);
6118  if (Ncalc!=Nid)
6119  SG_ERROR("Ncalc!=Nid, Ncalc:%i, Nid:%i\n",Ncalc,Nid);
6120  if (Mlimits!=Mpenalties)
6121  SG_ERROR("Mlimits!=Mpenalties, Mlimits:%i, Mpenalties:%i\n",Mlimits,Mpenalties);
6122 
6123  int32_t N = Ncalc;
6124  int32_t M = Mlimits;
6125  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6126  pm->create_plifs(N, M);
6127  pm->set_plif_ids(SGVector<int32_t>(ids, N));
6128  pm->set_plif_min_values(SGVector<float64_t>(min_values, N));
6129  pm->set_plif_max_values(SGVector<float64_t>(max_values, N));
6130  pm->set_plif_use_cache(SGVector<bool>(all_use_cache, N));
6131  pm->set_plif_use_svm(SGVector<int32_t>(all_use_svm, N));
6132  pm->set_plif_limits(SGMatrix<float64_t>(all_limits, N, M));
6133  pm->set_plif_penalties(SGMatrix<float64_t>(all_penalties, N, M));
6134  pm->set_plif_names(names, N);
6135  pm->set_plif_transform_type(all_transform, N);
6136 
6137  SG_FREE(all_limits);
6138  SG_FREE(all_penalties);
6139  SG_FREE(names);
6140  SG_FREE(all_transform);
6141  SG_FREE(min_values);
6142  SG_FREE(max_values);
6143  SG_FREE(all_use_cache);
6144  SG_FREE(all_use_svm);
6145  SG_FREE(all_do_calc);
6146 
6147  return true;
6148 }
6149 
6150 bool CSGInterface::cmd_get_plif_struct()
6151 {
6152  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6153  CPlif** PEN = pm->get_PEN();
6154  int32_t N = pm->get_num_plifs();
6155  int32_t M = pm->get_num_limits();
6156 
6157 
6158  int32_t* ids = SG_MALLOC(int32_t, N);
6159  float64_t* max_values = SG_MALLOC(float64_t, N);
6160  float64_t* min_values = SG_MALLOC(float64_t, N);
6162  SGString<char>* all_transform = SG_MALLOC(SGString<char>, N);
6163  float64_t* all_limits = SG_MALLOC(float64_t, N*M);
6164  float64_t* all_penalties = SG_MALLOC(float64_t, N*M);
6165  bool* all_use_cache = SG_MALLOC(bool, N);
6166  int32_t* all_use_svm = SG_MALLOC(int32_t, N);
6167  bool* all_do_calc = SG_MALLOC(bool, N);
6168  for (int32_t i=0;i<N;i++)
6169  {
6170  ids[i]=PEN[i]->get_id();
6171  names[i].string = PEN[i]->get_plif_name();
6172  names[i].slen = strlen(PEN[i]->get_plif_name());
6173  float64_t* limits = PEN[i]->get_plif_limits();
6174  float64_t* penalties = PEN[i]->get_plif_penalties();
6175  for (int32_t j=0;j<M;j++)
6176  {
6177  all_limits[i*M+j]=limits[j];
6178  all_penalties[i*M+j]=penalties[j];
6179  }
6180  all_transform[i].string = (char*) PEN[i]->get_transform_type();
6181  all_transform[i].slen = strlen(PEN[i]->get_transform_type());
6182  min_values[i]=PEN[i]->get_min_value();
6183  max_values[i]=PEN[i]->get_max_value();
6184  all_use_cache[i]=PEN[i]->get_use_cache();
6185  all_use_svm[i]=PEN[i]->get_use_svm();
6186  all_do_calc[i]=PEN[i]->get_do_calc();
6187 
6188  }
6189  set_vector(ids,N);
6190  set_string_list(names, N);
6191  set_matrix(all_limits, M, N);
6192  set_matrix(all_penalties, M, N);
6193  set_string_list(all_transform, N);
6194  set_vector(min_values,N);
6195  set_vector(max_values,N);
6196  set_vector(all_use_cache,N);
6197  set_vector(all_use_svm,N);
6198  set_vector(all_do_calc,N);
6199 
6200  SG_FREE(ids);
6201  SG_FREE(max_values);
6202  SG_FREE(min_values);
6203  SG_FREE(names);
6204  SG_FREE(all_transform);
6205  SG_FREE(all_limits);
6206  SG_FREE(all_penalties);
6207  SG_FREE(all_use_cache);
6208  SG_FREE(all_use_svm);
6209  SG_FREE(all_do_calc);
6210 
6211  return true;
6212 }
6213 /*bool CSGInterface::cmd_signals_set_model()
6214 {
6215  // ARG 1
6216  int32_t len=0;
6217  char* filename;
6218  filename = get_string(len);
6219 
6220  CTrainPredMaster* tpm = new CTrainPredMaster(ui_kernel);
6221 
6222  tpm->read_models_from_file(filename);
6223 
6224  return true;
6225  }*/
6226 bool CSGInterface::cmd_signals_set_positions()
6227 {
6228  return true;
6229 }
6230 bool CSGInterface::cmd_signals_set_labels()
6231 {
6232  return true;
6233 }
6234 bool CSGInterface::cmd_signals_set_split()
6235 {
6236  return true;
6237 }
6238 bool CSGInterface::cmd_signals_set_train_mask()
6239 {
6240  return true;
6241 }
6242 bool CSGInterface::cmd_signals_add_feature()
6243 {
6244  return true;
6245 }
6246 bool CSGInterface::cmd_signals_add_kernel()
6247 {
6248  return true;
6249 }
6250 bool CSGInterface::cmd_signals_run()
6251 {
6252  return true;
6253 }
6254 
6255 bool CSGInterface::cmd_init_dyn_prog()
6256 {
6257  //ARG 1
6258  int32_t num_svms=get_int();
6259 
6260  CDynProg* h=new CDynProg(num_svms);
6261  ui_structure->set_dyn_prog(h);
6262  return true;
6263 }
6264 
6265 bool CSGInterface::cmd_clean_up_dyn_prog()
6266 {
6267  return ui_structure->cleanup();
6268 }
6269 
6270 bool CSGInterface::cmd_set_model()
6271 {
6272 
6273  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6274 
6275  CDynProg* h = ui_structure->get_dyn_prog();
6276  int32_t num_svms = h->get_num_svms();
6277  //CDynProg* h=new CDynProg(Nweights/* = num_svms */);
6278 
6279  //ARG 1
6280  // transition pointers
6281  // link transitions to length, content, frame (and tiling)
6282  // plifs (#states x #states x 3 or 4)
6283  int32_t numDim=0;
6284  int32_t* Dim=0;
6285  float64_t* penalties_array=NULL;
6286  get_ndarray(penalties_array,Dim,numDim);
6287  ASSERT(numDim==3);
6288  ASSERT(Dim[0]==Dim[1]);
6289 
6290  if (!pm->compute_plif_matrix(SGNDArray<float64_t>(penalties_array, Dim, numDim)))
6291  SG_ERROR("error computing plif matrix\n");
6292  ui_structure->set_num_states(Dim[0]);
6293  SG_FREE(penalties_array);
6294 
6295  // ARG 2
6296  // bool-> determines if orf information should be used
6297  bool use_orf = get_bool();
6298  ui_structure->set_use_orf(use_orf);
6299 
6300  // ARG 3
6301  // determines for which contents which orf should be used (#contents x 2)
6302  int32_t Nmod=0;
6303  int32_t Mmod=0;
6304  int32_t* mod_words;
6305  get_matrix(mod_words, Nmod,Mmod);
6306  if (Nmod != num_svms)
6307  SG_ERROR("should be equal: Nmod: %i, num_svms: %i\n",Nmod,num_svms);
6308  ASSERT(Mmod == 2)
6309  h->init_mod_words_array(SGMatrix<int32_t>(mod_words, Nmod, Mmod));
6310  SG_FREE(mod_words);
6311 
6312  // ARG 4
6313  // links: states -> signal plifs (#states x 2)
6314  int32_t num_states=0;
6315  int32_t feat_dim3=0;
6316  int32_t* state_signals;
6317  get_matrix(state_signals,num_states,feat_dim3);
6318  ASSERT(num_states==Dim[0]);
6319  pm->compute_signal_plifs(SGMatrix<int32_t>(state_signals, feat_dim3, num_states));
6320  SG_FREE(state_signals);
6321 
6322 
6323  // ARG 5
6324  // ORF info (#states x 2)
6325  int32_t Norf=0;
6326  int32_t Morf=0;
6327  int32_t* orf_info;
6328  get_matrix(orf_info,Norf,Morf);
6329  ASSERT(Norf==num_states)
6330  ASSERT(Morf==2)
6331 
6332  ui_structure->set_orf_info(orf_info, Norf, Morf);
6333  h->set_orf_info(SGMatrix<int32_t>(orf_info, Norf, Morf));
6334  SG_FREE(orf_info);
6335 
6336  h->set_num_states(num_states) ;
6337 
6338  return true;
6339 }
6340 
6341 bool CSGInterface::cmd_precompute_content_svms()
6342 {
6343 
6344  // ARG 1
6345  int32_t seq_len=0;
6346  char* seq;
6347  seq = get_string(seq_len);
6348 
6349  // ARG 2
6350  // all feature positions
6351  int32_t Npos=0;
6352  int32_t* all_pos;
6353  get_vector(all_pos, Npos);
6354 
6355  //ARG 3
6356  // content svm weights
6357  int32_t Nweights=0;
6358  int32_t num_svms=0;
6359  float64_t* weights;
6360  get_matrix(weights, Nweights, num_svms);
6361  if (Nweights!=5440)
6362  SG_PRINT("Dimension mismatch: got %i, expect %i\n", Nweights, 5440) ;
6363  ui_structure->set_content_svm_weights(weights, Nweights, num_svms);
6364 
6365  CDynProg* h = ui_structure->get_dyn_prog();
6366  if (!h)
6367  SG_ERROR("no DynProg object found, use init_dyn_prog first\n");
6368 
6369 
6370  //float64_t* weights = ui_structure->get_content_svm_weights();
6371  //int32_t Mweights = h->get_num_svms();
6372  //int32_t Nweights = ui_structure->get_num_svm_weights();
6373  h->set_pos(SGVector<int32_t>(all_pos, Npos));
6374  h->set_gene_string(SGVector<char>(seq, seq_len));
6375  SG_FREE(seq);
6376  h->create_word_string();
6378  h->init_content_svm_value_array(num_svms);
6379  h->set_dict_weights(SGMatrix<float64_t>(weights, Nweights, num_svms));
6380  SG_FREE(weights);
6382  SG_DEBUG("precompute_content_svms done\n");
6383  return true;
6384 }
6385 
6386 bool CSGInterface::cmd_get_lin_feat()
6387 {
6388  CDynProg* h = ui_structure->get_dyn_prog();
6389  if (!h)
6390  SG_ERROR("no DynProg object found, use set_model first\n");
6391 
6392 
6393  int32_t dim1, dim2 = 0;
6394  float64_t* lin_feat = h->get_lin_feat(dim1, dim2);
6395 
6396  set_matrix(lin_feat, dim1, dim2);
6397 
6398  return true;
6399 }
6400 bool CSGInterface::cmd_set_lin_feat()
6401 {
6402  // ARG 1
6403  int32_t Nseq=0;
6404  char* seq;
6405  seq = get_string(Nseq);
6406 
6407  // ARG 2
6408  // all feature positions
6409  int32_t Npos=0;
6410  int32_t* all_pos;
6411  get_vector(all_pos, Npos);
6412 
6413  //ARG 3
6414  //
6415  int32_t num_svms, seq_len;
6416  float64_t* lin_feat=NULL;
6417  get_matrix(lin_feat, num_svms, seq_len);
6418 
6419  if (Npos!=seq_len)
6420  {
6421  SG_ERROR("Dimension mismatch: got %i positions and (%ix%i) values\n", Npos, num_svms, seq_len) ;
6422 
6423  SG_FREE(lin_feat);
6424  SG_FREE(seq);
6425  SG_FREE(all_pos);
6426 
6427  return false ;
6428  }
6429 
6430  CDynProg* h = ui_structure->get_dyn_prog();
6431  if (!h)
6432  SG_ERROR("no DynProg object found, use set_model first\n");
6433 
6434  h->set_pos(SGVector<int32_t>(all_pos, Npos));
6435  h->set_gene_string(SGVector<char>(seq, Nseq));
6437  h->init_content_svm_value_array(num_svms);
6438  h->set_lin_feat(lin_feat, num_svms, seq_len);
6439 
6440  SG_FREE(lin_feat);
6441  SG_FREE(seq);
6442  SG_FREE(all_pos);
6443 
6444  return true;
6445 }
6446 bool CSGInterface::cmd_long_transition_settings()
6447 {
6448  bool use_long_transitions = get_bool();
6449  int32_t threshold = get_int();
6450  int32_t max_len = get_int();
6451 
6452  CDynProg* h = ui_structure->get_dyn_prog();
6453  if (!h)
6454  SG_ERROR("no DynProg object found, use set_model first\n");
6455 
6456  h->long_transition_settings(use_long_transitions, threshold, max_len);
6457 
6458  return true;
6459 }
6460 bool CSGInterface::cmd_set_feature_matrix()
6461 {
6462  int32_t num_states = ui_structure->get_num_states();
6463 
6464  //ARG 1
6465  // feature matrix (#states x #feature_positions x max_num_signals)
6466  int32_t* Dims=0;
6467  int32_t numDims=0;
6468  float64_t* features = NULL;
6469  get_ndarray(features, Dims, numDims);
6470 
6471  if (numDims!=3)
6472  SG_ERROR("expected a 3 dimensional array, got %i dimensions\n", numDims);
6473  if (Dims[0]!=num_states)
6474  SG_ERROR("number of rows (%i) not equal number of states (%i)\n",Dims[0], num_states);
6475  ASSERT(ui_structure->set_feature_matrix(features, Dims));
6476 
6477  ASSERT(ui_structure->set_feature_dims(Dims));
6478 
6479  SG_FREE(features);
6480  SG_FREE(Dims);
6481 
6482  return true;
6483 }
6484 bool CSGInterface::cmd_set_feature_matrix_sparse()
6485 {
6486  int32_t num_pos = ui_structure->get_num_positions();
6487  int32_t num_states = ui_structure->get_num_states();
6488 
6489  //ARG 1
6490  // feature matrix (#states x #feature_positions x max_num_signals)
6491  int32_t dim11, dim12 ;
6492  SGSparseVector<float64_t> *features1=NULL ;
6493  get_sparse_matrix(features1, dim11, dim12);
6494 
6495  int32_t dim21, dim22 ;
6496  SGSparseVector<float64_t> *features2=NULL ;
6497  get_sparse_matrix(features2, dim21, dim22);
6498 
6499  ASSERT(dim11==dim21) ;
6500  ASSERT(dim12==dim22) ;
6501 
6502  int32_t *Dims = SG_MALLOC(int32_t, 3);
6503  Dims[0]=dim11 ;
6504  Dims[1]=dim12 ;
6505  Dims[2]=2 ;
6506 
6507  ASSERT(Dims[0]==num_states)
6508  ASSERT(Dims[1]==num_pos)
6509 
6510  ASSERT(ui_structure->set_feature_matrix_sparse(features1, features2, Dims));
6511  ASSERT(ui_structure->set_feature_dims(Dims));
6512 
6513  SG_FREE(features1);
6514  SG_FREE(features2);
6515  SG_FREE(Dims);
6516 
6517  return true;
6518 }
6519 bool CSGInterface::cmd_init_intron_list()
6520 {
6521  //ARG1 start_positions
6522  int32_t Nstart_positions;
6523  int32_t* start_positions;
6524  get_vector(start_positions, Nstart_positions);
6525  //SG_PRINT("Nstart_positions:%i\n",Nstart_positions);
6526 
6527  //ARG2 end_positions
6528  int32_t Nend_positions;
6529  int32_t* end_positions;
6530  get_vector(end_positions, Nend_positions);
6531  //SG_PRINT("Nend_positions:%i\n",Nend_positions);
6532 
6533  //ARG3 quality
6534  int32_t Nquality;
6535  int32_t* quality;
6536  get_vector(quality, Nquality);
6537  //SG_PRINT("Nquality:%i\n",Nquality);
6538 
6539  //ARG4 all candidate positions
6540  int32_t Nall_pos;
6541  int32_t* all_pos;
6542  get_vector(all_pos, Nall_pos);
6543  //SG_PRINT("Nall_pos:%i\n",Nall_pos);
6544 
6545  ASSERT(Nquality==Nend_positions);
6546  ASSERT(Nend_positions==Nstart_positions);
6547 
6548  CIntronList* intron_list = new CIntronList();
6549 
6550  intron_list->init_list(all_pos, Nall_pos);
6551 
6552  intron_list->read_introns(start_positions, end_positions, quality, Nstart_positions);
6553 
6554  SG_FREE(start_positions);
6555  SG_FREE(end_positions);
6556  SG_FREE(quality);
6557  SG_FREE(all_pos);
6558 
6559  //int32_t test;
6560  //int32_t testq;
6561  //intron_list->get_coverage(&test, &testq, 15 ,16);
6562 
6563  //SG_PRINT("coverage: %i, quality: %i\n",test, testq);
6564 
6565  CDynProg* h = ui_structure->get_dyn_prog();
6566  if (!h)
6567  SG_ERROR("no DynProg object found, use set_model first\n");
6568 
6569  h->set_intron_list(intron_list, 2);
6570 
6571  return true;
6572 }
6573 bool CSGInterface::cmd_precompute_tiling_features()
6574 {
6575  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6576  CPlif** PEN = pm->get_PEN();
6577  CDynProg* h = ui_structure->get_dyn_prog();
6578 
6579  int32_t Nintensities=0;
6580  float64_t* intensities;
6581  get_vector(intensities, Nintensities);
6582 
6583  int32_t Nprobe_pos=0;
6584  int32_t* probe_pos;
6585  get_vector(probe_pos, Nprobe_pos);
6586  ASSERT(Nprobe_pos==Nintensities);
6587 
6588  int32_t Ntiling_plif_ids=0;
6589  int32_t* tiling_plif_ids;
6590  get_vector(tiling_plif_ids, Ntiling_plif_ids);
6591 
6592  h->init_tiling_data(probe_pos,intensities, Nprobe_pos);
6593  h->precompute_tiling_plifs(PEN, tiling_plif_ids, Ntiling_plif_ids);
6594  return true;
6595 }
6596 
6597 bool CSGInterface::cmd_best_path_trans()
6598 {
6599  CDynProg* h = ui_structure->get_dyn_prog();
6600 
6601  CSegmentLoss* seg_loss_obj = h->get_segment_loss_object();
6602 
6603  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6604 
6605  int32_t num_states = h->get_num_states();
6606  int32_t* feat_dims = ui_structure->get_feature_dims();
6607  float64_t* features = (ui_structure->get_feature_matrix(false));
6608  CSparseFeatures<float64_t>* features_sparse1 = (ui_structure->get_feature_matrix_sparse(0));
6609  CSparseFeatures<float64_t>* features_sparse2 = (ui_structure->get_feature_matrix_sparse(1));
6610  int32_t* orf_info = ui_structure->get_orf_info();
6611  bool use_orf = ui_structure->get_use_orf();
6612  int32_t Nplif = pm->get_num_plifs();
6613 
6614  // ARG 1
6615  // transitions from initial state (#states x 1)
6616  int32_t Np=0;
6617  float64_t* p;
6618  get_vector(p, Np);
6619  if (Np!=num_states)
6620  SG_ERROR("# transitions from initial state (%i) does not match # states (%i)\n", Np, num_states);
6621 
6622  // ARG 2
6623  // transitions to end state (#states x 1)
6624  int32_t Nq=0;
6625  float64_t* q;
6626  get_vector(q, Nq);
6627  if (Nq!=num_states)
6628  SG_ERROR("# transitions to end state (%i) does not match # states (%i)\n", Nq, num_states);
6629 
6630  // ARG 3
6631  // number of best paths
6632  int32_t Nnbest=0;
6633  int32_t* all_nbest;
6634  get_vector(all_nbest, Nnbest);
6635  int32_t nbest;
6636  int32_t nother = 0;
6637  if (Nnbest==2)
6638  {
6639  nbest =all_nbest[0];
6640  nother=all_nbest[1];
6641  }
6642  else
6643  nbest =all_nbest[0];
6644  SG_FREE(all_nbest);
6645 
6646  // ARG 4
6647  // segment path (2 x #feature_positions)
6648  // masking/weighting of loss for specific
6649  // regions of the true path
6650  int32_t Nseg_path=0;
6651  int32_t Mseg_path=0;
6652  float64_t* seg_path;
6653  get_matrix(seg_path, Nseg_path, Mseg_path);
6654 
6655  // ARG 5
6656  // links for transitions (#transitions x 4)
6657  int32_t Na_trans=0;
6658  int32_t num_a_trans=0;
6659  float64_t* a_trans;
6660  get_matrix(a_trans, num_a_trans, Na_trans);
6661 
6662  // ARG 6
6663  // loss matrix (#segment x 2*#segments)
6664  // one (#segment x #segments)-matrix for segment loss
6665  // and one for nucleotide loss
6666  int32_t Nloss=0;
6667  int32_t Mloss=0;
6668  float64_t* loss;
6669  get_matrix(loss, Nloss,Mloss);
6670 
6671  int32_t M = h->get_num_positions();
6672 
6674  // check input
6676  ASSERT(num_states==Nq);
6677 
6678  CPlif** PEN=pm->get_PEN();
6679  ASSERT(PEN);
6680 
6681  h->set_p_vector(SGVector<float64_t>(p, num_states));
6682  SG_FREE(p); p=NULL ;
6683  h->set_q_vector(SGVector<float64_t>(q, num_states));
6684  SG_FREE(q); q=NULL ;
6685 
6686  if (seg_path!=NULL)
6687  {
6688  h->set_a_trans_matrix(SGMatrix<float64_t>(a_trans, num_a_trans, Na_trans)) ;
6689  }
6690  else
6691  {
6692  h->set_a_trans_matrix(SGMatrix<float64_t>(a_trans, num_a_trans, 3)) ; // segment_id = 0
6693  }
6694  SG_FREE(a_trans);
6695  a_trans=NULL ;
6696 
6697  if (!h->check_svm_arrays())
6698  {
6699  SG_ERROR( "svm arrays inconsistent\n") ;
6700  CPlif::delete_penalty_struct(PEN, Nplif) ;
6701  return false ;
6702  }
6703 
6704  SG_DEBUG("best_path_trans: M: %i, Mseg_path: %i\n", M, Mseg_path);
6705 
6706  h->set_observation_matrix(SGNDArray<float64_t>(features, feat_dims, 3));
6707 
6708  if (seg_path!=NULL)
6709  {
6710  h->best_path_set_segment_loss(SGMatrix<float64_t>(loss, Nloss, Mloss)) ;
6711  seg_loss_obj->set_segment_loss(loss, Nloss, Mloss);
6712  }
6713  else
6714  {
6715  float64_t zero2[2] = {0.0, 0.0} ;
6717  seg_loss_obj->set_segment_loss(zero2, 2, 1);
6718  }
6719  h->set_content_type_array(SGMatrix<float64_t>(seg_path,Nseg_path,Mseg_path));
6720  SG_FREE(seg_path);
6721 
6722  bool segment_loss_non_zero=false;
6723  for (int32_t i=0; i<Nloss*Mloss; i++)
6724  {
6725  if (loss[i]>1e-3)
6726  segment_loss_non_zero=true;
6727  }
6728 
6729  SG_FREE(loss);
6730  loss=NULL;
6731 
6732  h->set_orf_info(SGMatrix<int32_t>(orf_info, num_states, 2));
6733  h->set_sparse_features(features_sparse1, features_sparse2);
6734  h->set_plif_matrices(pm);
6735 
6736  if (segment_loss_non_zero)
6737  {
6738  SG_DEBUG("Using version with segment_loss\n") ;
6739  if (nbest==1)
6740  h->compute_nbest_paths(feat_dims[2], use_orf, 1,true,false);
6741  else
6742  h->compute_nbest_paths(feat_dims[2], use_orf, 2,true,false);
6743  }
6744  else
6745  {
6746  SG_DEBUG("Using version without segment_loss\n") ;
6747  if (nbest==1)
6748  h->compute_nbest_paths(feat_dims[2], use_orf, 1,false,false);
6749  else
6750  h->compute_nbest_paths(feat_dims[2], use_orf, 2,false,false);
6751  }
6752 
6753  SGVector<float64_t> p_prob=h->get_scores();
6754 
6755  SGMatrix<int32_t> states=h->get_states();
6756 
6757  SGMatrix<int32_t> my_pos=h->get_positions();
6758 
6759  // transcribe result
6760  float64_t* d_my_path= SG_MALLOC(float64_t, (nbest+nother)*M);
6761  float64_t* d_my_pos= SG_MALLOC(float64_t, (nbest+nother)*M);
6762 
6763  for (int32_t k=0; k<(nbest+nother); k++)
6764  {
6765  for (int32_t i=0; i<M; i++)
6766  {
6767  d_my_path[i*(nbest+nother)+k] = states.matrix[i+k*M] ;
6768  d_my_pos[i*(nbest+nother)+k] = my_pos.matrix[i+k*M] ;
6769  }
6770  }
6771  SG_FREE(states.matrix);
6772  SG_FREE(my_pos.matrix);
6773 
6774  set_vector(p_prob.vector,nbest+nother);
6775  set_vector(d_my_path, (nbest+nother)*M);
6776  set_vector(d_my_pos, (nbest+nother)*M);
6777 
6778  SG_FREE(d_my_path);
6779  SG_FREE(d_my_pos);
6780 
6781  return true;
6782 
6783 }
6784 
6785 bool CSGInterface::cmd_best_path_trans_deriv()
6786 {
6787  int32_t num_states = ui_structure->get_num_states();
6788  int32_t* feat_dims = ui_structure->get_feature_dims();
6789  float64_t* features = (ui_structure->get_feature_matrix(false));
6790 
6791  CPlifMatrix* pm=ui_structure->get_plif_matrix();
6792  int32_t Nplif = pm->get_num_plifs();
6793  CPlif** PEN = pm->get_PEN();
6794 
6795  // ARG 1
6796  // transitions from initial state (#states x 1)
6797  int32_t Np=0;
6798  float64_t* p=NULL;
6799  get_vector(p, Np);
6800  if (Np!=num_states)
6801  SG_ERROR("Np!=num_states; Np:%i num_states:%i",Np,num_states);
6802 
6803  // ARG 2
6804  // transitions to end state (#states x 1)
6805  int32_t Nq=0;
6806  float64_t* q=NULL;
6807  get_vector(q, Nq);
6808  if (Nq!=num_states)
6809  SG_ERROR("Nq!=num_states; Nq:%i num_states:%i",Nq,num_states);
6810 
6811 
6812  // ARG 3
6813  // segment path (2 x #feature_positions)
6814  // masking/weighting of loss for specific
6815  // regions of the true path
6816  int32_t Nseg_path=0;
6817  int32_t Mseg_path=0;
6818  float64_t* seg_path;
6819  get_matrix(seg_path,Nseg_path,Mseg_path);
6820 
6821  // ARG 4
6822  // links for transitions (#transitions x 4)
6823  int32_t Na_trans=0;
6824  int32_t num_a_trans=0;
6825  float64_t* a_trans=NULL;
6826  get_matrix(a_trans, num_a_trans, Na_trans);
6827 
6828  // ARG 5
6829  // loss matrix (#segment x 2*#segments)
6830  // one (#segment x #segments)-matrix for segment loss
6831  // and one for nucleotide loss
6832  int32_t Nloss=0;
6833  int32_t Mloss=0;
6834  float64_t* loss=NULL;
6835  get_matrix(loss, Nloss,Mloss);
6836 
6837  // ARG 6
6838  // path to calc derivative for
6839  int32_t Nmystate_seq=0;
6840  int32_t* mystate_seq=NULL;
6841  get_vector(mystate_seq, Nmystate_seq);
6842 
6843  // ARG 7
6844  // positions of the path
6845  int32_t Nmypos_seq=0;
6846  int32_t* mypos_seq=NULL;
6847  get_vector(mypos_seq, Nmypos_seq);
6848 
6849 
6850  //a => a_trans
6851 
6852  int32_t max_plif_id = 0 ;
6853  int32_t max_plif_len = 1 ;
6854  for (int32_t i=0; i<Nplif; i++)
6855  {
6856  if (i>0 && PEN[i]->get_id()!=i)
6857  SG_ERROR("PEN[i]->get_id()!=i; PEN[%i]->get_id():%i ,\n",i, PEN[i]->get_id());
6858  if (i>max_plif_id)
6859  max_plif_id=i ;
6860  if (PEN[i]->get_plif_len()>max_plif_len)
6861  max_plif_len=PEN[i]->get_plif_len() ;
6862  } ;
6863 
6864 
6865  CDynProg* h = ui_structure->get_dyn_prog();
6866  CSegmentLoss* seg_loss_obj = h->get_segment_loss_object();
6867  h->set_num_states(num_states) ;
6868  h->set_p_vector(SGVector<float64_t>(p, num_states)) ;
6869  h->set_q_vector(SGVector<float64_t>(q, num_states)) ;
6870 
6871  if (seg_path!=NULL)
6872  h->set_a_trans_matrix(SGMatrix<float64_t>(a_trans, num_a_trans, Na_trans)) ;
6873  else
6874  h->set_a_trans_matrix(SGMatrix<float64_t>(a_trans, num_a_trans, 3)) ;
6875 
6876  if (!h->check_svm_arrays())
6877  SG_ERROR( "svm arrays inconsistent\n") ;
6878 
6879  int32_t *my_path = SG_MALLOC(int32_t, Nmypos_seq+1);
6880  memset(my_path, -1, Nmypos_seq*sizeof(int32_t)) ;
6881  int32_t *my_pos = SG_MALLOC(int32_t, Nmypos_seq+1);
6882  memset(my_pos, -1, Nmypos_seq*sizeof(int32_t)) ;
6883 
6884  h->set_observation_matrix(SGNDArray<float64_t>(features, feat_dims, 3));
6885  for (int32_t i=0; i<Nmypos_seq; i++)
6886  {
6887  my_path[i] = mystate_seq[i] ;
6888  my_pos[i] = mypos_seq[i] ;
6889  }
6890 
6891  if (seg_path!=NULL)
6892  {
6893  h->best_path_set_segment_loss(SGMatrix<float64_t>(loss, Nloss, Mloss)) ;
6894  seg_loss_obj->set_segment_loss(loss, Nloss, Mloss);
6895  }
6896  else
6897  {
6898  float64_t zero2[2] = {0.0, 0.0} ;
6900  seg_loss_obj->set_segment_loss(zero2, 2, 1);
6901  }
6902  h->set_content_type_array(SGMatrix<float64_t>(seg_path,Nseg_path,Mseg_path));
6903 
6904  float64_t* p_Plif_deriv = SG_MALLOC(float64_t, (max_plif_id+1)*max_plif_len);
6905  CArray2<float64_t> a_Plif_deriv(p_Plif_deriv, max_plif_id+1, max_plif_len, false, false) ;
6906 
6907  float64_t* p_A_deriv = SG_MALLOC(float64_t, num_states*num_states);
6908  float64_t* p_p_deriv = SG_MALLOC(float64_t, num_states);
6909  float64_t* p_q_deriv = SG_MALLOC(float64_t, num_states);
6910 
6911  h->set_plif_matrices(pm);
6912  h->best_path_trans_deriv(my_path, my_pos, Nmypos_seq, features, feat_dims[2]);
6913 
6914  float64_t* p_my_scores;
6915  int32_t n_scores;
6916  h->get_path_scores(&p_my_scores, &n_scores);
6917 
6918  float64_t* p_my_losses;
6919  int32_t n_losses;
6920  h->get_path_losses(&p_my_losses, &n_losses);
6921 
6922  for (int32_t i=0; i<num_states; i++)
6923  {
6924  for (int32_t j=0; j<num_states; j++)
6925  p_A_deriv[i+j*num_states] = h->get_a_deriv(i, j) ;
6926 
6927  p_p_deriv[i]=h->get_p_deriv(i) ;
6928  p_q_deriv[i]=h->get_q_deriv(i) ;
6929  }
6930 
6931  for (int32_t id=0; id<=max_plif_id; id++)
6932  {
6933  int32_t len=0 ;
6934  const float64_t * deriv = PEN[id]->get_cum_derivative(len) ;
6935  ASSERT(len<=max_plif_len) ;
6936  for (int32_t j=0; j<max_plif_len; j++)
6937  a_Plif_deriv.element(id, j)= deriv[j] ;
6938  }
6939 
6940  set_vector(p_p_deriv, num_states);
6941  set_vector(p_q_deriv, num_states);
6942  set_matrix(p_A_deriv, num_states, num_states);
6943  set_matrix(p_Plif_deriv, (max_plif_id+1), max_plif_len);
6944  set_vector(p_my_scores, Nmypos_seq);
6945  set_vector(p_my_losses, Nmypos_seq);
6946 
6947  SG_FREE(p_A_deriv);
6948  SG_FREE(p_p_deriv);
6949  SG_FREE(p_q_deriv);
6950  SG_FREE(p_Plif_deriv);
6951  free(p_my_scores);
6952  free(p_my_losses);
6953 
6954  SG_FREE(my_path);
6955  SG_FREE(my_pos);
6956 
6957  SG_FREE(p);
6958  SG_FREE(q);
6959  SG_FREE(seg_path);
6960  SG_FREE(a_trans);
6961  SG_FREE(loss);
6962  SG_FREE(mystate_seq);
6963  SG_FREE(mypos_seq);
6964 
6965  return true ;
6966 }
6967 
6968 bool CSGInterface::cmd_precompute_subkernels()
6969 {
6970  if (m_nrhs!=1 || !create_return_values(0))
6971  return false;
6972 
6973  return ui_kernel->precompute_subkernels();
6974 }
6975 bool CSGInterface::cmd_crc()
6976 {
6977  if (m_nrhs!=2 || !create_return_values(1))
6978  return false;
6979 
6980  int32_t slen=0;
6981  char* string=get_string(slen);
6982  ASSERT(string);
6983  uint8_t* bstring=SG_MALLOC(uint8_t, slen);
6984 
6985  for (int32_t i=0; i<slen; i++)
6986  bstring[i]=string[i];
6987  SG_FREE(string);
6988 
6989  int32_t val=CHash::crc32(bstring, slen);
6990  SG_FREE(bstring);
6991  set_int(val);
6992 
6993  return true;
6994 }
6995 
6996 bool CSGInterface::cmd_system()
6997 {
6998  if (m_nrhs<2 || !create_return_values(0))
6999  return false;
7000 
7001  int32_t len=0;
7002  char* command=SG_MALLOC(char, 10000);
7003  memset(command, 0, sizeof(char)*10000);
7004  char* cmd=get_str_from_str_or_direct(len);
7005  strncat(command, cmd, 10000);
7006  SG_FREE(cmd);
7007 
7008  while (m_rhs_counter<m_nrhs)
7009  {
7010  strncat(command, " ", 10000);
7011  char* arg=get_str_from_str_or_direct(len);
7012  strncat(command, arg, 10000);
7013  SG_FREE(arg);
7014  }
7015 
7016  int32_t success=system(command);
7017 
7018  return (success==0);
7019 }
7020 
7021 bool CSGInterface::cmd_exit()
7022 {
7023  exit(0);
7024  return 0; //never reached but necessary to keep sun compiler happy
7025 }
7026 
7027 bool CSGInterface::cmd_exec()
7028 {
7029  if (m_nrhs<2 || !create_return_values(0))
7030  return false;
7031 
7032  int32_t len=0;
7033  char* filename=get_str_from_str_or_direct(len);
7034  FILE* file=fopen(filename, "r");
7035  if (!file)
7036  {
7037  SG_FREE(filename);
7038  SG_ERROR("Error opening file: %s.\n", filename);
7039  }
7040 
7041  while (!feof(file))
7042  {
7043  // FIXME: interpret lines as input
7044  break;
7045  }
7046 
7047  fclose(file);
7048  return true;
7049 }
7050 
7051 bool CSGInterface::cmd_set_output()
7052 {
7053  if (m_nrhs<2 || !create_return_values(0))
7054  return false;
7055 
7056  int32_t len=0;
7057  char* filename=get_str_from_str_or_direct(len);
7058 
7059  if (file_out)
7060  fclose(file_out);
7061  file_out=NULL;
7062 
7063  SG_INFO("Setting output file to: %s.\n", filename);
7064 
7065  if (strmatch(filename, "STDERR"))
7066  io->set_target(stderr);
7067  else if (strmatch(filename, "STDOUT"))
7068  io->set_target(stdout);
7069  else
7070  {
7071  file_out=fopen(filename, "w");
7072  if (!file_out)
7073  SG_ERROR("Error opening output file %s.\n", filename);
7074  io->set_target(file_out);
7075  }
7076 
7077  return true;
7078 }
7079 
7080 bool CSGInterface::cmd_set_threshold()
7081 {
7082  if (m_nrhs!=2 || !create_return_values(0))
7083  return false;
7084 
7085  float64_t value=get_real_from_real_or_str();
7086 
7087  ui_math->set_threshold(value);
7088  return true;
7089 }
7090 
7091 bool CSGInterface::cmd_init_random()
7092 {
7093  if (m_nrhs!=2 || !create_return_values(0))
7094  return false;
7095 
7096  uint32_t initseed=(uint32_t) get_int_from_int_or_str();
7097  ui_math->init_random(initseed);
7098 
7099  return true;
7100 }
7101 
7102 bool CSGInterface::cmd_set_num_threads()
7103 {
7104  if (m_nrhs!=2 || !create_return_values(0))
7105  return false;
7106 
7107  int32_t num_threads=get_int_from_int_or_str();
7108 
7109  parallel->set_num_threads(num_threads);
7110  SG_INFO("Set number of threads to %d.\n", num_threads);
7111 
7112  return true;
7113 }
7114 
7115 bool CSGInterface::cmd_translate_string()
7116 {
7117  if (m_nrhs!=4 || !create_return_values(1))
7118  return false;
7119 
7120  float64_t* string=NULL;
7121  int32_t len;
7122  get_vector(string, len);
7123 
7124  int32_t order=get_int();
7125  int32_t start=get_int();
7126 
7127  const int32_t max_val=2; /* DNA->2bits */
7128  int32_t i,j;
7129  uint16_t* obs=SG_MALLOC(uint16_t, len);
7130 
7131  for (i=0; i<len; i++)
7132  {
7133  switch ((char) string[i])
7134  {
7135  case 'A': obs[i]=0; break;
7136  case 'C': obs[i]=1; break;
7137  case 'G': obs[i]=2; break;
7138  case 'T': obs[i]=3; break;
7139  case 'a': obs[i]=0; break;
7140  case 'c': obs[i]=1; break;
7141  case 'g': obs[i]=2; break;
7142  case 't': obs[i]=3; break;
7143  default: SG_ERROR("Wrong letter in string.\n");
7144  }
7145  }
7146 
7147  //convert interval of size T
7148  for (i=len-1; i>=order-1; i--)
7149  {
7150  uint16_t value=0;
7151  for (j=i; j>=i-order+1; j--)
7152  value=(value>>max_val) | ((obs[j])<<(max_val*(order-1)));
7153 
7154  obs[i]=(uint16_t) value;
7155  }
7156 
7157  for (i=order-2;i>=0;i--)
7158  {
7159  uint16_t value=0;
7160  for (j=i; j>=i-order+1; j--)
7161  {
7162  value= (value >> max_val);
7163  if (j>=0)
7164  value|=(obs[j]) << (max_val * (order-1));
7165  }
7166  obs[i]=value;
7167  }
7168 
7169  float64_t* real_obs=SG_MALLOC(float64_t, len);
7170  for (i=start; i<len; i++)
7171  real_obs[i-start]=(float64_t) obs[i];
7172  SG_FREE(obs);
7173 
7174  set_vector(real_obs, len);
7175  SG_FREE(real_obs);
7176 
7177  return true;
7178 }
7179 
7180 bool CSGInterface::cmd_clear()
7181 {
7182  // reset guilib
7183  SG_UNREF(ui_classifier);
7184  ui_classifier=new CGUIClassifier(this);
7185  SG_UNREF(ui_distance);
7186  ui_distance=new CGUIDistance(this);
7187  SG_UNREF(ui_features);
7188  ui_features=new CGUIFeatures(this);
7189  SG_UNREF(ui_hmm);
7190  ui_hmm=new CGUIHMM(this);
7191  SG_UNREF(ui_kernel);
7192  ui_kernel=new CGUIKernel(this);
7193  SG_UNREF(ui_labels);
7194  ui_labels=new CGUILabels(this);
7195  SG_UNREF(ui_math);
7196  ui_math=new CGUIMath(this);
7197  SG_UNREF(ui_pluginestimate);
7198  ui_pluginestimate=new CGUIPluginEstimate(this);
7199  SG_UNREF(ui_preproc);
7200  ui_preproc=new CGUIPreprocessor(this);
7201  SG_UNREF(ui_time);
7202  ui_time=new CGUITime(this);
7203 
7204  return true;
7205 }
7206 
7207 bool CSGInterface::cmd_tic()
7208 {
7209  ui_time->start();
7210  return true;
7211 }
7212 
7213 bool CSGInterface::cmd_toc()
7214 {
7215  ui_time->stop();
7216  return true;
7217 }
7218 
7219 bool CSGInterface::cmd_print()
7220 {
7221  if (m_nrhs<2 || !create_return_values(0))
7222  return false;
7223 
7224  int32_t len=0;
7225  char* msg=get_str_from_str_or_direct(len);
7226 
7227  SG_PRINT("%s\n", msg);
7228 
7229  SG_FREE(msg);
7230  return true;
7231 }
7232 
7233 bool CSGInterface::cmd_echo()
7234 {
7235  if (m_nrhs<2 || !create_return_values(0))
7236  return false;
7237 
7238  int32_t len=0;
7239  char* level=get_str_from_str_or_direct(len);
7240 
7241  if (strmatch(level, "OFF"))
7242  {
7243  echo=false;
7244  SG_INFO("Echo is off.\n");
7245  }
7246  else
7247  {
7248  echo=true;
7249  SG_INFO("Echo is on.\n");
7250  }
7251 
7252  SG_FREE(level);
7253  return true;
7254 }
7255 
7256 bool CSGInterface::cmd_loglevel()
7257 {
7258  if (m_nrhs<2 || !create_return_values(0))
7259  return false;
7260 
7261  int32_t len=0;
7262  char* level=get_str_from_str_or_direct(len);
7263 
7264  if (strmatch(level, "ALL") || strmatch(level, "GCDEBUG"))
7265  io->set_loglevel(MSG_GCDEBUG);
7266  else if (strmatch(level, "DEBUG"))
7267  io->set_loglevel(MSG_DEBUG);
7268  else if (strmatch(level, "INFO"))
7269  io->set_loglevel(MSG_INFO);
7270  else if (strmatch(level, "NOTICE"))
7271  io->set_loglevel(MSG_NOTICE);
7272  else if (strmatch(level, "WARN"))
7273  io->set_loglevel(MSG_WARN);
7274  else if (strmatch(level, "ERROR"))
7275  io->set_loglevel(MSG_ERROR);
7276  else if (strmatch(level, "CRITICAL"))
7277  io->set_loglevel(MSG_CRITICAL);
7278  else if (strmatch(level, "ALERT"))
7279  io->set_loglevel(MSG_ALERT);
7280  else if (strmatch(level, "EMERGENCY"))
7281  io->set_loglevel(MSG_EMERGENCY);
7282  else
7283  SG_ERROR("Unknown loglevel '%s'.\n", level);
7284 
7285  SG_INFO("Loglevel set to %s.\n", level);
7286 
7287  SG_FREE(level);
7288  return true;
7289 }
7290 
7291 bool CSGInterface::cmd_syntax_highlight()
7292 {
7293  if (m_nrhs<2 || !create_return_values(0))
7294  return false;
7295 
7296  int32_t len=0;
7297  char* hili=get_str_from_str_or_direct(len);
7298 
7299  if (strmatch(hili, "ON"))
7300  {
7302  io->enable_syntax_highlighting();
7303  }
7304  else if (strmatch(hili, "OFF"))
7305  {
7307  io->disable_syntax_highlighting();
7308  }
7309  else
7310  SG_ERROR("arguments to " N_SYNTAX_HIGHLIGHT " are ON|OFF - found '%s'.\n", hili);
7311 
7312  SG_INFO("Syntax hilighting set to %s.\n", hili);
7313 
7314  SG_FREE(hili);
7315  return true;
7316 }
7317 
7318 bool CSGInterface::cmd_progress()
7319 {
7320  if (m_nrhs<2 || !create_return_values(0))
7321  return false;
7322 
7323  int32_t len=0;
7324  char* progress=get_str_from_str_or_direct(len);
7325 
7326  if (strmatch(progress, "ON"))
7327  io->enable_progress();
7328  else if (strmatch(progress, "OFF"))
7329  io->disable_progress();
7330  else
7331  SG_ERROR("arguments to progress are ON|OFF - found '%s'.\n", progress);
7332 
7333  SG_INFO("Progress set to %s.\n", progress);
7334 
7335  SG_FREE(progress);
7336  return true;
7337 }
7338 
7339 bool CSGInterface::cmd_get_version()
7340 {
7341  if (m_nrhs!=1 || !create_return_values(1))
7342  return false;
7343 
7344  set_int(version->get_version_revision());
7345 
7346  return true;
7347 }
7348 
7349 bool CSGInterface::cmd_help()
7350 {
7351  if ((m_nrhs!=1 && m_nrhs!=2) || !create_return_values(0))
7352  return false;
7353 
7354  int32_t i=0;
7355 
7356  SG_PRINT("\n");
7357  if (m_nrhs==1) // unspecified help
7358  {
7359  SG_PRINT("Help is available for the following topics.\n"
7360  "-------------------------------------------\n\n");
7361  while (sg_methods[i].command)
7362  {
7363  bool is_group_item=false;
7364  if (!sg_methods[i].method && !sg_methods[i].usage_prefix)
7365  is_group_item=true;
7366 
7367  if (is_group_item)
7368  {
7369  SG_PRINT("%s%s%s\n",
7371  sg_methods[i].command,
7373  }
7374 
7375  i++;
7376  }
7377  SG_PRINT("\nUse sg('%shelp%s', '%s<topic>%s')"
7378  " to see the list of commands in this group, e.g.\n\n"
7379  "\tsg('%shelp%s', '%sFeatures%s')\n\n"
7380  "to see the list of commands for the 'Features' group.\n"
7381  "\nOr use sg('%shelp%s', '%sall%s')"
7382  " to see a brief listing of all commands.\n\nTo disable syntax"
7383  " highlighting (useful e.g. in the matlab GUI) use\n\n"
7384  "\tsg('syntax_highlight','OFF')\n",
7391  }
7392  else // m_nrhs == 2 -> all commands, single command or group help
7393  {
7394  bool found=false;
7395  bool in_group=false;
7396  int32_t clen=0;
7397  char* command=get_string(clen);
7398 
7399  if (strmatch("doxygen", command) || strmatch("DOXYGEN", command))
7400  {
7401  found=true;
7402  while (sg_methods[i].command)
7403  {
7404  if (sg_methods[i].usage_prefix) // display group item
7405  {
7406  SG_PRINT("\\arg \\b %s \\verbatim %s%s%s \\endverbatim\n",
7407  sg_methods[i].command,
7408  sg_methods[i].usage_prefix,
7409  sg_methods[i].command,
7410  sg_methods[i].usage_suffix);
7411  }
7412  else if (!sg_methods[i].method) // display group
7413  {
7414  SG_PRINT("\n\\section %s_sec %s\n",
7415  sg_methods[i].command, sg_methods[i].command);
7416  }
7417  i++;
7418  }
7419  }
7420  if (strmatch("all", command) || strmatch("ALL", command))
7421  {
7422  found=true;
7423  while (sg_methods[i].command)
7424  {
7425  if (sg_methods[i].usage_prefix) // display group item
7426  {
7427  SG_PRINT("\t%s%s%s%s%s\n", sg_methods[i].usage_prefix,
7429  sg_methods[i].command,
7431  sg_methods[i].usage_suffix);
7432  }
7433  else if (!sg_methods[i].method) // display group
7434  {
7435  SG_PRINT("\nCommands in group %s%s%s\n",
7437  sg_methods[i].command,
7439  }
7440  i++;
7441  }
7442  }
7443  else
7444  {
7445  while (sg_methods[i].command)
7446  {
7447  if (in_group)
7448  {
7449  if (sg_methods[i].usage_prefix) // display group item
7450  SG_PRINT("\t%s%s%s\n",
7452  sg_methods[i].command,
7454  else // next group reached -> end
7455  break;
7456  }
7457  else
7458  {
7459  found=strmatch(sg_methods[i].command, command);
7460  if (found)
7461  {
7462  if (sg_methods[i].usage_prefix) // found item
7463  {
7464  SG_PRINT("Usage for %s%s%s\n\n\t%s%s%s%s%s\n",
7466  sg_methods[i].command,
7468  sg_methods[i].usage_prefix,
7470  sg_methods[i].command,
7472  sg_methods[i].usage_suffix);
7473  break;
7474  }
7475  else // found group item
7476  {
7477  SG_PRINT("Commands in group %s%s%s\n\n",
7479  sg_methods[i].command,
7481  in_group=true;
7482  }
7483  }
7484  }
7485 
7486  i++;
7487  }
7488  }
7489 
7490  if (!found)
7491  SG_PRINT("Could not find help for command %s.\n", command);
7492  else if (in_group)
7493  {
7494  SG_PRINT("\n\nUse sg('%shelp%s', '%s<command>%s')"
7495  " to see the usage pattern of a single command, e.g.\n\n"
7496  "\tsg('%shelp%s', '%sclassify%s')\n\n"
7497  " to see the usage pattern of the command 'classify'.\n",
7502  }
7503 
7504  SG_FREE(command);
7505  }
7506 
7507 
7508  SG_PRINT("\n");
7509 
7510  return true;
7511 }
7512 #ifdef TRACE_MEMORY_ALLOCS
7513  extern CSet<MemoryBlock>* sg_mallocs;
7514 #endif
7515 
7516 bool CSGInterface::cmd_whos()
7517 {
7518  if ((m_nrhs!=1) || !create_return_values(0))
7519  return false;
7520 
7521 #ifdef TRACE_MEMORY_ALLOCS
7522  SG_PRINT("Blocks allocated by shogun\n");
7523  list_memory_allocs();
7524  SG_PRINT("\n");
7525  return true;
7526 #else
7527  SG_PRINT("Requires shogun to be compiled with --enable-trace-mallocs\n");
7528  return false;
7529 #endif
7530 }
7531 
7532 bool CSGInterface::cmd_send_command()
7533 {
7534  SG_DEPRECATED;
7535 
7536  int32_t len=0;
7537  char* arg=get_string(len);
7538  //SG_DEBUG("legacy: arg == %s\n", arg);
7539  m_legacy_strptr=arg;
7540 
7541  char* command=get_str_from_str(len);
7542  int32_t i=0;
7543  bool success=false;
7544 
7545  while (sg_methods[i].command)
7546  {
7547  if (strmatch(command, sg_methods[i].command))
7548  {
7549  SG_DEBUG("legacy: found command %s\n", sg_methods[i].command);
7550  // fix-up m_nrhs; +1 to include command
7551  m_nrhs=get_num_args_in_str()+1;
7552 
7553  if (!(interface->*(sg_methods[i].method))())
7554  {
7555  SG_ERROR("Usage: %s%s%s\n\n\t%s%s%s%s%s\n",
7557  sg_methods[i].command,
7559  sg_methods[i].usage_prefix,
7561  sg_methods[i].command,
7563  sg_methods[i].usage_suffix);
7564  }
7565  else
7566  {
7567  success=true;
7568  break;
7569  }
7570  }
7571 
7572  i++;
7573  }
7574 
7575  if (!success)
7576  SG_ERROR("Non-supported legacy command %s.\n", command);
7577 
7578  SG_FREE(command);
7579  SG_FREE(arg);
7580  return success;
7581 }
7582 
7583 bool CSGInterface::cmd_run_python()
7584 {
7585  SG_ERROR("Only available in the elwms interface\n");
7586  return false;
7587 }
7588 
7589 bool CSGInterface::cmd_run_octave()
7590 {
7591  SG_ERROR("Only available in the elwms interface\n");
7592  return false;
7593 }
7594 
7595 bool CSGInterface::cmd_run_r()
7596 {
7597  SG_ERROR("Only available in the elwms interface\n");
7598  return false;
7599 }
7600 
7601 bool CSGInterface::cmd_pr_loqo()
7602 {
7603  if (m_nrhs!=7 || !create_return_values(2))
7604  return false;
7605 
7606  float64_t* c=NULL;
7607  int32_t lenc=0;
7608  get_vector(c, lenc);
7609 
7610  int32_t n = lenc;
7611 
7612  float64_t* H=NULL;
7613  int32_t nH=0;
7614  int32_t mH=0;
7615  get_matrix(H, nH, mH);
7616  ASSERT(nH==n && mH==n);
7617 
7618  float64_t* A=NULL;
7619  int32_t nA=0;
7620  int32_t mA=0;
7621  get_matrix(A, nA, mA);
7622  ASSERT(mA==n);
7623  int32_t m=nA;
7624 
7625  float64_t* b=NULL;
7626  int32_t lenb=0;
7627  get_vector(b, lenb);
7628  ASSERT(lenb==m);
7629 
7630  float64_t* l=NULL;
7631  int32_t lenl=0;
7632  get_vector(l, lenl);
7633  ASSERT(lenl==n);
7634 
7635  float64_t* u=NULL;
7636  int32_t lenu=0;
7637  get_vector(u, lenu);
7638  ASSERT(lenu==n);
7639 
7640  float64_t* x=SG_MALLOC(float64_t, 3*n);
7641  CMath::fill_vector(x, 3*n, 0.0);
7642 
7643  float64_t* y=SG_MALLOC(float64_t, m+2*n);
7644  CMath::fill_vector(y, m+2*n, 0.0);
7645 
7646  pr_loqo(n,m, c, H, A, b, l, u, x, y, 0, 5, 50, 0.05, 100, 0);
7647 
7648  set_vector(x, n);
7649  set_vector(y, m);
7650 
7651  SG_FREE(c);
7652  SG_FREE(H);
7653  SG_FREE(A);
7654  SG_FREE(b);
7655  SG_FREE(l);
7656  SG_FREE(u);
7657  SG_FREE(x);
7658  SG_FREE(y);
7659  return true;
7660 }
7661 
7662 void CSGInterface::print_prompt()
7663 {
7664  SG_PRINT("%sshogun%s >> ",
7667 }
7668 
7670 // legacy-related methods
7672 
7673 char* CSGInterface::get_str_from_str_or_direct(int32_t& len)
7674 {
7675  if (m_legacy_strptr)
7676  return get_str_from_str(len);
7677  else
7678  return get_string(len);
7679 }
7680 
7681 int32_t CSGInterface::get_int_from_int_or_str()
7682 {
7683  if (m_legacy_strptr)
7684  {
7685  int32_t len=0;
7686  char* str=get_str_from_str(len);
7687  int32_t val=strtol(str, NULL, 10);
7688 
7689  SG_FREE(str);
7690  return val;
7691  }
7692  else
7693  return get_int();
7694 }
7695 
7696 float64_t CSGInterface::get_real_from_real_or_str()
7697 {
7698  if (m_legacy_strptr)
7699  {
7700  int32_t len=0;
7701  char* str=get_str_from_str(len);
7702  float64_t val=strtod(str, NULL);
7703 
7704  SG_FREE(str);
7705  return val;
7706  }
7707  else
7708  return get_real();
7709 }
7710 
7711 bool CSGInterface::get_bool_from_bool_or_str()
7712 {
7713  if (m_legacy_strptr)
7714  {
7715  int32_t len=0;
7716  char* str=get_str_from_str(len);
7717  bool val=strtol(str, NULL, 10)!=0;
7718 
7719  SG_FREE(str);
7720  return val;
7721  }
7722  else
7723  return get_bool();
7724 }
7725 
7726 void CSGInterface::get_vector_from_int_vector_or_str(int32_t*& vector, int32_t& len)
7727 {
7728  if (m_legacy_strptr)
7729  {
7730  len=get_vector_len_from_str(len);
7731  if (len==0)
7732  {
7733  vector=NULL;
7734  return;
7735  }
7736 
7737  vector=SG_MALLOC(int32_t, len);
7738  char* str=NULL;
7739  int32_t slen=0;
7740  for (int32_t i=0; i<len; i++)
7741  {
7742  str=get_str_from_str(slen);
7743  vector[i]=strtol(str, NULL, 10);
7744  //SG_DEBUG("vec[%d]: %d\n", i, vector[i]);
7745  SG_FREE(str);
7746  }
7747  }
7748  else
7749  get_vector(vector, len);
7750 }
7751 
7752 void CSGInterface::get_vector_from_real_vector_or_str(
7753  float64_t*& vector, int32_t& len)
7754 {
7755  if (m_legacy_strptr)
7756  {
7757  len=get_vector_len_from_str(len);
7758  if (len==0)
7759  {
7760  vector=NULL;
7761  return;
7762  }
7763 
7764  vector=SG_MALLOC(float64_t, len);
7765  char* str=NULL;
7766  int32_t slen=0;
7767  for (int32_t i=0; i<len; i++)
7768  {
7769  str=get_str_from_str(slen);
7770  vector[i]=strtod(str, NULL);
7771  //SG_DEBUG("vec[%d]: %f\n", i, vector[i]);
7772  SG_FREE(str);
7773  }
7774  }
7775  else
7776  get_vector(vector, len);
7777 }
7778 
7779 int32_t CSGInterface::get_vector_len_from_str(int32_t expected_len)
7780 {
7781  int32_t num_args=get_num_args_in_str();
7782 
7783  if (expected_len==0 || num_args==expected_len)
7784  return num_args;
7785  else if (num_args==2*expected_len)
7786  {
7787  // special case for position_weights; a bit shaky...
7788  return expected_len;
7789  }
7790  else
7791  SG_ERROR("Expected vector length %d does not match actual length %d.\n", expected_len, num_args);
7792 
7793  return 0;
7794 }
7795 
7796 char* CSGInterface::get_str_from_str(int32_t& len)
7797 {
7798  if (!m_legacy_strptr)
7799  return NULL;
7800 
7801  int32_t i=0;
7802  while (m_legacy_strptr[i]!='\0' && !isspace(m_legacy_strptr[i]))
7803  i++;
7804 
7805  len=i;
7806  char* str=SG_MALLOC(char, len+1);
7807  for (i=0; i<len; i++)
7808  str[i]=m_legacy_strptr[i];
7809  str[len]='\0';
7810 
7811  // move legacy strptr
7812  if (m_legacy_strptr[len]=='\0')
7813  m_legacy_strptr=NULL;
7814  else
7815  {
7816  m_legacy_strptr=m_legacy_strptr+len;
7817  m_legacy_strptr=SGIO::skip_spaces(m_legacy_strptr);
7818  }
7819 
7820  return str;
7821 }
7822 
7823 int32_t CSGInterface::get_num_args_in_str()
7824 {
7825  if (!m_legacy_strptr)
7826  return 0;
7827 
7828  int32_t count=0;
7829  int32_t i=0;
7830  bool in_arg=false;
7831  while (m_legacy_strptr[i]!='\0')
7832  {
7833  if (!isspace(m_legacy_strptr[i]) && !in_arg)
7834  {
7835  count++;
7836  in_arg=true;
7837  }
7838  else if (isspace(m_legacy_strptr[i]) && in_arg)
7839  in_arg=false;
7840 
7841  i++;
7842  }
7843 
7844  return count;
7845 }
7846 
7848 // handler
7850 
7851 bool CSGInterface::handle()
7852 {
7853  int32_t len=0;
7854  bool success=false;
7855 
7856 #ifndef WIN32
7858 #endif
7859 
7860  char* command=NULL;
7861  command=interface->get_command(len);
7862 
7863  SG_DEBUG("command: %s, nrhs %d\n", command, m_nrhs);
7864  int32_t i=0;
7865  while (sg_methods[i].command)
7866  {
7867  if (strmatch(command, sg_methods[i].command))
7868  {
7869  SG_DEBUG("found command %s%s%s\n",
7871  sg_methods[i].command,
7873 
7874  if (!(interface->*(sg_methods[i].method))())
7875  {
7876  if (sg_methods[i].usage_prefix)
7877  {
7878  SG_ERROR("Usage: %s%s%s\n\n\t%s%s%s%s%s\n",
7880  sg_methods[i].command,
7882  sg_methods[i].usage_prefix,
7884  sg_methods[i].command,
7886  sg_methods[i].usage_suffix);
7887  }
7888  else
7889  SG_ERROR("Non-supported command %s%s%s.\n",
7891  sg_methods[i].command,
7893  }
7894  else
7895  {
7896  success=true;
7897  break;
7898  }
7899  }
7900  i++;
7901  }
7902 
7903 #ifndef WIN32
7905 #endif
7906 
7907  if (!success)
7908  SG_ERROR("Unknown command %s%s%s.\n",
7910  command,
7912 
7913  SG_FREE(command);
7914  return success;
7915 }

SHOGUN Machine Learning Toolbox - Documentation