This page lists ready to run shogun examples for the Static Python interface.
To run the examples issue
python name_of_example.py
def gmnpsvm (): print 'GMNPSVM' size_cache=10 width=2.1 C=1.2 epsilon=1e-5 use_bias=False from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width) sg('set_labels', 'TRAIN', label_train_multiclass) sg('new_classifier', 'GMNPSVM') sg('svm_epsilon', epsilon) sg('c', C) sg('svm_use_bias', use_bias) sg('train_classifier') sg('set_features', 'TEST', fm_test_real) result=sg('classify') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_multiclass=lm.load_labels('../data/label_train_multiclass.dat') gmnpsvm()
def gpbtsvm (): print 'GPBTSVM' size_cache=10 width=2.1 C=1.2 epsilon=1e-5 use_bias=False from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width) sg('set_labels', 'TRAIN', label_train_twoclass) sg('new_classifier', 'GPBTSVM') sg('svm_epsilon', epsilon) sg('c', C) sg('svm_use_bias', use_bias) sg('train_classifier') sg('set_features', 'TEST', fm_test_real) result=sg('classify') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat') gpbtsvm()
def knn (): print 'KNN' k=3 from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_labels', 'TRAIN', label_train_multiclass) sg('set_distance', 'EUCLIDIAN', 'REAL') sg('new_classifier', 'KNN') sg('train_classifier', k) sg('set_features', 'TEST', fm_test_real) result=sg('classify') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_multiclass=lm.load_labels('../data/label_train_multiclass.dat') knn()
def lda (): print 'LDA' from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_labels', 'TRAIN', label_train_twoclass) sg('new_classifier', 'LDA') sg('train_classifier') sg('set_features', 'TEST', fm_test_real) result=sg('classify') if __name__=='__main__': #svm_light() from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat') lda()
def libsvm (): print 'LibSVM' size_cache=10 width=2.1 C=1.2 epsilon=1e-5 use_bias=False from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width) sg('set_labels', 'TRAIN', label_train_twoclass) sg('new_classifier', 'LIBSVM') sg('svm_epsilon', epsilon) sg('c', C) sg('svm_use_bias', use_bias) sg('train_classifier') sg('set_features', 'TEST', fm_test_real) result=sg('classify') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat') libsvm()
def libsvm_multiclass (): print 'LibSVMMultiClass' size_cache=10 width=2.1 C=10. epsilon=1e-5 use_bias=False from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width) sg('set_labels', 'TRAIN', label_train_multiclass) sg('new_classifier', 'LIBSVM_MULTICLASS') sg('svm_epsilon', epsilon) sg('c', C) sg('svm_use_bias', use_bias) sg('train_classifier') sg('set_features', 'TEST', fm_test_real) result=sg('classify') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_multiclass=lm.load_labels('../data/label_train_multiclass.dat') libsvm_multiclass()
def libsvm_oneclass (): print 'LibSVMOneClass' size_cache=10 width=2.1 C=10. epsilon=1e-5 use_bias=False from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width) sg('new_classifier', 'LIBSVM_ONECLASS') sg('svm_epsilon', epsilon) sg('c', C) sg('svm_use_bias', use_bias) sg('train_classifier') sg('set_features', 'TEST', fm_test_real) result=sg('classify') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') libsvm_oneclass()
def mpdsvm (): print 'MPDSVM' size_cache=10 width=2.1 C=1.2 epsilon=1e-5 use_bias=False from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width) sg('set_labels', 'TRAIN', label_train_twoclass) sg('new_classifier', 'MPDSVM') sg('svm_epsilon', epsilon) sg('c', C) sg('svm_use_bias', use_bias) sg('train_classifier') sg('set_features', 'TEST', fm_test_real) result=sg('classify') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat') mpdsvm()
def perceptron (): print 'Perceptron' from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_labels', 'TRAIN', label_train_twoclass) sg('new_classifier', 'PERCEPTRON') # often does not converge, mind your data! #sg('train_classifier') #sg('set_features', 'TEST', fm_test_real) #result=sg('classify') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat') perceptron()
def svm_light (): print 'SVMLight' size_cache=10 degree=20 C=1.2 epsilon=1e-5 use_bias=False from sg import sg sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', size_cache, degree) sg('set_labels', 'TRAIN', label_train_dna) try: sg('new_classifier', 'SVMLIGHT') except RuntimeError: return sg('svm_epsilon', epsilon) sg('c', C) sg('svm_use_bias', use_bias) sg('train_classifier') sg('set_features', 'TEST', fm_test_dna, 'DNA') result=sg('classify') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') label_train_dna=lm.load_labels('../data/label_train_dna.dat') svm_light()
def hierarchical (): print 'Hierarchical' size_cache=10 merges=3 from sg import sg sg('set_features', 'TRAIN', fm_train) sg('set_distance', 'EUCLIDIAN', 'REAL') sg('new_clustering', 'HIERARCHICAL') sg('train_clustering', merges) [merge_distance, pairs]=sg('get_clustering') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train=lm.load_numbers('../data/fm_train_real.dat') hierarchical()
def kmeans (): print 'KMeans' size_cache=10 k=3 iter=1000 from sg import sg sg('set_features', 'TRAIN', fm_train) sg('set_distance', 'EUCLIDIAN', 'REAL') sg('new_clustering', 'KMEANS') sg('train_clustering', k, iter) [radi, centers]=sg('get_clustering') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train=lm.load_numbers('../data/fm_train_real.dat') kmeans()
def bray_curtis_distance (): print 'BrayCurtisDistance' from sg import sg sg('set_distance', 'BRAYCURTIS', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') bray_curtis_distance()
def canberra_metric (): print 'CanberraMetric' from sg import sg sg('set_distance', 'CANBERRA', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') canberra_metric()
def canberra_word_distance (): print 'CanberraWordDistance' order=3 gap=0 reverse='n' # bit silly to not use boolean, set 'r' to yield true from sg import sg sg('set_distance', 'CANBERRA', 'WORD') sg('add_preproc', 'SORTWORDSTRING') sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TRAIN') dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TEST') dm=sg('get_distance_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') canberra_word_distance()
def chebyshew_metric (): print 'ChebyshewMetric' from sg import sg sg('set_distance', 'CHEBYSHEW', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') chebyshew_metric()
def chi_square_distance (): print 'ChiSquareDistance' from sg import sg sg('set_distance', 'CHISQUARE', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') chi_square_distance()
def cosine_distance (): print 'CosineDistance' from sg import sg sg('set_distance', 'COSINE', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') cosine_distance()
def euclidian_distance (): print 'EuclidianDistance' from sg import sg sg('set_distance', 'EUCLIDIAN', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') euclidian_distance()
def geodesic_metric (): print 'GeodesicMetric' from sg import sg sg('set_distance', 'GEODESIC', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') geodesic_metric()
def hamming_word_distance (): print 'HammingWordDistance' order=3 gap=0 reverse='n' # bit silly to not use boolean, set 'r' to yield true from sg import sg sg('set_distance', 'HAMMING', 'WORD') sg('add_preproc', 'SORTWORDSTRING') sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TRAIN') dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TEST') dm=sg('get_distance_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') hamming_word_distance()
def jensen_metric (): print 'JensenMetric' from sg import sg sg('set_distance', 'JENSEN', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') jensen_metric()
def manhattan_metric (): print 'ManhattanMetric' from sg import sg sg('set_distance', 'MANHATTAN', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') manhattan_metric()
def manhattan_word_distance (): print 'ManhattanWordDistance' order=3 gap=0 reverse='n' # bit silly to not use boolean, set 'r' to yield true from sg import sg sg('set_distance', 'MANHATTAN', 'WORD') sg('add_preproc', 'SORTWORDSTRING') sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TRAIN') dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TEST') dm=sg('get_distance_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') manhattan_word_distance()
def minkowski_metric (): print 'MinkowskiMetric' k=3. from sg import sg sg('set_distance', 'MINKOWSKI', 'REAL', k) sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') minkowski_metric()
def tanimoto_distance (): print 'TanimotoDistance' from sg import sg sg('set_distance', 'TANIMOTO', 'REAL') sg('set_features', 'TRAIN', fm_train_real) dm=sg('get_distance_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) dm=sg('get_distance_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') tanimoto_distance()
def histogram (): print 'Histogram' order=3 gap=0 reverse='n' # bit silly to not use boolean, set 'r' to yield true from sg import sg # sg('new_distribution', 'HISTOGRAM') sg('add_preproc', 'SORTWORDSTRING') sg('set_features', 'TRAIN', fm_train, 'DNA') sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TRAIN') # sg('train_distribution') # histo=sg('get_histogram') # num_examples=11 # num_param=sg('get_histogram_num_model_parameters') # for i in xrange(num_examples): # for j in xrange(num_param): # sg('get_log_derivative %d %d' % (j, i)) # sg('get_log_likelihood') # sg('get_log_likelihood_sample') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train=lm.load_dna('../data/fm_train_dna.dat') fm_cube=lm.load_cubes('../data/fm_train_cube.dat') histogram()
def hmm (): print 'HMM' N=3 M=6 order=1 hmms=list() liks=list() from sg import sg sg('new_hmm',N, M) sg('set_features', 'TRAIN', fm_cube, 'CUBE') sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order) sg('bw') hmm=sg('get_hmm') sg('new_hmm', N, M) sg('set_hmm', hmm[0], hmm[1], hmm[2], hmm[3]) likelihood=sg('hmm_likelihood') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train=lm.load_dna('../data/fm_train_dna.dat') fm_cube=lm.load_cubes('../data/fm_train_cube.dat') hmm()
def linear_hmm (): print 'LinearHMM' order=3 gap=0 reverse='n' # bit silly to not use boolean, set 'r' to yield true from sg import sg # sg('new_distribution', 'LinearHMM') sg('add_preproc', 'SORTWORDSTRING') sg('set_features', 'TRAIN', fm_train, 'DNA') sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TRAIN') # sg('train_distribution') # histo=sg('get_histogram') # num_examples=11 # num_param=sg('get_histogram_num_model_parameters') # for i in xrange(num_examples): # for j in xrange(num_param): # sg('get_log_derivative %d %d' % (j, i)) # sg('get_log_likelihood') # sg('get_log_likelihood_sample') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train=lm.load_dna('../data/fm_train_dna.dat') fm_cube=lm.load_cubes('../data/fm_train_cube.dat') linear_hmm()
def chi2 (): print 'Chi2' width=1.4 size_cache=10 from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_features', 'TEST', fm_test_real) sg('set_kernel', 'CHI2', 'REAL', size_cache, width) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') chi2()
def combined (): print 'Combined' size_cache=10 weight=1. from sg import sg sg('clean_kernel') sg('clean_features', 'TRAIN') sg('clean_features', 'TEST') sg('set_kernel', 'COMBINED', size_cache) sg('add_kernel', weight, 'LINEAR', 'REAL', size_cache) sg('add_features', 'TRAIN', fm_train_real) sg('add_features', 'TEST', fm_test_real) sg('add_kernel', weight, 'GAUSSIAN', 'REAL', size_cache, 1.) sg('add_features', 'TRAIN', fm_train_real) sg('add_features', 'TEST', fm_test_real) sg('add_kernel', weight, 'POLY', 'REAL', size_cache, 3, False) sg('add_features', 'TRAIN', fm_train_real) sg('add_features', 'TEST', fm_test_real) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') combined()
def comm_ulong_string (): print 'CommUlongString' size_cache=10 order=3 gap=0 reverse='n' # bit silly to not use boolean, set 'r' to yield true use_sign=False normalization='FULL' from sg import sg sg('add_preproc', 'SORTULONGSTRING') sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse) sg('attach_preproc', 'TRAIN') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse) sg('attach_preproc', 'TEST') sg('set_kernel', 'COMMSTRING', 'ULONG', size_cache, use_sign, normalization) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') comm_ulong_string()
def comm_word_string (): print 'CommWordString' size_cache=10 order=3 gap=0 reverse='n' # bit silly to not use boolean, set 'r' to yield true use_sign=False normalization='FULL' from sg import sg sg('add_preproc', 'SORTWORDSTRING') sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TRAIN') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TEST') sg('set_kernel', 'COMMSTRING', 'WORD', size_cache, use_sign, normalization) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') comm_word_string()
def const (): print 'Const' c=23. size_cache=10 from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_features', 'TEST', fm_test_real) sg('set_kernel', 'CONST', 'REAL', size_cache, c) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') const()
def diag (): print 'Diag' diag=23. size_cache=10 from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_features', 'TEST', fm_test_real) sg('set_kernel', 'DIAG', 'REAL', size_cache, diag) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') diag()
def distance (): print 'Distance' width=1.7 size_cache=10 from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_features', 'TEST', fm_test_real) sg('set_distance', 'EUCLIDIAN', 'REAL') sg('set_kernel', 'DISTANCE', size_cache, width) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') distance()
def fixed_degree_string (): print 'FixedDegreeString' size_cache=10 degree=3 from sg import sg sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('set_kernel', 'FIXEDDEGREE', 'CHAR', size_cache, degree) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') fixed_degree_string()
def gaussian (): print 'Gaussian' width=1.9 size_cache=10 from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_features', 'TEST', fm_test_real) sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') gaussian()
def gaussian_shift (): print 'GaussianShift' width=1.9 max_shift=2 shift_step=1 size_cache=10 from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_features', 'TEST', fm_test_real) sg('set_kernel', 'GAUSSIANSHIFT', 'REAL', size_cache, width, max_shift, shift_step) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') gaussian_shift()
def linear (): print 'Linear' scale=1.2 size_cache=10 from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_features', 'TEST', fm_test_real) sg('set_kernel', 'LINEAR', 'REAL', size_cache, scale) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') linear()
def linear_string (): print 'LinearString' size_cache=10 from sg import sg sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('set_kernel', 'LINEAR', 'CHAR', size_cache) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') linear_string()
def linear_byte (): print 'LinearByte' from sg import sg sg('set_features', 'TRAIN', fm_train_byte, 'RAWBYTE') sg('set_features', 'TEST', fm_test_byte, 'RAWBYTE') sg('set_kernel', 'LINEAR BYTE', 10) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from numpy import ubyte from tools.load import LoadMatrix lm=LoadMatrix() fm_train_byte=ubyte(lm.load_numbers('../data/fm_train_byte.dat')) fm_test_byte=ubyte(lm.load_numbers('../data/fm_test_byte.dat')) linear_byte()
def linear_word (): print 'LinearWord' size_cache=10 scale=1.4 from sg import sg sg('set_features', 'TRAIN', fm_train_word) sg('set_features', 'TEST', fm_test_word) sg('set_kernel', 'LINEAR', 'WORD', size_cache, scale) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix from numpy import ushort lm=LoadMatrix() fm_train_word=ushort(lm.load_numbers('../data/fm_test_word.dat')) fm_test_word=ushort(lm.load_numbers('../data/fm_test_word.dat')) linear_word()
def local_alignment_string(): print 'LocalAlignmentString' size_cache=10 from sg import sg sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('set_kernel', 'LOCALALIGNMENT', 'CHAR', size_cache) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') local_alignment_string()
def locality_improved_string (): print 'LocalityImprovedString' size_cache=10 length=5 inner_degree=5 outer_degree=inner_degree+2 from sg import sg sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('set_kernel', 'LIK', 'CHAR', size_cache, length, inner_degree, outer_degree) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') label_train_dna=lm.load_labels('../data/label_train_dna.dat') locality_improved_string()
def oligo_string (): print 'OligoString' size_cache=10 k=3 width=1.2 from sg import sg sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('set_kernel', 'OLIGO', 'CHAR', size_cache, k, width) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') oligo_string()
def plugin_estimate_histogram (): print 'PluginEstimate w/ HistogramWord' size_cache=10 order=3 gap=0 reverse='n' # bit silly to not use boolean, set 'r' to yield true use_sign=False normalization='FULL' from sg import sg sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) pseudo_pos=1e-1 pseudo_neg=1e-1 sg('new_plugin_estimator', pseudo_pos, pseudo_neg) sg('set_labels', 'TRAIN', label_train_dna) sg('train_estimator') sg('set_kernel', 'HISTOGRAM', 'WORD', size_cache) km=sg('get_kernel_matrix', 'TRAIN') # not supported yet # lab=sg('plugin_estimate_classify') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') label_train_dna=lm.load_labels('../data/label_train_dna.dat') plugin_estimate_histogram()
def poly (): print 'Poly' degree=4 inhomogene=False use_normalization=True size_cache=10 from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_features', 'TEST', fm_test_real) sg('set_kernel', 'POLY', 'REAL', size_cache, degree, inhomogene, use_normalization) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') poly()
def poly_match_string (): print 'PolyMatchString' size_cache=10 degree=3 inhomogene=False from sg import sg sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('set_kernel', 'POLYMATCH', 'CHAR', size_cache, degree, inhomogene) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') poly_match_string()
def poly_match_word (): print 'PolyMatchWord' size_cache=10 degree=2 inhomogene=True normalize=True order=3 gap=0 reverse='n' # bit silly to not use boolean, set 'r' to yield true from sg import sg sg('add_preproc', 'SORTWORDSTRING') sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TRAIN') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TEST') sg('set_kernel', 'POLYMATCH', 'WORD', size_cache, degree, inhomogene, normalize) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') label_train_dna=lm.load_labels('../data/label_train_dna.dat') poly_match_word()
def plugin_estimate_salzberg (): print 'PluginEstimate w/ SalzbergWord' size_cache=10 order=3 gap=0 reverse='n' # bit silly to not use boolean, set 'r' to yield true use_sign=False normalization='FULL' from sg import sg sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) pseudo_pos=1e-1 pseudo_neg=1e-1 sg('new_plugin_estimator', pseudo_pos, pseudo_neg) sg('set_labels', 'TRAIN', label_train_dna) sg('train_estimator') sg('set_kernel', 'SALZBERG', 'WORD', size_cache) #sg('set_prior_probs', 0.4, 0.6) sg('set_prior_probs_from_labels', label_train_dna) km=sg('get_kernel_matrix', 'TRAIN') # not supported yet # lab=sg('plugin_estimate_classify') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') label_train_dna=lm.load_labels('../data/label_train_dna.dat') plugin_estimate_salzberg()
def sigmoid (): print 'Sigmoid' num_feats=11 gamma=1.2 coef0=1.3 size_cache=10 from sg import sg sg('set_features', 'TRAIN', fm_train_real) sg('set_features', 'TEST', fm_test_real) sg('set_kernel', 'SIGMOID', 'REAL', size_cache, gamma, coef0) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') sigmoid()
def simple_locality_improved_string (): print 'SimpleLocalityImprovedString' size_cache=10 length=5 inner_degree=5 outer_degree=inner_degree+2 from sg import sg sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('set_kernel', 'SLIK', 'CHAR', size_cache, length, inner_degree, outer_degree) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') label_train_dna=lm.load_labels('../data/label_train_dna.dat') simple_locality_improved_string()
def weighted_comm_word_string (): print 'WeightedCommWordString' size_cache=10 order=3 gap=0 reverse='n' # bit silly to not use boolean, set 'r' to yield true use_sign=False normalization='FULL' from sg import sg sg('add_preproc', 'SORTWORDSTRING') sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TRAIN') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TEST') sg('set_kernel', 'WEIGHTEDCOMMSTRING', 'WORD', size_cache, use_sign, normalization) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') label_train_dna=lm.load_labels('../data/label_train_dna.dat') weighted_comm_word_string()
def weighted_degree_position_string (): print 'WeightedDegreePositionString' size_cache=10 degree=20 from sg import sg sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('set_kernel', 'WEIGHTEDDEGREEPOS', 'CHAR', size_cache, degree) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') weighted_degree_position_string()
def weighted_degree_string (): print 'WeightedDegreeString' size_cache=10 degree=20 from sg import sg sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('set_kernel', 'WEIGHTEDDEGREE', 'CHAR', size_cache, degree) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') weighted_degree_string()
def mkl_multiclass (): print 'mkl_multiclass' size_cache=10 width=1.2 C=1.2 epsilon=1e-5 mkl_eps=0.001 mkl_norm=1.0 weight=1.0 from sg import sg sg('clean_kernel') sg('clean_features', 'TRAIN') sg('clean_features', 'TEST') sg('set_kernel', 'COMBINED', size_cache) sg('add_kernel', weight, 'LINEAR', 'REAL', size_cache) sg('add_features', 'TRAIN', fm_train_real) sg('add_features', 'TEST', fm_test_real) sg('add_kernel', weight, 'GAUSSIAN', 'REAL', size_cache, width) sg('add_features', 'TRAIN', fm_train_real) sg('add_features', 'TEST', fm_test_real) sg('add_kernel', weight, 'POLY', 'REAL', size_cache, 2) sg('add_features', 'TRAIN', fm_train_real) sg('add_features', 'TEST', fm_test_real) sg('set_labels', 'TRAIN', label_train_multiclass) sg('new_classifier', 'MKL_MULTICLASS') sg('svm_epsilon', epsilon) sg('c', C) sg('mkl_parameters', mkl_eps, 0.0, mkl_norm) sg('train_classifier') #sg('set_features', 'TEST', fm_test_real) result=sg('classify') print result if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_multiclass=lm.load_labels('../data/label_train_multiclass.dat') mkl_multiclass()
from sg import sg from numpy import * num=100 weight=1. labels=concatenate((-ones([1,num]), ones([1,num])),1)[0] features=concatenate((random.normal(size=(2,num))-1,random.normal(size=(2,num))+1),1) tube_epsilon=1e-2 sg('new_classifier', 'MKL_REGRESSION') sg('c', 1.) sg('svr_tube_epsilon', tube_epsilon) sg('set_labels', 'TRAIN', labels) sg('add_features', 'TRAIN', features) sg('add_features', 'TRAIN', features) sg('add_features', 'TRAIN', features) sg('set_kernel', 'COMBINED', 100) sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 100.) sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 10.) sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 1.) sg('train_classifier') [bias, alphas]=sg('get_svm');
from sg import sg from numpy import * num=100 weight=1. labels=concatenate((-ones([1,num]), ones([1,num])),1)[0] features=concatenate((random.normal(size=(2,num))-1,random.normal(size=(2,num))+1),1) sg('c', 10.) sg('new_classifier', 'MKL_CLASSIFICATION') sg('set_labels', 'TRAIN', labels) sg('add_features', 'TRAIN', features) sg('add_features', 'TRAIN', features) sg('add_features', 'TRAIN', features) sg('set_kernel', 'COMBINED', 100) sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 100.) sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 10.) sg('add_kernel', weight, 'GAUSSIAN', 'REAL', 100, 1.) sg('train_classifier') [bias, alphas]=sg('get_svm');
def log_plus_one (): print 'LogPlusOne' width=1.4 size_cache=10 from sg import sg sg('add_preproc', 'LOGPLUSONE') sg('set_kernel', 'CHI2', 'REAL', size_cache, width) sg('set_features', 'TRAIN', fm_train_real) sg('attach_preproc', 'TRAIN') km=sg('get_kernel_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) sg('attach_preproc', 'TEST') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') log_plus_one()
def norm_one (): print 'NormOne' width=1.4 size_cache=10 from sg import sg sg('add_preproc', 'NORMONE') sg('set_kernel', 'CHI2', 'REAL', size_cache, width) sg('set_features', 'TRAIN', fm_train_real) sg('attach_preproc', 'TRAIN') km=sg('get_kernel_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) sg('attach_preproc', 'TEST') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') norm_one()
def prune_var_sub_mean (): print 'PruneVarSubMean' width=1.4 size_cache=10 divide_by_std=True from sg import sg sg('add_preproc', 'PRUNEVARSUBMEAN', divide_by_std) sg('set_kernel', 'CHI2', 'REAL', size_cache, width) sg('set_features', 'TRAIN', fm_train_real) sg('attach_preproc', 'TRAIN') km=sg('get_kernel_matrix', 'TRAIN') sg('set_features', 'TEST', fm_test_real) sg('attach_preproc', 'TEST') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') prune_var_sub_mean()
def sort_ulong_string (): print 'CommUlongString' size_cache=10 order=3 gap=0 reverse='n' # bit silly to not use boolean, set 'r' to yield true use_sign=False normalization='FULL' from sg import sg sg('add_preproc', 'SORTULONGSTRING') sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse) sg('attach_preproc', 'TRAIN') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'ULONG', order, order-1, gap, reverse) sg('attach_preproc', 'TEST') sg('set_kernel', 'COMMSTRING', 'ULONG', size_cache, use_sign, normalization) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') sort_ulong_string()
def sort_word_string (): print 'CommWordString' size_cache=10 order=3 gap=0 reverse='n' # bit silly to not use boolean, set 'r' to yield true use_sign=False normalization='FULL' from sg import sg sg('add_preproc', 'SORTWORDSTRING') sg('set_features', 'TRAIN', fm_train_dna, 'DNA') sg('convert', 'TRAIN', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TRAIN') sg('set_features', 'TEST', fm_test_dna, 'DNA') sg('convert', 'TEST', 'STRING', 'CHAR', 'STRING', 'WORD', order, order-1, gap, reverse) sg('attach_preproc', 'TEST') sg('set_kernel', 'COMMSTRING', 'WORD', size_cache, use_sign, normalization) km=sg('get_kernel_matrix', 'TRAIN') km=sg('get_kernel_matrix', 'TEST') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') sort_word_string()
def krr (): print 'KRR' size_cache=10 width=2.1 C=1.2 tau=1e-6 from sg import sg sg('set_features', 'TRAIN', fm_train) sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width) sg('set_labels', 'TRAIN', label_train) sg('new_regression', 'KRR') sg('krr_tau', tau) sg('c', C) sg('train_regression') sg('set_features', 'TEST', fm_test) result=sg('classify') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train=lm.load_numbers('../data/fm_train_real.dat') fm_test=lm.load_numbers('../data/fm_test_real.dat') label_train=lm.load_labels('../data/label_train_twoclass.dat') krr()
def libsvr (): print 'LibSVR' size_cache=10 width=2.1 C=1.2 epsilon=1e-5 tube_epsilon=1e-2 from sg import sg sg('set_features', 'TRAIN', fm_train) sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width) sg('set_labels', 'TRAIN', label_train) sg('new_regression', 'LIBSVR') sg('svr_tube_epsilon', tube_epsilon) sg('c', C) sg('train_regression') sg('set_features', 'TEST', fm_test) result=sg('classify') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train=lm.load_numbers('../data/fm_train_real.dat') fm_test=lm.load_numbers('../data/fm_test_real.dat') label_train=lm.load_labels('../data/label_train_twoclass.dat') libsvr()
def svr_light (): print 'SVRLight' size_cache=10 width=2.1 C=1.2 epsilon=1e-5 tube_epsilon=1e-2 from sg import sg sg('set_features', 'TRAIN', fm_train) sg('set_kernel', 'GAUSSIAN', 'REAL', size_cache, width) sg('set_labels', 'TRAIN', label_train) try: sg('new_regression', 'SVRLIGHT') except RuntimeError: return sg('svr_tube_epsilon', tube_epsilon) sg('c', C) sg('train_regression') sg('set_features', 'TEST', fm_test) result=sg('classify') if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train=lm.load_numbers('../data/fm_train_real.dat') fm_test=lm.load_numbers('../data/fm_test_real.dat') label_train=lm.load_labels('../data/label_train_twoclass.dat') svr_light()