SHOGUN v0.9.0
|
本页面包含了所有Python模块化接口的例子。
要运行这些例子只需要
python name_of_example.py
parameter_list = [[1,7],[2,8]] def classifier_custom_kernel_modular(C=1,dim=7): from shogun.Features import RealFeatures, Labels from shogun.Kernel import CustomKernel from shogun.Classifier import LibSVM from numpy import diag,ones,sign from numpy.random import rand,seed seed((C,dim)) lab=sign(2*rand(dim) - 1) data=rand(dim, dim) symdata=data*data.T + diag(ones(dim)) kernel=CustomKernel() kernel.set_full_kernel_matrix_from_full(data) labels=Labels(lab) svm=LibSVM(C, kernel, labels) svm.train() predictions =svm.classify() out=svm.classify().get_labels() return svm,out if __name__=='__main__': print 'custom_kernel' classifier_custom_kernel_modular(*parameter_list[0])
import numpy from shogun.Features import StringCharFeatures, Labels, DNA from shogun.Kernel import WeightedDegreeStringKernel from shogun.Classifier import SVMLight, DomainAdaptationSVM, MSG_DEBUG traindna = ['CGCACGTACGTAGCTCGAT', 'CGACGTAGTCGTAGTCGTA', 'CGACGGGGGGGGGGTCGTA', 'CGACCTAGTCGTAGTCGTA', 'CGACCACAGTTATATAGTA', 'CGACGTAGTCGTAGTCGTA', 'CGACGTAGTTTTTTTCGTA', 'CGACGTAGTCGTAGCCCCA', 'CAAAAAAAAAAAAAAAATA', 'CGACGGGGGGGGGGGCGTA'] label_traindna = numpy.array(5*[-1.0] + 5*[1.0]) testdna = ['AGCACGTACGTAGCTCGAT', 'AGACGTAGTCGTAGTCGTA', 'CAACGGGGGGGGGGTCGTA', 'CGACCTAGTCGTAGTCGTA', 'CGAACACAGTTATATAGTA', 'CGACCTAGTCGTAGTCGTA', 'CGACGTGGGGTTTTTCGTA', 'CGACGTAGTCCCAGCCCCA', 'CAAAAAAAAAAAACCAATA', 'CGACGGCCGGGGGGGCGTA'] label_testdna = numpy.array(5*[-1.0] + 5*[1.0]) traindna2 = ['AGACAGTCAGTCGATAGCT', 'AGCAGTCGTAGTCGTAGTC', 'AGCAGGGGGGGGGGTAGTC', 'AGCAATCGTAGTCGTAGTC', 'AGCAACACGTTCTCTCGTC', 'AGCAGTCGTAGTCGTAGTC', 'AGCAGTCGTTTTTTTAGTC', 'AGCAGTCGTAGTCGAAAAC', 'ACCCCCCCCCCCCCCCCTC', 'AGCAGGGGGGGGGGGAGTC'] label_traindna2 = numpy.array(5*[-1.0] + 5*[1.0]) testdna2 = ['CGACAGTCAGTCGATAGCT', 'CGCAGTCGTAGTCGTAGTC', 'ACCAGGGGGGGGGGTAGTC', 'AGCAATCGTAGTCGTAGTC', 'AGCCACACGTTCTCTCGTC', 'AGCAATCGTAGTCGTAGTC', 'AGCAGTGGGGTTTTTAGTC', 'AGCAGTCGTAAACGAAAAC', 'ACCCCCCCCCCCCAACCTC', 'AGCAGGAAGGGGGGGAGTC'] label_testdna2 = numpy.array(5*[-1.0] + 5*[1.0]) parameter_list = [[traindna,testdna,label_traindna,label_testdna,traindna2,label_traindna2, \ testdna2,label_testdna2,1,3],[traindna,testdna,label_traindna,label_testdna,traindna2,label_traindna2, \ testdna2,label_testdna2,2,5]] def classifier_domainadaptationsvm_modular(fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna, \ label_test_dna=label_testdna,fm_train_dna2=traindna2,fm_test_dna2=testdna2, \ label_train_dna2=label_traindna2,label_test_dna2=label_testdna2,C=1,degree=3): feats_train = StringCharFeatures(fm_train_dna, DNA) feats_test = StringCharFeatures(fm_test_dna, DNA) kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = Labels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.train() #svm.io.set_loglevel(MSG_DEBUG) ##################################### #print "obtaining DA SVM from previously trained SVM" feats_train2 = StringCharFeatures(fm_train_dna, DNA) feats_test2 = StringCharFeatures(fm_test_dna, DNA) kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels2 = Labels(label_train_dna) # we regularize against the previously obtained solution dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0) dasvm.train() out = dasvm.classify(feats_test2).get_labels() return out #,dasvm TODO if __name__=='__main__': print 'SVMLight' classifier_domainadaptationsvm_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_multiclass.dat') parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]] def classifier_gmnpsvm_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import GMNPSVM feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) kernel=GaussianKernel(feats_train, feats_train, width) labels=Labels(label_train_multiclass) svm=GMNPSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train(feats_train) #kernel.init(feats_train, feats_test) out=svm.classify(feats_test).get_labels() return out,kernel if __name__=='__main__': print 'GMNPSVM' classifier_gmnpsvm_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_twoclass.dat') parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]] def classifier_gpbtsvm_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,width=2.1,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import GPBTSVM feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) kernel=GaussianKernel(feats_train, feats_train, width) labels=Labels(label_train_twoclass) svm=GPBTSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() kernel.init(feats_train, feats_test) svm.classify().get_labels() predictions = svm.classify() return predictions, svm, predictions.get_labels() if __name__=='__main__': print 'GPBTSVM' classifier_gpbtsvm_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_multiclass.dat') parameter_list = [[traindat,testdat,label_traindat,3],[traindat,testdat,label_traindat,3]] def classifier_knn_modular(fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat, k=3 ): from shogun.Features import RealFeatures, Labels from shogun.Classifier import KNN from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance(feats_train, feats_train) labels=Labels(label_train_multiclass) knn=KNN(k, distance, labels) knn_train = knn.train() output=knn.classify(feats_test).get_labels() return knn,knn_train,output if __name__=='__main__': print 'KNN' classifier_knn_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_multiclass.dat') parameter_list = [[traindat,testdat,label_traindat,0.9,1,6],[traindat,testdat,label_traindat,0.8,1,5]] def classifier_larank_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,C=0.9,num_threads=1,num_iter=5): from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import LaRank from shogun.Library import Math_init_random Math_init_random(17) feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) width=2.1 kernel=GaussianKernel(feats_train, feats_train, width) C=1 epsilon=1e-5 labels=Labels(label_train_multiclass) svm=LaRank(C, kernel, labels) #svm.set_tau(1e-3) #svm.set_batch_mode(False) #svm.io.enable_progress() svm.set_epsilon(epsilon) svm.train() out=svm.classify(feats_train).get_labels() predictions = svm.classify() return predictions, svm, predictions.get_labels() if __name__=='__main__': print 'LaRank' classifier_larank_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_twoclass.dat') parameter_list = [[traindat,testdat,label_traindat,3,1],[traindat,testdat,label_traindat,4,1]] def classifier_lda_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,gamma=3,num_threads=1): from shogun.Features import RealFeatures, Labels from shogun.Classifier import LDA feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) labels=Labels(label_train_twoclass) lda=LDA(gamma, feats_train, labels) lda.train() lda.get_bias() lda.get_w() lda.set_features(feats_test) lda.classify().get_labels() return lda,lda.classify().get_labels() if __name__=='__main__': print 'LDA' classifier_lda_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_twoclass.dat') parameter_list = [[traindat,testdat,label_traindat,0.9,1e-3],[traindat,testdat,label_traindat,0.8,1e-2]] def classifier_liblinear_modular(fm_train_real, fm_test_real, label_train_twoclass, C, epsilon): from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import LibLinear, L2R_L2LOSS_SVC_DUAL from shogun.Library import Math_init_random Math_init_random(17) feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) labels=Labels(label_train_twoclass) svm=LibLinear(C, feats_train, labels) svm.set_liblinear_solver_type(L2R_L2LOSS_SVC_DUAL) svm.set_epsilon(epsilon) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.classify().get_labels() predictions = svm.classify() return predictions, svm, predictions.get_labels() if __name__=='__main__': print 'LibLinear' classifier_liblinear_modular(*parameter_list[0])
from numpy import * from numpy.random import randn from shogun.Features import * from shogun.Classifier import * from shogun.Kernel import * num=1000 dist=1 width=2.1 C=1 traindata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1) testdata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1); trainlab=concatenate((-ones(num), ones(num))); testlab=concatenate((-ones(num), ones(num))); feats_train=RealFeatures(traindata_real); feats_test=RealFeatures(testdata_real); kernel=GaussianKernel(feats_train, feats_train, width); labels=Labels(trainlab); svm=LibSVM(C, kernel, labels); svm.train(); kernel.init(feats_train, feats_test); out=svm.classify().get_labels(); testerr=mean(sign(out)!=testlab) print testerr
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_twoclass.dat') parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]] def classifier_libsvm_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,width=2.1,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import LibSVM feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) kernel=GaussianKernel(feats_train, feats_train, width) labels=Labels(label_train_twoclass) svm=LibSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() kernel.init(feats_train, feats_test) labels = svm.classify().get_labels() supportvectors = sv_idx=svm.get_support_vectors() alphas=svm.get_alphas() predictions = svm.classify() return predictions, svm, predictions.get_labels() if __name__=='__main__': print 'LibSVM' classifier_libsvm_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_multiclass.dat') parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5],[traindat,testdat,label_traindat,2.2,1,1e-5]] def classifier_libsvmmulticlass_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_multiclass=label_traindat,width=2.1,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import LibSVMMultiClass feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) kernel=GaussianKernel(feats_train, feats_train, width) labels=Labels(label_train_multiclass) svm=LibSVMMultiClass(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() kernel.init(feats_train, feats_test) out = svm.classify().get_labels() predictions = svm.classify() return predictions, svm, predictions.get_labels() if __name__=='__main__': print 'LibSVMMultiClass' classifier_libsvmmulticlass_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat,2.2,1,1e-7],[traindat,testdat,2.1,1,1e-5]] def classifier_libsvmoneclass_modular (fm_train_real=traindat,fm_test_real=testdat,width=2.1,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import LibSVMOneClass feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) kernel=GaussianKernel(feats_train, feats_train, width) svm=LibSVMOneClass(C, kernel) svm.set_epsilon(epsilon) svm.train() kernel.init(feats_train, feats_test) svm.classify().get_labels() predictions = svm.classify() return predictions, svm, predictions.get_labels() if __name__=='__main__': print 'LibSVMOneClass' classifier_libsvmoneclass_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_twoclass.dat') parameter_list = [[traindat,testdat,label_traindat,1,1e-5],[traindat,testdat,label_traindat,0.9,1e-5]] def classifier_mpdsvm_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,C=1,epsilon=1e-5): from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import MPDSVM feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) width=2.1 kernel=GaussianKernel(feats_train, feats_train, width) labels=Labels(label_train_twoclass) svm=MPDSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() kernel.init(feats_train, feats_test) svm.classify().get_labels() predictions = svm.classify() return predictions, svm, predictions.get_labels() if __name__=='__main__': print 'MPDSVM' classifier_mpdsvm_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_twoclass.dat') parameter_list = [[traindat,testdat,label_traindat,1.,1000,1],[traindat,testdat,label_traindat,1.,1000,1]] def classifier_perceptron_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,learn_rate=1.,max_iter=1000,num_threads=1): from shogun.Features import RealFeatures, Labels from shogun.Classifier import Perceptron feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) labels=Labels(label_train_twoclass) perceptron=Perceptron(feats_train, labels) perceptron.set_learn_rate(learn_rate) perceptron.set_max_iter(max_iter) # only guaranteed to converge for separable data perceptron.train() perceptron.set_features(feats_test) out_labels = perceptron.classify().get_labels() return perceptron, out_labels if __name__=='__main__': print 'Perceptron' classifier_perceptron_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() train=lm.load_numbers('../data/fm_train_real.dat') test=lm.load_numbers('../data/fm_test_real.dat') labels=lm.load_labels('../data/label_train_twoclass.dat') parameter_list=[[train,test,labels,5,1e-3,3.0], [train,test,labels,0.9,1e-2,1.0]] def classifier_subgradientsvm_modular(fm_train_real, fm_test_real, label_train_twoclass, C, epsilon, max_train_time): from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import SubGradientSVM realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) labels=Labels(label_train_twoclass) svm=SubGradientSVM(C, feats_train, labels) svm.set_epsilon(epsilon) svm.set_max_train_time(max_train_time) svm.train() svm.set_features(feats_test) labels=svm.classify().get_labels() return labels, svm if __name__=='__main__': print 'SubGradientSVM' classifier_subgradientsvm_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() train_dna=lm.load_dna('../data/fm_train_dna.dat') test_dna=lm.load_dna('../data/fm_test_dna.dat') label=lm.load_labels('../data/label_train_dna.dat') parameter_list=[[train_dna, test_dna, label, 20, 0.9, 1e-3, 1], [train_dna, test_dna, label, 20, 2.3, 1e-5, 4]] def classifier_svmlight_batch_linadd_modular(fm_train_dna, fm_test_dna, label_train_dna, degree, C, epsilon, num_threads): from shogun.Features import StringCharFeatures, Labels, DNA from shogun.Kernel import WeightedDegreeStringKernel, MSG_DEBUG try: from shogun.Classifier import SVMLight except ImportError: print 'No support for SVMLight available.' return feats_train=StringCharFeatures(DNA) #feats_train.io.set_loglevel(MSG_DEBUG) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree=20 kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) labels=Labels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) #print 'SVMLight Objective: %f num_sv: %d' % \ # (svm.get_objective(), svm.get_num_support_vectors()) svm.set_batch_computation_enabled(False) svm.set_linadd_enabled(False) svm.classify().get_labels() svm.set_batch_computation_enabled(True) labels = svm.classify().get_labels() return labels, svm if __name__=='__main__': print 'SVMlight batch' classifier_svmlight_batch_linadd_modular(*parameter_list[0])
import numpy traindna=['CGCACGTACGTAGCTCGAT', 'CGACGTAGTCGTAGTCGTA', 'CGACGGGGGGGGGGTCGTA', 'CGACCTAGTCGTAGTCGTA', 'CGACCACAGTTATATAGTA', 'CGACGTAGTCGTAGTCGTA', 'CGACGTAGTTTTTTTCGTA', 'CGACGTAGTCGTAGCCCCA', 'CAAAAAAAAAAAAAAAATA', 'CGACGGGGGGGGGGGCGTA'] label_traindna=numpy.array(5*[-1.0] + 5*[1.0]) testdna=['AGCACGTACGTAGCTCGAT', 'AGACGTAGTCGTAGTCGTA', 'CAACGGGGGGGGGGTCGTA', 'CGACCTAGTCGTAGTCGTA', 'CGAACACAGTTATATAGTA', 'CGACCTAGTCGTAGTCGTA', 'CGACGTGGGGTTTTTCGTA', 'CGACGTAGTCCCAGCCCCA', 'CAAAAAAAAAAAACCAATA', 'CGACGGCCGGGGGGGCGTA'] label_test_dna=numpy.array(5*[-1.0] + 5*[1.0]) parameter_list = [[traindna,testdna,label_traindna,3,10,1e-5,1],[traindna,testdna,label_traindna,3,10,1e-5,1]] def classifier_svmlight_linear_term_modular(fm_train_dna=traindna,fm_test_dna=testdna, \ label_train_dna=label_traindna,degree=3, \ C=10,epsilon=1e-5,num_threads=1): from shogun.Features import StringCharFeatures, Labels, DNA from shogun.Kernel import WeightedDegreeStringKernel from shogun.Classifier import SVMLight feats_train=StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) labels=Labels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_qpsize(3) svm.set_linear_term(-numpy.array([1,2,3,4,5,6,7,8,7,6], dtype=numpy.double)); svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) out = svm.classify().get_labels() return out,kernel if __name__=='__main__': print 'SVMLight' classifier_svmlight_linear_term_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') label_traindat = lm.load_labels('../data/label_train_dna.dat') parameter_list = [[traindat,testdat,label_traindat,1.1,1e-5,1],[traindat,testdat,label_traindat,1.2,1e-5,1]] def classifier_svmlight_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,C=1.2,epsilon=1e-5,num_threads=1): from shogun.Features import StringCharFeatures, Labels, DNA from shogun.Kernel import WeightedDegreeStringKernel try: from shogun.Classifier import SVMLight except ImportError: print 'No support for SVMLight available.' return feats_train=StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree=20 kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) labels=Labels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) svm.classify().get_labels() return kernel if __name__=='__main__': print 'SVMLight' classifier_svmlight_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_twoclass.dat') parameter_list = [[traindat,testdat,label_traindat,0.9,1e-5,1],[traindat,testdat,label_traindat,0.8,1e-5,1]] def classifier_svmlin_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,C=0.9,epsilon=1e-5,num_threads=1): from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import SVMLin realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) labels=Labels(label_train_twoclass) svm=SVMLin(C, feats_train, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.get_bias() svm.get_w() svm.classify().get_labels() predictions = svm.classify() return predictions, svm, predictions.get_labels() if __name__=='__main__': print 'SVMLin' classifier_svmlin_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_twoclass.dat') parameter_list = [[traindat,testdat,label_traindat,0.9,1e-5,1],[traindat,testdat,label_traindat,0.8,1e-5,1]] def classifier_svmocas_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,C=0.9,epsilon=1e-5,num_threads=1): from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import SVMOcas realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) labels=Labels(label_train_twoclass) svm=SVMOcas(C, feats_train, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.set_bias_enabled(False) svm.train() svm.set_features(feats_test) svm.classify().get_labels() predictions = svm.classify() return predictions, svm, predictions.get_labels() if __name__=='__main__': print 'SVMOcas' classifier_svmocas_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_twoclass.dat') parameter_list = [[traindat,testdat,label_traindat,0.9,1,6],[traindat,testdat,label_traindat,0.8,1,5]] def classifier_svmsgd_modular (fm_train_real=traindat,fm_test_real=testdat,label_train_twoclass=label_traindat,C=0.9,num_threads=1,num_iter=5): from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import SVMSGD realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) labels=Labels(label_train_twoclass) svm=SVMSGD(C, feats_train, labels) svm.set_epochs(num_iter) #svm.io.set_loglevel(0) svm.train() svm.set_features(feats_test) svm.classify().get_labels() predictions = svm.classify() return predictions, svm, predictions.get_labels() if __name__=='__main__': print 'SVMSGD' classifier_svmsgd_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') parameter_list = [[traindat,3],[traindat,4]] def clustering_hierarchical_modular (fm_train=traindat,merges=3): from shogun.Distance import EuclidianDistance from shogun.Features import RealFeatures from shogun.Clustering import Hierarchical feats_train=RealFeatures(fm_train) distance=EuclidianDistance(feats_train, feats_train) hierarchical=Hierarchical(merges, distance) hierarchical.train() out_distance = hierarchical.get_merge_distances() out_cluster = hierarchical.get_cluster_pairs() return hierarchical,out_distance,out_cluster if __name__=='__main__': print 'Hierarchical' clustering_hierarchical_modular(*parameter_list[0])
##!/usr/bin/env python #""" #Explicit examples on how to use clustering #""" from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') parameter_list = [[traindat,3],[traindat,4]] def clustering_kmeans_modular (fm_train=traindat,k=3): from shogun.Distance import EuclidianDistance from shogun.Features import RealFeatures from shogun.Clustering import KMeans from shogun.Library import Math_init_random Math_init_random(17) feats_train=RealFeatures(fm_train) distance=EuclidianDistance(feats_train, feats_train) kmeans=KMeans(k, distance) kmeans.train() out_centers = kmeans.get_cluster_centers() kmeans.get_radiuses() return out_centers, kmeans if __name__=='__main__': print 'KMeans' clustering_kmeans_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat],[traindat,testdat]] def distance_braycurtis_modular (fm_train_real=traindat,fm_test_real=testdat): from shogun.Features import RealFeatures from shogun.Distance import BrayCurtisDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=BrayCurtisDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test if __name__=='__main__': print 'BrayCurtisDistance' distance_braycurtis_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat],[traindat,testdat]] def distance_canberra_modular (fm_train_real=traindat,fm_test_real=testdat): from shogun.Features import RealFeatures from shogun.Distance import CanberraMetric feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=CanberraMetric(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test if __name__=='__main__': print 'CanberaMetric' distance_canberra_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindna = lm.load_dna('../data/fm_train_dna.dat') testdna = lm.load_dna('../data/fm_test_dna.dat') parameter_list = [[traindna,testdna,3,0,False],[traindna,testdna,3,0,False]] def distance_canberraword_modular (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False): from shogun.Features import StringCharFeatures, StringWordFeatures, DNA from shogun.PreProc import SortWordString from shogun.Distance import CanberraWordDistance charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortWordString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preproc(preproc) feats_test.apply_preproc() distance=CanberraWordDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test if __name__=='__main__': print 'CanberraWordDistance' distance_canberraword_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat],[traindat,testdat]] def distance_chebyshew_modular (fm_train_real=traindat,fm_test_real=testdat): from shogun.Features import RealFeatures from shogun.Distance import ChebyshewMetric feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=ChebyshewMetric(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test if __name__=='__main__': print 'ChebyshewMetric' distance_chebyshew_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat,],[traindat,testdat]] def distance_chisquare_modular (fm_train_real=traindat,fm_test_real=testdat): from shogun.Features import RealFeatures from shogun.Distance import ChiSquareDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=ChiSquareDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test if __name__=='__main__': print 'ChiSquareDistance' distance_chisquare_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat],[traindat,testdat]] def distance_cosine_modular (fm_train_real=traindat,fm_test_real=testdat): from shogun.Features import RealFeatures from shogun.Distance import CosineDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=CosineDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test if __name__=='__main__': print 'CosineDistance' distance_cosine_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat],[traindat,testdat]] def distance_euclidian_modular (fm_train_real=traindat,fm_test_real=testdat): from shogun.Features import RealFeatures from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test if __name__=='__main__': print 'EuclidianDistance' distance_euclidian_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat],[traindat,testdat]] def distance_geodesic_modular (fm_train_real=traindat,fm_test_real=testdat): from shogun.Features import RealFeatures from shogun.Distance import GeodesicMetric feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=GeodesicMetric(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test if __name__=='__main__': print 'GeodesicMetric' distance_geodesic_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindna = lm.load_dna('../data/fm_train_dna.dat') testdna = lm.load_dna('../data/fm_test_dna.dat') testdat = lm.load_labels('../data/fm_test_real.dat') parameter_list = [[traindna,testdna,testdat,4,0,False,False], [traindna,testdna,testdat,3,0,False,False]] def distance_hammingword_modular (fm_train_dna=traindna,fm_test_dna=testdna, fm_test_real=testdat,order=3,gap=0,reverse=False,use_sign=False): from shogun.Features import StringCharFeatures, StringWordFeatures, DNA from shogun.PreProc import SortWordString from shogun.Distance import HammingWordDistance charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortWordString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preproc(preproc) feats_test.apply_preproc() distance=HammingWordDistance(feats_train, feats_train, use_sign) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test if __name__=='__main__': print 'HammingWordDistance' distance_hammingword_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat],[traindat,testdat]] def distance_jensen_modular (fm_train_real=traindat,fm_test_real=testdat): from shogun.Features import RealFeatures from shogun.Distance import JensenMetric feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=JensenMetric(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test if __name__=='__main__': print 'JensenMetric' distance_jensen_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat],[traindat,testdat]] def distance_manhatten_modular (fm_train_real=traindat,fm_test_real=testdat): from shogun.Features import RealFeatures from shogun.Distance import ManhattanMetric feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=ManhattanMetric(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test if __name__=='__main__': print 'ManhattanMetric' distance_manhatten_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindna = lm.load_dna('../data/fm_train_dna.dat') testdna = lm.load_dna('../data/fm_test_dna.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindna,testdna,testdat,3,0,False],[traindna,testdna,testdat,4,0,False]] def distance_manhattenword_modular (fm_train_dna=traindna ,fm_test_dna=testdna,fm_test_real=testdat,order=3,gap=0,reverse=False): from shogun.Features import StringCharFeatures, StringWordFeatures, DNA from shogun.PreProc import SortWordString from shogun.Distance import ManhattanWordDistance charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortWordString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preproc(preproc) feats_test.apply_preproc() distance=ManhattanWordDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return dm_train,dm_test if __name__=='__main__': print 'ManhattanWordDistance' distance_manhattenword_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat,3],[traindat,testdat,4]] def distance_minkowski_modular (fm_train_real=traindat,fm_test_real=testdat,k=3): from shogun.Features import RealFeatures from shogun.Distance import MinkowskiMetric feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=MinkowskiMetric(feats_train, feats_train, k) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test if __name__=='__main__': print 'MinkowskiMetric' distance_minkowski_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat],[traindat,testdat]] def distance_normsquared_modular (fm_train_real=traindat,fm_test_real=testdat): from shogun.Features import RealFeatures from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance(feats_train, feats_train) distance.set_disable_sqrt(True) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test if __name__=='__main__': print 'EuclidianDistance - NormSquared' distance_normsquared_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat],[traindat,testdat]] def distance_sparseeuclidean_modular (fm_train_real=traindat,fm_test_real=testdat): from shogun.Features import RealFeatures, SparseRealFeatures from shogun.Distance import SparseEuclidianDistance realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) distance=SparseEuclidianDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test if __name__=='__main__': print 'SparseEuclidianDistance' distance_sparseeuclidean_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat],[traindat,testdat]] def distance_tanimoto_modular (fm_train_real=traindat,fm_test_real=testdat): from shogun.Features import RealFeatures from shogun.Distance import TanimotoDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=TanimotoDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() return distance,dm_train,dm_test if __name__=='__main__': print 'TanimotoDistance' distance_tanimoto_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindna = lm.load_dna('../data/fm_train_dna.dat') parameter_list = [[traindna,3,0,False],[traindna,4,0,False]] def distribution_histogram_modular (fm_dna=traindna,order=3,gap=0,reverse=False): from shogun.Features import StringWordFeatures, StringCharFeatures, DNA from shogun.Distribution import Histogram charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_dna) feats=StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order-1, order, gap, reverse) histo=Histogram(feats) histo.train() histo.get_histogram() num_examples=feats.get_num_vectors() num_param=histo.get_num_model_parameters() #for i in xrange(num_examples): # for j in xrange(num_param): # histo.get_log_derivative(j, i) out_likelihood = histo.get_log_likelihood() out_sample = histo.get_log_likelihood_sample() return histo,out_sample,out_likelihood ########################################################################### # call functions ########################################################################### if __name__=='__main__': print 'Histogram' distribution_histogram_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() data=lm.load_cubes('../data/fm_train_cube.dat') parameter_list=[[data, 1, 64, 1e-5, 2, 0, False, 5], [data, 3, 6, 1e-1, 1, 0, False, 2]] def distribution_hmm_modular(fm_cube, N, M, pseudo, order, gap, reverse, num_examples): from shogun.Features import StringWordFeatures, StringCharFeatures, CUBE from shogun.Distribution import HMM, BW_NORMAL charfeat=StringCharFeatures(CUBE) charfeat.set_features(fm_cube) feats=StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order-1, order, gap, reverse) hmm=HMM(feats, N, M, pseudo) hmm.train() hmm.baum_welch_viterbi_train(BW_NORMAL) num_examples=feats.get_num_vectors() num_param=hmm.get_num_model_parameters() for i in xrange(num_examples): for j in xrange(num_param): hmm.get_log_derivative(j, i) best_path=0 best_path_state=0 for i in xrange(num_examples): best_path+=hmm.best_path(i) for j in xrange(N): best_path_state+=hmm.get_best_path_state(i, j) lik_example = hmm.get_log_likelihood() lik_sample = hmm.get_log_likelihood_sample() return lik_example, lik_sample, hmm ########################################################################### # call functions ########################################################################### if __name__=='__main__': print 'HMM' distribution_hmm_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindna = lm.load_dna('../data/fm_train_dna.dat') parameter_list = [[traindna,3,0,False],[traindna,4,0,False]] def distribution_linearhmm_modular (fm_dna=traindna,order=3,gap=0,reverse=False): from shogun.Features import StringWordFeatures, StringCharFeatures, DNA from shogun.Distribution import LinearHMM charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_dna) feats=StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order-1, order, gap, reverse) hmm=LinearHMM(feats) hmm.train() hmm.get_transition_probs() num_examples=feats.get_num_vectors() num_param=hmm.get_num_model_parameters() for i in xrange(num_examples): for j in xrange(num_param): hmm.get_log_derivative(j, i) out_likelihood = hmm.get_log_likelihood() out_sample = hmm.get_log_likelihood_sample() return hmm,out_likelihood ,out_sample ########################################################################### # call functions ########################################################################### if __name__=='__main__': distribution_linearhmm_modular(*parameter_list[0]) print 'LinearHMM'
from tools.load import LoadMatrix lm=LoadMatrix() data=lm.load_numbers('../data/fm_train_real.dat') label=lm.load_numbers('../data/label_train_twoclass.dat') parameter_list=[[data,label]] def features_io_modular(fm_train_real, label_train_twoclass): import numpy from shogun.Features import SparseRealFeatures, RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Library import AsciiFile, BinaryFile, HDF5File feats=SparseRealFeatures(fm_train_real) feats2=SparseRealFeatures() f=BinaryFile("fm_train_sparsereal.bin","w") feats.save(f) f=AsciiFile("fm_train_sparsereal.ascii","w") feats.save(f) f=BinaryFile("fm_train_sparsereal.bin") feats2.load(f) f=AsciiFile("fm_train_sparsereal.ascii") feats2.load(f) feats=RealFeatures(fm_train_real) feats2=RealFeatures() f=BinaryFile("fm_train_real.bin","w") feats.save(f) f=HDF5File("fm_train_real.h5","w", "/data/doubles") feats.save(f) f=AsciiFile("fm_train_real.ascii","w") feats.save(f) f=BinaryFile("fm_train_real.bin") feats2.load(f) #print "diff binary", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())) f=AsciiFile("fm_train_real.ascii") feats2.load(f) #print "diff ascii", numpy.max(numpy.abs(feats2.get_feature_matrix().flatten()-fm_train_real.flatten())) lab=Labels(numpy.array([1.0,2.0,3.0])) lab2=Labels() f=AsciiFile("label_train_twoclass.ascii","w") lab.save(f) f=BinaryFile("label_train_twoclass.bin","w") lab.save(f) f=HDF5File("label_train_real.h5","w", "/data/labels") lab.save(f) f=AsciiFile("label_train_twoclass.ascii") lab2.load(f) f=BinaryFile("label_train_twoclass.bin") lab2.load(f) f=HDF5File("fm_train_real.h5","r", "/data/doubles") feats2.load(f) #print feats2.get_feature_matrix() f=HDF5File("label_train_real.h5","r", "/data/labels") lab2.load(f) #print lab2.get_labels() #clean up import os for f in ['fm_train_sparsereal.bin','fm_train_sparsereal.ascii', 'fm_train_real.bin','fm_train_real.h5','fm_train_real.ascii', 'label_train_real.h5', 'label_train_twoclass.ascii','label_train_twoclass.bin']: os.unlink(f) return feats, feats2, lab, lab2 if __name__=='__main__': print 'Features IO' features_io_modular(*parameter_list[0])
parameter_list=[['../data/train_sparsereal.light']] def features_read_svmlight_format_modular(fname): import os from shogun.Features import SparseRealFeatures f=SparseRealFeatures() lab=f.load_svmlight_file(fname) f.write_svmlight_file('testwrite.light', lab) os.unlink('testwrite.light') if __name__=='__main__': print 'Reading SVMLIGHT format' features_read_svmlight_format_modular(*parameter_list[0])
import numpy # create dense matrix A A=numpy.array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=numpy.uint8) parameter_list=[[A]] def features_simple_byte_modular(A): from shogun.Features import ByteFeatures # create dense features a # ... of type Byte a=ByteFeatures(A) # print some statistics about a #print a.get_num_vectors() #print a.get_num_features() # get first feature vector and set it #print a.get_feature_vector(0) a.set_feature_vector(numpy.array([1,4,0,0,0,9], dtype=numpy.uint8), 0) # get matrix a_out = a.get_feature_matrix() #print type(a_out), a_out.dtype #print a_out assert(numpy.all(a_out==A)) return a_out,a if __name__=='__main__': print 'ByteFeatures' features_simple_byte_modular(*parameter_list[0])
from shogun.Features import LongIntFeatures from numpy import array, int64, all # create dense matrix A matrix=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=int64) parameter_list = [[matrix]] # ... of type LongInt def features_simple_longint_modular(A=matrix): a=LongIntFeatures(A) # get first feature vector and set it a.set_feature_vector(array([1,4,0,0,0,9], dtype=int64), 0) # get matrix a_out = a.get_feature_matrix() assert(all(a_out==A)) return a_out if __name__=='__main__': print 'simple_longint' features_simple_longint_modular(*parameter_list[0])
from shogun.Features import RealFeatures, LongIntFeatures, ByteFeatures from numpy import array, float64, int64, uint8, all # create dense matrices A,B,C matrixA=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=float64) matrixB=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=int64) matrixC=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=uint8) # ... of type Real, LongInt and Byte parameter_list = [[matrixA,matrixB,matrixC]] def features_simple_modular(A=matrixA,B=matrixB,C=matrixC): a=RealFeatures(A) b=LongIntFeatures(B) c=ByteFeatures(C) # or 16bit wide ... #feat1 = f.ShortFeatures(N.zeros((10,5),N.short)) #feat2 = f.WordFeatures(N.zeros((10,5),N.uint16)) # print some statistics about a # get first feature vector and set it a.set_feature_vector(array([1,4,0,0,0,9], dtype=float64), 0) # get matrices a_out = a.get_feature_matrix() b_out = b.get_feature_matrix() c_out = c.get_feature_matrix() assert(all(a_out==A)) assert(all(b_out==B)) assert(all(c_out==C)) return a_out,b_out,c_out,a,b,c if __name__=='__main__': print 'simple' features_simple_modular(*parameter_list[0])
from shogun.Features import RealFeatures from numpy import array, float64, all # create dense matrices A,B,C matrix=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=float64) parameter_list = [[matrix]] # ... of type LongInt def features_simple_real_modular(A=matrix): # ... of type Real, LongInt and Byte a=RealFeatures(A) # print some statistics about a #print a.get_num_vectors() #print a.get_num_features() # get first feature vector and set it #print a.get_feature_vector(0) a.set_feature_vector(array([1,4,0,0,0,9], dtype=float64), 0) # get matrix a_out = a.get_feature_matrix() assert(all(a_out==A)) return a_out if __name__=='__main__': print 'simple_real' features_simple_real_modular(*parameter_list[0])
parameter_list=[['../data/snps.dat']] def features_snp_modular(fname): from shogun.Features import StringByteFeatures, SNPFeatures, SNP sf=StringByteFeatures(SNP) sf.load_ascii_file(fname, False, SNP, SNP) #print sf.get_features() snps=SNPFeatures(sf) #print snps.get_feature_matrix() #print snps.get_minor_base_string() #print snps.get_major_base_string() if __name__=='__main__': print 'SNP Features' features_snp_modular(*parameter_list[0])
import numpy # create dense matrix A A=numpy.array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=numpy.float64) parameter_list=[[A]] def features_sparse_modular(A): from scipy.sparse import csc_matrix from shogun.Features import SparseRealFeatures from numpy import array, float64, all # sparse representation X of dense matrix A # note, will work with types other than float64 too, # but requires recent scipy.sparse X=csc_matrix(A) #print A # create sparse shogun features from dense matrix A a=SparseRealFeatures(A) a_out=a.get_full_feature_matrix() #print a_out assert(all(a_out==A)) #print a_out # create sparse shogun features from sparse matrix X a.set_sparse_feature_matrix(X) a_out=a.get_full_feature_matrix() #print a_out assert(all(a_out==A)) # create sparse shogun features from sparse matrix X a=SparseRealFeatures(X) a_out=a.get_full_feature_matrix() #print a_out assert(all(a_out==A)) # obtain (data,row,indptr) csc arrays of sparse shogun features z=csc_matrix(a.get_sparse_feature_matrix()) z_out=z.todense() #print z_out assert(all(z_out==A)) if __name__=='__main__': print 'Sparse Features' features_sparse_modular(*parameter_list[0])
parameter_list = [['features_string_char_compressed_modular.py']] def features_string_char_compressed_modular(fname): from shogun.Features import StringCharFeatures, StringFileCharFeatures, RAWBYTE from shogun.Library import UNCOMPRESSED,LZO,GZIP,BZIP2,LZMA, MSG_DEBUG from shogun.PreProc import DecompressCharString f=StringFileCharFeatures(fname, RAWBYTE) #print "original strings", f.get_features() #uncompressed f.save_compressed("foo_uncompressed.str", UNCOMPRESSED, 1) f2=StringCharFeatures(RAWBYTE); f2.load_compressed("foo_uncompressed.str", True) #print "uncompressed strings", f2.get_features() #print # load compressed data and uncompress on load #lzo f.save_compressed("foo_lzo.str", LZO, 9) f2=StringCharFeatures(RAWBYTE); f2.load_compressed("foo_lzo.str", True) #print "lzo strings", f2.get_features() #print ##gzip f.save_compressed("foo_gzip.str", GZIP, 9) f2=StringCharFeatures(RAWBYTE); f2.load_compressed("foo_gzip.str", True) #print "gzip strings", f2.get_features() #print #bzip2 f.save_compressed("foo_bzip2.str", BZIP2, 9) f2=StringCharFeatures(RAWBYTE); f2.load_compressed("foo_bzip2.str", True) #print "bzip2 strings", f2.get_features() #print #lzma f.save_compressed("foo_lzma.str", LZMA, 9) f2=StringCharFeatures(RAWBYTE); f2.load_compressed("foo_lzma.str", True) #print "lzma strings", f2.get_features() #print # load compressed data and uncompress via preprocessor f2=StringCharFeatures(RAWBYTE); f2.load_compressed("foo_lzo.str", False) f2.add_preproc(DecompressCharString(LZO)) f2.apply_preproc() #print "lzo strings", f2.get_features() #print # load compressed data and uncompress on-the-fly via preprocessor f2=StringCharFeatures(RAWBYTE); f2.load_compressed("foo_lzo.str", False) #f2.io.set_loglevel(MSG_DEBUG) f2.add_preproc(DecompressCharString(LZO)) f2.enable_on_the_fly_preprocessing() #print "lzo strings", f2.get_features() #print #clean up import os for f in ['foo_uncompressed.str', 'foo_lzo.str', 'foo_gzip.str', 'foo_bzip2.str', 'foo_lzma.str', 'foo_lzo.str', 'foo_lzo.str']: if os.path.exists(f): os.unlink(f) ########################################################################################## # some perfectly compressible stuff follows ########################################################################################## ########################################################################################## ########################################################################################## ########################################################################################## ########################################################################################## ########################################################################################## ########################################################################################## ########################################################################################## ########################################################################################## ########################################################################################## if __name__=='__main__': print 'Compressing StringCharFileFeatures' features_string_char_compressed_modular(*parameter_list[0])
strings=['hey','guys','i','am','a','string'] parameter_list=[[strings]] def features_string_char_modular(strings): from shogun.Features import StringCharFeatures, RAWBYTE from numpy import array #create string features f=StringCharFeatures(strings, RAWBYTE) #and output several stats #print "max string length", f.get_max_vector_length() #print "number of strings", f.get_num_vectors() #print "length of first string", f.get_vector_length(0) #print "string[5]", ''.join(f.get_feature_vector(5)) #print "strings", f.get_features() #replace string 0 f.set_feature_vector(array(['t','e','s','t']), 0) #print "strings", f.get_features() return f.get_features(), f if __name__=='__main__': print 'StringCharFeatures' features_string_char_modular(*parameter_list[0])
parameter_list = [['features_string_file_char_modular.py']] def features_string_file_char_modular(fname): from shogun.Features import StringFileCharFeatures, RAWBYTE f = StringFileCharFeatures(fname, RAWBYTE) #print "strings", f.get_features() return f if __name__=='__main__': print 'Compressing StringCharFileFeatures' features_string_file_char_modular(*parameter_list[0])
parameter_list=[[".", "features_string_char_modular.py"]] def features_string_file_modular(directory, fname): from shogun.Features import StringCharFeatures, RAWBYTE from shogun.Library import AsciiFile # load features from directory f=StringCharFeatures(RAWBYTE) f.load_from_directory(directory) #and output several stats #print "max string length", f.get_max_vector_length() #print "number of strings", f.get_num_vectors() #print "length of first string", f.get_vector_length(0) #print "str[0,0:3]", f.get_feature(0,0), f.get_feature(0,1), f.get_feature(0,2) #print "len(str[0])", f.get_vector_length(0) #print "str[0]", f.get_feature_vector(0) #or load features from file (one string per line) fil=AsciiFile(fname) f.load(fil) #print f.get_features() #or load fasta file #f.load_fasta('fasta.fa') #print f.get_features() return f.get_features(), f if __name__=='__main__': print 'StringWordFeatures' features_string_file_modular(*parameter_list[0])
from shogun.Features import LongIntFeatures from numpy import array, int64, all # create dense matrix A matrix=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=int64) parameter_list = [[matrix,3,1,2],[matrix,3,1,2]] # ... of type LongInt def features_string_hashed_wd_modular(A=matrix,order=3,start_order=1,hash_bits=2): a=LongIntFeatures(A) from numpy import * from shogun.Features import * from shogun.Library import MSG_DEBUG x=[array([0,1,2,3,0,1,2,3,3,2,2,1,1],dtype=uint8)] from_order=order f=StringByteFeatures(RAWDNA) #f.io.set_loglevel(MSG_DEBUG) f.set_features(x) y=HashedWDFeatures(f,start_order,order,from_order,hash_bits) fm=y.get_feature_matrix() return fm if __name__=='__main__': print 'string_hashed_wd' features_string_hashed_wd_modular(*parameter_list[0])
# create string features with a single string s=10*'A' + 10*'C' + 10*'G' + 10*'T' parameter_list=[[s]] def features_string_sliding_window_modular(strings): from shogun.Features import StringCharFeatures, DNA from shogun.Library import DynamicIntArray f=StringCharFeatures([strings], DNA) # slide a window of length 5 over features # (memory efficient, does not copy strings) f.obtain_by_sliding_window(5,1) #print f.get_num_vectors() #print f.get_vector_length(0) #print f.get_vector_length(1) #print f.get_features() # slide a window of length 4 over features # (memory efficient, does not copy strings) f.obtain_by_sliding_window(4,1) #print f.get_num_vectors() #print f.get_vector_length(0) #print f.get_vector_length(1) #print f.get_features() # extract string-windows at position 0,6,16,25 of window size 4 # (memory efficient, does not copy strings) f.set_features([s]) positions=DynamicIntArray() positions.append_element(0) positions.append_element(6) positions.append_element(16) positions.append_element(25) f.obtain_by_position_list(4,positions) #print f.get_features() # now extract windows of size 8 from same positon list f.obtain_by_position_list(8,positions) #print f.get_features() return f if __name__=='__main__': print 'Sliding Window' features_string_sliding_window_modular(*parameter_list[0])
parameter_list = [[0,2,0,False],[0,3,0,False]] def features_string_ulong_modular(start=0,order=2,gap=0,rev=False): from shogun.Features import StringCharFeatures, StringUlongFeatures, RAWBYTE from numpy import array, uint64 #create string features cf=StringCharFeatures(['hey','guys','string'], RAWBYTE) uf=StringUlongFeatures(RAWBYTE) uf.obtain_from_char(cf, start,order,gap,rev) #replace string 0 uf.set_feature_vector(array([1,2,3,4,5], dtype=uint64), 0) return uf.get_features(),uf.get_feature_vector(2), uf.get_num_vectors() if __name__=='__main__': print 'simple_longint' features_string_ulong_modular(*parameter_list[0])
strings=['hey','guys','string'] parameter_list=[[strings,0,2,0,False]] def features_string_word_modular(strings, start, order, gap, rev): from shogun.Features import StringCharFeatures, StringWordFeatures, RAWBYTE from numpy import array, uint16 #create string features cf=StringCharFeatures(strings, RAWBYTE) wf=StringWordFeatures(RAWBYTE) wf.obtain_from_char(cf, start, order, gap, rev) #and output several stats #print "max string length", wf.get_max_vector_length() #print "number of strings", wf.get_num_vectors() #print "length of first string", wf.get_vector_length(0) #print "string[2]", wf.get_feature_vector(2) #print "strings", wf.get_features() #replace string 0 wf.set_feature_vector(array([1,2,3,4,5], dtype=uint16), 0) #print "strings", wf.get_features() return wf.get_features(), wf if __name__=='__main__': print 'StringWordFeatures' features_string_word_modular(*parameter_list[0])
########################################################################### # kernel can be used to maximize AUC instead of margin in SVMs ########################################################################### from tools.load import LoadMatrix from numpy import double lm=LoadMatrix() traindat = double(lm.load_numbers('../data/fm_train_real.dat')) testdat = lm.load_labels('../data/label_train_twoclass.dat') parameter_list = [[traindat,testdat,1.7], [traindat,testdat,1.6]] def kernel_auc_modular(fm_train_real=traindat,label_train_real=testdat,width=1.7): from shogun.Kernel import GaussianKernel, AUCKernel from shogun.Features import RealFeatures, Labels feats_train=RealFeatures(fm_train_real) subkernel=GaussianKernel(feats_train, feats_train, width) kernel=AUCKernel(0, subkernel) kernel.setup_auc_maximization( Labels(label_train_real) ) km_train=kernel.get_kernel_matrix() return kernel if __name__=='__main__': print 'AUC' kernel_auc_modular(*parameter_list[0])
########################################################################### # chi2 kernel ########################################################################### from tools.load import LoadMatrix from numpy import double lm=LoadMatrix() traindat = double(lm.load_numbers('../data/fm_train_real.dat')) testdat = double(lm.load_numbers('../data/fm_test_real.dat')) parameter_list = [[traindat,testdat,1.4,10], [traindat,testdat,1.5,10]] def kernel_chi2_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4, size_cache=10): from shogun.Kernel import Chi2Kernel from shogun.Features import RealFeatures feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) kernel=Chi2Kernel(feats_train, feats_train, width, size_cache) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'Chi2' kernel_chi2_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_twoclass.dat') parameter_list= [[traindat,testdat,label_traindat],[traindat,testdat,label_traindat]] def kernel_combined_custom_poly_modular(fm_train_real = traindat,fm_test_real = testdat,fm_label_twoclass=label_traindat): from shogun.Features import CombinedFeatures, RealFeatures, Labels from shogun.Kernel import CombinedKernel, PolyKernel, CustomKernel from shogun.Classifier import LibSVM kernel = CombinedKernel() feats_train = CombinedFeatures() tfeats = RealFeatures(fm_train_real) tkernel = PolyKernel(10,3) tkernel.init(tfeats, tfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_train = RealFeatures(fm_train_real) feats_train.append_feature_obj(subkfeats_train) subkernel = PolyKernel(10,2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels = Labels(fm_label_twoclass) svm = LibSVM(1.0, kernel, labels) svm.train() kernel = CombinedKernel() feats_pred = CombinedFeatures() pfeats = RealFeatures(fm_test_real) tkernel = PolyKernel(10,3) tkernel.init(tfeats, pfeats) K = tkernel.get_kernel_matrix() kernel.append_kernel(CustomKernel(K)) subkfeats_test = RealFeatures(fm_test_real) feats_pred.append_feature_obj(subkfeats_test) subkernel = PolyKernel(10, 2) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_pred) svm.set_kernel(kernel) svm.classify() km_train=kernel.get_kernel_matrix() return km_train,kernel if __name__=='__main__': kernel_combined_custom_poly_modular(*parameter_list[0])
from tools.load import LoadMatrix from numpy import double lm=LoadMatrix() traindat = double(lm.load_numbers('../data/fm_train_real.dat')) testdat = double(lm.load_numbers('../data/fm_test_real.dat')) traindna = lm.load_dna('../data/fm_train_dna.dat') testdna = lm.load_dna('../data/fm_test_dna.dat') parameter_list = [[traindat,testdat,traindna,testdna],[traindat,testdat,traindna,testdna]] def kernel_combined_modular(fm_train_real=traindat,fm_test_real=testdat,fm_train_dna=traindna,fm_test_dna=testdna ): from shogun.Kernel import CombinedKernel, GaussianKernel, FixedDegreeStringKernel, LocalAlignmentStringKernel from shogun.Features import RealFeatures, StringCharFeatures, CombinedFeatures, DNA kernel=CombinedKernel() feats_train=CombinedFeatures() feats_test=CombinedFeatures() subkfeats_train=RealFeatures(fm_train_real) subkfeats_test=RealFeatures(fm_test_real) subkernel=GaussianKernel(10, 1.1) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train=StringCharFeatures(fm_train_dna, DNA) subkfeats_test=StringCharFeatures(fm_test_dna, DNA) degree=3 subkernel=FixedDegreeStringKernel(10, degree) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train=StringCharFeatures(fm_train_dna, DNA) subkfeats_test=StringCharFeatures(fm_test_dna, DNA) subkernel=LocalAlignmentStringKernel(10) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'Combined' kernel_combined_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat =lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') parameter_list = [[traindat,testdat,3,0,False ],[traindat,testdat,4,0,False]] def kernel_comm_ulong_string_modular (fm_train_dna=traindat,fm_test_dna=testdat, order=3, gap=0, reverse = False): from shogun.Kernel import CommUlongStringKernel from shogun.Features import StringUlongFeatures, StringCharFeatures, DNA from shogun.PreProc import SortUlongString charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringUlongFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortUlongString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringUlongFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preproc(preproc) feats_test.apply_preproc() use_sign=False kernel=CommUlongStringKernel(feats_train, feats_train, use_sign) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'CommUlongString' kernel_comm_ulong_string_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') parameter_list = [[traindat,testdat,4,0,False, False],[traindat,testdat,4,0,False,False]] def kernel_comm_word_string_modular (fm_train_dna=traindat, fm_test_dna=testdat, order=3, gap=0, reverse = False, use_sign = False): from shogun.Kernel import CommWordStringKernel from shogun.Features import StringWordFeatures, StringCharFeatures, DNA from shogun.PreProc import SortWordString charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortWordString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preproc(preproc) feats_test.apply_preproc() kernel=CommWordStringKernel(feats_train, feats_train, use_sign) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'CommWordString' kernel_comm_word_string_modular(*parameter_list[0])
parameter_list =[[23],[24]] def kernel_const_modular (c=23): from shogun.Features import DummyFeatures from shogun.Kernel import ConstKernel feats_train=DummyFeatures(10) feats_test=DummyFeatures(17) kernel=ConstKernel(feats_train, feats_train, c) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'Const' kernel_const_modular(*parameter_list[0])
from numpy.random import seed seed(42) parameter_list=[[7],[8]] def kernel_custom_modular (dim=7): from numpy.random import rand, seed from numpy import array, float32 from shogun.Features import RealFeatures from shogun.Kernel import CustomKernel seed(17) data=rand(dim, dim) feats=RealFeatures(data) symdata=data+data.T lowertriangle=array([symdata[(x,y)] for x in xrange(symdata.shape[1]) for y in xrange(symdata.shape[0]) if y<=x]) kernel=CustomKernel() # once with float64's kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle=kernel.get_kernel_matrix() kernel.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle=kernel.get_kernel_matrix() kernel.set_full_kernel_matrix_from_full(data) km_fullfull=kernel.get_kernel_matrix() # now once with float32's data=array(data,dtype=float32) kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle=kernel.get_kernel_matrix() kernel.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle=kernel.get_kernel_matrix() kernel.set_full_kernel_matrix_from_full(data) km_fullfull=kernel.get_kernel_matrix() return km_fullfull,kernel if __name__=='__main__': print 'Custom' kernel_custom_modular(*parameter_list[0])
parameter_list =[[23],[24]] def kernel_diag_modular (diag=23): from shogun.Features import DummyFeatures from shogun.Kernel import DiagKernel feats_train=DummyFeatures(10) feats_test=DummyFeatures(17) kernel=DiagKernel(feats_train, feats_train, diag) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'Diag' kernel_diag_modular(*parameter_list[0])
from tools.load import LoadMatrix from numpy import double lm=LoadMatrix() traindat = double(lm.load_numbers('../data/fm_test_real.dat')) testdat = double(lm.load_numbers('../data/fm_train_real.dat')) parameter_list=[[traindat,testdat,1.7],[traindat,testdat,1.8]] def kernel_distance_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.7): from shogun.Kernel import DistanceKernel from shogun.Features import RealFeatures from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance() kernel=DistanceKernel(feats_train, feats_test, width, distance) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'Distance' kernel_distance_modular(*parameter_list[0])
from tools.load import LoadMatrix from numpy import where lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') label_traindat = lm.load_labels('../data/label_train_dna.dat') parameter_list = [[traindat,testdat,label_traindat,1,4,1e-1,1,0,False,[1,False,True]],[traindat,testdat,label_traindat,3,4,1e-1,1,0,False,[1,False,True]]] fm_hmm_pos=[ traindat[i] for i in where([label_traindat==1])[1] ] fm_hmm_neg=[ traindat[i] for i in where([label_traindat==-1])[1] ] def kernel_fisher_modular(fm_train_dna=traindat, fm_test_dna=testdat, label_train_dna=label_traindat, N=1,M=4,pseudo=1e-1,order=1,gap=0,reverse=False, kargs=[1,False,True]): from shogun.Features import StringCharFeatures, StringWordFeatures, FKFeatures, DNA from shogun.Kernel import PolyKernel from shogun.Distribution import HMM, BW_NORMAL#, MSG_DEBUG # train HMM for positive class charfeat=StringCharFeatures(fm_hmm_pos, DNA) #charfeat.io.set_loglevel(MSG_DEBUG) hmm_pos_train=StringWordFeatures(charfeat.get_alphabet()) hmm_pos_train.obtain_from_char(charfeat, order-1, order, gap, reverse) pos=HMM(hmm_pos_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) # train HMM for negative class charfeat=StringCharFeatures(fm_hmm_neg, DNA) hmm_neg_train=StringWordFeatures(charfeat.get_alphabet()) hmm_neg_train.obtain_from_char(charfeat, order-1, order, gap, reverse) neg=HMM(hmm_neg_train, N, M, pseudo) neg.baum_welch_viterbi_train(BW_NORMAL) # Kernel training data charfeat=StringCharFeatures(fm_train_dna, DNA) wordfeats_train=StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) # Kernel testing data charfeat=StringCharFeatures(fm_test_dna, DNA) wordfeats_test=StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) # get kernel on training data pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train=FKFeatures(10, pos, neg) feats_train.set_opt_a(-1) #estimate prior kernel=PolyKernel(feats_train, feats_train, *kargs) km_train=kernel.get_kernel_matrix() # get kernel on testing data pos_clone=HMM(pos) neg_clone=HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test=FKFeatures(10, pos_clone, neg_clone) feats_test.set_a(feats_train.get_a()) #use prior from training data kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print "Fisher Kernel" kernel_fisher_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') parameter_list=[[traindat, testdat,3],[traindat,testdat,4]] def kernel_fixed_degree_string_modular (fm_train_dna=traindat, fm_test_dna=testdat,degree=3): from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import FixedDegreeStringKernel feats_train=StringCharFeatures(fm_train_dna, DNA) feats_test=StringCharFeatures(fm_test_dna, DNA) kernel=FixedDegreeStringKernel(feats_train, feats_train, degree) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'FixedDegreeString' kernel_fixed_degree_string_modular(*parameter_list[0])
from tools.load import LoadMatrix from numpy import where lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list=[[traindat,testdat, 1.3],[traindat,testdat, 1.4]] def kernel_gaussian_modular (fm_train_real=traindat,fm_test_real=testdat, width=1.3): from shogun.Features import RealFeatures from shogun.Kernel import GaussianKernel feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) kernel=GaussianKernel(feats_train, feats_train, width) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'Gaussian' kernel_gaussian_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list=[[traindat,testdat,1.8,2,1],[traindat,testdat,1.9,2,1]] def kernel_gaussian_shift_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.8,max_shift=2,shift_step=1): from shogun.Features import RealFeatures from shogun.Kernel import GaussianShiftKernel feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) kernel=GaussianShiftKernel(feats_train, feats_train, width, max_shift, shift_step) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'GaussianShift' kernel_gaussian_shift_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') label_traindat = lm.load_labels('../data/label_train_dna.dat') parameter_list=[[traindat,testdat,label_traindat,3,0,False],[traindat,testdat,label_traindat,3,0,False]] def kernel_histogram_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,order=3,gap=0,reverse=False): from shogun.Features import StringCharFeatures, StringWordFeatures, DNA, Labels from shogun.Kernel import HistogramWordStringKernel from shogun.Classifier import PluginEstimate#, MSG_DEBUG reverse = reverse charfeat=StringCharFeatures(DNA) #charfeat.io.set_loglevel(MSG_DEBUG) charfeat.set_features(fm_train_dna) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) pie=PluginEstimate() labels=Labels(label_train_dna) pie.set_labels(labels) pie.set_features(feats_train) pie.train() kernel=HistogramWordStringKernel(feats_train, feats_train, pie) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) pie.set_features(feats_test) pie.classify().get_labels() km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'PluginEstimate w/ HistogramWord' kernel_histogram_word_string_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list=[[traindat,testdat,1.9],[traindat,testdat,1.7]] def kernel_io_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.9): from shogun.Features import RealFeatures from shogun.Kernel import GaussianKernel from shogun.Library import AsciiFile, BinaryFile feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) kernel=GaussianKernel(feats_train, feats_train, width) km_train=kernel.get_kernel_matrix() f=AsciiFile("gaussian_train.ascii","w") kernel.save(f) del f kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() f=AsciiFile("gaussian_test.ascii","w") kernel.save(f) del f #clean up import os os.unlink("gaussian_test.ascii") os.unlink("gaussian_train.ascii") return km_train, km_test, kernel if __name__=='__main__': print 'Gaussian' kernel_io_modular(*parameter_list[0])
########################################################################### # linear kernel on byte features ########################################################################### from tools.load import LoadMatrix from numpy import ubyte lm=LoadMatrix() traindat = ubyte(lm.load_numbers('../data/fm_train_byte.dat')) testdat = ubyte(lm.load_numbers('../data/fm_test_byte.dat')) parameter_list=[[traindat,testdat],[traindat,testdat]] def kernel_linear_byte_modular(fm_train_byte=traindat,fm_test_byte=testdat): from shogun.Kernel import LinearKernel from shogun.Features import ByteFeatures feats_train=ByteFeatures(fm_train_byte) feats_test=ByteFeatures(fm_test_byte) kernel=LinearKernel(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return kernel if __name__=='__main__': print 'LinearByte' kernel_linear_byte_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list=[[traindat,testdat,1.2],[traindat,testdat,1.4]] def kernel_linear_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.2): from shogun.Features import RealFeatures from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) kernel=LinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'Linear' kernel_linear_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') parameter_list=[[traindat,testdat],[traindat,testdat]] def kernel_linear_string_modular (fm_train_dna=traindat,fm_test_dna=testdat): from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import LinearStringKernel feats_train=StringCharFeatures(fm_train_dna, DNA) feats_test=StringCharFeatures(fm_test_dna, DNA) kernel=LinearStringKernel(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': from tools.load import LoadMatrix print 'LinearString' kernel_linear_string_modular(*parameter_list[0])
from tools.load import LoadMatrix from numpy import ushort lm=LoadMatrix() traindat = ushort(lm.load_numbers('../data/fm_train_word.dat')) testdat = ushort(lm.load_numbers('../data/fm_test_word.dat')) parameter_list=[[traindat,testdat,1.2],[traindat,testdat,1.2]] def kernel_linear_word_modular (fm_train_word=traindat,fm_test_word=testdat,scale=1.2): from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer from shogun.Features import WordFeatures feats_train=WordFeatures(fm_train_word) feats_test=WordFeatures(fm_test_word) kernel=LinearKernel(feats_train, feats_train) kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return kernel if __name__=='__main__': print 'LinearWord' kernel_linear_word_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') parameter_list=[[traindat,testdat],[traindat,testdat]] def kernel_local_alignment_string_modular(fm_train_dna=traindat,fm_test_dna=testdat): from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import LocalAlignmentStringKernel feats_train=StringCharFeatures(fm_train_dna, DNA) feats_test=StringCharFeatures(fm_test_dna, DNA) kernel=LocalAlignmentStringKernel(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'LocalAlignmentString' kernel_local_alignment_string_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') parameter_list=[[traindat,testdat,5,5,7],[traindat,testdat,5,5,7]] def kernel_locality_improved_string_modular(fm_train_dna=traindat,fm_test_dna=testdat,length=5,inner_degree=5,outer_degree=7): from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import LocalityImprovedStringKernel feats_train=StringCharFeatures(fm_train_dna, DNA) feats_test=StringCharFeatures(fm_test_dna, DNA) kernel=LocalityImprovedStringKernel( feats_train, feats_train, length, inner_degree, outer_degree) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'LocalityImprovedString' kernel_locality_improved_string_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') parameter_list = [[traindat,testdat, 3,1.4,10,3,0,False],[ traindat,testdat, 3,1.4,10,3,0,False]] def kernel_match_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat, degree=3,scale=1.4,size_cache=10,order=3,gap=0,reverse=False): from shogun.Kernel import MatchWordStringKernel, AvgDiagKernelNormalizer from shogun.Features import StringWordFeatures, StringCharFeatures, DNA charfeat=StringCharFeatures(fm_train_dna, DNA) feats_train=StringWordFeatures(DNA) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) charfeat=StringCharFeatures(fm_test_dna, DNA) feats_test=StringWordFeatures(DNA) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) kernel=MatchWordStringKernel(size_cache, degree) kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'MatchWordString' kernel_match_word_string_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') parameter_list = [[traindat,testdat,3,1.2,10],[traindat,testdat,4,1.3,10]] def kernel_oligo_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,k=3,width=1.2,size_cache=10): from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import OligoStringKernel feats_train=StringCharFeatures(fm_train_dna, DNA) feats_test=StringCharFeatures(fm_test_dna, DNA) kernel=OligoStringKernel(size_cache, k, width) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'OligoString' kernel_oligo_string_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') parameter_list = [[traindat,testdat,3,False],[traindat,testdat,4,False]] def kernel_poly_match_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,degree=3,inhomogene=False): from shogun.Kernel import PolyMatchStringKernel from shogun.Features import StringCharFeatures, DNA feats_train=StringCharFeatures(fm_train_dna, DNA) feats_test=StringCharFeatures(fm_train_dna, DNA) kernel=PolyMatchStringKernel(feats_train, feats_train, degree, inhomogene) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'PolyMatchString' kernel_poly_match_string_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') parameter_list = [[traindat,testdat,2,True,3,0,False],[traindat,testdat,2,True,3,0,False]] def kernel_poly_match_word_string_modular(fm_train_dna=traindat,fm_test_dna=testdat, degree=2,inhomogene=True,order=3,gap=0,reverse=False): from shogun.Kernel import PolyMatchWordStringKernel from shogun.Features import StringWordFeatures, StringCharFeatures, DNA charfeat=StringCharFeatures(fm_train_dna, DNA) feats_train=StringWordFeatures(DNA) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) charfeat=StringCharFeatures(fm_test_dna, DNA) feats_test=StringWordFeatures(DNA) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) kernel=PolyMatchWordStringKernel(feats_train, feats_train, degree, inhomogene) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'PolyMatchWordString' kernel_poly_match_word_string_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat,4,False,True],[traindat,testdat,5,False,True]] def kernel_poly_modular (fm_train_real=traindat,fm_test_real=testdat,degree=4,inhomogene=False, use_normalization=True): from shogun.Features import RealFeatures from shogun.Kernel import PolyKernel feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) kernel=PolyKernel( feats_train, feats_train, degree, inhomogene, use_normalization) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'Poly' kernel_poly_modular (*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') label_traindat = lm.load_labels('../data/label_train_dna.dat') parameter_list = [[traindat,testdat,label_traindat,3,0,False],[traindat,testdat,label_traindat,3,0,False]] def kernel_salzberg_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat, order=3,gap=0,reverse=False): from shogun.Features import StringCharFeatures, StringWordFeatures, DNA, Labels from shogun.Kernel import SalzbergWordStringKernel from shogun.Classifier import PluginEstimate charfeat=StringCharFeatures(fm_train_dna, DNA) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) charfeat=StringCharFeatures(fm_test_dna, DNA) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) pie=PluginEstimate() labels=Labels(label_train_dna) pie.set_labels(labels) pie.set_features(feats_train) pie.train() kernel=SalzbergWordStringKernel(feats_train, feats_test, pie, labels) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) pie.set_features(feats_test) pie.classify().get_labels() km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'PluginEstimate w/ SalzbergWord' kernel_salzberg_word_string_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat,10,1.2,1.3],[traindat,testdat,10,1.2,1.3]] def kernel_sigmoid_modular(fm_train_real=traindat,fm_test_real=testdat,size_cache=10,gamma=1.2,coef0=1.3): from shogun.Features import RealFeatures from shogun.Kernel import SigmoidKernel feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) kernel=SigmoidKernel(feats_train, feats_train, size_cache, gamma, coef0) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'Sigmoid' kernel_sigmoid_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') parameter_list = [[traindat,testdat,5,5,1],[traindat,testdat,5,3,2]] def kernel_simple_locality_improved_string_modular(fm_train_dna=traindat,fm_test_dna=testdat, length=5,inner_degree=5,outer_degree=1 ): from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import SimpleLocalityImprovedStringKernel, MSG_DEBUG feats_train=StringCharFeatures(fm_train_dna, DNA) #feats_train.io.set_loglevel(MSG_DEBUG) feats_test=StringCharFeatures(fm_test_dna, DNA) kernel=SimpleLocalityImprovedStringKernel( feats_train, feats_train, length, inner_degree, outer_degree) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'SimpleLocalityImprovedString' kernel_simple_locality_improved_string_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat,1.1],[traindat,testdat,1.2]] def kernel_sparse_gaussian_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.1 ): from shogun.Features import SparseRealFeatures from shogun.Kernel import GaussianKernel feats_train=SparseRealFeatures(fm_train_real) feats_test=SparseRealFeatures(fm_test_real) kernel=GaussianKernel(feats_train, feats_train, width) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'SparseGaussian' kernel_sparse_gaussian_modular (*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat,1.1],[traindat,testdat,1.2]] def kernel_sparse_linear_modular (fm_train_real=traindat,fm_test_real=testdat,scale=1.1): from shogun.Features import SparseRealFeatures from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer feats_train=SparseRealFeatures(fm_train_real) feats_test=SparseRealFeatures(fm_test_real) kernel=LinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'SparseLinear' kernel_sparse_linear_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat,10,3,True],[traindat,testdat,10,4,True]] def kernel_sparse_poly_modular (fm_train_real=traindat,fm_test_real=testdat, size_cache=10,degree=3,inhomogene=True ): from shogun.Features import SparseRealFeatures from shogun.Kernel import PolyKernel feats_train=SparseRealFeatures(fm_train_real) feats_test=SparseRealFeatures(fm_test_real) kernel=PolyKernel(feats_train, feats_train, size_cache, degree, inhomogene) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'SparsePoly' kernel_sparse_poly_modular(*parameter_list[0])
from tools.load import LoadMatrix from numpy import where lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') label_traindat = lm.load_labels('../data/label_train_dna.dat') fm_hmm_pos=[traindat[i] for i in where([label_traindat==1])[1] ] fm_hmm_neg=[traindat[i] for i in where([label_traindat==-1])[1] ] parameter_list = [[traindat,testdat,label_traindat,1e-1,1,0,False,[1, False, True]], \ [traindat,testdat,label_traindat,1e-1,1,0,False,[1, False, True] ]] def kernel_top_modular(fm_train_dna=traindat,fm_test_dna=testdat,label_train_dna=label_traindat,pseudo=1e-1, order=1,gap=0,reverse=False,kargs=[1, False, True]): from shogun.Features import StringCharFeatures, StringWordFeatures, TOPFeatures, DNA from shogun.Kernel import PolyKernel from shogun.Distribution import HMM, BW_NORMAL N=1 # toy HMM with 1 state M=4 # 4 observations -> DNA # train HMM for positive class charfeat=StringCharFeatures(fm_hmm_pos, DNA) hmm_pos_train=StringWordFeatures(charfeat.get_alphabet()) hmm_pos_train.obtain_from_char(charfeat, order-1, order, gap, reverse) pos=HMM(hmm_pos_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) # train HMM for negative class charfeat=StringCharFeatures(fm_hmm_neg, DNA) hmm_neg_train=StringWordFeatures(charfeat.get_alphabet()) hmm_neg_train.obtain_from_char(charfeat, order-1, order, gap, reverse) neg=HMM(hmm_neg_train, N, M, pseudo) neg.baum_welch_viterbi_train(BW_NORMAL) # Kernel training data charfeat=StringCharFeatures(fm_train_dna, DNA) wordfeats_train=StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) # Kernel testing data charfeat=StringCharFeatures(fm_test_dna, DNA) wordfeats_test=StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) # get kernel on training data pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train=TOPFeatures(10, pos, neg, False, False) kernel=PolyKernel(feats_train, feats_train, *kargs) km_train=kernel.get_kernel_matrix() # get kernel on testing data pos_clone=HMM(pos) neg_clone=HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test=TOPFeatures(10, pos_clone, neg_clone, False, False) kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print "TOP Kernel" kernel_top_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') parameter_list = [[traindat,testdat],[traindat,testdat]] def kernel_weighted_comm_word_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,order=3,gap=0,reverse=True ): from shogun.Kernel import WeightedCommWordStringKernel from shogun.Features import StringWordFeatures, StringCharFeatures, DNA from shogun.PreProc import SortWordString charfeat=StringCharFeatures(fm_train_dna, DNA) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortWordString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() charfeat=StringCharFeatures(fm_test_dna, DNA) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preproc(preproc) feats_test.apply_preproc() use_sign=False kernel=WeightedCommWordStringKernel(feats_train, feats_train, use_sign) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'WeightedCommWordString' kernel_weighted_comm_word_string_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') parameter_list = [[traindat,testdat,20],[traindat,testdat,22]] def kernel_weighted_degree_position_string_modular(fm_train_dna=traindat,fm_test_dna=testdat,degree=20): from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import WeightedDegreePositionStringKernel, MSG_DEBUG feats_train=StringCharFeatures(fm_train_dna, DNA) #feats_train.io.set_loglevel(MSG_DEBUG) feats_test=StringCharFeatures(fm_test_dna, DNA) kernel=WeightedDegreePositionStringKernel(feats_train, feats_train, degree) from numpy import zeros,ones,float64,int32 #kernel.set_shifts(zeros(len(fm_train_dna[0]), dtype=int32)) #kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64)) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'WeightedDegreePositionString' kernel_weighted_degree_position_string_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_dna('../data/fm_train_dna.dat') testdat = lm.load_dna('../data/fm_test_dna.dat') parameter_list = [[traindat,testdat,3],[traindat,testdat,20]] def kernel_weighted_degree_string_modular (fm_train_dna=traindat,fm_test_dna=testdat,degree=20): from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import WeightedDegreeStringKernel, MSG_DEBUG feats_train=StringCharFeatures(fm_train_dna, DNA) #feats_train.io.set_loglevel(MSG_DEBUG) feats_test=StringCharFeatures(fm_test_dna, DNA) kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) from numpy import arange,double weights=arange(1,degree+1,dtype=double)[::-1]/ \ sum(arange(1,degree+1,dtype=double)) kernel.set_wd_weights(weights) #from numpy import ones,float64,int32 #kernel.set_position_weights(ones(len(fm_train_dna[0]), dtype=float64)) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() #this is how to serializate the kernel #import pickle #pickle.dump(kernel, file('kernel_obj.dump','w'), protocol=2) #k=pickle.load(file('kernel_obj.dump','r')) return km_train, km_test, kernel if __name__=='__main__': print 'WeightedDegreeString' kernel_weighted_degree_string_modular(*parameter_list[0])
from shogun.Features import CombinedFeatures, RealFeatures, Labels from shogun.Kernel import CombinedKernel, PolyKernel, CustomKernel from shogun.Classifier import MKLClassification from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_twoclass.dat') parameter_list = [[traindat,testdat,label_traindat],[traindat,testdat,label_traindat]] # fm_train_real.shape # fm_test_real.shape # combined_custom() def mkl_binclass_modular (fm_train_real=traindat,fm_test_real=testdat,fm_label_twoclass = label_traindat): ################################## # set up and train # create some poly train/test matrix tfeats = RealFeatures(fm_train_real) tkernel = PolyKernel(10,3) tkernel.init(tfeats, tfeats) K_train = tkernel.get_kernel_matrix() pfeats = RealFeatures(fm_test_real) tkernel.init(tfeats, pfeats) K_test = tkernel.get_kernel_matrix() # create combined train features feats_train = CombinedFeatures() feats_train.append_feature_obj(RealFeatures(fm_train_real)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_train)) kernel.append_kernel(PolyKernel(10,2)) kernel.init(feats_train, feats_train) # train mkl labels = Labels(fm_label_twoclass) mkl = MKLClassification() # which norm to use for MKL mkl.set_mkl_norm(1) #2,3 # set cost (neg, pos) mkl.set_C(1, 1) # set kernel and labels mkl.set_kernel(kernel) mkl.set_labels(labels) # train mkl.train() #w=kernel.get_subkernel_weights() #kernel.set_subkernel_weights(w) ################################## # test # create combined test features feats_pred = CombinedFeatures() feats_pred.append_feature_obj(RealFeatures(fm_test_real)) # and corresponding combined kernel kernel = CombinedKernel() kernel.append_kernel(CustomKernel(K_test)) kernel.append_kernel(PolyKernel(10, 2)) kernel.init(feats_train, feats_pred) # and classify mkl.set_kernel(kernel) mkl.classify() return mkl.classify(),kernel if __name__=='__main__': mkl_binclass_modular (*parameter_list[0])
from tools.load import LoadMatrix lm = LoadMatrix() fm_train_real = lm.load_numbers('../data/fm_train_real.dat') fm_test_real = lm.load_numbers('../data/fm_test_real.dat') label_train_multiclass = lm.load_labels('../data/label_train_multiclass.dat') parameter_list=[ [ fm_train_real, fm_test_real, label_train_multiclass, 1.2, 1.2, 1e-5, 1, 0.001, 1.5], [ fm_train_real, fm_test_real, label_train_multiclass, 5, 1.2, 1e-2, 1, 0.001, 2]] def mkl_multiclass_modular(fm_train_real, fm_test_real, label_train_multiclass, width, C, epsilon, num_threads, mkl_epsilon, mkl_norm): from shogun.Features import CombinedFeatures, RealFeatures, Labels from shogun.Kernel import CombinedKernel, GaussianKernel, LinearKernel,PolyKernel from shogun.Classifier import MKLMultiClass kernel = CombinedKernel() feats_train = CombinedFeatures() feats_test = CombinedFeatures() subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = GaussianKernel(10, width) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = LinearKernel() feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train = RealFeatures(fm_train_real) subkfeats_test = RealFeatures(fm_test_real) subkernel = PolyKernel(10,2) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels = Labels(label_train_multiclass) mkl = MKLMultiClass(C, kernel, labels) mkl.set_epsilon(epsilon); mkl.parallel.set_num_threads(num_threads) mkl.set_mkl_epsilon(mkl_epsilon) mkl.set_mkl_norm(mkl_norm) mkl.train() kernel.init(feats_train, feats_test) out = mkl.classify().get_labels() return out if __name__ == '__main__': print 'mkl_multiclass' mkl_multiclass_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat+10,testdat+10,1.4,10],[traindat+10,testdat+10,1.5,10]] def preproc_logplusone_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10): from shogun.Kernel import Chi2Kernel from shogun.Features import RealFeatures from shogun.PreProc import LogPlusOne feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) preproc=LogPlusOne() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() feats_test.add_preproc(preproc) feats_test.apply_preproc() kernel=Chi2Kernel(feats_train, feats_train, width, size_cache) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'LogPlusOne' preproc_logplusone_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat,1.4,10],[traindat,testdat,1.5,10]] def preproc_normone_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10): from shogun.Kernel import Chi2Kernel from shogun.Features import RealFeatures from shogun.PreProc import NormOne feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) preproc=NormOne() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() feats_test.add_preproc(preproc) feats_test.apply_preproc() kernel=Chi2Kernel(feats_train, feats_train, width, size_cache) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'NormOne' preproc_normone_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') parameter_list = [[traindat,testdat,1.5,10],[traindat,testdat,1.5,10]] def preproc_prunevarsubmean_modular (fm_train_real=traindat,fm_test_real=testdat,width=1.4,size_cache=10): from shogun.Kernel import Chi2Kernel from shogun.Features import RealFeatures from shogun.PreProc import PruneVarSubMean feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) preproc=PruneVarSubMean() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() feats_test.add_preproc(preproc) feats_test.apply_preproc() kernel=Chi2Kernel(feats_train, feats_train, width, size_cache) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'PruneVarSubMean' preproc_prunevarsubmean_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindna = lm.load_dna('../data/fm_train_dna.dat') testdna = lm.load_dna('../data/fm_test_dna.dat') parameter_list = [[traindna,testdna,4,0,False,False],[traindna,testdna,3,0,False,False]] def preproc_sortulongstring_modular (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False,use_sign=False): from shogun.Kernel import CommUlongStringKernel from shogun.Features import StringCharFeatures, StringUlongFeatures, DNA from shogun.PreProc import SortUlongString charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringUlongFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringUlongFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortUlongString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() feats_test.add_preproc(preproc) feats_test.apply_preproc() kernel=CommUlongStringKernel(feats_train, feats_train, use_sign) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'CommUlongString' preproc_sortulongstring_modular(*parameter_list[0])
from tools.load import LoadMatrix lm=LoadMatrix() traindna = lm.load_dna('../data/fm_train_dna.dat') testdna = lm.load_dna('../data/fm_test_dna.dat') parameter_list = [[traindna,testdna,3,0,False,False],[traindna,testdna,3,0,False,False]] def preproc_sortwordstring_modular (fm_train_dna=traindna,fm_test_dna=testdna,order=3,gap=0,reverse=False,use_sign=False): from shogun.Kernel import CommWordStringKernel from shogun.Features import StringCharFeatures, StringWordFeatures, DNA from shogun.PreProc import SortWordString charfeat=StringCharFeatures(fm_train_dna, DNA) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortWordString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() charfeat=StringCharFeatures(fm_test_dna, DNA) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preproc(preproc) feats_test.apply_preproc() kernel=CommWordStringKernel(feats_train, feats_train, use_sign) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() return km_train,km_test,kernel if __name__=='__main__': print 'CommWordString' preproc_sortwordstring_modular(*parameter_list[0])
########################################################################### # kernel ridge regression ########################################################################### from numpy import array from numpy.random import seed, rand from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_twoclass.dat') parameter_list = [[traindat,testdat,label_traindat,0.8,1e-6],[traindat,testdat,label_traindat,0.9,1e-7]] def regression_krr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,width=0.8,tau=1e-6): from shogun.Features import Labels, RealFeatures from shogun.Kernel import GaussianKernel from shogun.Regression import KRR feats_train=RealFeatures(fm_train) feats_test=RealFeatures(fm_test) kernel=GaussianKernel(feats_train, feats_train, width) labels=Labels(label_train) krr=KRR(tau, kernel, labels) krr.train(feats_train) kernel.init(feats_train, feats_test) out = krr.classify().get_labels() return out,kernel,krr # equivialent shorter version def krr_short (): print 'KRR_short' from shogun.Features import Labels, RealFeatures from shogun.Kernel import GaussianKernel from shogun.Regression import KRR width=0.8; tau=1e-6 krr=KRR(tau, GaussianKernel(0, width), Labels(label_train)) krr.train(RealFeatures(fm_train)) out = krr.classify(RealFeatures(fm_test)).get_labels() return krr,out if __name__=='__main__': print 'KRR' regression_krr_modular(*parameter_list[0])
from numpy import array from numpy.random import seed, rand from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_twoclass.dat') parameter_list = [[traindat,testdat,label_traindat,2.1,1,1e-5,1e-2], \ [traindat,testdat,label_traindat,2.1,1,1e-5,1e-2]] def regression_libsvr_modular (fm_train=traindat,fm_test=testdat,label_train=label_traindat,\ width=2.1,C=1,epsilon=1e-5,tube_epsilon=1e-2): from shogun.Features import Labels, RealFeatures from shogun.Kernel import GaussianKernel from shogun.Regression import LibSVR feats_train=RealFeatures(fm_train) feats_test=RealFeatures(fm_test) kernel=GaussianKernel(feats_train, feats_train, width) labels=Labels(label_train) svr=LibSVR(C, epsilon, kernel, labels) svr.set_tube_epsilon(tube_epsilon) svr.train() kernel.init(feats_train, feats_test) out1=svr.classify().get_labels() out2=svr.classify(feats_test).get_labels() return out1,out2,kernel if __name__=='__main__': print 'LibSVR' regression_libsvr_modular(*parameter_list[0])
########################################################################### # svm light based support vector regression ########################################################################### from numpy import array from numpy.random import seed, rand from tools.load import LoadMatrix lm=LoadMatrix() traindat = lm.load_numbers('../data/fm_train_real.dat') testdat = lm.load_numbers('../data/fm_test_real.dat') label_traindat = lm.load_labels('../data/label_train_twoclass.dat') parameter_list = [[traindat,testdat,label_traindat,1.2,1,1e-5,1e-2,3],[traindat,testdat,label_traindat,2.3,0.5,1e-5,1e-6,1]] def regression_svrlight_modular(fm_train=traindat,fm_test=testdat,label_train=label_traindat, \ width=1.2,C=1,epsilon=1e-5,tube_epsilon=1e-2,num_threads=3): from shogun.Features import Labels, RealFeatures from shogun.Kernel import GaussianKernel try: from shogun.Regression import SVRLight except ImportError: print 'No support for SVRLight available.' return feats_train=RealFeatures(fm_train) feats_test=RealFeatures(fm_test) kernel=GaussianKernel(feats_train, feats_train, width) labels=Labels(label_train) svr=SVRLight(C, epsilon, kernel, labels) svr.set_tube_epsilon(tube_epsilon) svr.parallel.set_num_threads(num_threads) svr.train() kernel.init(feats_train, feats_test) out = svr.classify().get_labels() return out, kernel if __name__=='__main__': print 'SVRLight' regression_svrlight_modular(*parameter_list[0])
parameter_list=[[5,1,10, 2.0, 10], [10,0.3,2, 1.0, 0.1]] def check_status(status): # silent... assert(status) #if status: # print "OK reading/writing .h5\n" #else: # print "ERROR reading/writing .h5\n" def serialization_complex_example(num=5, dist=1, dim=10, C=2.0, width=10): import os from numpy import concatenate, zeros, ones from numpy.random import randn, seed from shogun.Features import RealFeatures, Labels from shogun.Classifier import GMNPSVM from shogun.Kernel import GaussianKernel from shogun.Library import SerializableHdf5File,SerializableAsciiFile, \ SerializableJsonFile,SerializableXmlFile,MSG_DEBUG from shogun.PreProc import NormOne, LogPlusOne seed(17) data=concatenate((randn(dim, num), randn(dim, num) + dist, randn(dim, num) + 2*dist, randn(dim, num) + 3*dist), axis=1) lab=concatenate((zeros(num), ones(num), 2*ones(num), 3*ones(num))) feats=RealFeatures(data) #feats.io.set_loglevel(MSG_DEBUG) kernel=GaussianKernel(feats, feats, width) labels=Labels(lab) svm = GMNPSVM(C, kernel, labels) feats.add_preproc(NormOne()) feats.add_preproc(LogPlusOne()) feats.set_preprocessed(1) svm.train(feats) #svm.print_serializable() fstream = SerializableHdf5File("blaah.h5", "w") status = svm.save_serializable(fstream) check_status(status) fstream = SerializableAsciiFile("blaah.asc", "w") status = svm.save_serializable(fstream) check_status(status) fstream = SerializableJsonFile("blaah.json", "w") status = svm.save_serializable(fstream) check_status(status) fstream = SerializableXmlFile("blaah.xml", "w") status = svm.save_serializable(fstream) check_status(status) fstream = SerializableHdf5File("blaah.h5", "r") new_svm=GMNPSVM() status = new_svm.load_serializable(fstream) check_status(status) new_svm.train() fstream = SerializableAsciiFile("blaah.asc", "r") new_svm=GMNPSVM() status = new_svm.load_serializable(fstream) check_status(status) new_svm.train() fstream = SerializableJsonFile("blaah.json", "r") new_svm=GMNPSVM() status = new_svm.load_serializable(fstream) check_status(status) new_svm.train() fstream = SerializableXmlFile("blaah.xml", "r") new_svm=GMNPSVM() status = new_svm.load_serializable(fstream) check_status(status) new_svm.train() os.unlink("blaah.h5") os.unlink("blaah.asc") os.unlink("blaah.json") os.unlink("blaah.xml") return svm,new_svm if __name__=='__main__': print 'Serialization SVMLight' serialization_complex_example(*parameter_list[0])
parameter_list=[[10, 1, 2.1, 2.0]] def serialization_svmlight_modular(num, dist, width, C): from shogun.Library import MSG_DEBUG from shogun.Features import RealFeatures, Labels, DNA, Alphabet from shogun.Kernel import WeightedDegreeStringKernel, GaussianKernel from shogun.Classifier import SVMLight from numpy import concatenate, ones from numpy.random import randn, seed import sys import types import random import bz2 import cPickle as pickle import inspect def save(filename, myobj): """ save object to file using pickle @param filename: name of destination file @type filename: str @param myobj: object to save (has to be pickleable) @type myobj: obj """ try: f = bz2.BZ2File(filename, 'wb') except IOError, details: sys.stderr.write('File ' + filename + ' cannot be written\n') sys.stderr.write(details) return pickle.dump(myobj, f, protocol=2) f.close() def load(filename): """ Load from filename using pickle @param filename: name of file to load from @type filename: str """ try: f = bz2.BZ2File(filename, 'rb') except IOError, details: sys.stderr.write('File ' + filename + ' cannot be read\n') sys.stderr.write(details) return myobj = pickle.load(f) f.close() return myobj ################################################## seed(17) traindata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1) testdata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1); trainlab=concatenate((-ones(num), ones(num))); testlab=concatenate((-ones(num), ones(num))); feats_train=RealFeatures(traindata_real); feats_test=RealFeatures(testdata_real); kernel=GaussianKernel(feats_train, feats_train, width); #kernel.io.set_loglevel(MSG_DEBUG) labels=Labels(trainlab); svm=SVMLight(C, kernel, labels) svm.train() #svm.io.set_loglevel(MSG_DEBUG) ################################################## #print "labels:" #print pickle.dumps(labels) # #print "features" #print pickle.dumps(feats_train) # #print "kernel" #print pickle.dumps(kernel) # #print "svm" #print pickle.dumps(svm) # #print "#################################" fn = "serialized_svm.bz2" #print "serializing SVM to file", fn save(fn, svm) #print "#################################" #print "unserializing SVM" svm2 = load(fn) #print "#################################" #print "comparing training" svm2.train() #print "objective before serialization:", svm.get_objective() #print "objective after serialization:", svm2.get_objective() return svm, svm.get_objective(), svm2, svm2.get_objective() if __name__=='__main__': print 'Serialization SVMLight' serialization_svmlight_modular(*parameter_list[0])
#!/usr/bin/env python # -*- coding: utf-8 -*- parameter_list=[['../data/DynProg_example_py.pickle.gz']] from shogun.Structure import * import numpy from numpy import array,Inf,float64,matrix,frompyfunc,zeros #from IPython.Shell import IPShellEmbed #ipshell = IPShellEmbed() import gzip import scipy from scipy.io import loadmat import pickle try: from cStringIO import StringIO except ImportError: from StringIO import StringIO if scipy.__version__ >= '0.7.0': renametable = { 'scipy.io.mio5': 'scipy.io.matlab.mio5', 'scipy.sparse.sparse' : 'scipy.sparse', } else: renametable = {} def mapname(name): if name in renametable: return renametable[name] return name def mapped_load_global(self): module = mapname(self.readline()[:-1]) name = mapname(self.readline()[:-1]) klass = self.find_class(module, name) self.append(klass) def loads(str): file = StringIO(str) unpickler = pickle.Unpickler(file) unpickler.dispatch[pickle.GLOBAL] = mapped_load_global return unpickler.load() def structure_dynprog_modular(fname): data_dict = loads(gzip.GzipFile(fname).read()) #data_dict = loadmat('../data/DynProg_example_py.dat.mat', appendmat=False, struct_as_record=False) #print data_dict #print len(data_dict['penalty_array'][0][0][0][0].limits[0]) num_plifs,num_limits = len(data_dict['penalty_array']),len(data_dict['penalty_array'][0].limits) pm = PlifMatrix() pm.create_plifs(num_plifs,num_limits) ids = numpy.array(range(num_plifs),dtype=numpy.int32) min_values = numpy.array(range(num_plifs),dtype=numpy.float64) max_values = numpy.array(range(num_plifs),dtype=numpy.float64) all_use_cache = numpy.array(range(num_plifs),dtype=numpy.bool) all_use_svm = numpy.array(range(num_plifs),dtype=numpy.int32) all_limits = zeros((num_plifs,num_limits)) all_penalties = zeros((num_plifs,num_limits)) all_names = ['']*num_plifs all_transforms = ['']*num_plifs for plif_idx in range(num_plifs): ids[plif_idx] = data_dict['penalty_array'][plif_idx].id-1 min_values[plif_idx] = data_dict['penalty_array'][plif_idx].min_value max_values[plif_idx] = data_dict['penalty_array'][plif_idx].max_value all_use_cache[plif_idx] = data_dict['penalty_array'][plif_idx].use_cache all_use_svm[plif_idx] = data_dict['penalty_array'][plif_idx].use_svm all_limits[plif_idx] = data_dict['penalty_array'][plif_idx].limits all_penalties[plif_idx] = data_dict['penalty_array'][plif_idx].penalties all_names[plif_idx] = str(data_dict['penalty_array'][plif_idx].name) all_transforms[plif_idx] = str(data_dict['penalty_array'][plif_idx].transform) if all_transforms[plif_idx] == '[]': all_transforms[plif_idx] = 'linear' pm.set_plif_ids(ids) pm.set_plif_min_values(min_values) pm.set_plif_max_values(max_values) pm.set_plif_use_cache(all_use_cache) pm.set_plif_use_svm(all_use_svm) pm.set_plif_limits(all_limits) pm.set_plif_penalties(all_penalties) #pm.set_plif_names(all_names) #pm.set_plif_transform_type(all_transforms) transition_ptrs = data_dict['model'].transition_pointers transition_ptrs = transition_ptrs[:,:,0:2] transition_ptrs = transition_ptrs.astype(numpy.float64) pm.compute_plif_matrix(transition_ptrs) # init_dyn_prog num_svms = 8 dyn = DynProg(num_svms) orf_info = data_dict['model'].orf_info orf_info = orf_info.astype(numpy.int32) num_states = orf_info.shape[0] dyn.set_num_states(num_states) block = data_dict['block'] seq_len = len(block.seq) seq = str(block.seq) gene_string = array([elem for elem in seq]) # precompute_content_svms pos = block.all_pos-1 pos = pos.astype(numpy.int32) snd_pos = pos dyn.set_pos(pos) dyn.set_gene_string(gene_string) dyn.create_word_string() dyn.precompute_stop_codons() dyn.init_content_svm_value_array(num_svms) dict_weights = data_dict['content_weights'] dict_weights = dict_weights.reshape(8,1).astype(numpy.float64) dict_weights = zeros((8,5440)) dyn.set_dict_weights(dict_weights.T) dyn.precompute_content_values() dyn.init_mod_words_array(data_dict['model'].mod_words.astype(numpy.int32)) pm.compute_signal_plifs(data_dict['state_signals'].astype(numpy.int32)) dyn.set_orf_info(orf_info) # p = data_dict['model'].p q = data_dict['model'].q dyn.set_p_vector(p) dyn.set_q_vector(q) a_trans = data_dict['a_trans'] a_trans = a_trans.astype(float64) dyn.set_a_trans_matrix(a_trans) dyn.check_svm_arrays() features = data_dict['block'].features dyn.set_observation_matrix(features) dyn.set_content_type_array(data_dict['seg_path'].astype(numpy.float64)) dyn.best_path_set_segment_loss(data_dict['loss'].astype(numpy.float64)) use_orf = True feat_dims = [25,201,2] dyn.set_plif_matrices(pm); dyn.compute_nbest_paths(features.shape[2], use_orf, 1,True,False) # fetch results states = dyn.get_states() #print states scores = dyn.get_scores() #print scores positions = dyn.get_positions() #print positions return states, scores, positions if __name__ == '__main__': print "Structure" structure_dynprog_modular(*parameter_list[0])
parameter_list=[[10,7,0,0]] def tests_check_commwordkernel_memleak_modular(num, order, gap, reverse): import gc from shogun.Features import Alphabet,StringCharFeatures,StringWordFeatures,DNA from shogun.PreProc import SortWordString, MSG_DEBUG from shogun.Kernel import CommWordStringKernel, IdentityKernelNormalizer from numpy import mat POS=[num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT'] NEG=[num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'TTGT', num*'TTGT', num*'TTGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT',num*'ACGT', num*'ACGT', num*'ACGT'] for i in xrange(10): alpha=Alphabet(DNA) traindat=StringCharFeatures(alpha) traindat.set_features(POS+NEG) trainudat=StringWordFeatures(traindat.get_alphabet()); trainudat.obtain_from_char(traindat, order-1, order, gap, reverse) #trainudat.io.set_loglevel(MSG_DEBUG) pre = SortWordString() #pre.io.set_loglevel(MSG_DEBUG) pre.init(trainudat) trainudat.add_preproc(pre) trainudat.apply_preproc() spec = CommWordStringKernel(10, False) spec.set_normalizer(IdentityKernelNormalizer()) spec.init(trainudat, trainudat) K=spec.get_kernel_matrix() del POS del NEG del order del gap del reverse return K if __name__=='__main__': print 'Leak Check Comm Word Kernel' tests_check_commwordkernel_memleak_modular(*parameter_list[0])