This page lists ready to run shogun examples for the Python Modular interface.
To run the examples issue
python name_of_example.py
import numpy from shogun.Features import StringCharFeatures, Labels, DNA from shogun.Kernel import WeightedDegreeStringKernel from shogun.Classifier import SVMLight, DomainAdaptationSVM degree=3 fm_train_dna = ['CGCACGTACGTAGCTCGAT', 'CGACGTAGTCGTAGTCGTA', 'CGACGGGGGGGGGGTCGTA', 'CGACCTAGTCGTAGTCGTA', 'CGACCACAGTTATATAGTA', 'CGACGTAGTCGTAGTCGTA', 'CGACGTAGTTTTTTTCGTA', 'CGACGTAGTCGTAGCCCCA', 'CAAAAAAAAAAAAAAAATA', 'CGACGGGGGGGGGGGCGTA'] label_train_dna = numpy.array(5*[-1.0] + 5*[1.0]) fm_test_dna = ['AGCACGTACGTAGCTCGAT', 'AGACGTAGTCGTAGTCGTA', 'CAACGGGGGGGGGGTCGTA', 'CGACCTAGTCGTAGTCGTA', 'CGAACACAGTTATATAGTA', 'CGACCTAGTCGTAGTCGTA', 'CGACGTGGGGTTTTTCGTA', 'CGACGTAGTCCCAGCCCCA', 'CAAAAAAAAAAAACCAATA', 'CGACGGCCGGGGGGGCGTA'] label_test_dna = numpy.array(5*[-1.0] + 5*[1.0]) fm_train_dna2 = ['AGACAGTCAGTCGATAGCT', 'AGCAGTCGTAGTCGTAGTC', 'AGCAGGGGGGGGGGTAGTC', 'AGCAATCGTAGTCGTAGTC', 'AGCAACACGTTCTCTCGTC', 'AGCAGTCGTAGTCGTAGTC', 'AGCAGTCGTTTTTTTAGTC', 'AGCAGTCGTAGTCGAAAAC', 'ACCCCCCCCCCCCCCCCTC', 'AGCAGGGGGGGGGGGAGTC'] label_train_dna2 = numpy.array(5*[-1.0] + 5*[1.0]) fm_test_dna2 = ['CGACAGTCAGTCGATAGCT', 'CGCAGTCGTAGTCGTAGTC', 'ACCAGGGGGGGGGGTAGTC', 'AGCAATCGTAGTCGTAGTC', 'AGCCACACGTTCTCTCGTC', 'AGCAATCGTAGTCGTAGTC', 'AGCAGTGGGGTTTTTAGTC', 'AGCAGTCGTAAACGAAAAC', 'ACCCCCCCCCCCCAACCTC', 'AGCAGGAAGGGGGGGAGTC'] label_test_dna2 = numpy.array(5*[-1.0] + 5*[1.0]) C = 1.0 feats_train = StringCharFeatures(fm_train_dna, DNA) feats_test = StringCharFeatures(fm_test_dna, DNA) kernel = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels = Labels(label_train_dna) svm = SVMLight(C, kernel, labels) svm.train() ##################################### print "obtaining DA SVM from previously trained SVM" feats_train2 = StringCharFeatures(fm_train_dna, DNA) feats_test2 = StringCharFeatures(fm_test_dna, DNA) kernel2 = WeightedDegreeStringKernel(feats_train, feats_train, degree) labels2 = Labels(label_train_dna) # we regularize versus the previously obtained solution dasvm = DomainAdaptationSVM(C, kernel2, labels2, svm, 1.0) dasvm.train() out = dasvm.classify(feats_test2).get_labels() print out
def gmnpsvm (): print 'GMNPSVM' from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import GMNPSVM feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) width=2.1 kernel=GaussianKernel(feats_train, feats_train, width) C=1 epsilon=1e-5 labels=Labels(label_train_multiclass) svm=GMNPSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train(feats_train) #kernel.init(feats_train, feats_test) out=svm.classify(feats_test).get_labels() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_multiclass=lm.load_labels('../data/label_train_multiclass.dat') gmnpsvm()
def gpbtsvm (): print 'GPBTSVM' from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import GPBTSVM feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) width=2.1 kernel=GaussianKernel(feats_train, feats_train, width) C=1 epsilon=1e-5 labels=Labels(label_train_twoclass) svm=GPBTSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() kernel.init(feats_train, feats_test) svm.classify().get_labels() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat') gpbtsvm()
def knn (): print 'KNN' from shogun.Features import RealFeatures, Labels from shogun.Classifier import KNN from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance(feats_train, feats_train) k=3 labels=Labels(label_train_multiclass) knn=KNN(k, distance, labels) knn.train() output=knn.classify(feats_test).get_labels() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_multiclass=lm.load_labels('../data/label_train_multiclass.dat') knn()
def larank (): print 'LaRank' from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import LaRank feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) width=2.1 kernel=GaussianKernel(feats_train, feats_train, width) C=1 epsilon=1e-5 labels=Labels(label_train_multiclass) svm=LaRank(C, kernel, labels) #svm.set_tau(1e-3) #svm.set_batch_mode(False) #svm.io.enable_progress() svm.set_epsilon(epsilon) svm.train() out=svm.classify(feats_train).get_labels() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_multiclass=lm.load_labels('../data/label_train_multiclass.dat') larank()
def lda (): print 'LDA' from shogun.Features import RealFeatures, Labels from shogun.Classifier import LDA feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) gamma=3 num_threads=1 labels=Labels(label_train_twoclass) lda=LDA(gamma, feats_train, labels) lda.train() lda.get_bias() lda.get_w() lda.set_features(feats_test) lda.classify().get_labels() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat') lda()
def liblinear (): print 'LibLinear' from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import LibLinear realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) C=0.9 epsilon=1e-5 num_threads=1 labels=Labels(label_train_twoclass) svm=LibLinear(C, feats_train, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.classify().get_labels() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat') liblinear()
from numpy import * from numpy.random import randn from shogun.Features import * from shogun.Classifier import * from shogun.Kernel import * num=1000 dist=1 width=2.1 C=1 traindata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1) testdata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1); trainlab=concatenate((-ones(num), ones(num))); testlab=concatenate((-ones(num), ones(num))); feats_train=RealFeatures(traindata_real); feats_test=RealFeatures(testdata_real); kernel=GaussianKernel(feats_train, feats_train, width); labels=Labels(trainlab); svm=LibSVM(C, kernel, labels); svm.train(); kernel.init(feats_train, feats_test); out=svm.classify().get_labels(); testerr=mean(sign(out)!=testlab) print testerr
def libsvm (): print 'LibSVM' from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import LibSVM feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) width=2.1 kernel=GaussianKernel(feats_train, feats_train, width) C=1 epsilon=1e-5 labels=Labels(label_train_twoclass) svm=LibSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() kernel.init(feats_train, feats_test) svm.classify().get_labels() sv_idx=svm.get_support_vectors() alphas=svm.get_alphas() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat') libsvm()
def libsvm_multiclass (): print 'LibSVMMultiClass' from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import LibSVMMultiClass feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) width=2.1 kernel=GaussianKernel(feats_train, feats_train, width) C=1 epsilon=1e-5 labels=Labels(label_train_multiclass) svm=LibSVMMultiClass(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() kernel.init(feats_train, feats_test) svm.classify().get_labels() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_multiclass=lm.load_labels('../data/label_train_multiclass.dat') libsvm_multiclass()
def libsvm_oneclass (): print 'LibSVMOneClass' from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import LibSVMOneClass feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) width=2.1 kernel=GaussianKernel(feats_train, feats_train, width) C=1 epsilon=1e-5 svm=LibSVMOneClass(C, kernel) svm.set_epsilon(epsilon) svm.train() kernel.init(feats_train, feats_test) svm.classify().get_labels() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') libsvm_oneclass()
def mpdsvm (): print 'MPDSVM' from shogun.Features import RealFeatures, Labels from shogun.Kernel import GaussianKernel from shogun.Classifier import MPDSVM feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) width=2.1 kernel=GaussianKernel(feats_train, feats_train, width) C=1 epsilon=1e-5 labels=Labels(label_train_twoclass) svm=MPDSVM(C, kernel, labels) svm.set_epsilon(epsilon) svm.train() kernel.init(feats_train, feats_test) svm.classify().get_labels() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat') mpdsvm()
def perceptron (): print 'Perceptron' from shogun.Features import RealFeatures, Labels from shogun.Classifier import Perceptron feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) learn_rate=1. max_iter=1000 num_threads=1 labels=Labels(label_train_twoclass) perceptron=Perceptron(feats_train, labels) perceptron.set_learn_rate(learn_rate) perceptron.set_max_iter(max_iter) # only guaranteed to converge for separable data perceptron.train() perceptron.set_features(feats_test) perceptron.classify().get_labels() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat') perceptron()
def subgradient_svm (): print 'SubGradientSVM' from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import SubGradientSVM realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) C=0.9 epsilon=1e-3 num_threads=1 max_train_time=1. labels=Labels(label_train_twoclass) svm=SubGradientSVM(C, feats_train, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.set_bias_enabled(False) svm.set_max_train_time(max_train_time) svm.train() svm.set_features(feats_test) svm.classify().get_labels() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat') subgradient_svm()
def do_batch_linadd (): print 'SVMlight batch' from shogun.Features import StringCharFeatures, Labels, DNA from shogun.Kernel import WeightedDegreeStringKernel try: from shogun.Classifier import SVMLight except ImportError: print 'No support for SVMLight available.' return feats_train=StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree=20 kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) C=1 epsilon=1e-5 num_threads=2 labels=Labels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) #print 'SVMLight Objective: %f num_sv: %d' % \ # (svm.get_objective(), svm.get_num_support_vectors()) svm.set_batch_computation_enabled(False) svm.set_linadd_enabled(False) svm.classify().get_labels() svm.set_batch_computation_enabled(True) svm.classify().get_labels() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') label_train_dna=lm.load_labels('../data/label_train_dna.dat') do_batch_linadd()
import numpy degree=3 fm_train_dna=['CGCACGTACGTAGCTCGAT', 'CGACGTAGTCGTAGTCGTA', 'CGACGGGGGGGGGGTCGTA', 'CGACCTAGTCGTAGTCGTA', 'CGACCACAGTTATATAGTA', 'CGACGTAGTCGTAGTCGTA', 'CGACGTAGTTTTTTTCGTA', 'CGACGTAGTCGTAGCCCCA', 'CAAAAAAAAAAAAAAAATA', 'CGACGGGGGGGGGGGCGTA'] label_train_dna=numpy.array(5*[-1.0] + 5*[1.0]) fm_test_dna=['AGCACGTACGTAGCTCGAT', 'AGACGTAGTCGTAGTCGTA', 'CAACGGGGGGGGGGTCGTA', 'CGACCTAGTCGTAGTCGTA', 'CGAACACAGTTATATAGTA', 'CGACCTAGTCGTAGTCGTA', 'CGACGTGGGGTTTTTCGTA', 'CGACGTAGTCCCAGCCCCA', 'CAAAAAAAAAAAACCAATA', 'CGACGGCCGGGGGGGCGTA'] label_test_dna=numpy.array(5*[-1.0] + 5*[1.0]) print 'SVMLight' from shogun.Features import StringCharFeatures, Labels, DNA from shogun.Kernel import WeightedDegreeStringKernel from shogun.Classifier import SVMLight feats_train=StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) C=10 epsilon=1e-5 num_threads=1 labels=Labels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_qpsize(3) svm.set_linear_term(-numpy.array([1,2,3,4,5,6,7,8,7,6], dtype=numpy.double)); svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) svm.classify().get_labels()
def svm_light (): print 'SVMLight' from shogun.Features import StringCharFeatures, Labels, DNA from shogun.Kernel import WeightedDegreeStringKernel try: from shogun.Classifier import SVMLight except ImportError: print 'No support for SVMLight available.' return feats_train=StringCharFeatures(DNA) feats_train.set_features(fm_train_dna) feats_test=StringCharFeatures(DNA) feats_test.set_features(fm_test_dna) degree=20 kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) C=1.2 epsilon=1e-5 num_threads=1 labels=Labels(label_train_dna) svm=SVMLight(C, kernel, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.train() kernel.init(feats_train, feats_test) svm.classify().get_labels() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') label_train_dna=lm.load_labels('../data/label_train_dna.dat') svm_light()
def svmlin (): print 'SVMLin' from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import SVMLin realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) C=0.9 epsilon=1e-5 num_threads=1 labels=Labels(label_train_twoclass) svm=SVMLin(C, feats_train, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.set_bias_enabled(True) svm.train() svm.set_features(feats_test) svm.get_bias() svm.get_w() svm.classify().get_labels() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat') svmlin()
def svmocas (): print 'SVMOcas' from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import SVMOcas realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) C=0.9 epsilon=1e-5 num_threads=1 labels=Labels(label_train_twoclass) svm=SVMOcas(C, feats_train, labels) svm.set_epsilon(epsilon) svm.parallel.set_num_threads(num_threads) svm.set_bias_enabled(False) svm.train() svm.set_features(feats_test) svm.classify().get_labels() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat') svmocas()
def svmsgd (): print 'SVMSGD' from shogun.Features import RealFeatures, SparseRealFeatures, Labels from shogun.Classifier import SVMSGD realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) C=0.9 epsilon=1e-5 num_threads=1 labels=Labels(label_train_twoclass) svm=SVMSGD(C, feats_train, labels) #svm.io.set_loglevel(0) svm.train() svm.set_features(feats_test) svm.classify().get_labels() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_twoclass=lm.load_labels('../data/label_train_twoclass.dat') svmsgd()
def hierarchical (): print 'Hierarchical' from shogun.Distance import EuclidianDistance from shogun.Features import RealFeatures from shogun.Clustering import Hierarchical merges=3 feats_train=RealFeatures(fm_train) distance=EuclidianDistance(feats_train, feats_train) hierarchical=Hierarchical(merges, distance) hierarchical.train() hierarchical.get_merge_distances() hierarchical.get_cluster_pairs() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train=lm.load_numbers('../data/fm_train_real.dat') hierarchical()
#!/usr/bin/env python """ Explicit examples on how to use clustering """ def kmeans (): print 'KMeans' from shogun.Distance import EuclidianDistance from shogun.Features import RealFeatures from shogun.Clustering import KMeans k=3 feats_train=RealFeatures(fm_train) distance=EuclidianDistance(feats_train, feats_train) kmeans=KMeans(k, distance) kmeans.train() kmeans.get_cluster_centers() kmeans.get_radiuses() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train=lm.load_numbers('../data/fm_train_real.dat') kmeans()
from numpy import * from numpy.random import rand from shogun.Features import RealFeatures, Labels from shogun.Kernel import CustomKernel from shogun.Classifier import LibSVM C=1 dim=7 lab=sign(2*rand(dim) - 1) data=rand(dim, dim) symdata=data*data.T kernel=CustomKernel() kernel.set_full_kernel_matrix_from_full(data) labels=Labels(lab) svm=LibSVM(C, kernel, labels) svm.train() out=svm.classify().get_labels()
def bray_curtis_distance (): print 'BrayCurtisDistance' from shogun.Features import RealFeatures from shogun.Distance import BrayCurtisDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=BrayCurtisDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') bray_curtis_distance()
def canberra_metric (): print 'CanberaMetric' from shogun.Features import RealFeatures from shogun.Distance import CanberraMetric feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=CanberraMetric(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') canberra_metric()
def canberra_word_distance (): print 'CanberraWordDistance' from shogun.Features import StringCharFeatures, StringWordFeatures, DNA from shogun.PreProc import SortWordString from shogun.Distance import CanberraWordDistance order=3 gap=0 reverse=False charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortWordString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preproc(preproc) feats_test.apply_preproc() distance=CanberraWordDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') canberra_word_distance()
def chebyshew_metric (): print 'ChebyshewMetric' from shogun.Features import RealFeatures from shogun.Distance import ChebyshewMetric feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=ChebyshewMetric(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') chebyshew_metric()
def chi_square_distance (): print 'ChiSquareDistance' from shogun.Features import RealFeatures from shogun.Distance import ChiSquareDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=ChiSquareDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') chi_square_distance()
def cosine_distance (): print 'CosineDistance' from shogun.Features import RealFeatures from shogun.Distance import CosineDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=CosineDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') cosine_distance()
def euclidian_distance (): print 'EuclidianDistance' from shogun.Features import RealFeatures from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') euclidian_distance()
def geodesic_metric (): print 'GeodesicMetric' from shogun.Features import RealFeatures from shogun.Distance import GeodesicMetric feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=GeodesicMetric(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') geodesic_metric()
def hamming_word_distance (): print 'HammingWordDistance' from shogun.Features import StringCharFeatures, StringWordFeatures, DNA from shogun.PreProc import SortWordString from shogun.Distance import HammingWordDistance order=3 gap=0 reverse=False charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortWordString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preproc(preproc) feats_test.apply_preproc() use_sign=False distance=HammingWordDistance(feats_train, feats_train, use_sign) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') hamming_word_distance()
def jensen_metric (): print 'JensenMetric' from shogun.Features import RealFeatures from shogun.Distance import JensenMetric feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=JensenMetric(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') jensen_metric()
def manhattan_metric (): print 'ManhattanMetric' from shogun.Features import RealFeatures from shogun.Distance import ManhattanMetric feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=ManhattanMetric(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') manhattan_metric()
def manhattan_word_distance (): print 'ManhattanWordDistance' from shogun.Features import StringCharFeatures, StringWordFeatures, DNA from shogun.PreProc import SortWordString from shogun.Distance import ManhattanWordDistance order=3 gap=0 reverse=False charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortWordString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preproc(preproc) feats_test.apply_preproc() distance=ManhattanWordDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') manhattan_word_distance()
def minkowski_metric (): print 'MinkowskiMetric' from shogun.Features import RealFeatures from shogun.Distance import MinkowskiMetric feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) k=3 distance=MinkowskiMetric(feats_train, feats_train, k) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') minkowski_metric()
def norm_squared_distance (): from shogun.Features import RealFeatures from shogun.Distance import EuclidianDistance print 'EuclidianDistance - NormSquared' feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=EuclidianDistance(feats_train, feats_train) distance.set_disable_sqrt(True) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') norm_squared_distance()
def sparse_euclidian_distance (): print 'SparseEuclidianDistance' from shogun.Features import RealFeatures, SparseRealFeatures from shogun.Distance import SparseEuclidianDistance realfeat=RealFeatures(fm_train_real) feats_train=SparseRealFeatures() feats_train.obtain_from_simple(realfeat) realfeat=RealFeatures(fm_test_real) feats_test=SparseRealFeatures() feats_test.obtain_from_simple(realfeat) distance=SparseEuclidianDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') sparse_euclidian_distance()
def tanimoto_distance (): print 'TanimotoDistance' from shogun.Features import RealFeatures from shogun.Distance import TanimotoDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) distance=TanimotoDistance(feats_train, feats_train) dm_train=distance.get_distance_matrix() distance.init(feats_train, feats_test) dm_test=distance.get_distance_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') tanimoto_distance()
def histogram (): print 'Histogram' from shogun.Features import StringWordFeatures, StringCharFeatures, DNA from shogun.Distribution import Histogram order=3 gap=0 reverse=False charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_dna) feats=StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order-1, order, gap, reverse) histo=Histogram(feats) histo.train() histo.get_histogram() num_examples=feats.get_num_vectors() num_param=histo.get_num_model_parameters() #for i in xrange(num_examples): # for j in xrange(num_param): # histo.get_log_derivative(j, i) histo.get_log_likelihood() histo.get_log_likelihood_sample() ########################################################################### # call functions ########################################################################### if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_dna=lm.load_dna('../data/fm_train_dna.dat') histogram()
def hmm (): print 'HMM' from shogun.Features import StringWordFeatures, StringCharFeatures, CUBE from shogun.Distribution import HMM, BW_NORMAL N=3 M=6 pseudo=1e-1 order=1 gap=0 reverse=False num_examples=2 charfeat=StringCharFeatures(CUBE) charfeat.set_features(fm_cube) feats=StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order-1, order, gap, reverse) hmm=HMM(feats, N, M, pseudo) hmm.train() hmm.baum_welch_viterbi_train(BW_NORMAL) num_examples=feats.get_num_vectors() num_param=hmm.get_num_model_parameters() for i in xrange(num_examples): for j in xrange(num_param): hmm.get_log_derivative(j, i) best_path=0 best_path_state=0 for i in xrange(num_examples): best_path+=hmm.best_path(i) for j in xrange(N): best_path_state+=hmm.get_best_path_state(i, j) hmm.get_log_likelihood() hmm.get_log_likelihood_sample() ########################################################################### # call functions ########################################################################### if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_cube=lm.load_cubes('../data/fm_train_cube.dat') hmm()
def linear_hmm (): print 'LinearHMM' from shogun.Features import StringWordFeatures, StringCharFeatures, DNA from shogun.Distribution import LinearHMM order=3 gap=0 reverse=False charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_dna) feats=StringWordFeatures(charfeat.get_alphabet()) feats.obtain_from_char(charfeat, order-1, order, gap, reverse) hmm=LinearHMM(feats) hmm.train() hmm.get_transition_probs() num_examples=feats.get_num_vectors() num_param=hmm.get_num_model_parameters() for i in xrange(num_examples): for j in xrange(num_param): hmm.get_log_derivative(j, i) hmm.get_log_likelihood() hmm.get_log_likelihood_sample() ########################################################################### # call functions ########################################################################### if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_dna=lm.load_dna('../data/fm_train_dna.dat') linear_hmm()
import os from shogun.Features import SparseRealFeatures f=SparseRealFeatures() lab=f.load_svmlight_file('../data/train_sparsereal.light') f.write_svmlight_file('testwrite.light', lab) os.unlink('testwrite.light')
from shogun.Features import ByteFeatures from numpy import array, uint8, all # create dense matrix A A=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=uint8) # ... of type Byte a=ByteFeatures(A) # print some statistics about a print a.get_num_vectors() print a.get_num_features() # get first feature vector and set it print a.get_feature_vector(0) a.set_feature_vector(array([1,4,0,0,0,9], dtype=uint8), 0) # get matrix a_out = a.get_feature_matrix() print type(a_out), a_out.dtype print a_out assert(all(a_out==A))
from shogun.Features import LongIntFeatures from numpy import array, int64, all # create dense matrix A A=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=int64) # ... of type LongInt a=LongIntFeatures(A) # print some statistics about a print a.get_num_vectors() print a.get_num_features() # get first feature vector and set it print a.get_feature_vector(0) a.set_feature_vector(array([1,4,0,0,0,9], dtype=int64), 0) # get matrix a_out = a.get_feature_matrix() print type(a_out), a_out.dtype print a_out assert(all(a_out==A))
from shogun.Features import RealFeatures, LongIntFeatures, ByteFeatures from numpy import array, float64, int64, uint8, all # create dense matrices A,B,C A=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=float64) B=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=int64) C=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=uint8) # ... of type Real, LongInt and Byte a=RealFeatures(A) b=LongIntFeatures(B) c=ByteFeatures(C) # or 16bit wide ... #feat1 = f.ShortFeatures(N.zeros((10,5),N.short)) #feat2 = f.WordFeatures(N.zeros((10,5),N.uint16)) # print some statistics about a print a.get_num_vectors() print a.get_num_features() # get first feature vector and set it print a.get_feature_vector(0) a.set_feature_vector(array([1,4,0,0,0,9], dtype=float64), 0) # get matrices a_out = a.get_feature_matrix() b_out = b.get_feature_matrix() c_out = c.get_feature_matrix() print type(a_out), a_out.dtype print a_out assert(all(a_out==A)) print type(b_out), b_out.dtype print b_out assert(all(b_out==B)) print type(c_out), c_out.dtype print c_out assert(all(c_out==C))
from shogun.Features import RealFeatures from numpy import array, float64, all # create dense matrices A,B,C A=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=float64) # ... of type Real, LongInt and Byte a=RealFeatures(A) # print some statistics about a print a.get_num_vectors() print a.get_num_features() # get first feature vector and set it print a.get_feature_vector(0) a.set_feature_vector(array([1,4,0,0,0,9], dtype=float64), 0) # get matrix a_out = a.get_feature_matrix() print type(a_out), a_out.dtype print a_out assert(all(a_out==A))
from scipy.sparse import csc_matrix from shogun.Features import SparseRealFeatures from numpy import array, float64, all # create dense matrix A and its sparse representation X # note, will work with types other than float64 too, # but requires recent scipy.sparse A=array([[1,2,3],[4,0,0],[0,0,0],[0,5,0],[0,0,6],[9,9,9]], dtype=float64) X=csc_matrix(A) print A # create sparse shogun features from dense matrix A a=SparseRealFeatures(A) a_out=a.get_full_feature_matrix() print a_out assert(all(a_out==A)) print a_out # create sparse shogun features from sparse matrix X a.set_sparse_feature_matrix(X) a_out=a.get_full_feature_matrix() print a_out assert(all(a_out==A)) # create sparse shogun features from sparse matrix X a=SparseRealFeatures(X) a_out=a.get_full_feature_matrix() print a_out assert(all(a_out==A)) # obtain (data,row,indptr) csc arrays of sparse shogun features z=csc_matrix(a.get_sparse_feature_matrix()) z_out=z.todense() print z_out assert(all(z_out==A))
from shogun.Features import StringCharFeatures, StringFileCharFeatures, RAWBYTE from shogun.Library import UNCOMPRESSED,LZO,GZIP,BZIP2,LZMA, MSG_DEBUG from shogun.PreProc import DecompressCharString f=StringFileCharFeatures('features_string_char_compressed_modular.py', RAWBYTE) print "original strings", f.get_features() #uncompressed f.save_compressed("foo_uncompressed.str", UNCOMPRESSED, 1) f2=StringCharFeatures(RAWBYTE); f2.load_compressed("foo_uncompressed.str", True) print "uncompressed strings", f2.get_features() print # load compressed data and uncompress on load #lzo f.save_compressed("foo_lzo.str", LZO, 9) f2=StringCharFeatures(RAWBYTE); f2.load_compressed("foo_lzo.str", True) print "lzo strings", f2.get_features() print ##gzip f.save_compressed("foo_gzip.str", GZIP, 9) f2=StringCharFeatures(RAWBYTE); f2.load_compressed("foo_gzip.str", True) print "gzip strings", f2.get_features() print #bzip2 f.save_compressed("foo_bzip2.str", BZIP2, 9) f2=StringCharFeatures(RAWBYTE); f2.load_compressed("foo_bzip2.str", True) print "bzip2 strings", f2.get_features() print #lzma f.save_compressed("foo_lzma.str", LZMA, 9) f2=StringCharFeatures(RAWBYTE); f2.load_compressed("foo_lzma.str", True) print "lzma strings", f2.get_features() print # load compressed data and uncompress via preprocessor f2=StringCharFeatures(RAWBYTE); f2.load_compressed("foo_lzo.str", False) f2.add_preproc(DecompressCharString(LZO)) f2.apply_preproc() print "lzo strings", f2.get_features() print # load compressed data and uncompress on-the-fly via preprocessor f2=StringCharFeatures(RAWBYTE); f2.load_compressed("foo_lzo.str", False) f2.io.set_loglevel(MSG_DEBUG) f2.add_preproc(DecompressCharString(LZO)) f2.enable_on_the_fly_preprocessing() print "lzo strings", f2.get_features() print ########################################################################################## # some perfectly compressible stuff follows
from shogun.Features import StringCharFeatures, RAWBYTE from numpy import array #create string features f=StringCharFeatures(['hey','guys','i','am','a','string'], RAWBYTE) #and output several stats print "max string length", f.get_max_vector_length() print "number of strings", f.get_num_vectors() print "length of first string", f.get_vector_length(0) print "string[5]", ''.join(f.get_feature_vector(5)) print "strings", f.get_features() #replace string 0 f.set_feature_vector(array(['t','e','s','t']), 0) print "strings", f.get_features()
from shogun.Features import StringFileCharFeatures, RAWBYTE f=StringFileCharFeatures('features_string_file_char_modular.py', RAWBYTE) print "strings", f.get_features()
from shogun.Features import StringCharFeatures, RAWBYTE # load features from directory f=StringCharFeatures(RAWBYTE) f.load_from_directory(".") #and output several stats print "max string length", f.get_max_vector_length() print "number of strings", f.get_num_vectors() print "length of first string", f.get_vector_length(0) print "str[0,0:3]", f.get_feature(0,0), f.get_feature(0,1), f.get_feature(0,2) print "len(str[0])", f.get_vector_length(0) print "str[0]", f.get_feature_vector(0) #or load features from file (one string per line) f.load('features_string_char_modular.py') print f.get_features() #or load fasta file #f.load_fasta('fasta.fa') #print f.get_features()
from shogun.Features import StringCharFeatures, DNA from shogun.Library import DynamicIntArray # create string features with a single string s=10*'A' + 10*'C' + 10*'G' + 10*'T' f=StringCharFeatures([s], DNA) # slide a window of length 5 over features # (memory efficient, does not copy strings) f.obtain_by_sliding_window(5,1) print f.get_num_vectors() print f.get_vector_length(0) print f.get_vector_length(1) print f.get_features() # slide a window of length 4 over features # (memory efficient, does not copy strings) f.obtain_by_sliding_window(4,1) print f.get_num_vectors() print f.get_vector_length(0) print f.get_vector_length(1) print f.get_features() # extract string-windows at position 0,6,16,25 of window size 4 # (memory efficient, does not copy strings) f.set_features([s]) positions=DynamicIntArray() positions.append_element(0) positions.append_element(6) positions.append_element(16) positions.append_element(25) f.obtain_by_position_list(4,positions) print f.get_features() # now extract windows of size 8 from same positon list f.obtain_by_position_list(8,positions) print f.get_features()
from shogun.Features import StringCharFeatures, StringUlongFeatures, RAWBYTE from numpy import array, uint64 #create string features cf=StringCharFeatures(['hey','guys','string'], RAWBYTE) uf=StringUlongFeatures(RAWBYTE) #start=0, order=2, gap=0, rev=False) uf.obtain_from_char(cf, 0, 2, 0, False) #and output several stats print "max string length", uf.get_max_vector_length() print "number of strings", uf.get_num_vectors() print "length of first string", uf.get_vector_length(0) print "string[2]", uf.get_feature_vector(2) print "strings", uf.get_features() #replace string 0 uf.set_feature_vector(array([1,2,3,4,5], dtype=uint64), 0) print "strings", uf.get_features()
from shogun.Features import StringCharFeatures, StringWordFeatures, RAWBYTE from numpy import array, uint16 #create string features cf=StringCharFeatures(['hey','guys','string'], RAWBYTE) wf=StringWordFeatures(RAWBYTE) #start=0, order=2, gap=0, rev=False) wf.obtain_from_char(cf, 0, 2, 0, False) #and output several stats print "max string length", wf.get_max_vector_length() print "number of strings", wf.get_num_vectors() print "length of first string", wf.get_vector_length(0) print "string[2]", wf.get_feature_vector(2) print "strings", wf.get_features() #replace string 0 wf.set_feature_vector(array([1,2,3,4,5], dtype=uint16), 0) print "strings", wf.get_features()
########################################################################### # kernel can be used to maximize AUC instead of margin in SVMs ########################################################################### def auc (): print 'AUC' from shogun.Kernel import GaussianKernel, AUCKernel from shogun.Features import RealFeatures, Labels feats_train=RealFeatures(fm_train_real) width=1.7 subkernel=GaussianKernel(feats_train, feats_train, width) kernel=AUCKernel(0, subkernel) kernel.setup_auc_maximization( Labels(label_train_real) ) km_train=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix from numpy import double lm=LoadMatrix() fm_train_real=double(lm.load_numbers('../data/fm_train_real.dat')) label_train_real=lm.load_labels('../data/label_train_twoclass.dat') auc()
########################################################################### # chi2 kernel ########################################################################### def chi2 (): print 'Chi2' from shogun.Kernel import Chi2Kernel from shogun.Features import RealFeatures feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) width=1.4 size_cache=10 kernel=Chi2Kernel(feats_train, feats_train, width, size_cache) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix from numpy import double lm=LoadMatrix() fm_train_real=double(lm.load_numbers('../data/fm_train_real.dat')) fm_test_real=double(lm.load_numbers('../data/fm_test_real.dat')) chi2()
def combined(): print 'Combined' from shogun.Kernel import CombinedKernel, GaussianKernel, FixedDegreeStringKernel, LocalAlignmentStringKernel from shogun.Features import RealFeatures, StringCharFeatures, CombinedFeatures, DNA kernel=CombinedKernel() feats_train=CombinedFeatures() feats_test=CombinedFeatures() subkfeats_train=RealFeatures(fm_train_real) subkfeats_test=RealFeatures(fm_test_real) subkernel=GaussianKernel(10, 1.1) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train=StringCharFeatures(fm_train_dna, DNA) subkfeats_test=StringCharFeatures(fm_test_dna, DNA) degree=3 subkernel=FixedDegreeStringKernel(10, degree) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train=StringCharFeatures(fm_train_dna, DNA) subkfeats_test=StringCharFeatures(fm_test_dna, DNA) subkernel=LocalAlignmentStringKernel(10) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix from numpy import double lm=LoadMatrix() fm_train_real=double(lm.load_numbers('../data/fm_train_real.dat')) fm_test_real=double(lm.load_numbers('../data/fm_test_real.dat')) fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') combined()
def comm_ulong_string (): print 'CommUlongString' from shogun.Kernel import CommUlongStringKernel from shogun.Features import StringUlongFeatures, StringCharFeatures, DNA from shogun.PreProc import SortUlongString order=3 gap=0 reverse=False charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringUlongFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortUlongString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringUlongFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preproc(preproc) feats_test.apply_preproc() use_sign=False kernel=CommUlongStringKernel(feats_train, feats_train, use_sign) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') comm_ulong_string()
def comm_word_string (): print 'CommWordString' from shogun.Kernel import CommWordStringKernel from shogun.Features import StringWordFeatures, StringCharFeatures, DNA from shogun.PreProc import SortWordString order=3 gap=0 reverse=False charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortWordString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preproc(preproc) feats_test.apply_preproc() use_sign=False kernel=CommWordStringKernel(feats_train, feats_train, use_sign) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') comm_word_string()
def const (): print 'Const' from shogun.Features import DummyFeatures from shogun.Kernel import ConstKernel feats_train=DummyFeatures(10) feats_test=DummyFeatures(17) c=23. kernel=ConstKernel(feats_train, feats_train, c) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': const()
def custom (): print 'Custom' from numpy.random import rand from numpy import array from shogun.Features import RealFeatures from shogun.Kernel import CustomKernel dim=7 data=rand(dim, dim) feats=RealFeatures(data) symdata=data+data.T lowertriangle=array([symdata[(x,y)] for x in xrange(symdata.shape[1]) for y in xrange(symdata.shape[0]) if y<=x]) kernel=CustomKernel() kernel.set_triangle_kernel_matrix_from_triangle(lowertriangle) km_triangletriangle=kernel.get_kernel_matrix() kernel.set_triangle_kernel_matrix_from_full(symdata) km_fulltriangle=kernel.get_kernel_matrix() kernel.set_full_kernel_matrix_from_full(data) km_fullfull=kernel.get_kernel_matrix() if __name__=='__main__': from numpy.random import seed seed(42) custom()
def diag (): print 'Diag' from shogun.Features import DummyFeatures from shogun.Kernel import DiagKernel feats_train=DummyFeatures(10) feats_test=DummyFeatures(17) diag=23. kernel=DiagKernel(feats_train, feats_train, diag) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': diag()
def distance (): print 'Distance' from shogun.Kernel import DistanceKernel from shogun.Features import RealFeatures from shogun.Distance import EuclidianDistance feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) width=1.7 distance=EuclidianDistance() kernel=DistanceKernel(feats_train, feats_test, width, distance) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix from numpy import double lm=LoadMatrix() fm_train_real=double(lm.load_numbers('../data/fm_train_real.dat')) fm_test_real=double(lm.load_numbers('../data/fm_test_real.dat')) distance()
def fisher (): print "Fisher Kernel" from shogun.Features import StringCharFeatures, StringWordFeatures, FKFeatures, DNA from shogun.Kernel import PolyKernel from shogun.Distribution import HMM, BW_NORMAL N=1 # toy HMM with 1 state M=4 # 4 observations -> DNA pseudo=1e-1 order=1 gap=0 reverse=False kargs=[1, False, True] # train HMM for positive class charfeat=StringCharFeatures(fm_hmm_pos, DNA) hmm_pos_train=StringWordFeatures(charfeat.get_alphabet()) hmm_pos_train.obtain_from_char(charfeat, order-1, order, gap, reverse) pos=HMM(hmm_pos_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) # train HMM for negative class charfeat=StringCharFeatures(fm_hmm_neg, DNA) hmm_neg_train=StringWordFeatures(charfeat.get_alphabet()) hmm_neg_train.obtain_from_char(charfeat, order-1, order, gap, reverse) neg=HMM(hmm_neg_train, N, M, pseudo) neg.baum_welch_viterbi_train(BW_NORMAL) # Kernel training data charfeat=StringCharFeatures(fm_train_dna, DNA) wordfeats_train=StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) # Kernel testing data charfeat=StringCharFeatures(fm_test_dna, DNA) wordfeats_test=StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) # get kernel on training data pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train=FKFeatures(10, pos, neg) feats_train.set_opt_a(-1) #estimate prior kernel=PolyKernel(feats_train, feats_train, *kargs) km_train=kernel.get_kernel_matrix() # get kernel on testing data pos_clone=HMM(pos) neg_clone=HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test=FKFeatures(10, pos_clone, neg_clone) feats_test.set_a(feats_train.get_a()) #use prior from training data kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix from numpy import where lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') label_train_dna=lm.load_labels('../data/label_train_dna.dat') fm_hmm_pos=[ fm_train_dna[i] for i in where([label_train_dna==1])[1] ] fm_hmm_neg=[ fm_train_dna[i] for i in where([label_train_dna==-1])[1] ] fisher()
def fixed_degree_string (): print 'FixedDegreeString' from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import FixedDegreeStringKernel feats_train=StringCharFeatures(fm_train_dna, DNA) feats_test=StringCharFeatures(fm_test_dna, DNA) degree=3 kernel=FixedDegreeStringKernel(feats_train, feats_train, degree) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') fixed_degree_string()
def gaussian (): print 'Gaussian' from shogun.Features import RealFeatures from shogun.Kernel import GaussianKernel feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) width=1.9 kernel=GaussianKernel(feats_train, feats_train, width) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') gaussian()
def gaussian_shift (): print 'GaussianShift' from shogun.Features import RealFeatures from shogun.Kernel import GaussianShiftKernel feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) width=1.8 max_shift=2 shift_step=1 kernel=GaussianShiftKernel( feats_train, feats_train, width, max_shift, shift_step) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') gaussian_shift()
def plugin_estimate_histogram (): print 'PluginEstimate w/ HistogramWord' from shogun.Features import StringCharFeatures, StringWordFeatures, DNA, Labels from shogun.Kernel import HistogramWordStringKernel from shogun.Classifier import PluginEstimate order=3 gap=0 reverse=False charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) pie=PluginEstimate() labels=Labels(label_train_dna) pie.set_labels(labels) pie.set_features(feats_train) pie.train() kernel=HistogramWordStringKernel(feats_train, feats_train, pie) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) pie.set_features(feats_test) pie.classify().get_labels() km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') label_train_dna=lm.load_labels('../data/label_train_dna.dat') plugin_estimate_histogram()
########################################################################### # linear kernel on byte features ########################################################################### def linear_byte(): print 'LinearByte' from shogun.Kernel import LinearByteKernel from shogun.Features import ByteFeatures feats_train=ByteFeatures(fm_train_byte) feats_test=ByteFeatures(fm_test_byte) kernel=LinearByteKernel(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix from numpy import ubyte lm=LoadMatrix() fm_train_byte=ubyte(lm.load_numbers('../data/fm_train_byte.dat')) fm_test_byte=ubyte(lm.load_numbers('../data/fm_test_byte.dat')) linear_byte()
def linear (): print 'Linear' from shogun.Features import RealFeatures from shogun.Kernel import LinearKernel, AvgDiagKernelNormalizer feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) scale=1.2 kernel=LinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') linear()
def linear_string (): print 'LinearString' from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import LinearStringKernel feats_train=StringCharFeatures(fm_train_dna, DNA) feats_test=StringCharFeatures(fm_test_dna, DNA) kernel=LinearStringKernel(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') linear_string()
def linear_word (): print 'LinearWord' from shogun.Kernel import LinearWordKernel, AvgDiagKernelNormalizer from shogun.Features import WordFeatures feats_train=WordFeatures(fm_train_word) feats_test=WordFeatures(fm_test_word) scale=1.4 kernel=LinearWordKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix from numpy import ushort lm=LoadMatrix() fm_train_word=ushort(lm.load_numbers('../data/fm_test_word.dat')) fm_test_word=ushort(lm.load_numbers('../data/fm_test_word.dat')) linear_word()
def local_alignment_string(): print 'LocalAlignmentString' from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import LocalAlignmentStringKernel feats_train=StringCharFeatures(fm_train_dna, DNA) feats_test=StringCharFeatures(fm_test_dna, DNA) kernel=LocalAlignmentStringKernel(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') local_alignment_string()
def locality_improved_string (): print 'LocalityImprovedString' from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import LocalityImprovedStringKernel feats_train=StringCharFeatures(fm_train_dna, DNA) feats_test=StringCharFeatures(fm_test_dna, DNA) length=5 inner_degree=5 outer_degree=7 kernel=LocalityImprovedStringKernel( feats_train, feats_train, length, inner_degree, outer_degree) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') locality_improved_string()
def match_word_string (): print 'MatchWordString' from shogun.Kernel import MatchWordStringKernel, AvgDiagKernelNormalizer from shogun.Features import StringWordFeatures, StringCharFeatures, DNA degree=3 scale=1.4 size_cache=10 order=3 gap=0 reverse=False charfeat=StringCharFeatures(fm_train_dna, DNA) feats_train=StringWordFeatures(DNA) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) charfeat=StringCharFeatures(fm_test_dna, DNA) feats_test=StringWordFeatures(DNA) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) kernel=MatchWordStringKernel(size_cache, degree) kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') match_word_string()
def oligo_string (): print 'OligoString' from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import OligoStringKernel feats_train=StringCharFeatures(fm_train_dna, DNA) feats_test=StringCharFeatures(fm_test_dna, DNA) k=3 width=1.2 size_cache=10 kernel=OligoStringKernel(size_cache, k, width) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') oligo_string()
def poly_match_string (): print 'PolyMatchString' from shogun.Kernel import PolyMatchStringKernel from shogun.Features import StringCharFeatures, DNA feats_train=StringCharFeatures(fm_train_dna, DNA) feats_test=StringCharFeatures(fm_train_dna, DNA) degree=3 inhomogene=False kernel=PolyMatchStringKernel(feats_train, feats_train, degree, inhomogene) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') poly_match_string()
def poly_match_word_string (): print 'PolyMatchWordString' from shogun.Kernel import PolyMatchWordStringKernel from shogun.Features import StringWordFeatures, StringCharFeatures, DNA degree=2 inhomogene=True order=3 gap=0 reverse=False charfeat=StringCharFeatures(fm_train_dna, DNA) feats_train=StringWordFeatures(DNA) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) charfeat=StringCharFeatures(fm_test_dna, DNA) feats_test=StringWordFeatures(DNA) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) kernel=PolyMatchWordStringKernel(feats_train, feats_train, degree, inhomogene) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') poly_match_word_string()
def poly (): print 'Poly' from shogun.Features import RealFeatures from shogun.Kernel import PolyKernel feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) degree=4 inhomogene=False use_normalization=True kernel=PolyKernel( feats_train, feats_train, degree, inhomogene, use_normalization) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') poly()
def plugin_estimate_salzberg (): print 'PluginEstimate w/ SalzbergWord' from shogun.Features import StringCharFeatures, StringWordFeatures, DNA, Labels from shogun.Kernel import SalzbergWordStringKernel from shogun.Classifier import PluginEstimate order=3 gap=0 reverse=False charfeat=StringCharFeatures(fm_train_dna, DNA) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) charfeat=StringCharFeatures(fm_test_dna, DNA) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) pie=PluginEstimate() labels=Labels(label_train_dna) pie.set_labels(labels) pie.set_features(feats_train) pie.train() kernel=SalzbergWordStringKernel(feats_train, feats_test, pie, labels) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) pie.set_features(feats_test) pie.classify().get_labels() km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') label_train_dna=lm.load_labels('../data/label_train_dna.dat') plugin_estimate_salzberg()
def sigmoid (): print 'Sigmoid' from shogun.Features import RealFeatures from shogun.Kernel import SigmoidKernel feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) size_cache=10 gamma=1.2 coef0=1.3 kernel=SigmoidKernel(feats_train, feats_train, size_cache, gamma, coef0) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') sigmoid()
def simple_locality_improved_string (): print 'SimpleLocalityImprovedString' from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import SimpleLocalityImprovedStringKernel feats_train=StringCharFeatures(fm_train_dna, DNA) feats_test=StringCharFeatures(fm_test_dna, DNA) length=5 inner_degree=5 outer_degree=7 kernel=SimpleLocalityImprovedStringKernel( feats_train, feats_train, length, inner_degree, outer_degree) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') simple_locality_improved_string()
def sparse_gaussian (): print 'SparseGaussian' from shogun.Features import SparseRealFeatures from shogun.Kernel import SparseGaussianKernel feats_train=SparseRealFeatures(fm_train_real) feats_test=SparseRealFeatures(fm_test_real) width=1.1 kernel=SparseGaussianKernel(feats_train, feats_train, width) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') sparse_gaussian()
def sparse_linear (): print 'SparseLinear' from shogun.Features import SparseRealFeatures from shogun.Kernel import SparseLinearKernel, AvgDiagKernelNormalizer feats_train=SparseRealFeatures(fm_train_real) feats_test=SparseRealFeatures(fm_test_real) scale=1.1 kernel=SparseLinearKernel() kernel.set_normalizer(AvgDiagKernelNormalizer(scale)) kernel.init(feats_train, feats_train) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') sparse_linear()
def sparse_poly (): print 'SparsePoly' from shogun.Features import SparseRealFeatures from shogun.Kernel import SparsePolyKernel feats_train=SparseRealFeatures(fm_train_real) feats_test=SparseRealFeatures(fm_test_real) size_cache=10 degree=3 inhomogene=True kernel=SparsePolyKernel(feats_train, feats_train, size_cache, degree, inhomogene) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') sparse_poly()
def top(): print "TOP Kernel" from shogun.Features import StringCharFeatures, StringWordFeatures, TOPFeatures, DNA from shogun.Kernel import PolyKernel from shogun.Distribution import HMM, BW_NORMAL N=1 # toy HMM with 1 state M=4 # 4 observations -> DNA pseudo=1e-1 order=1 gap=0 reverse=False kargs=[1, False, True] # train HMM for positive class charfeat=StringCharFeatures(fm_hmm_pos, DNA) hmm_pos_train=StringWordFeatures(charfeat.get_alphabet()) hmm_pos_train.obtain_from_char(charfeat, order-1, order, gap, reverse) pos=HMM(hmm_pos_train, N, M, pseudo) pos.baum_welch_viterbi_train(BW_NORMAL) # train HMM for negative class charfeat=StringCharFeatures(fm_hmm_neg, DNA) hmm_neg_train=StringWordFeatures(charfeat.get_alphabet()) hmm_neg_train.obtain_from_char(charfeat, order-1, order, gap, reverse) neg=HMM(hmm_neg_train, N, M, pseudo) neg.baum_welch_viterbi_train(BW_NORMAL) # Kernel training data charfeat=StringCharFeatures(fm_train_dna, DNA) wordfeats_train=StringWordFeatures(charfeat.get_alphabet()) wordfeats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) # Kernel testing data charfeat=StringCharFeatures(fm_test_dna, DNA) wordfeats_test=StringWordFeatures(charfeat.get_alphabet()) wordfeats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) # get kernel on training data pos.set_observations(wordfeats_train) neg.set_observations(wordfeats_train) feats_train=TOPFeatures(10, pos, neg, False, False) kernel=PolyKernel(feats_train, feats_train, *kargs) km_train=kernel.get_kernel_matrix() # get kernel on testing data pos_clone=HMM(pos) neg_clone=HMM(neg) pos_clone.set_observations(wordfeats_test) neg_clone.set_observations(wordfeats_test) feats_test=TOPFeatures(10, pos_clone, neg_clone, False, False) kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix from numpy import where lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') label_train_dna=lm.load_labels('../data/label_train_dna.dat') fm_hmm_pos=[ fm_train_dna[i] for i in where([label_train_dna==1])[1] ] fm_hmm_neg=[ fm_train_dna[i] for i in where([label_train_dna==-1])[1] ] top()
def weighted_comm_word_string (): print 'WeightedCommWordString' from shogun.Kernel import WeightedCommWordStringKernel from shogun.Features import StringWordFeatures, StringCharFeatures, DNA from shogun.PreProc import SortWordString order=3 gap=0 reverse=True charfeat=StringCharFeatures(fm_train_dna, DNA) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortWordString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() charfeat=StringCharFeatures(fm_test_dna, DNA) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preproc(preproc) feats_test.apply_preproc() use_sign=False kernel=WeightedCommWordStringKernel(feats_train, feats_train, use_sign) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') weighted_comm_word_string()
def weighted_degree_position_string (): print 'WeightedDegreePositionString' from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import WeightedDegreePositionStringKernel feats_train=StringCharFeatures(fm_train_dna, DNA) feats_test=StringCharFeatures(fm_test_dna, DNA) degree=20 kernel=WeightedDegreePositionStringKernel(feats_train, feats_train, degree) #kernel.set_shifts(zeros(len(data['train'][0]), dtype=int)) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') weighted_degree_position_string()
def weighted_degree_string (): print 'WeightedDegreeString' from shogun.Features import StringCharFeatures, DNA from shogun.Kernel import WeightedDegreeStringKernel feats_train=StringCharFeatures(fm_train_dna, DNA) feats_test=StringCharFeatures(fm_test_dna, DNA) degree=20 kernel=WeightedDegreeStringKernel(feats_train, feats_train, degree) #weights=arange(1,degree+1,dtype=double)[::-1]/ \ # sum(arange(1,degree+1,dtype=double)) #kernel.set_wd_weights(weights) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') weighted_degree_string()
def mkl_multiclass (): print 'mkl_multiclass' from shogun.Features import CombinedFeatures, RealFeatures, Labels from shogun.Kernel import CombinedKernel, GaussianKernel, LinearKernel,PolyKernel from shogun.Classifier import MKLMultiClass width=1.2 C=1.2 epsilon=1e-5 num_threads=1 kernel=CombinedKernel() feats_train=CombinedFeatures() feats_test=CombinedFeatures() subkfeats_train=RealFeatures(fm_train_real) subkfeats_test=RealFeatures(fm_test_real) subkernel=GaussianKernel(10, width) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train=RealFeatures(fm_train_real) subkfeats_test=RealFeatures(fm_test_real) subkernel=LinearKernel() feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) subkfeats_train=RealFeatures(fm_train_real) subkfeats_test=RealFeatures(fm_test_real) subkernel=PolyKernel(10,2) feats_train.append_feature_obj(subkfeats_train) feats_test.append_feature_obj(subkfeats_test) kernel.append_kernel(subkernel) kernel.init(feats_train, feats_train) labels=Labels(label_train_multiclass) mkl=MKLMultiClass(C, kernel, labels) mkl.set_epsilon(epsilon); mkl.parallel.set_num_threads(num_threads) mkl.set_mkl_epsilon(0.001) mkl.train() kernel.init(feats_train, feats_test) out= mkl.classify().get_labels() print out if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') label_train_multiclass=lm.load_labels('../data/label_train_multiclass.dat') mkl_multiclass()
def log_plus_one (): print 'LogPlusOne' from shogun.Kernel import Chi2Kernel from shogun.Features import RealFeatures from shogun.PreProc import LogPlusOne feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) preproc=LogPlusOne() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() feats_test.add_preproc(preproc) feats_test.apply_preproc() width=1.4 size_cache=10 kernel=Chi2Kernel(feats_train, feats_train, width, size_cache) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') log_plus_one()
def norm_one (): print 'NormOne' from shogun.Kernel import Chi2Kernel from shogun.Features import RealFeatures from shogun.PreProc import NormOne feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) preproc=NormOne() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() feats_test.add_preproc(preproc) feats_test.apply_preproc() width=1.4 size_cache=10 kernel=Chi2Kernel(feats_train, feats_train, width, size_cache) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') norm_one()
def prune_var_sub_mean (): print 'PruneVarSubMean' from shogun.Kernel import Chi2Kernel from shogun.Features import RealFeatures from shogun.PreProc import PruneVarSubMean feats_train=RealFeatures(fm_train_real) feats_test=RealFeatures(fm_test_real) preproc=PruneVarSubMean() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() feats_test.add_preproc(preproc) feats_test.apply_preproc() width=1.4 size_cache=10 kernel=Chi2Kernel(feats_train, feats_train, width, size_cache) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_real=lm.load_numbers('../data/fm_train_real.dat') fm_test_real=lm.load_numbers('../data/fm_test_real.dat') prune_var_sub_mean()
def sort_ulong_string (): print 'CommUlongString' from shogun.Kernel import CommUlongStringKernel from shogun.Features import StringCharFeatures, StringUlongFeatures, DNA from shogun.PreProc import SortUlongString order=3 gap=0 reverse=False charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_train_dna) feats_train=StringUlongFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) charfeat=StringCharFeatures(DNA) charfeat.set_features(fm_test_dna) feats_test=StringUlongFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortUlongString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() feats_test.add_preproc(preproc) feats_test.apply_preproc() use_sign=False kernel=CommUlongStringKernel(feats_train, feats_train, use_sign) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') sort_ulong_string()
def sort_word_string (): print 'CommWordString' from shogun.Kernel import CommWordStringKernel from shogun.Features import StringCharFeatures, StringWordFeatures, DNA from shogun.PreProc import SortWordString order=3 gap=0 reverse=False charfeat=StringCharFeatures(fm_train_dna, DNA) feats_train=StringWordFeatures(charfeat.get_alphabet()) feats_train.obtain_from_char(charfeat, order-1, order, gap, reverse) preproc=SortWordString() preproc.init(feats_train) feats_train.add_preproc(preproc) feats_train.apply_preproc() charfeat=StringCharFeatures(fm_test_dna, DNA) feats_test=StringWordFeatures(charfeat.get_alphabet()) feats_test.obtain_from_char(charfeat, order-1, order, gap, reverse) feats_test.add_preproc(preproc) feats_test.apply_preproc() use_sign=False kernel=CommWordStringKernel(feats_train, feats_train, use_sign) km_train=kernel.get_kernel_matrix() kernel.init(feats_train, feats_test) km_test=kernel.get_kernel_matrix() if __name__=='__main__': from tools.load import LoadMatrix lm=LoadMatrix() fm_train_dna=lm.load_dna('../data/fm_train_dna.dat') fm_test_dna=lm.load_dna('../data/fm_test_dna.dat') sort_word_string()
########################################################################### # kernel ridge regression ########################################################################### def krr (): print 'KRR' from shogun.Features import Labels, RealFeatures from shogun.Kernel import GaussianKernel from shogun.Regression import KRR feats_train=RealFeatures(fm_train) feats_test=RealFeatures(fm_test) width=0.8 kernel=GaussianKernel(feats_train, feats_train, width) tau=1e-6 labels=Labels(label_train) krr=KRR(tau, kernel, labels) krr.train(feats_train) kernel.init(feats_train, feats_test) out = krr.classify().get_labels() return out # equivialent shorter version def krr_short (): print 'KRR_short' from shogun.Features import Labels, RealFeatures from shogun.Kernel import GaussianKernel from shogun.Regression import KRR width=0.8; tau=1e-6 krr=KRR(tau, GaussianKernel(0, width), Labels(label_train)) krr.train(RealFeatures(fm_train)) out = krr.classify(RealFeatures(fm_test)).get_labels() return out if __name__=='__main__': from numpy import array from numpy.random import seed, rand from tools.load import LoadMatrix lm=LoadMatrix() fm_train=lm.load_numbers('../data/fm_train_real.dat') fm_test=lm.load_numbers('../data/fm_test_real.dat') label_train=lm.load_labels('../data/label_train_twoclass.dat') out1=krr() out2=krr_short()
def libsvr (): print 'LibSVR' from shogun.Features import Labels, RealFeatures from shogun.Kernel import GaussianKernel from shogun.Regression import LibSVR feats_train=RealFeatures(fm_train) feats_test=RealFeatures(fm_test) width=2.1 kernel=GaussianKernel(feats_train, feats_train, width) C=1 epsilon=1e-5 tube_epsilon=1e-2 labels=Labels(label_train) svr=LibSVR(C, epsilon, kernel, labels) svr.set_tube_epsilon(tube_epsilon) svr.train() kernel.init(feats_train, feats_test) out1=svr.classify().get_labels() out2=svr.classify(feats_test).get_labels() if __name__=='__main__': from numpy import array from numpy.random import seed, rand from tools.load import LoadMatrix lm=LoadMatrix() fm_train=lm.load_numbers('../data/fm_train_real.dat') fm_test=lm.load_numbers('../data/fm_test_real.dat') label_train=lm.load_labels('../data/label_train_twoclass.dat') libsvr()
########################################################################### # svm light based support vector regression ########################################################################### def svr_light (): print 'SVRLight' from shogun.Features import Labels, RealFeatures from shogun.Kernel import GaussianKernel try: from shogun.Regression import SVRLight except ImportError: print 'No support for SVRLight available.' return feats_train=RealFeatures(fm_train) feats_test=RealFeatures(fm_test) width=2.1 kernel=GaussianKernel(feats_train, feats_train, width) C=1 epsilon=1e-5 tube_epsilon=1e-2 num_threads=3 labels=Labels(label_train) svr=SVRLight(C, epsilon, kernel, labels) svr.set_tube_epsilon(tube_epsilon) svr.parallel.set_num_threads(num_threads) svr.train() kernel.init(feats_train, feats_test) svr.classify().get_labels() if __name__=='__main__': from numpy import array from numpy.random import seed, rand from tools.load import LoadMatrix lm=LoadMatrix() fm_train=lm.load_numbers('../data/fm_train_real.dat') fm_test=lm.load_numbers('../data/fm_test_real.dat') label_train=lm.load_labels('../data/label_train_twoclass.dat') svr_light()
from shogun.Features import * from shogun.Library import MSG_DEBUG from shogun.Features import StringCharFeatures, Labels, DNA, Alphabet from shogun.Kernel import WeightedDegreeStringKernel, GaussianKernel from shogun.Classifier import SVMLight from numpy import * from numpy.random import randn import sys import types import random import bz2 import cPickle import inspect def save(filename, myobj): """ save object to file using pickle @param filename: name of destination file @type filename: str @param myobj: object to save (has to be pickleable) @type myobj: obj """ try: f = bz2.BZ2File(filename, 'wb') except IOError, details: sys.stderr.write('File ' + filename + ' cannot be written\n') sys.stderr.write(details) return cPickle.dump(myobj, f, protocol=2) f.close() def load(filename): """ Load from filename using pickle @param filename: name of file to load from @type filename: str """ try: f = bz2.BZ2File(filename, 'rb') except IOError, details: sys.stderr.write('File ' + filename + ' cannot be read\n') sys.stderr.write(details) return myobj = cPickle.load(f) f.close() return myobj ################################################## num=10 dist=1 width=2.1 traindata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1) testdata_real=concatenate((randn(2,num)-dist, randn(2,num)+dist), axis=1); trainlab=concatenate((-ones(num), ones(num))); testlab=concatenate((-ones(num), ones(num))); feats_train=RealFeatures(traindata_real); feats_test=RealFeatures(testdata_real); kernel=GaussianKernel(feats_train, feats_train, width); kernel.io.set_loglevel(MSG_DEBUG) labels=Labels(trainlab); svm=SVMLight(2, kernel, labels) svm.train() svm.io.set_loglevel(MSG_DEBUG) ################################################## print "labels:" print labels.to_string() print "features" print feats_train.to_string() print "kernel" print kernel.to_string() print "svm" print svm.to_string() print "#################################" fn = "serialized_svm.bz2" print "serializing SVM to file", fn save(fn, svm) print "#################################" print "unserializing SVM" svm2 = load(fn) print "#################################" print "comparing training" svm2.train() print "objective before serialization:", svm.get_objective() print "objective after serialization:", svm2.get_objective()
#!/usr/bin/env python # -*- coding: utf-8 -*- from numpy.random import randn from shogun.Structure import * import numpy from numpy import array,Inf,float64,matrix,frompyfunc,zeros from IPython.Shell import IPShellEmbed ipshell = IPShellEmbed() from scipy.io import loadmat def run_test(): data_dict = loadmat('../data/DynProg_example_py.dat') num_plifs,num_limits = len(data_dict['penalty_array']),len(data_dict['penalty_array'][0].limits) pm = PlifMatrix() pm.create_plifs(num_plifs,num_limits) ids = numpy.array(range(num_plifs),dtype=numpy.int32) min_values = numpy.array(range(num_plifs),dtype=numpy.float64) max_values = numpy.array(range(num_plifs),dtype=numpy.float64) all_use_cache = numpy.array(range(num_plifs),dtype=numpy.bool) all_use_svm = numpy.array(range(num_plifs),dtype=numpy.int32) all_limits = zeros((num_plifs,num_limits)) all_penalties = zeros((num_plifs,num_limits)) all_names = ['']*num_plifs all_transforms = ['']*num_plifs for plif_idx in range(num_plifs): ids[plif_idx] = data_dict['penalty_array'][plif_idx].id-1 min_values[plif_idx] = data_dict['penalty_array'][plif_idx].min_value max_values[plif_idx] = data_dict['penalty_array'][plif_idx].max_value all_use_cache[plif_idx] = data_dict['penalty_array'][plif_idx].use_cache all_use_svm[plif_idx] = data_dict['penalty_array'][plif_idx].use_svm all_limits[plif_idx] = data_dict['penalty_array'][plif_idx].limits all_penalties[plif_idx] = data_dict['penalty_array'][plif_idx].penalties all_names[plif_idx] = str(data_dict['penalty_array'][plif_idx].name) all_transforms[plif_idx] = str(data_dict['penalty_array'][plif_idx].transform) if all_transforms[plif_idx] == '[]': all_transforms[plif_idx] = 'linear' pm.set_plif_ids(ids) pm.set_plif_min_values(min_values) pm.set_plif_max_values(max_values) pm.set_plif_use_cache(all_use_cache) pm.set_plif_use_svm(all_use_svm) pm.set_plif_limits(all_limits) pm.set_plif_penalties(all_penalties) #pm.set_plif_names(all_names) #pm.set_plif_transform_type(all_transforms) transition_ptrs = data_dict['model'].transition_pointers transition_ptrs = transition_ptrs[:,:,0:2] transition_ptrs = transition_ptrs.astype(numpy.float64) pm.compute_plif_matrix(transition_ptrs) # init_dyn_prog num_svms = 8 dyn = DynProg(num_svms) orf_info = data_dict['model'].orf_info orf_info = orf_info.astype(numpy.int32) num_states = orf_info.shape[0] dyn.set_num_states(num_states) block = data_dict['block'] seq_len = len(block.seq) seq = str(block.seq) gene_string = array([elem for elem in seq]) # precompute_content_svms pos = block.all_pos-1 pos = pos.astype(numpy.int32) snd_pos = pos dyn.set_pos(pos) dyn.set_gene_string(gene_string) dyn.create_word_string() dyn.precompute_stop_codons() dyn.init_content_svm_value_array(num_svms) dict_weights = data_dict['content_weights'] dict_weights = dict_weights.reshape(8,1).astype(numpy.float64) dict_weights = zeros((8,5440)) dyn.set_dict_weights(dict_weights.T) dyn.precompute_content_values() dyn.init_mod_words_array(data_dict['model'].mod_words.astype(numpy.int32)) pm.compute_signal_plifs(data_dict['state_signals'].astype(numpy.int32)) dyn.set_orf_info(orf_info) # p = data_dict['model'].p q = data_dict['model'].q dyn.set_p_vector(p) dyn.set_q_vector(q) a_trans = data_dict['a_trans'] a_trans = a_trans.astype(float64) dyn.set_a_trans_matrix(a_trans) dyn.set_content_type_array(data_dict['seg_path'].astype(numpy.float64)) dyn.best_path_set_segment_loss(data_dict['loss'].astype(numpy.float64)) dyn.check_svm_arrays() features = data_dict['block'].features dyn.set_observation_matrix(features) use_orf = True feat_dims = [25,201,2] dyn.set_plif_matrices(pm); dyn.compute_nbest_paths(features.shape[2], use_orf, 1,True,False) # fetch results states = dyn.get_states() print states scores = dyn.get_scores() print scores positions = dyn.get_positions() print positions if __name__ == '__main__': run_test()
import gc from shogun.Features import Alphabet,StringCharFeatures,StringWordFeatures,DNA from shogun.PreProc import SortWordString, MSG_DEBUG from shogun.Kernel import CommWordStringKernel, IdentityKernelNormalizer from numpy import mat POS=[100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'TTGT', 100*'TTGT', 100*'TTGT',100*'TTGT', 100*'TTGT', 100*'TTGT',100*'TTGT', 100*'TTGT', 100*'TTGT',100*'TTGT', 100*'TTGT', 100*'TTGT',100*'TTGT', 100*'TTGT', 100*'TTGT',100*'TTGT', 100*'TTGT', 100*'TTGT',100*'TTGT', 100*'TTGT', 100*'TTGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT'] NEG=[100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'TTGT', 100*'TTGT', 100*'TTGT',100*'TTGT', 100*'TTGT', 100*'TTGT',100*'TTGT', 100*'TTGT', 100*'TTGT',100*'TTGT', 100*'TTGT', 100*'TTGT',100*'TTGT', 100*'TTGT', 100*'TTGT',100*'TTGT', 100*'TTGT', 100*'TTGT',100*'TTGT', 100*'TTGT', 100*'TTGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT',100*'ACGT', 100*'ACGT', 100*'ACGT'] order=7 gap=0 reverse=False for i in xrange(10): alpha=Alphabet(DNA) traindat=StringCharFeatures(alpha) traindat.set_features(POS+NEG) trainudat=StringWordFeatures(traindat.get_alphabet()); trainudat.obtain_from_char(traindat, order-1, order, gap, reverse) #trainudat.io.set_loglevel(MSG_DEBUG) pre = SortWordString() #pre.io.set_loglevel(MSG_DEBUG) pre.init(trainudat) trainudat.add_preproc(pre) trainudat.apply_preproc() spec = CommWordStringKernel(10, False) spec.set_normalizer(IdentityKernelNormalizer()) spec.init(trainudat, trainudat) K=mat(spec.get_kernel_matrix()) del POS del NEG del order del gap del reverse