SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 3 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2007-2009 Christian Gehl 00008 * Written (W) 1999-2009 Soeren Sonnenburg 00009 * Copyright (C) 1999-2009 Fraunhofer Institute FIRST and Max-Planck-Society 00010 */ 00011 00012 #include "lib/common.h" 00013 #include "distance/ManhattanWordDistance.h" 00014 #include "features/Features.h" 00015 #include "features/StringFeatures.h" 00016 #include "lib/io.h" 00017 00018 using namespace shogun; 00019 00020 CManhattanWordDistance::CManhattanWordDistance() 00021 : CStringDistance<uint16_t>() 00022 { 00023 SG_DEBUG("CManhattanWordDistance created"); 00024 } 00025 00026 CManhattanWordDistance::CManhattanWordDistance( 00027 CStringFeatures<uint16_t>* l, CStringFeatures<uint16_t>* r) 00028 : CStringDistance<uint16_t>() 00029 { 00030 SG_DEBUG("CManhattanWordDistance created"); 00031 00032 init(l, r); 00033 } 00034 00035 CManhattanWordDistance::~CManhattanWordDistance() 00036 { 00037 cleanup(); 00038 } 00039 00040 bool CManhattanWordDistance::init(CFeatures* l, CFeatures* r) 00041 { 00042 bool result=CStringDistance<uint16_t>::init(l,r); 00043 return result; 00044 } 00045 00046 void CManhattanWordDistance::cleanup() 00047 { 00048 } 00049 00050 float64_t CManhattanWordDistance::compute(int32_t idx_a, int32_t idx_b) 00051 { 00052 int32_t alen, blen; 00053 bool free_avec, free_bvec; 00054 00055 uint16_t* avec=((CStringFeatures<uint16_t>*) lhs)-> 00056 get_feature_vector(idx_a, alen, free_avec); 00057 uint16_t* bvec=((CStringFeatures<uint16_t>*) rhs)-> 00058 get_feature_vector(idx_b, blen, free_bvec); 00059 00060 int32_t result=0; 00061 00062 int32_t left_idx=0; 00063 int32_t right_idx=0; 00064 00065 while (left_idx < alen && right_idx < blen) 00066 { 00067 uint16_t sym=avec[left_idx]; 00068 if (avec[left_idx]==bvec[right_idx]) 00069 { 00070 int32_t old_left_idx=left_idx; 00071 int32_t old_right_idx=right_idx; 00072 00073 while (left_idx< alen && avec[left_idx]==sym) 00074 left_idx++; 00075 00076 while (right_idx< blen && bvec[right_idx]==sym) 00077 right_idx++; 00078 00079 result += CMath::abs( (left_idx-old_left_idx) - (right_idx-old_right_idx) ); 00080 } 00081 else if (avec[left_idx]<bvec[right_idx]) 00082 { 00083 00084 while (left_idx< alen && avec[left_idx]==sym) 00085 { 00086 result++; 00087 left_idx++; 00088 } 00089 } 00090 else 00091 { 00092 sym=bvec[right_idx]; 00093 00094 while (right_idx< blen && bvec[right_idx]==sym) 00095 { 00096 result++; 00097 right_idx++; 00098 } 00099 } 00100 } 00101 00102 result+=blen-right_idx + alen-left_idx; 00103 00104 ((CStringFeatures<uint16_t>*) lhs)-> 00105 free_feature_vector(avec, idx_a, free_avec); 00106 ((CStringFeatures<uint16_t>*) rhs)-> 00107 free_feature_vector(bvec, idx_b, free_bvec); 00108 00109 return result; 00110 } 00111