SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 2 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2010 Christian Widmer 00008 * Copyright (C) 2010 Max-Planck-Society 00009 */ 00010 00011 #ifndef _MULTITASKKERNELPLIFNORMALIZER_H___ 00012 #define _MULTITASKKERNELPLIFNORMALIZER_H___ 00013 00014 #include "kernel/KernelNormalizer.h" 00015 #include "kernel/MultitaskKernelMklNormalizer.h" 00016 #include "kernel/Kernel.h" 00017 #include <algorithm> 00018 00019 00020 00021 namespace shogun 00022 { 00026 class CMultitaskKernelPlifNormalizer: public CMultitaskKernelMklNormalizer 00027 { 00028 00029 public: 00031 CMultitaskKernelPlifNormalizer() : CMultitaskKernelMklNormalizer() 00032 { 00033 SG_UNSTABLE("CMultitaskKernelPlifNormalizer::" 00034 "CMultitaskKernelPlifNormalizer()", "\n"); 00035 00036 num_tasks = 0; 00037 num_betas = 0; 00038 } 00039 00042 CMultitaskKernelPlifNormalizer(std::vector<float64_t> support_, std::vector<int32_t> task_vector) 00043 : CMultitaskKernelMklNormalizer() 00044 { 00045 00046 num_betas = static_cast<int>(support_.size()); 00047 00048 support = support_; 00049 00050 // init support points values with constant function 00051 betas = std::vector<float64_t>(num_betas); 00052 for (int i=0; i!=num_betas; i++) 00053 { 00054 betas[i] = 1; 00055 } 00056 00057 num_tasks = get_num_unique_tasks(task_vector); 00058 00059 // set both sides equally 00060 set_task_vector(task_vector); 00061 00062 // init distance matrix 00063 distance_matrix = std::vector<float64_t>(num_tasks * num_tasks); 00064 00065 // init similarity matrix 00066 similarity_matrix = std::vector<float64_t>(num_tasks * num_tasks); 00067 00068 } 00069 00070 00076 inline virtual float64_t normalize(float64_t value, int32_t idx_lhs, 00077 int32_t idx_rhs) 00078 { 00079 00080 //lookup tasks 00081 int32_t task_idx_lhs = task_vector_lhs[idx_lhs]; 00082 int32_t task_idx_rhs = task_vector_rhs[idx_rhs]; 00083 00084 //lookup similarity 00085 float64_t task_similarity = get_task_similarity(task_idx_lhs, 00086 task_idx_rhs); 00087 00088 //take task similarity into account 00089 float64_t similarity = (value/scale) * task_similarity; 00090 00091 00092 return similarity; 00093 00094 } 00095 00101 int32_t get_num_unique_tasks(std::vector<int32_t> vec) { 00102 00103 //sort 00104 std::sort(vec.begin(), vec.end()); 00105 00106 //reorder tasks with unique prefix 00107 std::vector<int32_t>::iterator endLocation = std::unique(vec.begin(), vec.end()); 00108 00109 //count unique tasks 00110 int32_t num_vec = std::distance(vec.begin(), endLocation); 00111 00112 return num_vec; 00113 00114 } 00115 00116 00118 virtual ~CMultitaskKernelPlifNormalizer() 00119 { 00120 } 00121 00122 00124 void update_cache() 00125 { 00126 00127 00128 for (int32_t i=0; i!=num_tasks; i++) 00129 { 00130 for (int32_t j=0; j!=num_tasks; j++) 00131 { 00132 00133 float64_t similarity = compute_task_similarity(i, j); 00134 set_task_similarity(i,j,similarity); 00135 00136 } 00137 00138 } 00139 } 00140 00141 00143 float64_t compute_task_similarity(int32_t task_a, int32_t task_b) 00144 { 00145 00146 float64_t distance = get_task_distance(task_a, task_b); 00147 float64_t similarity = -1; 00148 00149 int32_t upper_bound_idx = -1; 00150 00151 00152 // determine interval 00153 for (int i=1; i!=num_betas; i++) 00154 { 00155 if (distance <= support[i]) 00156 { 00157 upper_bound_idx = i; 00158 break; 00159 } 00160 } 00161 00162 // perform interpolation (constant for beyond upper bound) 00163 if (upper_bound_idx == -1) 00164 { 00165 00166 similarity = betas[num_betas-1]; 00167 00168 } else { 00169 00170 int32_t lower_bound_idx = upper_bound_idx - 1; 00171 float64_t interval_size = support[upper_bound_idx] - support[lower_bound_idx]; 00172 00173 float64_t factor_lower = 1 - (distance - support[lower_bound_idx]) / interval_size; 00174 float64_t factor_upper = 1 - factor_lower; 00175 00176 similarity = factor_lower*betas[lower_bound_idx] + factor_upper*betas[upper_bound_idx]; 00177 00178 } 00179 00180 return similarity; 00181 00182 } 00183 00184 00185 public: 00186 00188 virtual std::vector<int32_t> get_task_vector_lhs() const 00189 { 00190 return task_vector_lhs; 00191 } 00192 00194 virtual void set_task_vector_lhs(std::vector<int32_t> vec) 00195 { 00196 task_vector_lhs = vec; 00197 } 00198 00200 virtual std::vector<int32_t> get_task_vector_rhs() const 00201 { 00202 return task_vector_rhs; 00203 } 00204 00206 virtual void set_task_vector_rhs(std::vector<int32_t> vec) 00207 { 00208 task_vector_rhs = vec; 00209 } 00210 00212 virtual void set_task_vector(std::vector<int32_t> vec) 00213 { 00214 task_vector_lhs = vec; 00215 task_vector_rhs = vec; 00216 } 00217 00223 float64_t get_task_distance(int32_t task_lhs, int32_t task_rhs) 00224 { 00225 00226 ASSERT(task_lhs < num_tasks && task_lhs >= 0); 00227 ASSERT(task_rhs < num_tasks && task_rhs >= 0); 00228 00229 return distance_matrix[task_lhs * num_tasks + task_rhs]; 00230 00231 } 00232 00238 void set_task_distance(int32_t task_lhs, int32_t task_rhs, 00239 float64_t distance) 00240 { 00241 00242 ASSERT(task_lhs < num_tasks && task_lhs >= 0); 00243 ASSERT(task_rhs < num_tasks && task_rhs >= 0); 00244 00245 distance_matrix[task_lhs * num_tasks + task_rhs] = distance; 00246 00247 } 00248 00254 float64_t get_task_similarity(int32_t task_lhs, int32_t task_rhs) 00255 { 00256 00257 ASSERT(task_lhs < num_tasks && task_lhs >= 0); 00258 ASSERT(task_rhs < num_tasks && task_rhs >= 0); 00259 00260 return similarity_matrix[task_lhs * num_tasks + task_rhs]; 00261 00262 } 00263 00269 void set_task_similarity(int32_t task_lhs, int32_t task_rhs, 00270 float64_t similarity) 00271 { 00272 00273 ASSERT(task_lhs < num_tasks && task_lhs >= 0); 00274 ASSERT(task_rhs < num_tasks && task_rhs >= 0); 00275 00276 similarity_matrix[task_lhs * num_tasks + task_rhs] = similarity; 00277 00278 } 00279 00283 float64_t get_beta(int32_t idx) 00284 { 00285 00286 return betas[idx]; 00287 00288 } 00289 00294 void set_beta(int32_t idx, float64_t weight) 00295 { 00296 00297 betas[idx] = weight; 00298 00299 update_cache(); 00300 00301 } 00302 00306 int32_t get_num_betas() 00307 { 00308 00309 return num_betas; 00310 00311 } 00312 00313 00315 inline virtual const char* get_name() const 00316 { 00317 return "MultitaskKernelNormalizer"; 00318 } 00319 00320 protected: 00321 00323 int32_t num_tasks; 00324 00326 std::vector<int32_t> task_vector_lhs; 00327 00329 std::vector<int32_t> task_vector_rhs; 00330 00332 std::vector<float64_t> distance_matrix; 00333 00335 std::vector<float64_t> similarity_matrix; 00336 00338 int32_t num_betas; 00339 00341 std::vector<float64_t> betas; 00342 00344 std::vector<float64_t> support; 00345 00346 }; 00347 } 00348 #endif