SHOGUN v0.9.0
|
00001 /* 00002 * This program is free software; you can redistribute it and/or modify 00003 * it under the terms of the GNU General Public License as published by 00004 * the Free Software Foundation; either version 2 of the License, or 00005 * (at your option) any later version. 00006 * 00007 * Written (W) 2010 Christian Widmer 00008 * Copyright (C) 2010 Max-Planck-Society 00009 */ 00010 00011 #ifndef _MULTITASKKERNELTREENORMALIZER_H___ 00012 #define _MULTITASKKERNELTREENORMALIZER_H___ 00013 00014 #include "kernel/KernelNormalizer.h" 00015 #include "kernel/MultitaskKernelMklNormalizer.h" 00016 #include "kernel/Kernel.h" 00017 #include <algorithm> 00018 #include <map> 00019 #include <set> 00020 #include <deque> 00021 00022 namespace shogun 00023 { 00024 00029 class CNode: public CSGObject 00030 { 00031 00032 public: 00033 00034 00037 CNode() 00038 { 00039 parent = NULL; 00040 beta = 1.0; 00041 node_id = 0; 00042 } 00043 00047 std::set<CNode*> get_path_root() 00048 { 00049 std::set<CNode*> nodes_on_path = std::set<CNode*>(); 00050 CNode *node = this; 00051 while (node != NULL) { 00052 nodes_on_path.insert(node); 00053 node = node->parent; 00054 } 00055 return nodes_on_path; 00056 } 00057 00061 std::vector<int32_t> get_task_ids_below() 00062 { 00063 00064 std::vector<int32_t> task_ids; 00065 std::deque<CNode*> grey_nodes; 00066 grey_nodes.push_back(this); 00067 00068 while(grey_nodes.size() > 0) 00069 { 00070 00071 CNode *current_node = grey_nodes.front(); 00072 grey_nodes.pop_front(); 00073 00074 for(int32_t i = 0; i!=int32_t(current_node->children.size()); i++){ 00075 grey_nodes.push_back(current_node->children[i]); 00076 } 00077 00078 if(current_node->is_leaf()){ 00079 task_ids.push_back(current_node->getNode_id()); 00080 } 00081 } 00082 00083 return task_ids; 00084 } 00085 00089 void add_child(CNode *node) 00090 { 00091 node->parent = this; 00092 this->children.push_back(node); 00093 } 00094 00096 inline virtual const char *get_name() const 00097 { 00098 return "CNode"; 00099 } 00100 00102 bool is_leaf() 00103 { 00104 return children.empty(); 00105 00106 } 00107 00109 int32_t getNode_id() const 00110 { 00111 return node_id; 00112 } 00113 00115 void setNode_id(int32_t node_idx) 00116 { 00117 this->node_id = node_idx; 00118 } 00119 00121 float64_t beta; 00122 00123 protected: 00124 00126 CNode* parent; 00127 00129 std::vector<CNode*> children; 00130 00132 int32_t node_id; 00133 00134 }; 00135 00136 00141 class CTaxonomy : public CSGObject 00142 { 00143 00144 public: 00145 00148 CTaxonomy() : CSGObject() 00149 { 00150 root = new CNode(); 00151 nodes.push_back(root); 00152 00153 name2id = std::map<std::string, int32_t>(); 00154 name2id["root"] = 0; 00155 } 00156 00161 CNode* get_node(int32_t task_id) { 00162 return nodes[task_id]; 00163 } 00164 00168 void set_root_beta(float64_t beta) 00169 { 00170 nodes[0]->beta = beta; 00171 } 00172 00178 CNode* add_node(std::string parent_name, std::string child_name, float64_t beta) { 00179 00180 00181 if (child_name=="") SG_ERROR("child_name empty"); 00182 if (parent_name=="") SG_ERROR("parent_name empty"); 00183 00184 00185 CNode* child_node = new CNode(); 00186 00187 child_node->beta = beta; 00188 00189 nodes.push_back(child_node); 00190 int32_t id = nodes.size()-1; 00191 00192 name2id[child_name] = id; 00193 00194 child_node->setNode_id(id); 00195 00196 00197 //create edge 00198 CNode* parent = nodes[name2id[parent_name]]; 00199 00200 parent->add_child(child_node); 00201 00202 return child_node; 00203 00204 } 00205 00210 int32_t get_id(std::string name) { 00211 return name2id[name]; 00212 } 00213 00219 std::set<CNode*> intersect_root_path(CNode* node_lhs, CNode* node_rhs) { 00220 00221 std::set<CNode*> root_path_lhs = node_lhs->get_path_root(); 00222 std::set<CNode*> root_path_rhs = node_rhs->get_path_root(); 00223 00224 std::set<CNode*> intersection; 00225 00226 std::set_intersection(root_path_lhs.begin(), root_path_lhs.end(), 00227 root_path_rhs.begin(), root_path_rhs.end(), 00228 std::inserter(intersection, intersection.end())); 00229 00230 return intersection; 00231 00232 } 00233 00239 float64_t compute_node_similarity(int32_t task_lhs, int32_t task_rhs) 00240 { 00241 00242 CNode* node_lhs = get_node(task_lhs); 00243 CNode* node_rhs = get_node(task_rhs); 00244 00245 // compute intersection of paths to root 00246 std::set<CNode*> intersection = intersect_root_path(node_lhs, node_rhs); 00247 00248 // sum up weights 00249 float64_t gamma = 0; 00250 for (std::set<CNode*>::const_iterator p = intersection.begin(); p != intersection.end(); ++p) { 00251 00252 gamma += (*p)->beta; 00253 } 00254 00255 return gamma; 00256 00257 } 00258 00262 void update_task_histogram(std::vector<int32_t> task_vector_lhs) { 00263 00264 //empty map 00265 task_histogram.clear(); 00266 00267 00268 //fill map with zeros 00269 for (std::vector<int32_t>::const_iterator it=task_vector_lhs.begin(); it!=task_vector_lhs.end(); it++) 00270 { 00271 task_histogram[*it] = 0.0; 00272 } 00273 00274 //fill map 00275 for (std::vector<int32_t>::const_iterator it=task_vector_lhs.begin(); it!=task_vector_lhs.end(); it++) 00276 { 00277 task_histogram[*it] += 1.0; 00278 } 00279 00280 //compute fractions 00281 for (std::map<int32_t, float64_t>::const_iterator it=task_histogram.begin(); it!=task_histogram.end(); it++) 00282 { 00283 task_histogram[it->first] = task_histogram[it->first] / float64_t(task_vector_lhs.size()); 00284 } 00285 00286 } 00287 00289 int32_t get_num_nodes() 00290 { 00291 return (int32_t)(nodes.size()); 00292 } 00293 00295 int32_t get_num_leaves() 00296 { 00297 int32_t num_leaves = 0; 00298 00299 for (int32_t i=0; i!=get_num_nodes(); i++) 00300 { 00301 if (get_node(i)->is_leaf()==true) 00302 { 00303 num_leaves++; 00304 } 00305 } 00306 00307 return num_leaves; 00308 } 00309 00311 float64_t get_node_weight(int32_t idx) 00312 { 00313 CNode* node = get_node(idx); 00314 return node->beta; 00315 } 00316 00321 void set_node_weight(int32_t idx, float64_t weight) 00322 { 00323 CNode* node = get_node(idx); 00324 node->beta = weight; 00325 } 00326 00328 inline virtual const char* get_name() const 00329 { 00330 return "CTaxonomy"; 00331 } 00332 00334 std::map<std::string, int32_t> get_name2id() { 00335 return name2id; 00336 } 00337 00343 int32_t get_id_by_name(std::string name) 00344 { 00345 return name2id[name]; 00346 } 00347 00348 00349 protected: 00350 00351 CNode* root; 00352 std::map<std::string, int32_t> name2id; 00353 std::vector<CNode*> nodes; 00354 std::map<int32_t, float64_t> task_histogram; 00355 00356 }; 00357 00358 00359 00360 00361 class CMultitaskKernelMklNormalizer; 00362 00366 class CMultitaskKernelTreeNormalizer: public CMultitaskKernelMklNormalizer 00367 { 00368 00369 00370 00371 public: 00372 00375 CMultitaskKernelTreeNormalizer() : CMultitaskKernelMklNormalizer() 00376 { 00377 } 00378 00385 CMultitaskKernelTreeNormalizer(std::vector<std::string> task_lhs, 00386 std::vector<std::string> task_rhs, 00387 CTaxonomy tax) : CMultitaskKernelMklNormalizer() 00388 { 00389 00390 taxonomy = tax; 00391 set_task_vector_lhs(task_lhs); 00392 set_task_vector_rhs(task_rhs); 00393 00394 num_nodes = taxonomy.get_num_nodes(); 00395 00396 dependency_matrix = std::vector<float64_t>(num_nodes * num_nodes); 00397 00398 update_cache(); 00399 } 00400 00401 00403 virtual ~CMultitaskKernelTreeNormalizer() 00404 { 00405 } 00406 00407 00409 void update_cache() 00410 { 00411 00412 00413 for (int32_t i=0; i!=num_nodes; i++) 00414 { 00415 for (int32_t j=0; j!=num_nodes; j++) 00416 { 00417 00418 float64_t similarity = taxonomy.compute_node_similarity(i, j); 00419 set_node_similarity(i,j,similarity); 00420 00421 } 00422 00423 } 00424 } 00425 00426 00427 00433 inline virtual float64_t normalize(float64_t value, int32_t idx_lhs, int32_t idx_rhs) 00434 { 00435 00436 //lookup tasks 00437 int32_t task_idx_lhs = task_vector_lhs[idx_lhs]; 00438 int32_t task_idx_rhs = task_vector_rhs[idx_rhs]; 00439 00440 //lookup similarity 00441 float64_t task_similarity = get_node_similarity(task_idx_lhs, task_idx_rhs); 00442 //float64_t task_similarity = taxonomy.compute_node_similarity(task_idx_lhs, task_idx_rhs); 00443 00444 //take task similarity into account 00445 float64_t similarity = (value/scale) * task_similarity; 00446 00447 00448 return similarity; 00449 00450 } 00451 00456 inline virtual float64_t normalize_lhs(float64_t value, int32_t idx_lhs) 00457 { 00458 SG_ERROR("normalize_lhs not implemented"); 00459 return 0; 00460 } 00461 00466 inline virtual float64_t normalize_rhs(float64_t value, int32_t idx_rhs) 00467 { 00468 SG_ERROR("normalize_rhs not implemented"); 00469 return 0; 00470 } 00471 00472 00474 void set_task_vector_lhs(std::vector<std::string> vec) 00475 { 00476 00477 task_vector_lhs.clear(); 00478 00479 for (int32_t i = 0; i != (int32_t)(vec.size()); ++i) 00480 { 00481 task_vector_lhs.push_back(taxonomy.get_id(vec[i])); 00482 } 00483 00484 //update task histogram 00485 taxonomy.update_task_histogram(task_vector_lhs); 00486 00487 } 00488 00490 void set_task_vector_rhs(std::vector<std::string> vec) 00491 { 00492 00493 task_vector_rhs.clear(); 00494 00495 for (int32_t i = 0; i != (int32_t)(vec.size()); ++i) 00496 { 00497 task_vector_rhs.push_back(taxonomy.get_id(vec[i])); 00498 } 00499 00500 } 00501 00503 void set_task_vector(std::vector<std::string> vec) 00504 { 00505 set_task_vector_lhs(vec); 00506 set_task_vector_rhs(vec); 00507 } 00508 00510 int32_t get_num_betas() 00511 { 00512 00513 return taxonomy.get_num_nodes(); 00514 00515 } 00516 00520 float64_t get_beta(int32_t idx) 00521 { 00522 00523 return taxonomy.get_node_weight(idx); 00524 00525 } 00526 00530 void set_beta(int32_t idx, float64_t weight) 00531 { 00532 00533 taxonomy.set_node_weight(idx, weight); 00534 00535 update_cache(); 00536 00537 } 00538 00539 00545 float64_t get_node_similarity(int32_t node_lhs, int32_t node_rhs) 00546 { 00547 00548 ASSERT(node_lhs < num_nodes && node_lhs >= 0); 00549 ASSERT(node_rhs < num_nodes && node_rhs >= 0); 00550 00551 return dependency_matrix[node_lhs * num_nodes + node_rhs]; 00552 00553 } 00554 00560 void set_node_similarity(int32_t node_lhs, int32_t node_rhs, 00561 float64_t similarity) 00562 { 00563 00564 ASSERT(node_lhs < num_nodes && node_lhs >= 0); 00565 ASSERT(node_rhs < num_nodes && node_rhs >= 0); 00566 00567 dependency_matrix[node_lhs * num_nodes + node_rhs] = similarity; 00568 00569 } 00570 00571 00573 inline virtual const char* get_name() const 00574 { 00575 return "MultitaskKernelTreeNormalizer"; 00576 } 00577 00578 00579 00580 protected: 00581 00582 00584 CTaxonomy taxonomy; 00585 00587 int32_t num_nodes; 00588 00590 std::vector<int32_t> task_vector_lhs; 00591 00593 std::vector<int32_t> task_vector_rhs; 00594 00596 std::vector<float64_t> dependency_matrix; 00597 00598 }; 00599 } 00600 #endif