SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
StratifiedCrossValidationSplitting.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Heiko Strathmann
8  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
9  */
10 
12 #include <shogun/features/Labels.h>
13 #include <shogun/lib/Set.h>
14 
15 using namespace shogun;
16 
18  CSplittingStrategy(0, 0)
19 {
20 }
21 
23  CLabels* labels, index_t num_subsets) :
24  CSplittingStrategy(labels, num_subsets)
25 {
26  build_subsets();
27 }
28 
30 {
31  /* extract all labels */
32  CSet<float64_t> unique_labels;
33  for (index_t i=0; i<m_labels->get_num_labels(); ++i)
34  unique_labels.add(m_labels->get_label(i));
35 
36  /* for every label, build set for indices */
38  for (index_t i=0; i<unique_labels.get_num_elements(); ++i)
39  label_indices.append_element(new CDynamicArray<index_t> ());
40 
41  /* fill set with indices, for each label type ... */
42  for (index_t i=0; i<unique_labels.get_num_elements(); ++i)
43  {
44  /* ... iterate over all labels and add indices with same label to set */
45  for (index_t j=0; j<m_labels->get_num_labels(); ++j)
46  {
47  if (m_labels->get_label(j)==unique_labels[i])
48  {
49  CDynamicArray<index_t>* current=label_indices.get_element(i);
50  current->append_element(j);
51  SG_UNREF(current);
52  }
53  }
54  }
55 
56  /* shuffle created label sets */
57  for (index_t i=0; i<label_indices.get_num_elements(); ++i)
58  {
59  CDynamicArray<index_t>* current=label_indices.get_element(i);
60  current->shuffle();
61  SG_UNREF(current);
62  }
63 
64  /* distribute labels to subsets for all label types */
65  index_t target_set=0;
66  for (index_t i=0; i<unique_labels.get_num_elements(); ++i)
67  {
68  /* current index set for current label */
69  CDynamicArray<index_t>* current=label_indices.get_element(i);
70 
71  for (index_t j=0; j<current->get_num_elements(); ++j)
72  {
74  target_set++);
75  next->append_element(current->get_element(j));
76  target_set%=m_subset_indices->get_num_elements();
77  SG_UNREF(next);
78  }
79 
80  SG_UNREF(current);
81  }
82 
83  /* finally shuffle to avoid that subsets with low indices have more
84  * elements, which happens if the number of class labels is not equal to
85  * the number of subsets */
87 }

SHOGUN Machine Learning Toolbox - Documentation