SHOGUN
v1.1.0
|
Agglomerative hierarchical single linkage clustering.
Starting with each object being assigned to its own cluster clusters are iteratively merged. Here the clusters are merged whose elements have minimum distance, i.e. the clusters A and B that obtain
are merged.
cf e.g. http://en.wikipedia.org/wiki/Data_clustering
Definition at line 37 of file Hierarchical.h.
Public Member Functions | |
CHierarchical () | |
CHierarchical (int32_t merges, CDistance *d) | |
virtual | ~CHierarchical () |
virtual EClassifierType | get_classifier_type () |
virtual bool | load (FILE *srcfile) |
virtual bool | save (FILE *dstfile) |
void | set_merges (int32_t m) |
int32_t | get_merges () |
SGVector< int32_t > | get_assignment () |
SGVector< float64_t > | get_merge_distances () |
SGMatrix< int32_t > | get_cluster_pairs () |
virtual const char * | get_name () const |
![]() | |
CDistanceMachine () | |
virtual | ~CDistanceMachine () |
void | set_distance (CDistance *d) |
CDistance * | get_distance () |
void | distances_lhs (float64_t *result, int32_t idx_a1, int32_t idx_a2, int32_t idx_b) |
void | distances_rhs (float64_t *result, int32_t idx_b1, int32_t idx_b2, int32_t idx_a) |
![]() | |
CMachine () | |
virtual | ~CMachine () |
virtual bool | train (CFeatures *data=NULL) |
virtual void | set_labels (CLabels *lab) |
virtual CLabels * | get_labels () |
virtual float64_t | get_label (int32_t i) |
void | set_max_train_time (float64_t t) |
float64_t | get_max_train_time () |
void | set_solver_type (ESolverType st) |
ESolverType | get_solver_type () |
virtual void | set_store_model_features (bool store_model) |
![]() | |
CSGObject () | |
CSGObject (const CSGObject &orig) | |
virtual | ~CSGObject () |
virtual bool | is_generic (EPrimitiveType *generic) const |
template<class T > | |
void | set_generic () |
void | unset_generic () |
virtual void | print_serializable (const char *prefix="") |
virtual bool | save_serializable (CSerializableFile *file, const char *prefix="") |
virtual bool | load_serializable (CSerializableFile *file, const char *prefix="") |
void | set_global_io (SGIO *io) |
SGIO * | get_global_io () |
void | set_global_parallel (Parallel *parallel) |
Parallel * | get_global_parallel () |
void | set_global_version (Version *version) |
Version * | get_global_version () |
SGVector< char * > | get_modelsel_names () |
char * | get_modsel_param_descr (const char *param_name) |
index_t | get_modsel_param_index (const char *param_name) |
Protected Member Functions | |
virtual bool | train_machine (CFeatures *data=NULL) |
virtual void | store_model_features () |
virtual CLabels * | apply () |
virtual CLabels * | apply (CFeatures *data) |
virtual float64_t | apply (int32_t num) |
Protected Attributes | |
int32_t | merges |
the number of merges in hierarchical clustering | |
int32_t | dimensions |
number of dimensions | |
int32_t | assignment_size |
size of assignment table | |
int32_t * | assignment |
cluster assignment for the num_points | |
int32_t | table_size |
size of the below tables | |
int32_t * | pairs |
tuples of i/j | |
float64_t * | merge_distance |
distance at which pair i/j was added | |
![]() | |
CDistance * | distance |
![]() | |
float64_t | max_train_time |
CLabels * | labels |
ESolverType | solver_type |
bool | m_store_model_features |
Additional Inherited Members | |
![]() | |
SGIO * | io |
Parallel * | parallel |
Version * | version |
Parameter * | m_parameters |
Parameter * | m_model_selection_parameters |
![]() | |
static void * | run_distance_thread_lhs (void *p) |
static void * | run_distance_thread_rhs (void *p) |
CHierarchical | ( | ) |
default constructor
Definition at line 34 of file Hierarchical.cpp.
CHierarchical | ( | int32_t | merges, |
CDistance * | d | ||
) |
|
virtual |
Definition at line 47 of file Hierarchical.cpp.
|
protectedvirtual |
NOT IMPLEMENTED
Reimplemented from CDistanceMachine.
Definition at line 204 of file Hierarchical.cpp.
NOT IMPLEMENTED
Reimplemented from CDistanceMachine.
Definition at line 199 of file Hierarchical.cpp.
|
protectedvirtual |
NOT IMPLEMENTED
Reimplemented from CDistanceMachine.
Definition at line 210 of file Hierarchical.cpp.
SGVector< int32_t > get_assignment | ( | ) |
get assignment
Definition at line 178 of file Hierarchical.cpp.
|
virtual |
get classifier type
Reimplemented from CMachine.
Definition at line 54 of file Hierarchical.cpp.
SGMatrix< int32_t > get_cluster_pairs | ( | ) |
get cluster pairs
Definition at line 188 of file Hierarchical.cpp.
get merge distance
Definition at line 183 of file Hierarchical.cpp.
int32_t get_merges | ( | ) |
|
virtual |
Reimplemented from CDistanceMachine.
Definition at line 103 of file Hierarchical.h.
|
virtual |
load distance machine from file
srcfile | file to load from |
Reimplemented from CMachine.
Definition at line 158 of file Hierarchical.cpp.
|
virtual |
save distance machine to file
dstfile | file to save to |
Reimplemented from CMachine.
Definition at line 165 of file Hierarchical.cpp.
void set_merges | ( | int32_t | m | ) |
|
protectedvirtual |
TODO: Ensures cluster centers are in lhs of underlying distance Currently: does nothing.
Reimplemented from CDistanceMachine.
Definition at line 194 of file Hierarchical.cpp.
|
protectedvirtual |
estimate hierarchical clustering
data | training data (parameter can be avoided if distance or kernel-based classifiers are used and distance/kernels are initialized with train data) |
Reimplemented from CMachine.
Definition at line 59 of file Hierarchical.cpp.
|
protected |
cluster assignment for the num_points
Definition at line 141 of file Hierarchical.h.
|
protected |
size of assignment table
Definition at line 138 of file Hierarchical.h.
|
protected |
number of dimensions
Definition at line 135 of file Hierarchical.h.
|
protected |
distance at which pair i/j was added
Definition at line 150 of file Hierarchical.h.
|
protected |
the number of merges in hierarchical clustering
Definition at line 132 of file Hierarchical.h.
|
protected |
tuples of i/j
Definition at line 147 of file Hierarchical.h.
|
protected |
size of the below tables
Definition at line 144 of file Hierarchical.h.