20 using namespace shogun;
22 #ifndef DOXYGEN_SHOULD_SKIP_THIS
23 struct HASHEDWD_THREAD_PARAM
36 #endif // DOXYGEN_SHOULD_SKIP_THIS
42 "CHashedWDFeaturesTransposed::CHashedWDFeaturesTransposed()",
64 int32_t start_order, int32_t order, int32_t from_order,
76 int32_t transposed_num_feat=0;
77 int32_t transposed_num_vec=0;
100 :
CDotFeatures(orig), strings(orig.strings), transposed_strings(orig.transposed_strings),
101 degree(orig.degree), start_degree(orig.start_degree),
102 from_degree(orig.from_degree), m_hash_bits(orig.m_hash_bits),
103 normalization_const(orig.normalization_const)
133 bool free_vec1, free_vec2;
136 uint8_t* vec2=wdf->strings->get_feature_vector(vec_idx2, len2, free_vec2);
142 for (int32_t i=0; i<len1; i++)
144 for (int32_t j=0; (i+j<len1) && (j<
degree); j++)
146 if (vec1[i+j]!=vec2[i+j])
153 wdf->strings->free_feature_vector(vec2, vec_idx2, free_vec2);
159 if (vec2_len !=
w_dim)
160 SG_ERROR(
"Dimensions don't match, vec2_dim=%d, w_dim=%d\n", vec2_len,
w_dim);
172 for (int32_t i=0; i < len; i++)
175 for (int32_t k=0; k<
degree && i+k<len; k++)
180 #ifdef DEBUG_HASHEDWD
181 SG_PRINT(
"vec[i]=%d, k=%d, offs=%d o=%d h=%d \n", vec[i], k,offs, o, h);
183 sum+=vec2[o+(h &
mask)]*wd;
203 uint32_t* index=
SG_MALLOC(uint32_t, stop);
205 int32_t num_vectors=stop-start;
214 SG_ERROR(
"Dimensions don't match, vec_len=%d, w_dim=%d\n", dim,
w_dim);
220 HASHEDWD_THREAD_PARAM params;
222 params.sub_index=NULL;
223 params.output=output;
226 params.alphas=alphas;
229 params.progress=
false;
236 pthread_t* threads =
SG_MALLOC(pthread_t, num_threads-1);
237 HASHEDWD_THREAD_PARAM* params =
SG_MALLOC(HASHEDWD_THREAD_PARAM, num_threads);
238 int32_t step= num_vectors/num_threads;
242 for (t=0; t<num_threads-1; t++)
245 params[t].sub_index=NULL;
246 params[t].output = output;
247 params[t].start = start+t*step;
248 params[t].stop = start+(t+1)*step;
249 params[t].alphas=alphas;
252 params[t].progress =
false;
253 params[t].index=index;
254 pthread_create(&threads[t], NULL,
259 params[t].sub_index=NULL;
260 params[t].output = output;
261 params[t].start = start+t*step;
262 params[t].stop = stop;
263 params[t].alphas=alphas;
266 params[t].progress =
false;
267 params[t].index=index;
270 for (t=0; t<num_threads-1; t++)
271 pthread_join(threads[t], NULL);
281 SG_INFO(
"prematurely stopped. \n");
290 uint32_t* index=
SG_MALLOC(uint32_t, num);
298 SG_ERROR(
"Dimensions don't match, vec_len=%d, w_dim=%d\n", dim,
w_dim);
304 HASHEDWD_THREAD_PARAM params;
306 params.sub_index=sub_index;
307 params.output=output;
310 params.alphas=alphas;
313 params.progress=
false;
320 pthread_t* threads =
SG_MALLOC(pthread_t, num_threads-1);
321 HASHEDWD_THREAD_PARAM* params =
SG_MALLOC(HASHEDWD_THREAD_PARAM, num_threads);
322 int32_t step= num/num_threads;
326 for (t=0; t<num_threads-1; t++)
329 params[t].sub_index=sub_index;
330 params[t].output = output;
331 params[t].start = t*step;
332 params[t].stop = (t+1)*step;
333 params[t].alphas=alphas;
336 params[t].progress =
false;
337 params[t].index=index;
338 pthread_create(&threads[t], NULL,
343 params[t].sub_index=sub_index;
344 params[t].output = output;
345 params[t].start = t*step;
346 params[t].stop = num;
347 params[t].alphas=alphas;
350 params[t].progress =
false;
351 params[t].index=index;
354 for (t=0; t<num_threads-1; t++)
355 pthread_join(threads[t], NULL);
365 SG_INFO(
"prematurely stopped. \n");
371 HASHEDWD_THREAD_PARAM* par=(HASHEDWD_THREAD_PARAM*) p;
373 int32_t* sub_index=par->sub_index;
375 int32_t start=par->start;
376 int32_t stop=par->stop;
380 bool progress=par->progress;
381 uint32_t* index=par->index;
392 for (int32_t j=start; j<stop; j++)
402 uint8_t* dim=transposed_strings[i+k].
string;
405 for (int32_t j=start; j<stop; j++)
407 uint8_t bval=dim[sub_index[j]];
413 output[j]+=vec[o + (h &
mask)]*wd;
417 offs+=partial_w_dim*
degree;
423 for (int32_t j=start; j<stop; j++)
426 output[j]=output[j]*alphas[sub_index[j]]/normalization_const+bias;
428 output[j]=output[j]/normalization_const+bias;
442 uint8_t* dim=transposed_strings[i+k].
string;
445 for (int32_t j=start; j<stop; j++)
452 output[j]+=vec[o + (h &
mask)]*wd;
456 offs+=partial_w_dim*
degree;
462 for (int32_t j=start; j<stop; j++)
465 output[j]=output[j]*alphas[j]/normalization_const+bias;
467 output[j]=output[j]/normalization_const+bias;
476 if (vec2_len !=
w_dim)
477 SG_ERROR(
"Dimensions don't match, vec2_dim=%d, w_dim=%d\n", vec2_len,
w_dim);
491 for (int32_t i=0; i<len; i++)
494 for (int32_t k=0; k<
degree && i+k<len; k++)
501 #ifdef DEBUG_HASHEDWD
502 SG_PRINT(
"offs=%d o=%d h=%d \n", offs, o, h);
503 SG_PRINT(
"vec[i]=%d, k=%d, offs=%d o=%d h=%d \n", vec[i], k,offs, o, h);
505 vec2[o+(h &
mask)]+=wd;
525 for (int32_t i=0; i<
degree; i++)
528 SG_DEBUG(
"created HashedWDFeaturesTransposed with d=%d (%d), alphabetsize=%d, "
529 "dim=%d partial_dim=%d num=%d, len=%d\n",
540 for (int32_t i=0; i<
degree; i++)