ViennaCL - The Vienna Computing Library  1.2.0
kernel_parameters.hpp
Go to the documentation of this file.
1 #ifndef VIENNACL_IO_KERNEL_PARAMETERS_HPP
2 #define VIENNACL_IO_KERNEL_PARAMETERS_HPP
3 
4 /* =========================================================================
5  Copyright (c) 2010-2011, Institute for Microelectronics,
6  Institute for Analysis and Scientific Computing,
7  TU Wien.
8 
9  -----------------
10  ViennaCL - The Vienna Computing Library
11  -----------------
12 
13  Project Head: Karl Rupp rupp@iue.tuwien.ac.at
14 
15  (A list of authors and contributors can be found in the PDF manual)
16 
17  License: MIT (X11), see file LICENSE in the base directory
18 ============================================================================= */
19 
20 
25 #include "viennacl/ocl/backend.hpp"
26 #include "pugixml/src/pugixml.hpp"
27 
28 namespace viennacl
29 {
30  namespace io
31  {
32  namespace tag
33  {
34  static std::string root = "parameters";
35  static std::string devices = "devices";
36  static std::string device = "device";
37  static std::string name = "name";
38  static std::string driver = "driver";
39  static std::string compun = "computeunits";
40  static std::string workgrp = "workgroupsize";
41  static std::string tests = "tests";
42  static std::string test = "test";
43  static std::string numeric = "numeric";
44  static std::string kernels = "kernels";
45  static std::string kernel = "kernel";
46  static std::string params = "params";
47  static std::string param = "param";
48  static std::string value = "value";
49  static std::string alignment = "alignment";
50  } // end namespace tag
51 
52  namespace val {
53  static std::string globsize = "globalsize";
54  static std::string locsize = "localsize";
55  static std::string vec = "vector";
56  static std::string matrix = "matrix";
57  static std::string compmat = "compressed_matrix";
58  static std::string fl = "float";
59  static std::string dbl = "double";
60  }
61 
64  {
66  {
67  root = doc.append_child();
68  root.set_name(tag::root.c_str());
69  last = root;
70 
71  devices_open = false;
72  tests_open = false;
73  kernels_open = false;
74  parameters_open = false;
75  }
76 
77  void add_device()
78  {
79  pugi::xml_node dev;
80  if(devices_open)
81  {
82  dev = devices.append_child();
83  dev.set_name(tag::device.c_str());
84  }
85  else
86  {
87  devices = last.append_child();
88  devices.set_name(tag::devices.c_str());
89 
90  dev = devices.append_child();
91  dev.set_name(tag::device.c_str());
92 
93  devices_open = true;
94  }
95  last = dev;
96  }
97 
98  void add_test()
99  {
100  pugi::xml_node test;
101  if(tests_open)
102  {
103  test = tests.append_child();
104  test.set_name(tag::test.c_str());
105  }
106  else
107  {
108  tests = last.append_child();
109  tests.set_name(tag::tests.c_str());
110 
111  test = tests.append_child();
112  test.set_name(tag::test.c_str());
113 
114  tests_open = true;
115  }
116  last = test;
117  // close the current kernels section
118  // so a new one is created for this new test
119  kernels_open = false;
120  }
121 
122  void add_kernel()
123  {
124  pugi::xml_node kern;
125  if(kernels_open)
126  {
127  kern = kernels.append_child();
128  kern.set_name(tag::kernel.c_str());
129  }
130  else
131  {
132  kernels = last.append_child();
133  kernels.set_name(tag::kernels.c_str());
134 
135  kern = kernels.append_child();
136  kern.set_name(tag::kernel.c_str());
137 
138  kernels_open = true;
139  }
140  last = kern;
141 
142  // close the current parameters section
143  // so a new one is created for this new kernel
144  parameters_open = false;
145  }
146 
148  {
149  pugi::xml_node para;
150 
151  if(parameters_open)
152  {
153  para = parameters.append_child();
154  para.set_name(tag::param.c_str());
155  }
156  else
157  {
158  parameters = last.append_child();
159  parameters.set_name(tag::params.c_str());
160 
161  para = parameters.append_child();
162  para.set_name(tag::param.c_str());
163 
164  parameters_open = true;
165  }
166  last = para;
167  }
168 
169  template<typename ValueT>
170  void add_data_node(std::string tagstr, ValueT data)
171  {
172  std::stringstream ss;
173  ss << data;
174  add_data_node(tagstr, ss.str());
175  }
176 
177  void add_data_node(std::string tagstr, std::string data)
178  {
179  pugi::xml_node node = last.append_child();
180 
181  if(tagstr == tag::name)
182  node.set_name(tag::name.c_str());
183  else if(tagstr == tag::driver)
184  node.set_name(tag::driver.c_str());
185  else if(tagstr == tag::numeric)
186  node.set_name(tag::numeric.c_str());
187  else if(tagstr == tag::alignment)
188  node.set_name(tag::alignment.c_str());
189  else if(tagstr == tag::value)
190  node.set_name(tag::value.c_str());
191  else if(tagstr == tag::compun)
192  node.set_name(tag::compun.c_str());
193  else if(tagstr == tag::workgrp)
194  node.set_name(tag::workgrp.c_str());
195  else
196  std::cout << "# Error adding data node: node tag not recognized .." << std::endl;
197  node.append_child(pugi::node_pcdata).set_value(data.c_str());
198  }
199 
200  void load(std::string filename)
201  {
202  doc.load_file(filename.c_str());
203  }
204 
205  void dump(std::string filename)
206  {
207  std::ofstream outstream(filename.c_str());
208  this->dump(outstream);
209  outstream.close();
210  }
211 
212  void dump(std::ostream& stream = std::cout)
213  {
214  doc.save(stream, " ");
215  }
216 
217  pugi::xml_document doc;
218  pugi::xml_node root;
219  pugi::xml_node devices;
220  pugi::xml_node tests;
221  pugi::xml_node kernels;
222  pugi::xml_node parameters;
223  pugi::xml_node last;
224 
226  bool tests_open;
229 
230  };
231 
233  template <typename T>
235  {
236  static char get(); //intentionally not implemented, class must be specialized
237  };
238 
239  template <>
240  struct first_letter_of_type <float>
241  {
242  static char get() { return 'f'; }
243  };
244 
245  template <>
246  struct first_letter_of_type <double>
247  {
248  static char get() { return 'd'; }
249  };
250 
251  template <typename T>
253  {
254  static std::string get(); //intentionally not implemented, class must be specialized
255  };
256 
257  template <typename T, unsigned int ALIGNMENT>
258  struct program_for_vcltype < viennacl::vector<T, ALIGNMENT> >
259  {
260  static std::string get()
261  {
262  std::stringstream ss;
263  ss << first_letter_of_type<T>::get() << "_vector_" << ALIGNMENT;
264  return ss.str();
265  }
266  };
267 
268  template <typename T, unsigned int ALIGNMENT>
269  struct program_for_vcltype < viennacl::matrix<T, row_major, ALIGNMENT> >
270  {
271  static std::string get()
272  {
273  std::stringstream ss;
274  ss << first_letter_of_type<T>::get() << "_matrix_row_" << ALIGNMENT;
275  return ss.str();
276  }
277  };
278 
279  template <typename T, unsigned int ALIGNMENT>
280  struct program_for_vcltype < viennacl::matrix<T, column_major, ALIGNMENT> >
281  {
282  static std::string get()
283  {
284  std::stringstream ss;
285  ss << first_letter_of_type<T>::get() << "_matrix_col_" << ALIGNMENT;
286  return ss.str();
287  }
288  };
289 
290  template <typename T, unsigned int ALIGNMENT>
291  struct program_for_vcltype < viennacl::compressed_matrix<T, ALIGNMENT> >
292  {
293  static std::string get()
294  {
295  std::stringstream ss;
296  ss << first_letter_of_type<T>::get() << "_compressed_matrix_" << ALIGNMENT;
297  return ss.str();
298  }
299  };
300 
301  template<typename SCALARTYPE, unsigned int ALIGNMENT>
302  void set_kernel_params(std::string program_name,
303  std::string kernel_name,
304  unsigned int glob, //total no. of threads
305  unsigned int loc) //threads per work group
306  {
307  //get kernel from pool and set work sizes:
308  viennacl::ocl::kernel & k = viennacl::ocl::get_kernel(program_name, kernel_name);
309  k.global_work_size(0, glob);
310  k.local_work_size(0, loc);
311 
312  //std::cout << "Setting [" << glob << ", " << loc << "] for kernel " << kernel_name << std::endl;
313  }
314 
315  template<typename VclBasicType>
316  void tune_impl(parameter_database& paras, std::string parent)
317  {
318  typedef typename VclBasicType::value_type::value_type SCALARTYPE;
319 
320  // create dummy vectors; the kernels have to be created ..
321  VclBasicType dummy;
322 
323  // extract the kernels for which parameters are present
324  std::string kernel_str = parent+"/kernels/kernel/name/text()";
325  pugi::xpath_node_set kernel_res = paras.doc.select_nodes(kernel_str.c_str());
326 
327  typedef std::vector<std::string> kernels_type;
328  kernels_type kernels;
329  std::cout << "Retrieving kernels..." << std::endl;
330  for (pugi::xpath_node_set::const_iterator it = kernel_res.begin(); it != kernel_res.end(); ++it)
331  {
332  std::stringstream ss;
333  it->node().print(ss, " ");
334  std::string kern(ss.str());
335  kern.erase(std::remove(kern.begin(), kern.end(), '\n'), kern.end()); //trim trailing linebreak
336  kernels.push_back(kern);
337  }
338 
339  // retrieve the actual parameters
340  std::cout << "Retrieving actual parameters..." << std::endl;
341  for(typename kernels_type::iterator iter = kernels.begin();
342  iter != kernels.end(); iter++)
343  {
344  // retrieving the work group ..
345  std::string wg_str = parent+"/kernels/kernel[name='"+*iter+"']/params/param[name='"+val::globsize+"']/value/text()";
346  pugi::xpath_node_set wg_res = paras.doc.select_nodes(wg_str.c_str());
347 
348  unsigned int global_size(0);
349 
350  std::stringstream ss;
351  ss << wg_res[0].node().value();
352  ss >> global_size;
353 
354  // retrieving the local_workers ..
355  std::string lw_str = parent+"/kernels/kernel[name='"+*iter+"']/params/param[name='"+val::locsize+"']/value/text()";
356  pugi::xpath_node_set lw_res = paras.doc.select_nodes(lw_str.c_str());
357 
358  unsigned int local_workers(0);
359 
360  ss.clear();
361  ss << lw_res[0].node().value();
362  ss >> local_workers;
363 
364  //std::cout << "kernel: " << *iter << " wg: " << work_group << " lw: " << local_workers << std::endl;
365 
366  // set the parameters
367  set_kernel_params<SCALARTYPE,1> (program_for_vcltype<VclBasicType>::get(), *iter, global_size, local_workers);
368  //set_kernel_params<SCALARTYPE,4> (*iter, work_group * local_workers, local_workers);
369  //set_kernel_params<SCALARTYPE,16>(*iter, work_group * local_workers, local_workers);
370  }
371  }
372 
374  template <typename T>
375  struct to_string {};
376 
377  template <>
378  struct to_string<float>
379  {
380  static std::string get() { return "float"; }
381  };
382 
383  template <>
384  struct to_string<double>
385  {
386  static std::string get() { return "double"; }
387  };
388 
394  template<typename VclBasicType>
395  void read_kernel_parameters(std::string filename)
396  {
397  typedef typename VclBasicType::value_type::value_type SCALARTYPE;
398 
399  parameter_database paras;
400  paras.load(filename);
401 
402  std::string devname = viennacl::ocl::current_device().name();
403 
404  // check if tune parameters for the current device are present
405  std::string device_str = "/parameters/devices/device[name='"+devname+"']";
406  pugi::xpath_node_set device_res = paras.doc.select_nodes(device_str.c_str());
407 
408  if(device_res.size() == 0)
409  {
410  std::cout << "Tuner: There are no parameters for this device present!" << std::endl;
411  // evaluate the parameters for this device?
412  }
413 
414  // check if tune parameters for float exist
415  std::string numeric_str = device_str+"/tests/test[numeric='"+to_string<SCALARTYPE>::get()+"']";
416  pugi::xpath_node_set numeric_res = paras.doc.select_nodes(numeric_str.c_str());
417 
418  if(numeric_res.size() > 0)
419  {
420  tune_impl<VclBasicType>(paras, numeric_str);
421  }
422  else
423  {
424  std::cout << "Tuner: There are no parameters for numeric type float present!" << std::endl;
425  }
426 
427  // // check if tune parameters for double exist
428  // std::string double_str = device_str+"/tests/test[numeric='"+val::dbl+"']";
429  // pugi::xpath_node_set double_res = paras.doc.select_nodes(double_str.c_str());
430  //
431  // if(double_res.size() > 0)
432  // {
433  // tune_impl<double>(paras, double_str);
434  // }
435  // else
436  // {
437  // std::cout << "Tuner: There are no parameters for numeric type double present!" << std::endl;
438  // }
439 
440  }
441 
442  } // end namespace io
443 
444 } // end namespace viennacl
445 
446 #endif