Bolt  1.1
C++ template library with support for OpenCL
bolt.h
Go to the documentation of this file.
1 /***************************************************************************
2 * Copyright 2012 - 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 
16 ***************************************************************************/
17 
18 
23 #pragma once
24 #if !defined( BOLT_CL_BOLT_H )
25 #define BOLT_CL_BOLT_H
26 #define __CL_ENABLE_EXCEPTIONS
27 #define CL_USE_DEPRECATED_OPENCL_1_1_APIS
28 
29 #include <CL/cl.h>
30 /*For enabling only the OpenCL 1.1 specification uncomment the following line*/
31 //#undef CL_VERSION_1_2
32 #include <CL/cl.hpp>
33 
34 
35 #include <string>
36 #include <map>
37 #include <boost/thread/mutex.hpp>
38 #include "bolt/BoltVersion.h"
39 #include "bolt/cl/control.h"
40 #include "bolt/cl/clcode.h"
41 
42 #define PUSH_BACK_UNIQUE(CONTAINER, ELEMENT) \
43  if (std::find(CONTAINER.begin(), CONTAINER.end(), ELEMENT) == CONTAINER.end()) \
44  CONTAINER.push_back(ELEMENT);
45 
68 namespace bolt {
69  namespace cl {
70 
71  extern const std::string binary_search_kernels;
72  extern const std::string copy_kernels;
73  extern const std::string count_kernels;
74  extern const std::string fill_kernels;
75  extern const std::string gather_kernels;
76  extern const std::string generate_kernels;
77  extern const std::string merge_kernels;
78  extern const std::string min_element_kernels;
79  extern const std::string reduce_kernels;
80  extern const std::string reduce_by_key_kernels;
81  extern const std::string scan_kernels;
82  extern const std::string scan_by_key_kernels;
83  extern const std::string scatter_kernels;
84  extern const std::string sort_kernels;
85  extern const std::string stablesort_kernels;
86  extern const std::string stablesort_by_key_kernels;
87  extern const std::string sort_uint_kernels;
88  extern const std::string sort_int_kernels;
89  extern const std::string sort_common_kernels;
90  extern const std::string sort_by_key_kernels;
91  extern const std::string sort_by_key_int_kernels;
92  extern const std::string sort_by_key_uint_kernels;
93  extern const std::string transform_kernels;
94  extern const std::string transform_reduce_kernels;
95  extern const std::string transform_scan_kernels;
96 
97  // transform_scan kernel names
98  //static std::string transform_scan_kernel_names_array[] = { "perBlockTransformScan", "intraBlockInclusiveScan", "perBlockAddition" };
99  //const std::vector<std::string> transformScanKernelNames(transform_scan_kernel_names_array, transform_scan_kernel_names_array+3);
100 
101  /******************************************************************
102  * Kernel Template Specialization
103  *****************************************************************/
105  {
106  public:
107  // kernel template specializer functor
108  virtual const ::std::string operator() (const ::std::vector< ::std::string >& typeNames) const
109  { return "Error; virtual function not overloaded"; }
110 
111  // add a kernel name
112  void addKernelName( const std::string& kernelName) { kernelNames.push_back(kernelName); }
113 
114  // get the name of a particular kernel
115  const ::std::string name( int kernelIndex ) const { return kernelNames[ kernelIndex ]; }
116 
117  // return number of kernels
118  size_t numKernels() const { return kernelNames.size(); }
119 
120  // kernel vector
121  const ::std::vector< ::std::string > getKernelNames() const { return kernelNames; }
122 
123  public:
124  ::std::vector< ::std::string > kernelNames;
125  };
126 
127  class control;
128  //class KernelTemplateSpecializer;
129 
130  extern std::string fileToString(const std::string &fileName);
131 
132  /**********************************************************************
133  * getKernels
134  * returns vector of cl::Kernel objects either by constructing
135  * and compiling the kernels, or by returning the kernels if
136  * previously compiled.
137  * see bolt/cl/detail/scan.inl for example usage
138  **********************************************************************/
139  ::std::vector< ::cl::Kernel > getKernels(
140  const control& ctl,
141  const ::std::vector< ::std::string >& typeNames,
142  const KernelTemplateSpecializer * const kts,
143  const ::std::vector< ::std::string >& typeDefinitions,
144  const std::string& baseKernelString,
145  const std::string& compileOptions = ""
146  );
147 
154  void getVersion( cl_uint& major, cl_uint& minor, cl_uint& patch );
155 
160  std::string clErrorStringA( const cl_int& status );
161 
166  std::wstring clErrorStringW( const cl_int& status );
167 
174  inline cl_int V_OpenCL( cl_int res, const std::string& msg, size_t lineno )
175  {
176  switch( res )
177  {
178  case CL_SUCCESS:
179  break;
180  default:
181  {
182  std::string tmp;
183  tmp.append( "V_OpenCL< " );
184  tmp.append( clErrorStringA( res ) );
185  tmp.append( " >: " );
186  tmp.append( msg );
187  //std::cout << tmp << std::endl;
188  throw ::cl::Error( res, tmp.c_str( ) );
189  }
190  }
191 
192  return res;
193  }
194  #define V_OPENCL( status, message ) V_OpenCL( status, message, __LINE__ )
195 
196  void wait( const bolt::cl::control &ctl, ::cl::Event &e );
197 
198  /******************************************************************
199  * Program Map - so each kernel is only compiled once
200  *****************************************************************/
204  {
205  ::cl::Context context;
206  ::std::string device;
207  ::std::string compileOptions;
208  ::std::string kernelSource;
209  };
210 
212  {
213  ::cl::Program program;
214  };
215 
217  {
218  bool operator( )( const ProgramMapKey& lhs, const ProgramMapKey& rhs ) const
219  {
220  int comparison;
221  // context
222  // Do I really need to compare the context? Yes, required by OpenCL. -DT
223  if( lhs.context() < rhs.context() )
224  return true;
225  else if( lhs.context() > rhs.context() )
226  return false;
227  // else equal; compare using next element of key
228 
229  // device
230  comparison = lhs.device.compare(rhs.device);
231  //std::cout << "Compare Device: " << comparison << std::endl;
232  if( comparison < 0 )
233  {
234  return true;
235  }
236  else if( comparison > 0 )
237  {
238  return false;
239  }
240  // else equal; compare using next element of key
241 
242  // compileOptions
243  comparison = lhs.compileOptions.compare(rhs.compileOptions);
244  //std::cout << "Compare Options: " << comparison << std::endl;
245  if( comparison < 0 )
246  {
247  return true;
248  }
249  else if( comparison > 0 )
250  {
251  return false;
252  }
253  //else
254  // std::cout << "<" << lhs.compileOptions << "> == <" << rhs.compileOptions << ">" << std::endl;
255  // else equal; compare using next element of key
256 
257  // kernelSource
258  comparison = lhs.kernelSource.compare(rhs.kernelSource);
259  //std::cout << "Compare Source: " << comparison << std::endl;
260  if( comparison < 0 )
261  return true;
262  else if( comparison > 0 )
263  return false;
264  //else
265  // std::cout << "<lhs.kernelSource> == <rhs.kernelSource>" << std::endl;
266  // else equal; compare using next element of key
267 
268  // all elements equal
269  return false;
270  }
271  };
272 
273  typedef ::std::map< ProgramMapKey, ProgramMapValue, ProgramMapKeyComp > ProgramMap;
274  //typedef ::std::map< ::std::string, ProgramMapValue> ProgramMap;
275 
276  // declared in bolt.cpp
277  extern boost::mutex programMapMutex;
278  extern ProgramMap programMap;
279 
280  };
281 };
282 
283 #if defined( _WIN32 )
284 #define ALIGNED( bound ) __declspec( align( bound ) )
285 #else
286 #define ALIGNED( bound ) __attribute__ ( (aligned( bound ) ) )
287 #endif
288 
289 //Visual Studio 2012 is not able to map char to cl_char. Hence this typename is added.
290 BOLT_CREATE_TYPENAME( char );
291 
292 BOLT_CREATE_TYPENAME( cl_char );
293 BOLT_CREATE_TYPENAME( cl_uchar );
294 BOLT_CREATE_TYPENAME( cl_short );
295 BOLT_CREATE_TYPENAME( cl_ushort );
296 BOLT_CREATE_TYPENAME( cl_int );
297 BOLT_CREATE_TYPENAME( cl_uint );
298 BOLT_CREATE_TYPENAME( cl_long );
299 BOLT_CREATE_TYPENAME( cl_ulong );
300 BOLT_CREATE_TYPENAME( cl_float );
301 BOLT_CREATE_TYPENAME( cl_double );
302 
304 //BOLT_CREATE_TYPENAME( cl_int );
305 //BOLT_CREATE_CLCODE( cl_int, "int" );
306 //
307 //BOLT_CREATE_TYPENAME( cl_int2 );
308 //BOLT_CREATE_CLCODE( cl_int2, "int2" );
309 //
310 //BOLT_CREATE_TYPENAME( cl_int4 );
311 //BOLT_CREATE_CLCODE( cl_int4, "int4" );
312 //
313 //BOLT_CREATE_TYPENAME( cl_uint );
314 //BOLT_CREATE_CLCODE( cl_uint, "uint" );
315 //
316 //BOLT_CREATE_TYPENAME( cl_uint2 );
317 //BOLT_CREATE_CLCODE( cl_uint2, "uint2" );
318 //
319 //BOLT_CREATE_TYPENAME( cl_uint4 );
320 //BOLT_CREATE_CLCODE( cl_uint4, "uint4" );
321 //
322 //BOLT_CREATE_TYPENAME( cl_float );
323 //BOLT_CREATE_CLCODE( cl_float, "float" );
324 //
325 //BOLT_CREATE_TYPENAME( cl_float2 );
326 //BOLT_CREATE_CLCODE( cl_float2, "float2" );
327 //
328 //BOLT_CREATE_TYPENAME( cl_float4 );
329 //BOLT_CREATE_CLCODE( cl_float4, "float4" );
330 //
331 //BOLT_CREATE_TYPENAME( cl_double );
332 //BOLT_CREATE_CLCODE( cl_double, "double" );
333 //
334 //BOLT_CREATE_TYPENAME( cl_double2 );
335 //BOLT_CREATE_CLCODE( cl_double2, "double2" );
336 //
337 //BOLT_CREATE_TYPENAME( cl_double4 );
338 //BOLT_CREATE_CLCODE( cl_double4, "double4" );
339 
340 #endif