Bolt  1.1
C++ template library with support for OpenCL
synchronized_view.h
Go to the documentation of this file.
1 /***************************************************************************
2 * Copyright 2012 - 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 
16 ***************************************************************************/
17 
22 #if !defined( BOLT_AMP_SYNCVIEW_H )
23 #define BOLT_AMP_SYNCVIEW_H
24 
25 
26 #include <amp.h>
27 #pragma once
28 
29 namespace bolt {
30 
31  // Experimental type to see if we can override the CPU implementation of array indexes to eliminate
32  // the cpu-side "if synchronized" checks since these are costing a significant amount of performance.
33  // This prototype only supports array dimensions of 2.
34  template <typename _Value_type, int _Rank = 1>
35  class synchronized_view : public concurrency::array_view<_Value_type, _Rank>
36  {
37  public:
38  typedef typename _Value_type value_type;
39 
40 
41  template <typename _Container>
42  explicit synchronized_view(int _E0, int _E1, _Container& _Src) :
43  concurrency::array_view(_E0 , _E1, _Src)
44  {};
45 
46 
47  explicit synchronized_view(int _E0, int _E1, _Value_type * _Src) :
48  concurrency::array_view<_Value_type, _Rank> (_E0 , _E1, _Src)
49  {};
50 
51 
52 
53  //amprt.h: 1817
54  _Ret_ void * _Access(_Access_mode _Requested_mode, const concurrency::index<_Rank>& _Index) const __CPU_ONLY
55  {
56  static_assert(_Rank == 2, "value_type& array_view::operator()(int,int) is only permissible on array_view<T, 2>");
57  int * _Ptr = reinterpret_cast<int *>(_M_buffer_descriptor._M_data_ptr);
58  // This only works for array_dim = 2, we couldn't call flatten_helper without private access to Index._M_base.
59  return &_Ptr[_M_total_linear_offset + ((sizeof(_Value_type)/sizeof(int)) * (_M_array_multiplier[0] * _Index[0] + _Index[1]))];
60  //return &_Ptr[ 0 + ((sizeof(_Value_type)/sizeof(int)) * (_M_array_multiplier[0] * _Index[0] + _Index[1]))];
61  //return &_Ptr[_M_total_linear_offset + (_Element_size * _Flatten_helper::func(_M_array_multiplier._M_base, _Index._M_base))];
62  }
63 
64  _Ret_ void * _Access(_Access_mode _Requested_mode, const concurrency::index<_Rank>& _Index) const __GPU_ONLY
65  {
66 
67  UNREFERENCED_PARAMETER(_Requested_mode);
68 
69  int * _Ptr = reinterpret_cast<int *>(_M_buffer_descriptor._M_data_ptr);
70  return &_Ptr[_M_total_linear_offset + ((sizeof(_Value_type)/sizeof(int)) * (_M_array_multiplier[0] * _Index[0] + _Index[1]))];
71  //return &_Ptr[_M_total_linear_offset + (_Element_size * _Flatten_helper::func(_M_array_multiplier._M_base, _Index._M_base))];
72  }
73 
74  // amp.h: 2309, amprt.h:1756
75  value_type& operator() (const concurrency::index<_Rank>& _Index) const __GPU
76  {
77  void * _Ptr = _Access(_Read_write_access, _Index);
78  return *reinterpret_cast<_Value_type*>(_Ptr);
79  }
80 
81  _Value_type& operator() (int _I0, int _I1) const __GPU
82  {
83  static_assert(_Rank == 2, "value_type& array_view::operator()(int,int) is only permissible on array_view<T, 2>");
84  return this->operator()(concurrency::index<2>(_I0,_I1));
85  }
86 
87  };
88 };
89 
90 #endif