22 #if !defined( BOLT_CL_DEVICE_VECTOR_H )
23 #define BOLT_CL_DEVICE_VECTOR_H
26 #include <type_traits>
31 #include <boost/iterator/iterator_facade.hpp>
32 #include <boost/iterator/reverse_iterator.hpp>
33 #include <boost/shared_array.hpp>
59 :
public std::random_access_iterator_tag
71 template<
typename T >
77 template<
typename Container >
78 class UnMapBufferFunctor
80 Container& m_Container;
84 UnMapBufferFunctor( Container& rhs ): m_Container( rhs )
87 void operator( )(
const void* pBuff )
89 ::cl::Event unmapEvent;
91 V_OPENCL( m_Container.m_commQueue.enqueueUnmapMemObject( m_Container.m_devMemory, const_cast< void* >( pBuff ), NULL, &unmapEvent ),
92 "shared_ptr failed to unmap host memory back to device memory" );
93 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
97 typedef T* naked_pointer;
98 typedef const T* const_naked_pointer;
103 typedef T value_type;
104 typedef ptrdiff_t difference_type;
105 typedef difference_type distance_type;
106 typedef size_t size_type;
108 typedef boost::shared_array< value_type > pointer;
109 typedef boost::shared_array< const value_type > const_pointer;
118 template<
typename Container >
122 reference_base(Container &rhs, size_type index ): m_Container( rhs ), m_Index( index )
128 operator value_type( )
const
130 cl_int l_Error = CL_SUCCESS;
131 naked_pointer result =
reinterpret_cast< naked_pointer
>( m_Container.m_commQueue.enqueueMapBuffer(
132 m_Container.m_devMemory,
true, CL_MAP_READ, m_Index *
sizeof( value_type ),
sizeof( value_type ), NULL, NULL, &l_Error ) );
133 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
135 value_type valTmp = *result;
137 ::cl::Event unmapEvent;
138 V_OPENCL( m_Container.m_commQueue.enqueueUnmapMemObject( m_Container.m_devMemory, result, NULL, &unmapEvent ),
"device_vector failed to unmap host memory back to device memory" );
139 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
146 cl_int l_Error = CL_SUCCESS;
147 naked_pointer result =
reinterpret_cast< naked_pointer
>( m_Container.m_commQueue.enqueueMapBuffer(
148 m_Container.m_devMemory,
true, CL_MAP_WRITE_INVALIDATE_REGION, m_Index *
sizeof( value_type ),
sizeof( value_type ), NULL, NULL, &l_Error ) );
149 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
153 ::cl::Event unmapEvent;
154 V_OPENCL( m_Container.m_commQueue.enqueueUnmapMemObject( m_Container.m_devMemory, result, NULL, &unmapEvent ),
"device_vector failed to unmap host memory back to device memory" );
155 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
167 size_type getIndex()
const
173 Container& m_Container;
201 template<
typename Container >
202 class iterator_base:
public boost::iterator_facade< iterator_base< Container >, value_type, device_vector_tag,
203 typename device_vector::reference, int >
206 typedef typename boost::iterator_facade< iterator_base< Container >, value_type,
device_vector_tag,
221 difference_type m_Index;
222 difference_type m_Ptr1[ 3 ];
227 iterator_base( ): m_Container( getContainer() ), m_Index( 0 )
231 iterator_base( Container& rhs, difference_type index ): m_Container( rhs ), m_Index( index )
235 template<
typename OtherContainer >
236 iterator_base(
const iterator_base< OtherContainer >& rhs ): m_Container( rhs.m_Container ), m_Index( rhs.m_Index )
241 iterator_base< Container >& operator = (
const iterator_base< Container >& rhs )
243 m_Container = rhs.m_Container;
244 m_Index = rhs.m_Index;
248 iterator_base< Container > & operator+= (
const difference_type & n )
254 iterator_base< Container >& operator = (
const difference_type & n )
262 const iterator_base< Container > operator + (
const difference_type & n )
const
264 iterator_base< Container > result(*
this);
269 Container& getContainer( )
const
276 const Payload gpuPayload( )
const
278 Payload payload = { m_Index, { 0, 0, 0 } };
285 const difference_type gpuPayloadSize( )
const
287 cl_int l_Error = CL_SUCCESS;
288 ::cl::Device which_device;
289 l_Error = m_Container.m_commQueue.getInfo(CL_QUEUE_DEVICE,&which_device );
291 cl_uint deviceBits = which_device.getInfo< CL_DEVICE_ADDRESS_BITS >( );
294 difference_type payloadSize =
sizeof( difference_type ) + ( deviceBits >> 3 );
297 if( deviceBits == 64 )
304 difference_type m_Index;
305 difference_type distance_to(
const iterator_base< Container >& rhs )
const
307 return static_cast< difference_type
>( rhs.m_Index - m_Index );
314 friend class boost::iterator_core_access;
320 template <
typename >
friend class iterator_base;
322 void advance( difference_type n )
337 template<
typename OtherContainer >
338 bool equal(
const iterator_base< OtherContainer >& rhs )
const
340 bool sameIndex = rhs.m_Index == m_Index;
341 bool sameContainer = (&m_Container == &rhs.m_Container );
343 return ( sameIndex && sameContainer );
348 return m_Container[ m_Index ];
351 Container& m_Container;
363 template<
typename Container >
364 class reverse_iterator_base:
public boost::iterator_facade< reverse_iterator_base< Container >, value_type, std::random_access_iterator_tag, typename device_vector::reference, int >
373 template<
typename OtherContainer >
381 m_Container = lhs.m_Container;
382 m_Index = lhs.m_Index;
398 #if !defined(_WIN32) && defined(__x86_64__)
421 return static_cast< difference_type
>( m_Index - lhs.m_Index );
426 friend class boost::iterator_core_access;
434 void advance( difference_type n )
450 template<
typename OtherContainer >
453 bool sameIndex = lhs.m_Index == m_Index;
454 bool sameContainer = (&m_Container == &lhs.m_Container );
456 return ( sameIndex && sameContainer );
461 return m_Container[ m_Index ];
464 Container& m_Container;
492 static_assert( !std::is_polymorphic< value_type >::value,
"AMD C++ template extensions do not support the virtual keyword yet" );
505 device_vector( size_type newSize,
const value_type& value = value_type( ), cl_mem_flags flags = CL_MEM_READ_WRITE,
506 bool init =
true,
const control& ctl =
control::getDefault( ) ): m_Size( newSize ), m_commQueue( ctl.getCommandQueue( ) ), m_Flags( flags )
508 static_assert( !std::is_polymorphic< value_type >::value,
"AMD C++ template extensions do not support the virtual keyword yet" );
511 cl_int l_Error = CL_SUCCESS;
512 ::cl::Context l_Context = m_commQueue.getInfo< CL_QUEUE_CONTEXT >( &l_Error );
513 V_OPENCL( l_Error,
"device_vector failed to query for the context of the ::cl::CommandQueue object" );
517 m_devMemory = ::cl::Buffer( l_Context, m_Flags, m_Size *
sizeof( value_type ) );
521 std::vector< ::cl::Event > fillEvent( 1 );
529 size_t sizeDS =
sizeof(value_type);
531 if( !( sizeDS & (sizeDS - 1 ) ) )
533 V_OPENCL( m_commQueue.enqueueFillBuffer< value_type >( m_devMemory, value, 0,
534 newSize *
sizeof( value_type ), NULL, &fillEvent.front( ) ),
535 "device_vector failed to fill the internal buffer with the requested pattern");
540 ::cl::Event fill_mapEvent;
541 value_type *host_buffer = ( value_type* )ctl.getCommandQueue( ).enqueueMapBuffer (
544 CL_MAP_READ | CL_MAP_WRITE,
546 sizeof( value_type )*newSize,
551 V_OPENCL( l_Error,
"Error calling map on device_vector buffer. Fill device_vector" );
552 bolt::cl::wait( ctl, fill_mapEvent );
556 std::fill_n( stdext::make_checked_array_iterator( host_buffer, newSize ),
567 l_Error = ctl.getCommandQueue( ).enqueueUnmapMemObject( m_devMemory,
570 &fillEvent.front( ) );
571 V_OPENCL( l_Error,
"Error calling map on device_vector buffer. Fill device_vector" );
575 catch( std::exception& e )
577 std::cout <<
"device_vector enqueueFillBuffer error condition reported:" << std::endl << e.what() << std::endl;
584 V_OPENCL( m_commQueue.enqueueWaitForEvents( fillEvent ),
"device_vector failed to wait for an event" );
586 catch( std::exception& e )
588 std::cout <<
"device_vector enqueueFillBuffer enqueueWaitForEvents error condition reported:" << std::endl << e.what() << std::endl;
607 template<
typename InputIterator >
610 typename std::enable_if< !std::is_integral< InputIterator >::value >::type* = 0 ): m_Size( newSize ),
611 m_commQueue( ctl.getCommandQueue( ) ), m_Flags( flags )
613 static_assert( std::is_convertible< value_type,
typename std::iterator_traits< InputIterator >::value_type >::value,
614 "iterator value_type does not convert to device_vector value_type" );
615 static_assert( !std::is_polymorphic< value_type >::value,
"AMD C++ template extensions do not support the virtual keyword yet" );
623 cl_int l_Error = CL_SUCCESS;
624 ::cl::Context l_Context = m_commQueue.getInfo< CL_QUEUE_CONTEXT >( &l_Error );
625 V_OPENCL( l_Error,
"device_vector failed to query for the context of the ::cl::CommandQueue object" );
627 if( m_Flags & CL_MEM_USE_HOST_PTR )
629 m_devMemory = ::cl::Buffer( l_Context, m_Flags, m_Size *
sizeof( value_type ),
630 reinterpret_cast< value_type* >( const_cast< value_type* >( &*begin ) ) );
634 m_devMemory = ::cl::Buffer( l_Context, m_Flags, m_Size *
sizeof( value_type ) );
638 size_t byteSize = m_Size *
sizeof( value_type );
642 naked_pointer pointer =
static_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer(
643 m_devMemory, CL_TRUE, CL_MEM_WRITE_ONLY, 0, byteSize, 0, 0, &l_Error) );
644 V_OPENCL( l_Error,
"enqueueMapBuffer failed in device_vector constructor" );
646 std::copy( begin, begin + m_Size, stdext::checked_array_iterator< naked_pointer >( pointer, m_Size ) );
648 std::copy( begin, begin + m_Size, pointer );
650 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, pointer, 0, 0 );
651 V_OPENCL( l_Error,
"enqueueUnmapMemObject failed in device_vector constructor" );
663 template<
typename InputIterator >
665 typename std::enable_if< !std::is_integral< InputIterator >::value >::type* = 0 ): m_commQueue( ctl.getCommandQueue( ) ), m_Flags( flags )
667 static_assert( std::is_convertible< value_type,
typename std::iterator_traits< InputIterator >::value_type >::value,
668 "iterator value_type does not convert to device_vector value_type" );
669 static_assert( !std::is_polymorphic< value_type >::value,
"AMD C++ template extensions do not support the virtual keyword yet" );
672 cl_int l_Error = CL_SUCCESS;
673 ::cl::Context l_Context = m_commQueue.getInfo< CL_QUEUE_CONTEXT >( &l_Error );
674 V_OPENCL( l_Error,
"device_vector failed to query for the context of the ::cl::CommandQueue object" );
676 m_Size = std::distance( begin, end );
682 size_t byteSize = m_Size *
sizeof( value_type );
684 if( m_Flags & CL_MEM_USE_HOST_PTR )
686 m_devMemory = ::cl::Buffer( l_Context, m_Flags, byteSize,
687 reinterpret_cast< value_type* >( const_cast< value_type* >( &*begin ) ) );
691 m_devMemory = ::cl::Buffer( l_Context, m_Flags, byteSize );
695 naked_pointer pointer =
static_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer(
696 m_devMemory, CL_TRUE, CL_MEM_WRITE_ONLY, 0, byteSize, 0, 0, &l_Error) );
697 V_OPENCL( l_Error,
"enqueueMapBuffer failed in device_vector constructor" );
699 std::copy( begin, end, stdext::checked_array_iterator< naked_pointer >( pointer, m_Size ) );
703 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, pointer, 0, 0 );
704 V_OPENCL( l_Error,
"enqueueUnmapMemObject failed in device_vector constructor" );
714 static_assert( !std::is_polymorphic< value_type >::value,
"AMD C++ template extensions do not support the virtual keyword yet" );
718 cl_int l_Error = CL_SUCCESS;
719 m_Flags = m_devMemory.getInfo< CL_MEM_FLAGS >( &l_Error );
720 V_OPENCL( l_Error,
"device_vector failed to query for the memory flags of the ::cl::Buffer object" );
732 size_type l_srcSize = m_Size *
sizeof( value_type );
733 ::cl::Event copyEvent;
735 cl_int l_Error = CL_SUCCESS;
736 l_Error = m_commQueue.enqueueCopyBuffer( rhs.m_devMemory, m_devMemory, 0, 0, l_srcSize, NULL, ©Event );
737 V_OPENCL( l_Error,
"device_vector failed to copy data inside of operator=()" );
738 V_OPENCL( copyEvent.wait( ),
"device_vector failed to wait for copy event" );
746 m_Flags = rhs.m_Flags;
747 m_commQueue = rhs.m_commQueue;
756 size_type l_srcSize = m_Size *
sizeof( value_type );
757 ::cl::Event copyEvent;
759 cl_int l_Error = CL_SUCCESS;
760 l_Error = m_commQueue.enqueueCopyBuffer( rhs.m_devMemory, m_devMemory, 0, 0, l_srcSize, NULL, ©Event );
761 V_OPENCL( l_Error,
"device_vector failed to copy data inside of operator=()" );
762 V_OPENCL( copyEvent.wait( ),
"device_vector failed to wait for copy event" );
781 void resize( size_type reqSize,
const value_type& val = value_type( ) )
783 if( (m_Flags & CL_MEM_USE_HOST_PTR) != 0 )
785 throw ::cl::Error( CL_MEM_OBJECT_ALLOCATION_FAILURE ,
786 "A device_vector can not resize() memory not under its direct control" );
795 throw ::cl::Error( CL_MEM_OBJECT_ALLOCATION_FAILURE ,
796 "The amount of memory requested exceeds what is available" );
798 cl_int l_Error = CL_SUCCESS;
800 ::cl::Context l_Context = m_commQueue.getInfo< CL_QUEUE_CONTEXT >( &l_Error );
801 V_OPENCL( l_Error,
"device_vector failed to query for the context of the ::cl::Buffer object" );
803 size_type l_reqSize = reqSize *
sizeof( value_type );
804 ::cl::Buffer l_tmpBuffer( l_Context, m_Flags, l_reqSize, NULL, &l_Error );
806 size_type l_srcSize = m_Size *
sizeof( value_type );
812 if( l_reqSize > l_srcSize )
814 std::vector< ::cl::Event > copyEvent( 1 );
815 l_Error = m_commQueue.enqueueCopyBuffer( m_devMemory,
821 ©Event.front( ) );
822 V_OPENCL( l_Error,
"device_vector failed to copy data to the new ::cl::Buffer object" );
823 ::cl::Event fillEvent;
825 size_t sizeDS =
sizeof(value_type);
826 if( !( sizeDS & (sizeDS - 1 ) ) )
828 l_Error = m_commQueue.enqueueFillBuffer< value_type >( l_tmpBuffer,
831 (l_reqSize - l_srcSize),
834 V_OPENCL( l_Error,
"device_vector failed to fill the new data with the provided pattern" );
840 ::cl::Event fill_mapEvent;
841 value_type *host_buffer = ( value_type* )m_commQueue.enqueueMapBuffer (
844 CL_MAP_READ | CL_MAP_WRITE,
846 (l_reqSize - l_srcSize),
851 V_OPENCL( l_Error,
"Error calling map on device_vector buffer. Fill device_vector" );
852 fill_mapEvent.wait( );
856 std::fill_n( stdext::make_checked_array_iterator( host_buffer , reqSize ),
867 l_Error = m_commQueue.enqueueUnmapMemObject( l_tmpBuffer,
871 V_OPENCL( l_Error,
"Error calling map on device_vector buffer. Fill device_vector" );
874 l_Error = fillEvent.wait( );
875 V_OPENCL( l_Error,
"device_vector failed to wait for fill event" );
879 std::vector< ::cl::Event > copyEvent( 1 );
880 l_Error = m_commQueue.enqueueCopyBuffer( m_devMemory, l_tmpBuffer, 0, 0, l_reqSize, NULL, ©Event.front( ) );
881 V_OPENCL( l_Error,
"device_vector failed to copy data to the new ::cl::Buffer object" );
883 l_Error = m_commQueue.enqueueWaitForEvents( copyEvent );
884 V_OPENCL( l_Error,
"device_vector failed to wait for copy event" );
889 ::cl::Event fillEvent;
890 size_t sizeDS =
sizeof(value_type);
891 if( !( sizeDS & (sizeDS - 1 ) ) )
893 l_Error = m_commQueue.enqueueFillBuffer< value_type >( l_tmpBuffer, val, 0, l_reqSize, NULL, &fillEvent );
894 V_OPENCL( l_Error,
"device_vector failed to fill the new data with the provided pattern" );
900 ::cl::Event fill_mapEvent;
901 value_type *host_buffer = ( value_type* )m_commQueue.enqueueMapBuffer (
904 CL_MAP_READ | CL_MAP_WRITE,
911 V_OPENCL( l_Error,
"Error calling map on device_vector buffer. Fill device_vector" );
912 fill_mapEvent.wait( );
916 std::fill_n( stdext::make_checked_array_iterator( host_buffer , reqSize ),
926 l_Error = m_commQueue.enqueueUnmapMemObject( l_tmpBuffer,
930 V_OPENCL( l_Error,
"Error calling map on device_vector buffer. Fill device_vector" );
934 l_Error = fillEvent.wait( );
935 V_OPENCL( l_Error,
"device_vector failed to wait for fill event" );
942 m_devMemory = l_tmpBuffer;
959 cl_int l_Error = CL_SUCCESS;
961 ::cl::Device l_Device = m_commQueue.getInfo< CL_QUEUE_DEVICE >( &l_Error );
962 V_OPENCL( l_Error,
"device_vector failed to query for the device of the command queue" );
964 cl_ulong l_MaxSize = l_Device.getInfo< CL_DEVICE_MAX_MEM_ALLOC_SIZE >( &l_Error );
965 V_OPENCL( l_Error,
"device_vector failed to query device for the maximum memory size" );
967 return static_cast< size_type
>( l_MaxSize /
sizeof( value_type ) );
985 throw ::cl::Error( CL_MEM_OBJECT_ALLOCATION_FAILURE ,
"The amount of memory requested exceeds what is available" );
988 cl_int l_Error = CL_SUCCESS;
989 ::cl::Context l_Context = m_commQueue.getInfo< CL_QUEUE_CONTEXT >( &l_Error );
990 V_OPENCL( l_Error,
"device_vector failed to query for the context of the ::cl::CommandQueue object" );
994 ::cl::Buffer l_tmpBuffer( l_Context, m_Flags, reqSize *
sizeof( value_type ) );
995 m_devMemory = l_tmpBuffer;
999 size_type l_size = reqSize *
sizeof( value_type );
1001 ::cl::Buffer l_tmpBuffer( l_Context, m_Flags, l_size, NULL, &l_Error );
1002 V_OPENCL( l_Error,
"device_vector can not create an temporary internal OpenCL buffer" );
1004 size_type l_srcSize = m_devMemory.getInfo< CL_MEM_SIZE >( &l_Error );
1005 V_OPENCL( l_Error,
"device_vector failed to request the size of the ::cl::Buffer object" );
1007 ::cl::Event copyEvent;
1008 V_OPENCL( m_commQueue.enqueueCopyBuffer( m_devMemory, l_tmpBuffer, 0, 0, l_srcSize, NULL, ©Event ),
1009 "device_vector failed to copy from buffer to buffer " );
1012 V_OPENCL( copyEvent.wait( ),
"device_vector failed to wait on an event object" );
1015 m_devMemory = l_tmpBuffer;
1025 size_t l_memSize = 0;
1026 cl_int l_Error = CL_SUCCESS;
1031 if(m_devMemory() == NULL)
1034 l_memSize = m_devMemory.getInfo< CL_MEM_SIZE >( &l_Error );
1035 V_OPENCL( l_Error,
"device_vector failed to request the size of the ::cl::Buffer object" );
1036 return static_cast< size_type
>( l_memSize /
sizeof( value_type ) );
1049 throw ::cl::Error( CL_MEM_OBJECT_ALLOCATION_FAILURE ,
"device_vector size can not be greater than capacity( )" );
1055 cl_int l_Error = CL_SUCCESS;
1056 ::cl::Context l_Context = m_commQueue.getInfo< CL_QUEUE_CONTEXT >( &l_Error );
1057 V_OPENCL( l_Error,
"device_vector failed to query for the context of the ::cl::CommandQueue object" );
1059 size_type l_newSize = m_Size *
sizeof( value_type );
1060 ::cl::Buffer l_tmpBuffer( l_Context, m_Flags, l_newSize, NULL, &l_Error );
1061 V_OPENCL( l_Error,
"device_vector can not create an temporary internal OpenCL buffer" );
1064 size_type l_srcSize = m_devMemory.getInfo< CL_MEM_SIZE >( &l_Error );
1065 V_OPENCL( l_Error,
"device_vector failed to request the size of the ::cl::Buffer object" );
1067 std::vector< ::cl::Event > copyEvent( 1 );
1068 l_Error = m_commQueue.enqueueCopyBuffer( m_devMemory, l_tmpBuffer, 0, 0, l_newSize, NULL, ©Event.front( ) );
1069 V_OPENCL( l_Error,
"device_vector failed to copy data to the new ::cl::Buffer object" );
1072 l_Error = m_commQueue.enqueueWaitForEvents( copyEvent );
1073 V_OPENCL( l_Error,
"device_vector failed to wait for copy event" );
1076 m_devMemory = l_tmpBuffer;
1093 cl_int l_Error = CL_SUCCESS;
1095 naked_pointer ptrBuff =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_READ, n *
sizeof( value_type),
sizeof( value_type), NULL, NULL, &l_Error ) );
1096 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
1100 ::cl::Event unmapEvent;
1101 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, ptrBuff, NULL, &unmapEvent );
1102 V_OPENCL( l_Error,
"device_vector failed to unmap host memory back to device memory" );
1103 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
1172 return iterator( *
this, static_cast< typename iterator::difference_type >( m_Size ) );
1181 return const_iterator( *
this, static_cast< typename iterator::difference_type >( m_Size ) );
1191 return const_iterator( *
this, static_cast< typename iterator::difference_type >( m_Size ) );
1246 return ( *(
end() - 1) );
1254 return ( *(
end() - 1) );
1257 pointer data(
void )
1264 cl_int l_Error = CL_SUCCESS;
1266 naked_pointer ptrBuff =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_READ | CL_MAP_WRITE,
1267 0,
capacity() *
sizeof( value_type ), NULL, NULL, &l_Error ) );
1269 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
1271 pointer sp( ptrBuff, UnMapBufferFunctor< device_vector< value_type > >( *
this ) );
1276 const_pointer data(
void )
const
1278 cl_int l_Error = CL_SUCCESS;
1280 const_naked_pointer ptrBuff =
reinterpret_cast< const_naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_READ,
1281 0,
capacity() *
sizeof( value_type ), NULL, NULL, &l_Error ) );
1282 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
1284 const_pointer sp( ptrBuff, UnMapBufferFunctor<
const device_vector< value_type > >( *
this ) );
1309 return m_Size ?
false:
true;
1318 throw ::cl::Error( CL_MEM_OBJECT_ALLOCATION_FAILURE ,
"device_vector size can not be greater than capacity( )" );
1327 cl_int l_Error = CL_SUCCESS;
1329 naked_pointer result =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_WRITE_INVALIDATE_REGION,
1330 m_Size *
sizeof( value_type),
sizeof( value_type ), NULL, NULL, &l_Error ) );
1331 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for push_back" );
1334 ::cl::Event unmapEvent;
1335 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, result, NULL, &unmapEvent );
1336 V_OPENCL( l_Error,
"device_vector failed to unmap host memory back to device memory" );
1337 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
1360 ::cl::Buffer swapBuffer( m_devMemory );
1361 m_devMemory = vec.m_devMemory;
1362 vec.m_devMemory = swapBuffer;
1364 ::cl::CommandQueue swapQueue( m_commQueue );
1365 m_commQueue = vec.m_commQueue;
1366 vec.m_commQueue = swapQueue;
1368 size_type sizeTmp = m_Size;
1369 m_Size = vec.m_Size;
1370 vec.m_Size = sizeTmp;
1372 cl_mem_flags flagsTmp = m_Flags;
1373 m_Flags = vec.m_Flags;
1374 vec.m_Flags = flagsTmp;
1383 if( &index.m_Container !=
this )
1384 throw ::cl::Error( CL_INVALID_ARG_VALUE ,
"Iterator is not from this container" );
1387 if( index.m_Index >= l_End.m_Index )
1388 throw ::cl::Error( CL_INVALID_ARG_INDEX ,
"Iterator is pointing past the end of this container" );
1390 size_type sizeRegion = l_End.m_Index - index.m_Index;
1392 cl_int l_Error = CL_SUCCESS;
1393 naked_pointer ptrBuff =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_READ | CL_MAP_WRITE,
1394 index.m_Index *
sizeof( value_type ), sizeRegion *
sizeof( value_type ), NULL, NULL, &l_Error ) );
1395 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
1397 ::memmove( ptrBuff, ptrBuff + 1, (sizeRegion - 1)*
sizeof( value_type ) );
1399 ::cl::Event unmapEvent;
1400 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, ptrBuff, NULL, &unmapEvent );
1401 V_OPENCL( l_Error,
"device_vector failed to unmap host memory back to device memory" );
1402 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
1406 size_type newIndex = (m_Size < index.m_Index) ? m_Size : index.m_Index;
1407 return iterator( *
this, static_cast< difference_type >( newIndex ) );
1417 if(( &first.m_Container !=
this ) && ( &last.m_Container !=
this ) )
1418 throw ::cl::Error( CL_INVALID_ARG_VALUE ,
"Iterator is not from this container" );
1420 if( last.m_Index > m_Size )
1421 throw ::cl::Error( CL_INVALID_ARG_INDEX ,
"Iterator is pointing past the end of this container" );
1423 if( (first ==
begin( )) && (last ==
end( )) )
1426 return iterator( *
this, static_cast< typename iterator::difference_type >( m_Size ) );
1430 size_type sizeMap = l_End.m_Index - first.m_Index;
1432 cl_int l_Error = CL_SUCCESS;
1433 naked_pointer ptrBuff =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_READ | CL_MAP_WRITE,
1434 first.m_Index *
sizeof( value_type ), sizeMap *
sizeof( value_type ), NULL, NULL, &l_Error ) );
1435 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
1437 size_type sizeErase = last.m_Index - first.m_Index;
1438 ::memmove( ptrBuff, ptrBuff + sizeErase, (sizeMap - sizeErase)*
sizeof( value_type ) );
1440 ::cl::Event unmapEvent;
1441 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, ptrBuff, NULL, &unmapEvent );
1442 V_OPENCL( l_Error,
"device_vector failed to unmap host memory back to device memory" );
1443 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
1445 m_Size -= sizeErase;
1447 size_type newIndex = (m_Size < last.m_Index) ? m_Size : last.m_Index;
1448 return iterator( *
this, static_cast< typename iterator::difference_type >( newIndex ) );
1460 if( &index.m_Container !=
this )
1461 throw ::cl::Error( CL_INVALID_ARG_VALUE ,
"Iterator is not from this container" );
1463 if( index.m_Index > m_Size )
1464 throw ::cl::Error( CL_INVALID_ARG_INDEX ,
"Iterator is pointing past the end of this container" );
1466 if( index.m_Index == m_Size )
1469 return iterator( *
this, index.m_Index );
1480 size_type sizeMap = (m_Size - index.m_Index) + 1;
1482 cl_int l_Error = CL_SUCCESS;
1483 naked_pointer ptrBuff =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_READ | CL_MAP_WRITE,
1484 index.m_Index *
sizeof( value_type ), sizeMap *
sizeof( value_type ), NULL, NULL, &l_Error ) );
1485 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
1488 ::memmove( ptrBuff + 1, ptrBuff, (sizeMap - 1)*
sizeof( value_type ) );
1493 ::cl::Event unmapEvent;
1494 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, ptrBuff, NULL, &unmapEvent );
1495 V_OPENCL( l_Error,
"device_vector failed to unmap host memory back to device memory" );
1496 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
1500 return iterator( *
this, index.m_Index );
1512 if( &index.m_Container !=
this )
1513 throw ::cl::Error( CL_INVALID_ARG_VALUE ,
"Iterator is not from this container" );
1515 if( index.m_Index > m_Size )
1516 throw ::cl::Error( CL_INVALID_ARG_INDEX ,
"Iterator is pointing past the end of this container" );
1526 size_type sizeMap = (m_Size - index.m_Index) + n;
1528 cl_int l_Error = CL_SUCCESS;
1529 naked_pointer ptrBuff =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_READ | CL_MAP_WRITE,
1530 index.m_Index *
sizeof( value_type ), sizeMap *
sizeof( value_type ), NULL, NULL, &l_Error ) );
1531 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for operator[]" );
1534 ::memmove( ptrBuff + n, ptrBuff, (sizeMap - n)*
sizeof( value_type ) );
1537 for( size_type i = 0; i < n; ++i )
1539 ptrBuff[ i ] = value;
1542 ::cl::Event unmapEvent;
1543 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, ptrBuff, NULL, &unmapEvent );
1544 V_OPENCL( l_Error,
"device_vector failed to unmap host memory back to device memory" );
1545 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
1550 template<
typename InputIterator >
1553 if( &index.m_Container !=
this )
1554 throw ::cl::Error( CL_INVALID_ARG_VALUE ,
"Iterator is not from this container" );
1556 if( index.m_Index > m_Size )
1557 throw ::cl::Error( CL_INVALID_ARG_INDEX ,
"Iterator is pointing past the end of this container" );
1562 size_type n = std::distance( begin, end );
1567 size_type sizeMap = (m_Size - index.m_Index) + n;
1569 cl_int l_Error = CL_SUCCESS;
1570 naked_pointer ptrBuff =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory,
true, CL_MAP_READ | CL_MAP_WRITE,
1571 index.m_Index *
sizeof( value_type ), sizeMap *
sizeof( value_type ), NULL, NULL, &l_Error ) );
1572 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for iterator insert" );
1575 ::memmove( ptrBuff + n, ptrBuff, (sizeMap - n)*
sizeof( value_type ) );
1578 std::copy( begin, end, stdext::checked_array_iterator< naked_pointer >( ptrBuff, n ) );
1583 ::cl::Event unmapEvent;
1584 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, ptrBuff, NULL, &unmapEvent );
1585 V_OPENCL( l_Error,
"device_vector failed to unmap host memory back to device memory" );
1586 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
1598 void assign( size_type newSize,
const value_type& value )
1600 if( newSize > m_Size )
1606 cl_int l_Error = CL_SUCCESS;
1608 ::cl::Event fillEvent;
1609 size_t sizeDS =
sizeof(value_type);
1611 if( !( sizeDS & (sizeDS - 1 ) ) )
1613 l_Error = m_commQueue.enqueueFillBuffer< value_type >( m_devMemory,
1616 m_Size *
sizeof( value_type ),
1619 V_OPENCL( l_Error,
"device_vector failed to fill the new data with the provided pattern" );
1624 ::cl::Event fill_mapEvent;
1625 value_type *host_buffer = ( value_type* )m_commQueue.enqueueMapBuffer ( m_devMemory,
1627 CL_MAP_READ | CL_MAP_WRITE,
1629 sizeof( value_type )*newSize,
1634 V_OPENCL( l_Error,
"Error calling map on device_vector buffer. Fill device_vector" );
1635 fill_mapEvent.wait( );
1643 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory,
1647 V_OPENCL( l_Error,
"Error calling map on device_vector buffer. Fill device_vector" );
1651 l_Error = fillEvent.wait( );
1652 V_OPENCL( l_Error,
"device_vector failed to wait for fill event" );
1660 #if _MSC_VER == 1700
1661 template<
typename InputIterator>
1662 typename std::enable_if< std::_Is_iterator<InputIterator>::value,
void>::type
1663 assign( InputIterator begin, InputIterator end )
1665 template<
typename InputIterator>
1666 typename std::enable_if< !std::is_same< typename std::iterator_traits<InputIterator >::value_type,
1667 size_type >::value,
void>::type
1668 assign( InputIterator begin, InputIterator end )
1671 size_type l_Count = std::distance( begin, end );
1673 if( l_Count > m_Size )
1679 cl_int l_Error = CL_SUCCESS;
1681 naked_pointer ptrBuffer =
reinterpret_cast< naked_pointer
>( m_commQueue.enqueueMapBuffer( m_devMemory, CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION, 0 , m_Size *
sizeof( value_type ), NULL, NULL, &l_Error ) );
1682 V_OPENCL( l_Error,
"device_vector failed map device memory to host memory for push_back" );
1685 std::copy( begin, end, stdext::checked_array_iterator< naked_pointer >( ptrBuffer, m_Size ) );
1689 ::cl::Event unmapEvent;
1690 l_Error = m_commQueue.enqueueUnmapMemObject( m_devMemory, ptrBuffer, NULL, &unmapEvent );
1691 V_OPENCL( l_Error,
"device_vector failed to unmap host memory back to device memory" );
1692 V_OPENCL( unmapEvent.wait( ),
"failed to wait for unmap event" );
1719 ::cl::Buffer m_devMemory;
1720 ::cl::CommandQueue m_commQueue;
1722 cl_mem_flags m_Flags;
1726 static std::string deviceVectorIteratorTemplate = STRINGIFY_CODE(
1727 namespace bolt {
namespace cl { \n
1728 template<
typename T > \n
1729 class device_vector \n
1735 typedef int iterator_category;
1736 typedef T value_type; \n
1737 typedef int difference_type; \n
1738 typedef int size_type; \n
1739 typedef T* pointer; \n
1740 typedef T& reference; \n
1742 iterator( value_type init ): m_StartIndex( init ), m_Ptr( 0 ) \n
1745 void init( global value_type* ptr )\n
1750 global value_type& operator[]( size_type threadID ) const \n
1752 return m_Ptr[ m_StartIndex + threadID ]; \n
1755 value_type operator*( ) const \n
1757 return m_Ptr[ m_StartIndex + threadID ]; \n
1760 size_type m_StartIndex; \n
1761 global value_type* m_Ptr; \n