Bolt  1.1
C++ template library with support for OpenCL
parallel_iteration.h
Go to the documentation of this file.
1 /***************************************************************************
2 * Copyright 2012 - 2013 Advanced Micro Devices, Inc.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 
16 ***************************************************************************/
17 
22 #if !defined( BOLT_AMP_PARALLEL_ITER_H )
23 #define BOLT_AMP_PARALLEL_ITER_H
24 
25 #pragma once
26 
27 #include <amp.h>
28 
29 
30 //#include <bolt/combineable_queue.h>
31 
32 //#define RESTRICT_AMP restrict(direct3d)
33 
34 
35 namespace bolt {
36 
50  template<typename IterType, typename Function>
51  void parallel_iteration(concurrency::extent<1> ext, IterType Init, Function f) {
52 
53  using namespace concurrency;
54 
55  parallel_for_each(extent<1>(ext), [=] (index<1> idx) mutable restrict(amp) {
56  IterType iter(Init);
57 
58  bool keepGoing = true;
59 
60  while (keepGoing) {
61  keepGoing = f(idx, iter);
62  };
63  });
64 
65  };
66 
69  template<typename IterType, typename Function>
70  void parallel_iteration_1(concurrency::extent<1> ext, IterType Init, Function f) {
71 
72  using namespace concurrency;
73  int splitPoint = 3; // Iterations to run on the GPU:
74 
75  static const int maxQSz=1024;
76  array<pair<index<1>, IterType> outQueue(maxQSz);
77  array<int,1> qPtr(1);
78 
79  parallel_for_each(extent<1>(ext), [=] (index<1> idx) mutable restrict(amp) {
80  IterType iter(Init);
81  int iterations = splitPoint;
82 
83  bool keepGoing = true;
84  while (keepGoing && --iterations) {
85  keepGoing = f(idx, iter);
86  };
87  if (keepGoing) {
88  // Enqueue for processing below - save idx and state.
89  //; Horrible performance due to contended atomic operations, need to combine enqueue operations.
90  int lPtr = atomic_fetch_add(&qPtr[0], 1);
91  // FIXME - bounds check
92  if (lPtr < maxQSz) {
93  outQueue[lPtr].first = idx;
94  outQueue[lPtr].second = iter;
95  }
96  };
97  });
98  };
99 };
100 
101 // TODO: Need versions to handle more extent dimensions??
102 
103 // FIXME-TODO
104 // Add Doxygen-style comments:
105 
106 #endif