Qrack  9.13
General classical-emulating-quantum development framework
oclengine.hpp
Go to the documentation of this file.
1 //
3 // (C) Daniel Strano and the Qrack contributors 2017-2023. All rights reserved.
4 //
5 // This is a multithreaded, universal quantum register simulation, allowing
6 // (nonphysical) register cloning and direct measurement of probability and
7 // phase, to leverage what advantages classical emulation of qubits can have.
8 //
9 // Licensed under the GNU Lesser General Public License V3.
10 // See LICENSE.md in the project root or https://www.gnu.org/licenses/lgpl-3.0.en.html
11 // for details.
12 
13 #pragma once
14 
15 #include "oclapi.hpp"
16 
17 #if !ENABLE_OPENCL
18 #error OpenCL has not been enabled
19 #endif
20 
21 #if defined(_WIN32) && !defined(__CYGWIN__)
22 #include <direct.h>
23 #endif
24 
25 #include <cstdint>
26 #include <map>
27 #include <memory>
28 #include <mutex>
29 #include <string>
30 #include <sys/stat.h>
31 
32 #if defined(OPENCL_V3)
33 #include <CL/opencl.hpp>
34 #elif defined(__APPLE__)
35 #define CL_SILENCE_DEPRECATION
36 #include <CL/opencl.hpp>
37 #elif defined(_WIN32) || ENABLE_SNUCL
38 #include <CL/cl.hpp>
39 #else
40 #include <CL/cl2.hpp>
41 #endif
42 
43 namespace Qrack {
44 
45 class OCLDeviceCall;
46 
47 class OCLDeviceContext;
48 
49 typedef std::shared_ptr<OCLDeviceContext> DeviceContextPtr;
50 typedef std::vector<cl::Event> EventVec;
51 typedef std::shared_ptr<EventVec> EventVecPtr;
52 
55  std::string kernelname;
56 
57  OCLKernelHandle(OCLAPI o, std::string kn)
58  : oclapi(o)
59  , kernelname(kn)
60  {
61  }
62 };
63 
65 protected:
66  std::lock_guard<std::mutex> guard;
67 
68 public:
69  // A cl::Kernel is unique object which should always be taken by reference, or the OCLDeviceContext will lose
70  // ownership.
71  cl::Kernel& call;
73 
74 protected:
75  OCLDeviceCall(std::mutex& m, cl::Kernel& c)
76  : guard(m)
77  , call(c)
78  {
79  }
80 
81  friend class OCLDeviceContext;
82 
83 private:
85 };
86 
88 public:
89  const cl::Platform platform;
90  const cl::Device device;
91  const cl::Context context;
92  const int64_t context_id;
93  const int64_t device_id;
94  const bool is_gpu;
95  const bool is_cpu;
96  const bool use_host_mem;
97  cl::CommandQueue queue;
99 
100 protected:
101  std::mutex waitEventsMutex;
102  std::map<OCLAPI, cl::Kernel> calls;
103  std::map<OCLAPI, std::unique_ptr<std::mutex>> mutexes;
104 
105 private:
106  const size_t procElemCount = device.getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
107  const size_t maxWorkItems = device.getInfo<CL_DEVICE_MAX_WORK_ITEM_SIZES>()[0];
108  const size_t maxWorkGroupSize = device.getInfo<CL_DEVICE_MAX_WORK_GROUP_SIZE>();
109  const size_t maxAlloc = device.getInfo<CL_DEVICE_MAX_MEM_ALLOC_SIZE>();
110  const size_t globalSize = device.getInfo<CL_DEVICE_GLOBAL_MEM_SIZE>();
111  const size_t localSize = device.getInfo<CL_DEVICE_LOCAL_MEM_SIZE>();
112  size_t globalLimit;
115 
116 public:
117  OCLDeviceContext(cl::Platform& p, cl::Device& d, cl::Context& c, int64_t dev_id, int64_t cntxt_id, int64_t maxAlloc,
118  bool isGpu, bool isCpu, bool useHostMem)
119  : platform(p)
120  , device(d)
121  , context(c)
122  , context_id(cntxt_id)
123  , device_id(dev_id)
124  , is_gpu(isGpu)
125  , is_cpu(isCpu)
126  , use_host_mem(useHostMem)
127  , wait_events(new EventVec())
128 #if ENABLE_OCL_MEM_GUARDS
130 #else
131  , globalLimit((maxAlloc >= 0) ? maxAlloc : -1)
132 #endif
135  {
136  cl_int error;
137 #if ENABLE_OOO_OCL
138  queue = cl::CommandQueue(c, d, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &error);
139  if (error != CL_SUCCESS) {
140  queue = cl::CommandQueue(c, d, 0, &error);
141  if (error != CL_SUCCESS) {
142  throw std::runtime_error("Failed to create OpenCL command queue!");
143  }
144  }
145 #else
146  queue = cl::CommandQueue(c, d, 0, &error);
147  if (error != CL_SUCCESS) {
148  throw std::runtime_error("Failed to create OpenCL command queue!");
149  }
150 #endif
151  }
152 
153  OCLDeviceCall Reserve(OCLAPI call) { return OCLDeviceCall(*(mutexes[call]), calls[call]); }
154 
156  {
157  std::lock_guard<std::mutex> guard(waitEventsMutex);
158  EventVecPtr waitVec = std::move(wait_events);
160  return waitVec;
161  }
162 
163  template <typename Fn> void EmplaceEvent(Fn fn)
164  {
165  std::lock_guard<std::mutex> guard(waitEventsMutex);
166  wait_events->emplace_back();
167  fn(wait_events->back());
168  }
169 
171  {
172  std::lock_guard<std::mutex> guard(waitEventsMutex);
173  if ((wait_events.get())->size()) {
174  cl::Event::waitForEvents((const EventVec&)*(wait_events.get()));
175  wait_events->clear();
176  }
177  }
178 
180  {
181  return preferredSizeMultiple
184  calls[OCL_API_APPLY2X2_NORM_SINGLE].getWorkGroupInfo<CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE>(
185  device);
186  }
187 
189  {
190  if (preferredConcurrency) {
191  return preferredConcurrency;
192  }
193 
194  int hybridOffset = 3U;
195 #if ENABLE_ENV_VARS
196  if (getenv("QRACK_GPU_OFFSET_QB")) {
197  hybridOffset = std::stoi(std::string(getenv("QRACK_GPU_OFFSET_QB")));
198  }
199 #endif
200 
201  const size_t pc = procElemCount * GetPreferredSizeMultiple();
203  while (preferredConcurrency < pc) {
204  preferredConcurrency <<= 1U;
205  }
207  hybridOffset > 0 ? (preferredConcurrency << hybridOffset) : (preferredConcurrency >> -hybridOffset);
208  if (preferredConcurrency < 1U) {
210  }
211 
212  return preferredConcurrency;
213  }
214 
215  size_t GetProcElementCount() { return procElemCount; }
216  size_t GetMaxWorkItems() { return maxWorkItems; }
218  size_t GetMaxAlloc() { return maxAlloc; }
219  size_t GetGlobalSize() { return globalSize; }
220  size_t GetLocalSize() { return localSize; }
221  size_t GetGlobalAllocLimit() { return globalLimit; }
222 
223  friend class OCLEngine;
224 };
225 
227  std::vector<DeviceContextPtr> all_dev_contexts;
229 
231  : all_dev_contexts()
232  , default_dev_context(NULL)
233  {
234  // Intentionally left blank
235  }
236 
237  InitOClResult(std::vector<DeviceContextPtr> adc, DeviceContextPtr ddc)
238  : all_dev_contexts(adc)
239  , default_dev_context(ddc)
240  {
241  // Intentionally left blank
242  }
243 };
244 
246 class OCLEngine {
247 public:
248  // See https://stackoverflow.com/questions/1008019/c-singleton-design-pattern
250  static OCLEngine& Instance()
251  {
252  static OCLEngine instance;
253  return instance;
254  }
256  static std::string GetDefaultBinaryPath()
257  {
258 #if ENABLE_ENV_VARS
259  if (getenv("QRACK_OCL_PATH")) {
260  std::string toRet = std::string(getenv("QRACK_OCL_PATH"));
261  if ((toRet.back() != '/') && (toRet.back() != '\\')) {
262 #if defined(_WIN32) && !defined(__CYGWIN__)
263  toRet += "\\";
264 #else
265  toRet += "/";
266 #endif
267  }
268  return toRet;
269  }
270 #endif
271 #if defined(_WIN32) && !defined(__CYGWIN__)
272  return std::string(getenv("HOMEDRIVE") ? getenv("HOMEDRIVE") : "") +
273  std::string(getenv("HOMEPATH") ? getenv("HOMEPATH") : "") + "\\.qrack\\";
274 #else
275  return std::string(getenv("HOME") ? getenv("HOME") : "") + "/.qrack/";
276 #endif
277  }
281  static InitOClResult InitOCL(bool buildFromSource = false, bool saveBinaries = false, std::string home = "*",
282  std::vector<int64_t> maxAllocVec = { -1 });
283 
285  DeviceContextPtr GetDeviceContextPtr(const int64_t& dev = -1);
287  std::vector<DeviceContextPtr> GetDeviceContextPtrVector();
292  void SetDeviceContextPtrVector(std::vector<DeviceContextPtr> vec, DeviceContextPtr dcp = nullptr);
294  int GetDeviceCount() { return all_device_contexts.size(); }
296  size_t GetDefaultDeviceID() { return default_device_context->device_id; }
299 
300  size_t GetActiveAllocSize(const int64_t& dev)
301  {
302  if (dev > ((int64_t)activeAllocSizes.size())) {
303  throw std::invalid_argument("OCLEngine::GetActiveAllocSize device ID is too high!");
304  }
305  return (dev < 0) ? activeAllocSizes[GetDefaultDeviceID()] : activeAllocSizes[(size_t)dev];
306  }
307  size_t AddToActiveAllocSize(const int64_t& dev, size_t size)
308  {
309  if (dev > ((int64_t)activeAllocSizes.size())) {
310  throw std::invalid_argument("OCLEngine::GetActiveAllocSize device ID is too high!");
311  }
312 
313  const size_t lDev = (dev < 0) ? GetDefaultDeviceID() : dev;
314 
315  if (size == 0) {
316  return activeAllocSizes[lDev];
317  }
318 
319  std::lock_guard<std::mutex> lock(allocMutex);
320  activeAllocSizes[lDev] += size;
321 
322  return activeAllocSizes[lDev];
323  }
324  size_t SubtractFromActiveAllocSize(const int64_t& dev, size_t size)
325  {
326  if (dev > ((int64_t)activeAllocSizes.size())) {
327  throw std::invalid_argument("OCLEngine::GetActiveAllocSize device ID is too high!");
328  }
329 
330  const size_t lDev = (dev < 0) ? GetDefaultDeviceID() : dev;
331 
332  if (size == 0) {
333  return activeAllocSizes[lDev];
334  }
335 
336  std::lock_guard<std::mutex> lock(allocMutex);
337  if (size < activeAllocSizes[lDev]) {
338  activeAllocSizes[lDev] -= size;
339  } else {
340  activeAllocSizes[lDev] = 0;
341  }
342  return activeAllocSizes[lDev];
343  }
344  void ResetActiveAllocSize(const int64_t& dev)
345  {
346  if (dev > ((int64_t)activeAllocSizes.size())) {
347  throw std::invalid_argument("OCLEngine::GetActiveAllocSize device ID is too high!");
348  }
349  const size_t lDev = (dev < 0) ? GetDefaultDeviceID() : dev;
350  std::lock_guard<std::mutex> lock(allocMutex);
351  // User code should catch std::bad_alloc and reset:
352  activeAllocSizes[lDev] = 0;
353  }
354 
355  OCLEngine(OCLEngine const&) = delete;
356  void operator=(OCLEngine const&) = delete;
357 
358 private:
359  static const std::vector<OCLKernelHandle> kernelHandles;
360  static const std::string binary_file_prefix;
361  static const std::string binary_file_ext;
362 
363  std::vector<size_t> activeAllocSizes;
364  std::vector<int64_t> maxActiveAllocSizes;
365  std::mutex allocMutex;
366  std::vector<DeviceContextPtr> all_device_contexts;
368 
369  OCLEngine(); // Private so that it can not be called
370 
372  static cl::Program MakeProgram(bool buildFromSource, std::string path, std::shared_ptr<OCLDeviceContext> devCntxt);
374  static void SaveBinary(cl::Program program, std::string path, std::string fileName);
375 };
376 
377 } // namespace Qrack
Definition: oclengine.hpp:64
std::lock_guard< std::mutex > guard
Definition: oclengine.hpp:66
OCLDeviceCall & operator=(const OCLDeviceCall &)=delete
cl::Kernel & call
Definition: oclengine.hpp:71
OCLDeviceCall(std::mutex &m, cl::Kernel &c)
Definition: oclengine.hpp:75
OCLDeviceCall(const OCLDeviceCall &)
Definition: oclengine.hpp:87
OCLDeviceContext(cl::Platform &p, cl::Device &d, cl::Context &c, int64_t dev_id, int64_t cntxt_id, int64_t maxAlloc, bool isGpu, bool isCpu, bool useHostMem)
Definition: oclengine.hpp:117
const cl::Device device
Definition: oclengine.hpp:90
const size_t globalSize
Definition: oclengine.hpp:110
size_t globalLimit
Definition: oclengine.hpp:112
size_t GetPreferredSizeMultiple()
Definition: oclengine.hpp:179
const size_t localSize
Definition: oclengine.hpp:111
std::mutex waitEventsMutex
Definition: oclengine.hpp:101
const bool use_host_mem
Definition: oclengine.hpp:96
const int64_t context_id
Definition: oclengine.hpp:92
std::map< OCLAPI, cl::Kernel > calls
Definition: oclengine.hpp:102
void EmplaceEvent(Fn fn)
Definition: oclengine.hpp:163
size_t GetGlobalSize()
Definition: oclengine.hpp:219
size_t GetMaxAlloc()
Definition: oclengine.hpp:218
std::map< OCLAPI, std::unique_ptr< std::mutex > > mutexes
Definition: oclengine.hpp:103
const bool is_cpu
Definition: oclengine.hpp:95
OCLDeviceCall Reserve(OCLAPI call)
Definition: oclengine.hpp:153
size_t GetMaxWorkGroupSize()
Definition: oclengine.hpp:217
void WaitOnAllEvents()
Definition: oclengine.hpp:170
size_t GetLocalSize()
Definition: oclengine.hpp:220
const int64_t device_id
Definition: oclengine.hpp:93
const size_t maxWorkGroupSize
Definition: oclengine.hpp:108
cl::CommandQueue queue
Definition: oclengine.hpp:97
size_t preferredConcurrency
Definition: oclengine.hpp:114
size_t GetMaxWorkItems()
Definition: oclengine.hpp:216
const size_t procElemCount
Definition: oclengine.hpp:106
EventVecPtr ResetWaitEvents()
Definition: oclengine.hpp:155
const cl::Context context
Definition: oclengine.hpp:91
size_t GetPreferredConcurrency()
Definition: oclengine.hpp:188
size_t preferredSizeMultiple
Definition: oclengine.hpp:113
const bool is_gpu
Definition: oclengine.hpp:94
size_t GetGlobalAllocLimit()
Definition: oclengine.hpp:221
size_t GetProcElementCount()
Definition: oclengine.hpp:215
EventVecPtr wait_events
Definition: oclengine.hpp:98
const size_t maxWorkItems
Definition: oclengine.hpp:107
const cl::Platform platform
Definition: oclengine.hpp:89
const size_t maxAlloc
Definition: oclengine.hpp:109
"Qrack::OCLEngine" manages the single OpenCL context.
Definition: oclengine.hpp:246
size_t AddToActiveAllocSize(const int64_t &dev, size_t size)
Definition: oclengine.hpp:307
static const std::string binary_file_ext
Definition: oclengine.hpp:361
std::vector< int64_t > maxActiveAllocSizes
Definition: oclengine.hpp:364
int GetDeviceCount()
Get the count of devices in the current list.
Definition: oclengine.hpp:294
std::vector< DeviceContextPtr > all_device_contexts
Definition: oclengine.hpp:366
OCLEngine(OCLEngine const &)=delete
std::vector< size_t > activeAllocSizes
Definition: oclengine.hpp:363
void ResetActiveAllocSize(const int64_t &dev)
Definition: oclengine.hpp:344
void operator=(OCLEngine const &)=delete
static const std::vector< OCLKernelHandle > kernelHandles
Definition: oclengine.hpp:359
std::vector< DeviceContextPtr > GetDeviceContextPtrVector()
Get the list of all available devices (and their supporting objects).
Definition: oclengine.cpp:151
size_t SubtractFromActiveAllocSize(const int64_t &dev, size_t size)
Definition: oclengine.hpp:324
OCLEngine()
Definition: oclengine.cpp:443
static const std::string binary_file_prefix
Definition: oclengine.hpp:360
std::mutex allocMutex
Definition: oclengine.hpp:365
static OCLEngine & Instance()
Get a pointer to the Instance of the singleton. (The instance will be instantiated,...
Definition: oclengine.hpp:250
static void SaveBinary(cl::Program program, std::string path, std::string fileName)
Save the program binary:
Definition: oclengine.cpp:241
static InitOClResult InitOCL(bool buildFromSource=false, bool saveBinaries=false, std::string home="*", std::vector< int64_t > maxAllocVec={ -1 })
Initialize the OCL environment, with the option to save the generated binaries.
Definition: oclengine.cpp:279
void SetDefaultDeviceContext(DeviceContextPtr dcp)
Pick a default device, for QEngineOCL instances that don't specify a preferred device.
Definition: oclengine.cpp:160
DeviceContextPtr default_device_context
Definition: oclengine.hpp:367
size_t GetActiveAllocSize(const int64_t &dev)
Definition: oclengine.hpp:300
static cl::Program MakeProgram(bool buildFromSource, std::string path, std::shared_ptr< OCLDeviceContext > devCntxt)
Make the program, from either source or binary.
Definition: oclengine.cpp:162
static std::string GetDefaultBinaryPath()
Get default location for precompiled binaries:
Definition: oclengine.hpp:256
size_t GetDefaultDeviceID()
Get default device ID.
Definition: oclengine.hpp:296
DeviceContextPtr GetDeviceContextPtr(const int64_t &dev=-1)
Get a pointer one of the available OpenCL contexts, by its index in the list of all contexts.
Definition: oclengine.cpp:54
void SetDeviceContextPtrVector(std::vector< DeviceContextPtr > vec, DeviceContextPtr dcp=nullptr)
Set the list of DeviceContextPtr object available for use.
Definition: oclengine.cpp:152
GLOSSARY: bitLenInt - "bit-length integer" - unsigned integer ID of qubit position in register bitCap...
Definition: complex16x2simd.hpp:25
std::shared_ptr< OCLDeviceContext > DeviceContextPtr
Definition: oclengine.hpp:47
std::shared_ptr< EventVec > EventVecPtr
Definition: oclengine.hpp:51
void U(quid sid, bitLenInt q, real1_f theta, real1_f phi, real1_f lambda)
(External API) 3-parameter unitary gate
Definition: wasm_api.cpp:1143
OCLAPI
Definition: oclapi.hpp:19
@ OCL_API_APPLY2X2_NORM_SINGLE
Definition: oclapi.hpp:23
std::vector< cl::Event > EventVec
Definition: oclengine.hpp:50
Definition: oclengine.hpp:226
std::vector< DeviceContextPtr > all_dev_contexts
Definition: oclengine.hpp:227
InitOClResult()
Definition: oclengine.hpp:230
InitOClResult(std::vector< DeviceContextPtr > adc, DeviceContextPtr ddc)
Definition: oclengine.hpp:237
DeviceContextPtr default_dev_context
Definition: oclengine.hpp:228
Definition: oclengine.hpp:53
std::string kernelname
Definition: oclengine.hpp:55
OCLAPI oclapi
Definition: oclengine.hpp:54
OCLKernelHandle(OCLAPI o, std::string kn)
Definition: oclengine.hpp:57