Qrack  9.13
General classical-emulating-quantum development framework
qengine_opencl.hpp
Go to the documentation of this file.
1 //
3 // (C) Daniel Strano and the Qrack contributors 2017-2023. All rights reserved.
4 //
5 // This is a multithreaded, universal quantum register simulation, allowing
6 // (nonphysical) register cloning and direct measurement of probability and
7 // phase, to leverage what advantages classical emulation of qubits can have.
8 //
9 // Licensed under the GNU Lesser General Public License V3.
10 // See LICENSE.md in the project root or https://www.gnu.org/licenses/lgpl-3.0.en.html
11 // for details.
12 
13 #pragma once
14 
15 #include "common/oclengine.hpp"
16 #include "qengine.hpp"
17 #include "qengine_gpu_util.hpp"
18 
19 #if !ENABLE_OPENCL
20 #error OpenCL has not been enabled
21 #endif
22 
23 #include <list>
24 #include <mutex>
25 
26 #define BCI_ARG_LEN 10
27 #define CMPLX_NORM_LEN 6
28 #define REAL_ARG_LEN 2
29 
30 namespace Qrack {
31 
32 typedef std::shared_ptr<cl::Buffer> BufferPtr;
33 
34 class QEngineOCL;
35 typedef std::shared_ptr<QEngineOCL> QEngineOCLPtr;
36 
37 struct QueueItem {
39  size_t workItemCount;
40  size_t localGroupSize;
41  size_t deallocSize;
42  std::vector<BufferPtr> buffers;
43  size_t localBuffSize;
44  bool isSetDoNorm;
45  bool isSetRunningNorm;
46  bool doNorm;
48 
50  : api_call()
51  , workItemCount(0U)
52  , localGroupSize(0U)
53  , deallocSize(0U)
54  , buffers()
55  , localBuffSize(0U)
56  , isSetDoNorm(false)
57  , isSetRunningNorm(true)
58  , doNorm(false)
60  {
61  }
62 
63  QueueItem(OCLAPI ac, size_t wic, size_t lgs, size_t ds, std::vector<BufferPtr> b, size_t lbs)
64  : api_call(ac)
65  , workItemCount(wic)
66  , localGroupSize(lgs)
67  , deallocSize(ds)
68  , buffers(b)
69  , localBuffSize(lbs)
70  , isSetDoNorm(false)
71  , isSetRunningNorm(false)
72  , doNorm(false)
74  {
75  }
76 
77  QueueItem(bool doNrm)
78  : api_call()
79  , workItemCount(0U)
80  , localGroupSize(0U)
81  , deallocSize(0U)
82  , buffers()
83  , localBuffSize(0U)
84  , isSetDoNorm(true)
85  , isSetRunningNorm(false)
86  , doNorm(doNrm)
88  {
89  }
90 
91  QueueItem(real1_f runningNrm)
92  : api_call()
93  , workItemCount(0U)
94  , localGroupSize(0U)
95  , deallocSize(0U)
96  , buffers()
97  , localBuffSize(0U)
98  , isSetDoNorm(false)
99  , isSetRunningNorm(true)
100  , doNorm(false)
101  , runningNorm(runningNrm)
102  {
103  }
104 };
105 
106 class PoolItem {
107 public:
111 
112  std::shared_ptr<real1> probArray;
113  std::shared_ptr<real1> angleArray;
114 
115  PoolItem(cl::Context& context)
116  : probArray(NULL)
117  , angleArray(NULL)
118  {
119  cmplxBuffer = MakeBuffer(context, sizeof(complex) * CMPLX_NORM_LEN);
120  realBuffer = MakeBuffer(context, sizeof(real1) * REAL_ARG_LEN);
121  ulongBuffer = MakeBuffer(context, sizeof(bitCapIntOcl) * BCI_ARG_LEN);
122  }
123 
125 
126 protected:
127  BufferPtr MakeBuffer(const cl::Context& context, size_t size)
128  {
129  cl_int error;
130  BufferPtr toRet = std::make_shared<cl::Buffer>(context, CL_MEM_READ_ONLY, size, (void*)NULL, &error);
131  if (error != CL_SUCCESS) {
132  if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE) {
133  throw bad_alloc("CL_MEM_OBJECT_ALLOCATION_FAILURE in PoolItem::MakeBuffer()");
134  }
135  if (error == CL_OUT_OF_HOST_MEMORY) {
136  throw bad_alloc("CL_OUT_OF_HOST_MEMORY in PoolItem::MakeBuffer()");
137  }
138  if (error == CL_INVALID_BUFFER_SIZE) {
139  throw bad_alloc("CL_INVALID_BUFFER_SIZE in PoolItem::MakeBuffer()");
140  }
141  throw std::runtime_error("OpenCL error code on buffer allocation attempt: " + std::to_string(error));
142  }
143 
144  return toRet;
145  }
146 };
147 
148 typedef std::shared_ptr<PoolItem> PoolItemPtr;
149 
168 class QEngineOCL : public QEngine {
169 protected:
170  bool didInit;
175  size_t nrmGroupSize;
177  int64_t deviceID;
180  std::shared_ptr<complex> stateVec;
181  std::mutex queue_mutex;
182  cl::CommandQueue queue;
183  cl::Context context;
184  // stateBuffer is allocated as a shared_ptr, because it's the only buffer that will be acted on outside of
185  // QEngineOCL itself, specifically by QEngineOCLMulti.
189  std::vector<EventVecPtr> wait_refs;
190  std::list<QueueItem> wait_queue_items;
191  std::vector<PoolItemPtr> poolItems;
192  std::unique_ptr<real1[], void (*)(real1*)> nrmArray;
193 
194 #if defined(__APPLE__)
195  real1* _aligned_nrm_array_alloc(bitCapIntOcl allocSize)
196  {
197  void* toRet;
198  posix_memalign(&toRet, QRACK_ALIGN_SIZE, allocSize);
199  return (real1*)toRet;
200  }
201 #endif
202 
204  {
205  if (callbackError == CL_SUCCESS) {
206  return;
207  }
208 
209  wait_queue_items.clear();
210  wait_refs.clear();
211 
212  throw std::runtime_error("Failed to enqueue kernel, error code: " + std::to_string(callbackError));
213  }
214 
215  // For std::function, cl_int use might discard int qualifiers.
216  void tryOcl(std::string message, std::function<int()> oclCall)
217  {
219 
220  if (oclCall() == CL_SUCCESS) {
221  // Success
222  return;
223  }
224 
225  // Soft finish (just for this QEngineOCL)
226  clFinish();
227 
228  if (oclCall() == CL_SUCCESS) {
229  // Success after clearing QEngineOCL queue
230  return;
231  }
232 
233  // Hard finish (for the unique OpenCL device)
234  clFinish(true);
235 
236  cl_int error = oclCall();
237  if (error == CL_SUCCESS) {
238  // Success after clearing all queues for the OpenCL device
239  return;
240  }
241 
242  wait_queue_items.clear();
243  wait_refs.clear();
244 
245  // We're fatally blocked. Throw to exit.
246  throw std::runtime_error(message + ", error code: " + std::to_string(error));
247  }
248 
249  using QEngine::Copy;
250  void Copy(QInterfacePtr orig) { Copy(std::dynamic_pointer_cast<QEngineOCL>(orig)); }
251  void Copy(QEngineOCLPtr orig)
252  {
253  QEngine::Copy(std::dynamic_pointer_cast<QEngine>(orig));
254  didInit = orig->didInit;
255  usingHostRam = orig->usingHostRam;
256  unlockHostMem = orig->unlockHostMem;
257  callbackError = orig->callbackError;
258  nrmGroupCount = orig->nrmGroupCount;
259  nrmGroupSize = orig->nrmGroupSize;
260  AddAlloc(orig->totalOclAllocSize);
261  deviceID = orig->deviceID;
262  lockSyncFlags = orig->lockSyncFlags;
263  permutationAmp = orig->permutationAmp;
264  stateVec = orig->stateVec;
265  // queue_mutex = orig->queue_mutex;
266  queue = orig->queue;
267  context = orig->context;
268  stateBuffer = orig->stateBuffer;
269  nrmBuffer = orig->nrmBuffer;
270  device_context = orig->device_context;
271  wait_refs = orig->wait_refs;
272  wait_queue_items = orig->wait_queue_items;
273  poolItems = orig->poolItems;
274  }
275 
276 public:
279  static const bitCapIntOcl OclMemDenom = 3U;
280 
299  QEngineOCL(bitLenInt qBitCount, const bitCapInt& initState, qrack_rand_gen_ptr rgp = nullptr,
300  const complex& phaseFac = CMPLX_DEFAULT_ARG, bool doNorm = false, bool randomGlobalPhase = true,
301  bool useHostMem = false, int64_t devID = -1, bool useHardwareRNG = true, bool ignored = false,
302  real1_f norm_thresh = REAL1_EPSILON, std::vector<int64_t> ignored2 = {}, bitLenInt ignored4 = 0U,
303  real1_f ignored3 = _qrack_qunit_sep_thresh);
304 
306  {
307  // Theoretically, all user output is blocking, so don't throw in destructor.
308  callbackError = CL_SUCCESS;
309  // Make sure we track device allocation.
310  FreeAll();
311  }
312 
313  virtual bool isOpenCL() { return true; }
314 
315  bool IsZeroAmplitude() { return !stateBuffer; }
317  {
318  if (!stateBuffer) {
319  return ZERO_R1_F;
320  }
321 
323  }
324 
325  void ZeroAmplitudes();
326  void CopyStateVec(QEnginePtr src);
327 
328  void GetAmplitudePage(complex* pagePtr, bitCapIntOcl offset, bitCapIntOcl length);
329  void SetAmplitudePage(const complex* pagePtr, bitCapIntOcl offset, bitCapIntOcl length);
330  void SetAmplitudePage(
331  QEnginePtr pageEnginePtr, bitCapIntOcl srcOffset, bitCapIntOcl dstOffset, bitCapIntOcl length);
332  void ShuffleBuffers(QEnginePtr engine);
334 
335  bitCapIntOcl GetMaxSize() { return device_context->GetMaxAlloc() / sizeof(complex); };
336 
337  void SetPermutation(const bitCapInt& perm, const complex& phaseFac = CMPLX_DEFAULT_ARG);
338 
340  void UniformlyControlledSingleBit(const std::vector<bitLenInt>& controls, bitLenInt qubitIndex,
341  const complex* mtrxs, const std::vector<bitCapInt>& mtrxSkipPowers, const bitCapInt& mtrxSkipValueMask);
342  void UniformParityRZ(const bitCapInt& mask, real1_f angle);
343  void CUniformParityRZ(const std::vector<bitLenInt>& controls, const bitCapInt& mask, real1_f angle);
344 
345  /* Operations that have an improved implementation. */
346  using QEngine::X;
347  void X(bitLenInt target);
348  using QEngine::Z;
349  void Z(bitLenInt target);
350  using QEngine::Invert;
351  void Invert(const complex& topRight, const complex& bottomLeft, bitLenInt qubitIndex);
352  using QEngine::Phase;
353  void Phase(const complex& topLeft, const complex& bottomRight, bitLenInt qubitIndex);
354  void XMask(const bitCapInt& mask);
355  void PhaseParity(real1_f radians, const bitCapInt& mask);
356  void PhaseRootNMask(bitLenInt n, const bitCapInt& mask);
357 
358  using QEngine::Compose;
360  bitLenInt Compose(QInterfacePtr toCopy) { return Compose(std::dynamic_pointer_cast<QEngineOCL>(toCopy)); }
361  bitLenInt Compose(QEngineOCLPtr toCopy, bitLenInt start);
363  {
364  return Compose(std::dynamic_pointer_cast<QEngineOCL>(toCopy), start);
365  }
366  using QEngine::Decompose;
367  void Decompose(bitLenInt start, QInterfacePtr dest);
368  void Dispose(bitLenInt start, bitLenInt length);
369  void Dispose(bitLenInt start, bitLenInt length, const bitCapInt& disposedPerm);
370  using QEngine::Allocate;
371  bitLenInt Allocate(bitLenInt start, bitLenInt length);
372 
373  void ROL(bitLenInt shift, bitLenInt start, bitLenInt length);
374 
375 #if ENABLE_ALU
376  void INC(const bitCapInt& toAdd, bitLenInt start, bitLenInt length);
377  void CINC(const bitCapInt& toAdd, bitLenInt inOutStart, bitLenInt length, const std::vector<bitLenInt>& controls);
378  void INCS(const bitCapInt& toAdd, bitLenInt start, bitLenInt length, bitLenInt carryIndex);
379 #if ENABLE_BCD
380  void INCBCD(const bitCapInt& toAdd, bitLenInt start, bitLenInt length);
381 #endif
382  void MUL(const bitCapInt& toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length);
383  void DIV(const bitCapInt& toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length);
384  void MULModNOut(
385  const bitCapInt& toMul, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length);
386  void IMULModNOut(
387  const bitCapInt& toMul, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length);
388  void POWModNOut(
389  const bitCapInt& base, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length);
390  void CMUL(const bitCapInt& toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length,
391  const std::vector<bitLenInt>& controls);
392  void CDIV(const bitCapInt& toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length,
393  const std::vector<bitLenInt>& controls);
394  void CMULModNOut(const bitCapInt& toMul, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart,
395  bitLenInt length, const std::vector<bitLenInt>& controls);
396  void CIMULModNOut(const bitCapInt& toMul, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart,
397  bitLenInt length, const std::vector<bitLenInt>& controls);
398  void CPOWModNOut(const bitCapInt& base, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart,
399  bitLenInt length, const std::vector<bitLenInt>& controls);
400  void FullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut);
401  void IFullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut);
402 
403  bitCapInt IndexedLDA(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength,
404  const unsigned char* values, bool resetValue = true);
405  bitCapInt IndexedADC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength,
406  bitLenInt carryIndex, const unsigned char* values);
407  bitCapInt IndexedSBC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength,
408  bitLenInt carryIndex, const unsigned char* values);
409  void Hash(bitLenInt start, bitLenInt length, const unsigned char* values);
410 
411  void CPhaseFlipIfLess(const bitCapInt& greaterPerm, bitLenInt start, bitLenInt length, bitLenInt flagIndex);
412  void PhaseFlipIfLess(const bitCapInt& greaterPerm, bitLenInt start, bitLenInt length);
413 #endif
414 
415  real1_f Prob(bitLenInt qubit);
416  real1_f CtrlOrAntiProb(bool controlState, bitLenInt control, bitLenInt target);
417  real1_f ProbReg(bitLenInt start, bitLenInt length, const bitCapInt& permutation);
418  void ProbRegAll(bitLenInt start, bitLenInt length, real1* probsArray);
419  real1_f ProbMask(const bitCapInt& mask, const bitCapInt& permutation);
420  void ProbMaskAll(const bitCapInt& mask, real1* probsArray);
421  real1_f ProbParity(const bitCapInt& mask);
422  bool ForceMParity(const bitCapInt& mask, bool result, bool doForce = true);
423  real1_f ExpectationBitsAll(const std::vector<bitLenInt>& bits, const bitCapInt& offset = ZERO_BCI);
424 
425  void SetDevice(int64_t dID);
426  int64_t GetDevice() { return deviceID; }
427 
428  void SetQuantumState(const complex* inputState);
429  void GetQuantumState(complex* outputState);
430  void GetProbs(real1* outputProbs);
431  bitCapInt MAll();
432  complex GetAmplitude(const bitCapInt& perm);
433  void SetAmplitude(const bitCapInt& perm, const complex& amp);
434 
435  real1_f SumSqrDiff(QInterfacePtr toCompare) { return SumSqrDiff(std::dynamic_pointer_cast<QEngineOCL>(toCompare)); }
436  real1_f SumSqrDiff(QEngineOCLPtr toCompare);
437 
438  void NormalizeState(
439  real1_f nrm = REAL1_DEFAULT_ARG, real1_f norm_thresh = REAL1_DEFAULT_ARG, real1_f phaseArg = ZERO_R1_F);
440  void UpdateRunningNorm(real1_f norm_thresh = REAL1_DEFAULT_ARG);
441  void Finish() { clFinish(); };
442  bool isFinished() { return wait_queue_items.empty(); };
443 
446 
447  void PopQueue(bool isDispatch);
448  void DispatchQueue();
449 
450 protected:
451  void AddAlloc(size_t size)
452  {
453  size_t currentAlloc = OCLEngine::Instance().AddToActiveAllocSize(deviceID, size);
454  if (device_context && (currentAlloc > device_context->GetGlobalAllocLimit())) {
456  throw bad_alloc("VRAM limits exceeded in QEngineOCL::AddAlloc()");
457  }
458  totalOclAllocSize += size;
459  }
460  void SubtractAlloc(size_t size)
461  {
463  totalOclAllocSize -= size;
464  }
465 
466  BufferPtr MakeBuffer(cl_mem_flags flags, size_t size, void* host_ptr = NULL)
467  {
469 
470  cl_int error;
471  BufferPtr toRet = std::make_shared<cl::Buffer>(context, flags, size, host_ptr, &error);
472  if (error == CL_SUCCESS) {
473  // Success
474  return toRet;
475  }
476 
477  // Soft finish (just for this QEngineOCL)
478  clFinish();
479 
480  toRet = std::make_shared<cl::Buffer>(context, flags, size, host_ptr, &error);
481  if (error == CL_SUCCESS) {
482  // Success after clearing QEngineOCL queue
483  return toRet;
484  }
485 
486  // Hard finish (for the unique OpenCL device)
487  clFinish(true);
488 
489  toRet = std::make_shared<cl::Buffer>(context, flags, size, host_ptr, &error);
490  if (error != CL_SUCCESS) {
491  if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE) {
492  throw bad_alloc("CL_MEM_OBJECT_ALLOCATION_FAILURE in QEngineOCL::MakeBuffer()");
493  }
494  if (error == CL_OUT_OF_HOST_MEMORY) {
495  throw bad_alloc("CL_OUT_OF_HOST_MEMORY in QEngineOCL::MakeBuffer()");
496  }
497  if (error == CL_INVALID_BUFFER_SIZE) {
498  throw bad_alloc("CL_INVALID_BUFFER_SIZE in QEngineOCL::MakeBuffer()");
499  }
500  throw std::runtime_error("OpenCL error code on buffer allocation attempt: " + std::to_string(error));
501  }
502 
503  return toRet;
504  }
505 
506  void SwitchHostPtr(bool useHostMem)
507  {
508  if (useHostMem == usingHostRam) {
509  return;
510  }
511 
512  std::shared_ptr<complex> copyVec = AllocStateVec(maxQPowerOcl, true);
513  GetQuantumState(copyVec.get());
514 
515  if (useHostMem) {
516  stateVec = copyVec;
518  } else {
519  stateVec = NULL;
521  tryOcl("Failed to write buffer", [&] {
522  return queue.enqueueWriteBuffer(
523  *stateBuffer, CL_TRUE, 0U, sizeof(complex) * maxQPowerOcl, copyVec.get(), ResetWaitEvents().get());
524  });
525  wait_refs.clear();
526  copyVec.reset();
527  }
528 
529  usingHostRam = useHostMem;
530  }
531 
532  void QueueCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector<BufferPtr> args,
533  size_t localBuffSize = 0U, size_t deallocSize = 0U)
534  {
535  if (localBuffSize > device_context->GetLocalSize()) {
536  throw bad_alloc("Local memory limits exceeded in QEngineOCL::QueueCall()");
537  }
538  AddQueueItem(QueueItem(api_call, workItemCount, localGroupSize, deallocSize, args, localBuffSize));
539  }
540 
541  void QueueSetDoNormalize(bool doNorm) { AddQueueItem(QueueItem(doNorm)); }
542  void QueueSetRunningNorm(real1_f runningNrm) { AddQueueItem(QueueItem(runningNrm)); }
543  void AddQueueItem(const QueueItem& item)
544  {
545  bool isBase;
546  // For lock_guard:
547  if (true) {
548  std::lock_guard<std::mutex> lock(queue_mutex);
550  isBase = wait_queue_items.empty();
551  wait_queue_items.push_back(item);
552  }
553 
554  if (isBase) {
555  DispatchQueue();
556  }
557  }
558 
559  real1_f GetExpectation(bitLenInt valueStart, bitLenInt valueLength);
560 
561  std::shared_ptr<complex> AllocStateVec(bitCapIntOcl elemCount, bool doForceAlloc = false);
562  void FreeStateVec() { stateVec = NULL; }
563  void FreeAll();
564  void ResetStateBuffer(BufferPtr nStateBuffer);
565  BufferPtr MakeStateVecBuffer(std::shared_ptr<complex> nStateVec);
566  void ReinitBuffer();
567 
568  void Compose(OCLAPI apiCall, const bitCapIntOcl* bciArgs, QEngineOCLPtr toCopy);
569 
570  void InitOCL(int64_t devID);
572 
573  real1_f ParSum(real1* toSum, bitCapIntOcl maxI);
574 
585  void LockSync(cl_map_flags flags = (CL_MAP_READ | CL_MAP_WRITE));
596  void UnlockSync();
597 
604  void clFinish(bool doHard = false);
605 
609  void clDump();
610 
611  size_t FixWorkItemCount(size_t maxI, size_t wic)
612  {
613  if (wic > maxI) {
614  // Guaranteed to be a power of two
615  return maxI;
616  }
617 
618  // Otherwise, clamp to a power of two
619  return pow2Ocl(log2Ocl(wic));
620  }
621 
622  size_t FixGroupSize(size_t wic, size_t gs)
623  {
624  if (gs > wic) {
625  return wic;
626  }
627 
628  return gs - (wic % gs);
629  }
630 
631  void DecomposeDispose(bitLenInt start, bitLenInt length, QEngineOCLPtr dest);
632 
633  using QEngine::Apply2x2;
634  void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex* mtrx, bitLenInt bitCount,
635  const bitCapIntOcl* qPowersSorted, bool doCalcNorm, real1_f norm_thresh = REAL1_DEFAULT_ARG)
636  {
637  Apply2x2(offset1, offset2, mtrx, bitCount, qPowersSorted, doCalcNorm, SPECIAL_2X2::NONE, norm_thresh);
638  }
639  void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex* mtrx, bitLenInt bitCount,
640  const bitCapIntOcl* qPowersSorted, bool doCalcNorm, SPECIAL_2X2 special,
641  real1_f norm_thresh = REAL1_DEFAULT_ARG);
642 
643  void BitMask(bitCapIntOcl mask, OCLAPI api_call, real1_f phase = (real1_f)PI_R1);
644 
645  void ApplyM(const bitCapInt& mask, bool result, const complex& nrm);
646  void ApplyM(const bitCapInt& mask, const bitCapInt& result, const complex& nrm);
647 
648  /* Utility functions used by the operations above. */
649  void WaitCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector<BufferPtr> args,
650  size_t localBuffSize = 0U);
651  EventVecPtr ResetWaitEvents(bool waitQueue = true);
652  void ApplyMx(OCLAPI api_call, const bitCapIntOcl* bciArgs, const complex& nrm);
653  real1_f Probx(OCLAPI api_call, const bitCapIntOcl* bciArgs);
654 
655  void ArithmeticCall(OCLAPI api_call, const bitCapIntOcl (&bciArgs)[BCI_ARG_LEN], const unsigned char* values = NULL,
656  bitCapIntOcl valuesLength = 0U);
657  void CArithmeticCall(OCLAPI api_call, const bitCapIntOcl (&bciArgs)[BCI_ARG_LEN], bitCapIntOcl* controlPowers,
658  bitLenInt controlLen, const unsigned char* values = NULL, bitCapIntOcl valuesLength = 0U);
659  void ROx(OCLAPI api_call, bitLenInt shift, bitLenInt start, bitLenInt length);
660 
661 #if ENABLE_ALU
662  void INCDECC(const bitCapInt& toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
663  void INCDECSC(const bitCapInt& toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
664  void INCDECSC(
665  const bitCapInt& toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex, bitLenInt carryIndex);
666 #if ENABLE_BCD
667  void INCDECBCDC(const bitCapInt& toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
668 #endif
669 
670  void INT(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length);
671  void CINT(
672  OCLAPI api_call, bitCapIntOcl toMod, bitLenInt start, bitLenInt length, const std::vector<bitLenInt>& controls);
673  void INTC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
674  void INTS(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex);
675  void INTSC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
676  void INTSC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex,
677  bitLenInt carryIndex);
678 #if ENABLE_BCD
679  void INTBCD(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length);
680  void INTBCDC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
681 #endif
682  void xMULx(OCLAPI api_call, const bitCapIntOcl* bciArgs, BufferPtr controlBuffer);
683  void MULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length);
684  void MULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart,
685  bitLenInt length);
686  void CMULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length,
687  const std::vector<bitLenInt>& controls);
688  void CMULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart,
689  bitLenInt length, const std::vector<bitLenInt>& controls);
690  void FullAdx(
691  bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut, OCLAPI api_call);
692  void PhaseFlipX(OCLAPI api_call, const bitCapIntOcl* bciArgs);
693 
694  bitCapIntOcl OpIndexed(OCLAPI api_call, bitCapIntOcl carryIn, bitLenInt indexStart, bitLenInt indexLength,
695  bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char* values);
696 #endif
697 
698  void ClearBuffer(BufferPtr buff, bitCapIntOcl offset, bitCapIntOcl size);
699 };
700 
701 } // namespace Qrack
size_t AddToActiveAllocSize(const int64_t &dev, size_t size)
Definition: oclengine.hpp:307
size_t SubtractFromActiveAllocSize(const int64_t &dev, size_t size)
Definition: oclengine.hpp:324
static OCLEngine & Instance()
Get a pointer to the Instance of the singleton. (The instance will be instantiated,...
Definition: oclengine.hpp:250
BufferPtr ulongBuffer
Definition: qengine_cuda.hpp:123
~PoolItem()
Definition: qengine_opencl.hpp:124
BufferPtr cmplxBuffer
Definition: qengine_cuda.hpp:121
BufferPtr MakeBuffer(size_t size)
Definition: qengine_cuda.hpp:140
BufferPtr realBuffer
Definition: qengine_cuda.hpp:122
std::shared_ptr< real1 > angleArray
Definition: qengine_cuda.hpp:126
BufferPtr MakeBuffer(const cl::Context &context, size_t size)
Definition: qengine_opencl.hpp:127
PoolItem(cl::Context &context)
Definition: qengine_opencl.hpp:115
std::shared_ptr< real1 > probArray
Definition: qengine_cuda.hpp:125
OpenCL enhanced QEngineCPU implementation.
Definition: qengine_opencl.hpp:168
real1_f ProbMask(const bitCapInt &mask, const bitCapInt &permutation)
Direct measure of masked permutation probability.
Definition: opencl.cpp:1795
void SetQuantumState(const complex *inputState)
Set arbitrary pure quantum state, in unsigned int permutation basis.
Definition: opencl.cpp:2957
cl::Context context
Definition: qengine_opencl.hpp:183
void GetProbs(real1 *outputProbs)
Get all probabilities, in unsigned int permutation basis.
Definition: opencl.cpp:3068
real1_f GetExpectation(bitLenInt valueStart, bitLenInt valueLength)
Definition: opencl.cpp:2020
void IFullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut)
Inverse of FullAdd.
Definition: opencl.cpp:2512
void Copy(QInterfacePtr orig)
Definition: qengine_opencl.hpp:250
QEnginePtr CloneEmpty()
Clone this QEngine's settings, with a zeroed state vector.
Definition: opencl.cpp:3180
void checkCallbackError()
Definition: qengine_opencl.hpp:203
void INTBCDC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Add or Subtract integer (BCD, with carry)
Definition: opencl.cpp:2402
void FullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut)
Quantum analog of classical "Full Adder" gate.
Definition: opencl.cpp:2506
size_t nrmGroupSize
Definition: qengine_opencl.hpp:175
void ArithmeticCall(OCLAPI api_call, const bitCapIntOcl(&bciArgs)[BCI_ARG_LEN], const unsigned char *values=NULL, bitCapIntOcl valuesLength=0U)
Definition: opencl.cpp:2040
int64_t GetDevice()
Get GPU device ID.
Definition: qengine_opencl.hpp:426
std::unique_ptr< real1[], void(*)(real1 *)> nrmArray
Definition: qengine_opencl.hpp:192
bool usingHostRam
Definition: qengine_opencl.hpp:171
real1_f ProbReg(bitLenInt start, bitLenInt length, const bitCapInt &permutation)
Direct measure of register permutation probability.
Definition: opencl.cpp:1745
void AddQueueItem(const QueueItem &item)
Definition: qengine_opencl.hpp:543
size_t FixWorkItemCount(size_t maxI, size_t wic)
Definition: qengine_opencl.hpp:611
void UpdateRunningNorm(real1_f norm_thresh=REAL1_DEFAULT_ARG)
Force a calculation of the norm of the state vector, in order to make it unit length before the next ...
Definition: opencl.cpp:3259
QEngineOCL(bitLenInt qBitCount, const bitCapInt &initState, qrack_rand_gen_ptr rgp=nullptr, const complex &phaseFac=CMPLX_DEFAULT_ARG, bool doNorm=false, bool randomGlobalPhase=true, bool useHostMem=false, int64_t devID=-1, bool useHardwareRNG=true, bool ignored=false, real1_f norm_thresh=REAL1_EPSILON, std::vector< int64_t > ignored2={}, bitLenInt ignored4=0U, real1_f ignored3=_qrack_qunit_sep_thresh)
Initialize a Qrack::QEngineOCL object.
Definition: opencl.cpp:67
void FullAdx(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut, OCLAPI api_call)
Definition: opencl.cpp:2517
std::list< QueueItem > wait_queue_items
Definition: qengine_opencl.hpp:190
void INCDECC(const bitCapInt &toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Common driver method behing INCC and DECC.
Definition: opencl.cpp:2251
DeviceContextPtr device_context
Definition: qengine_opencl.hpp:188
size_t nrmGroupCount
Definition: qengine_opencl.hpp:174
void ClearBuffer(BufferPtr buff, bitCapIntOcl offset, bitCapIntOcl size)
Definition: opencl.cpp:3352
std::vector< EventVecPtr > wait_refs
Definition: qengine_opencl.hpp:189
void PhaseRootNMask(bitLenInt n, const bitCapInt &mask)
Masked PhaseRootN gate.
Definition: opencl.cpp:760
void IMULModNOut(const bitCapInt &toMul, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Inverse of multiplication modulo N by integer, (out of place)
Definition: opencl.cpp:2482
bitLenInt Compose(QInterfacePtr toCopy)
Combine another QInterface with this one, after the last bit index of this one.
Definition: qengine_opencl.hpp:360
bool unlockHostMem
Definition: qengine_opencl.hpp:172
void XMask(const bitCapInt &mask)
Masked X gate.
Definition: opencl.cpp:732
void INC(const bitCapInt &toAdd, bitLenInt start, bitLenInt length)
Increment integer (without sign, with carry)
Definition: opencl.cpp:2203
void SwitchHostPtr(bool useHostMem)
Switch to/from host/device state vector bufffer.
Definition: qengine_opencl.hpp:506
void CINT(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt start, bitLenInt length, const std::vector< bitLenInt > &controls)
Add or Subtract integer (without sign or carry, with controls)
Definition: opencl.cpp:2165
void INTS(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex)
Add or Subtract integer (with overflow, without carry)
Definition: opencl.cpp:2257
void ApplyMx(OCLAPI api_call, const bitCapIntOcl *bciArgs, const complex &nrm)
Definition: opencl.cpp:1224
size_t totalOclAllocSize
Definition: qengine_opencl.hpp:176
bitCapInt IndexedLDA(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, const unsigned char *values, bool resetValue=true)
Set 8 bit register bits based on read from classical memory.
Definition: opencl.cpp:2801
void ResetStateBuffer(BufferPtr nStateBuffer)
Definition: opencl.cpp:658
void ROL(bitLenInt shift, bitLenInt start, bitLenInt length)
"Circular shift left" - shift bits left, and carry last bits.
Definition: opencl.cpp:2135
void UniformlyControlledSingleBit(const std::vector< bitLenInt > &controls, bitLenInt qubitIndex, const complex *mtrxs, const std::vector< bitCapInt > &mtrxSkipPowers, const bitCapInt &mtrxSkipValueMask)
Definition: opencl.cpp:1069
real1_f CtrlOrAntiProb(bool controlState, bitLenInt control, bitLenInt target)
Definition: opencl.cpp:1710
void CMUL(const bitCapInt &toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled multiplication by integer.
Definition: opencl.cpp:2542
void WaitCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector< BufferPtr > args, size_t localBuffSize=0U)
Definition: opencl.cpp:404
PoolItemPtr GetFreePoolItem()
Definition: opencl.cpp:374
real1_f SumSqrDiff(QInterfacePtr toCompare)
Calculates (1 - <\psi_e|\psi_c>) between states |\psi_c> and |\psi_e>.
Definition: qengine_opencl.hpp:435
void SetAmplitudePage(const complex *pagePtr, bitCapIntOcl offset, bitCapIntOcl length)
Copy a "page" of amplitudes from pagePtr into this QEngine's internal state.
Definition: opencl.cpp:153
void clDump()
Dumps the remaining asynchronous wait event list or queue of OpenCL events, for the current queue.
Definition: opencl.cpp:354
void ZeroAmplitudes()
Set all amplitudes to 0, and optionally temporarily deallocate state vector RAM.
Definition: opencl.cpp:99
complex GetAmplitude(const bitCapInt &perm)
Get the representational amplitude of a full permutation.
Definition: opencl.cpp:3003
QInterfacePtr Copy()
Copy this QInterface.
Definition: opencl.cpp:3190
void InitOCL(int64_t devID)
Definition: opencl.cpp:656
std::mutex queue_mutex
Definition: qengine_opencl.hpp:181
void BitMask(bitCapIntOcl mask, OCLAPI api_call, real1_f phase=(real1_f) PI_R1)
Definition: opencl.cpp:1028
void MULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Definition: opencl.cpp:2667
std::shared_ptr< complex > AllocStateVec(bitCapIntOcl elemCount, bool doForceAlloc=false)
Definition: opencl.cpp:3306
void NormalizeState(real1_f nrm=REAL1_DEFAULT_ARG, real1_f norm_thresh=REAL1_DEFAULT_ARG, real1_f phaseArg=ZERO_R1_F)
Apply the normalization factor found by UpdateRunningNorm() or on the fly by a single bit gate.
Definition: opencl.cpp:3201
void CDIV(const bitCapInt &toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled division by integer.
Definition: opencl.cpp:2564
void PhaseParity(real1_f radians, const bitCapInt &mask)
Parity phase gate.
Definition: opencl.cpp:745
void QueueCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector< BufferPtr > args, size_t localBuffSize=0U, size_t deallocSize=0U)
Definition: qengine_opencl.hpp:532
void SetPermutation(const bitCapInt &perm, const complex &phaseFac=CMPLX_DEFAULT_ARG)
Set to a specific permutation of all qubits.
Definition: opencl.cpp:660
void CIMULModNOut(const bitCapInt &toMul, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Inverse of controlled multiplication modulo N by integer, (out of place)
Definition: opencl.cpp:2606
void CMULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Definition: opencl.cpp:2758
void INT(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length)
Add or Subtract integer (without sign or carry)
Definition: opencl.cpp:2139
cl_int callbackError
Definition: qengine_opencl.hpp:173
void CMULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Definition: opencl.cpp:2715
bitCapInt IndexedSBC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
Subtract based on an indexed load from classical memory.
Definition: opencl.cpp:2891
void Finish()
If asynchronous work is still running, block until it finishes.
Definition: qengine_opencl.hpp:441
cl_map_flags lockSyncFlags
Definition: qengine_opencl.hpp:178
void CopyStateVec(QEnginePtr src)
Exactly copy the state vector of a different QEngine instance.
Definition: opencl.cpp:114
void CPOWModNOut(const bitCapInt &base, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled multiplication modulo N by integer, (out of place)
Definition: opencl.cpp:2624
void MULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Definition: opencl.cpp:2688
std::shared_ptr< complex > stateVec
Definition: qengine_opencl.hpp:180
void Hash(bitLenInt start, bitLenInt length, const unsigned char *values)
Set 8 bit register bits based on read from classical memory.
Definition: opencl.cpp:2898
void clFinish(bool doHard=false)
Finishes the asynchronous wait event list or queue of OpenCL events.
Definition: opencl.cpp:330
void DIV(const bitCapInt &toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Divide by integer.
Definition: opencl.cpp:2461
QInterfacePtr Clone()
Clone this QInterface.
Definition: opencl.cpp:3155
void PhaseFlipX(OCLAPI api_call, const bitCapIntOcl *bciArgs)
Definition: opencl.cpp:2907
void CPhaseFlipIfLess(const bitCapInt &greaterPerm, bitLenInt start, bitLenInt length, bitLenInt flagIndex)
The 6502 uses its carry flag also as a greater-than/less-than flag, for the CMP operation.
Definition: opencl.cpp:2927
void INCDECSC(const bitCapInt &toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Increment integer (with sign, with carry)
Definition: opencl.cpp:2359
void Z(bitLenInt target)
Z gate.
Definition: opencl.cpp:696
void AddAlloc(size_t size)
Definition: qengine_opencl.hpp:451
BufferPtr MakeBuffer(cl_mem_flags flags, size_t size, void *host_ptr=NULL)
Definition: qengine_opencl.hpp:466
virtual void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex *mtrx, bitLenInt bitCount, bitCapIntOcl const *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)=0
void INTC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Add or Subtract integer (without sign, with carry)
Definition: opencl.cpp:2220
void INCS(const bitCapInt &toAdd, bitLenInt start, bitLenInt length, bitLenInt carryIndex)
Increment integer (without sign, with carry)
Definition: opencl.cpp:2288
void ProbRegAll(bitLenInt start, bitLenInt length, real1 *probsArray)
Definition: opencl.cpp:1757
bitCapIntOcl GetMaxSize()
Definition: qengine_opencl.hpp:335
void CMULModNOut(const bitCapInt &toMul, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled multiplication modulo N by integer, (out of place)
Definition: opencl.cpp:2585
void Copy(QEngineOCLPtr orig)
Definition: qengine_opencl.hpp:251
size_t FixGroupSize(size_t wic, size_t gs)
Definition: qengine_opencl.hpp:622
void SubtractAlloc(size_t size)
Definition: qengine_opencl.hpp:460
void ReinitBuffer()
Definition: opencl.cpp:3345
void FreeAll()
Definition: opencl.cpp:89
void PhaseFlipIfLess(const bitCapInt &greaterPerm, bitLenInt start, bitLenInt length)
This is an expedient for an adaptive Grover's search for a function's global minimum.
Definition: opencl.cpp:2943
void GetQuantumState(complex *outputState)
Get pure quantum state, in unsigned int permutation basis.
Definition: opencl.cpp:3052
void ROx(OCLAPI api_call, bitLenInt shift, bitLenInt start, bitLenInt length)
Definition: opencl.cpp:2110
void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex *mtrx, bitLenInt bitCount, const bitCapIntOcl *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)
Definition: qengine_opencl.hpp:634
virtual QInterfacePtr Decompose(bitLenInt start, bitLenInt length)
Definition: qengine.hpp:291
void CINC(const bitCapInt &toAdd, bitLenInt inOutStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Add integer (without sign, with controls)
Definition: opencl.cpp:2208
cl::CommandQueue queue
Definition: qengine_opencl.hpp:182
void SetAmplitude(const bitCapInt &perm, const complex &amp)
Sets the representational amplitude of a full permutation.
Definition: opencl.cpp:3021
void tryOcl(std::string message, std::function< int()> oclCall)
Definition: qengine_opencl.hpp:216
void QueueSetRunningNorm(real1_f runningNrm)
Add an operation to the (OpenCL) queue, to set the value of runningNorm, which is the normalization c...
Definition: qengine_opencl.hpp:542
void QueueSetDoNormalize(bool doNorm)
Add an operation to the (OpenCL) queue, to set the value of doNormalize, which controls whether to au...
Definition: qengine_opencl.hpp:541
bool IsZeroAmplitude()
Returns "true" only if amplitudes are all totally 0.
Definition: qengine_opencl.hpp:315
bitLenInt Compose(QEngineOCLPtr toCopy)
Definition: opencl.cpp:1369
void CUniformParityRZ(const std::vector< bitLenInt > &controls, const bitCapInt &mask, real1_f angle)
If the controls are set and the target qubit set parity is odd, this applies a phase factor of .
Definition: opencl.cpp:1172
BufferPtr stateBuffer
Definition: qengine_opencl.hpp:186
void MUL(const bitCapInt &toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Multiply by integer.
Definition: opencl.cpp:2444
bitCapIntOcl OpIndexed(OCLAPI api_call, bitCapIntOcl carryIn, bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
Add or Subtract based on an indexed load from classical memory.
Definition: opencl.cpp:2835
void PopQueue(bool isDispatch)
Definition: opencl.cpp:413
void UnlockSync()
Unlocks synchronization between the state vector buffer and general RAM, so the state vector can be o...
Definition: opencl.cpp:310
EventVecPtr ResetWaitEvents(bool waitQueue=true)
Definition: opencl.cpp:387
void DispatchQueue()
Definition: opencl.cpp:446
bitLenInt Compose(QInterfacePtr toCopy, bitLenInt start)
Compose() a QInterface peer, inserting its qubit into index order at start index.
Definition: qengine_opencl.hpp:362
void MULModNOut(const bitCapInt &toMul, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Multiplication modulo N by integer, (out of place)
Definition: opencl.cpp:2472
void SetDevice(int64_t dID)
Set GPU device ID.
Definition: opencl.cpp:523
bool didInit
Definition: qengine_opencl.hpp:170
void xMULx(OCLAPI api_call, const bitCapIntOcl *bciArgs, BufferPtr controlBuffer)
Definition: opencl.cpp:2639
real1_f ParSum(real1 *toSum, bitCapIntOcl maxI)
Definition: opencl.cpp:644
bool isFinished()
Returns "false" if asynchronous work is still running, and "true" if all previously dispatched asynch...
Definition: qengine_opencl.hpp:442
void GetAmplitudePage(complex *pagePtr, bitCapIntOcl offset, bitCapIntOcl length)
Copy a "page" of amplitudes from this QEngine's internal state, into pagePtr.
Definition: opencl.cpp:138
void UniformParityRZ(const bitCapInt &mask, real1_f angle)
If the target qubit set parity is odd, this applies a phase factor of .
Definition: opencl.cpp:1138
void Phase(const complex &topLeft, const complex &bottomRight, bitLenInt qubitIndex)
Apply a single bit transformation that only effects phase.
Definition: opencl.cpp:715
real1_f Probx(OCLAPI api_call, const bitCapIntOcl *bciArgs)
Definition: opencl.cpp:1659
bool ForceMParity(const bitCapInt &mask, bool result, bool doForce=true)
Act as if is a measurement of parity of the masked set of qubits was applied, except force the (usual...
Definition: opencl.cpp:1951
real1_f ExpectationBitsAll(const std::vector< bitLenInt > &bits, const bitCapInt &offset=ZERO_BCI)
Get permutation expectation value of bits.
Definition: opencl.cpp:1982
void Dispose(bitLenInt start, bitLenInt length)
Minimally decompose a set of contiguous bits from the separably composed unit, and discard the separa...
Definition: opencl.cpp:1592
static const bitCapIntOcl OclMemDenom
1 / OclMemDenom is the maximum fraction of total OCL device RAM that a single state vector should occ...
Definition: qengine_opencl.hpp:279
int64_t deviceID
Definition: qengine_opencl.hpp:177
bitCapInt MAll()
Measure permutation state of all coherent bits.
Definition: opencl.cpp:2971
void ShuffleBuffers(QEnginePtr engine)
Swap the high half of this engine with the low half of another.
Definition: opencl.cpp:230
void CArithmeticCall(OCLAPI api_call, const bitCapIntOcl(&bciArgs)[BCI_ARG_LEN], bitCapIntOcl *controlPowers, bitLenInt controlLen, const unsigned char *values=NULL, bitCapIntOcl valuesLength=0U)
Definition: opencl.cpp:2045
void LockSync(cl_map_flags flags=(CL_MAP_READ|CL_MAP_WRITE))
Locks synchronization between the state vector buffer and general RAM, so the state vector can be dir...
Definition: opencl.cpp:287
~QEngineOCL()
Definition: qengine_opencl.hpp:305
void INCDECBCDC(const bitCapInt &toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Increment integer (BCD, with carry)
Definition: opencl.cpp:2437
complex permutationAmp
Definition: qengine_opencl.hpp:179
BufferPtr nrmBuffer
Definition: qengine_opencl.hpp:187
void INCBCD(const bitCapInt &toAdd, bitLenInt start, bitLenInt length)
Increment integer (BCD)
Definition: opencl.cpp:2396
bitCapInt IndexedADC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
Add based on an indexed load from classical memory.
Definition: opencl.cpp:2884
bitLenInt Allocate(bitLenInt start, bitLenInt length)
Allocate new "length" count of |0> state qubits at specified qubit index start position.
Definition: opencl.cpp:1648
real1_f ProbParity(const bitCapInt &mask)
Overall probability of any odd permutation of the masked set of bits.
Definition: opencl.cpp:1930
virtual bool isOpenCL()
Returns "true" if current simulation is OpenCL-based.
Definition: qengine_opencl.hpp:313
void ApplyM(const bitCapInt &mask, bool result, const complex &nrm)
Definition: opencl.cpp:1248
std::vector< PoolItemPtr > poolItems
Definition: qengine_opencl.hpp:191
void FreeStateVec()
Definition: qengine_opencl.hpp:562
virtual void X(bitLenInt q)
Definition: qengine.hpp:194
void INTBCD(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length)
Add or Subtract integer (BCD)
Definition: opencl.cpp:2366
void INTSC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Add or Subtract integer (with sign, with carry)
Definition: opencl.cpp:2338
BufferPtr MakeStateVecBuffer(std::shared_ptr< complex > nStateVec)
Definition: opencl.cpp:3332
void POWModNOut(const bitCapInt &base, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Raise a classical base to a quantum power, modulo N, (out of place)
Definition: opencl.cpp:2491
void ProbMaskAll(const bitCapInt &mask, real1 *probsArray)
Direct measure of masked permutation probability.
Definition: opencl.cpp:1845
real1_f FirstNonzeroPhase()
Get phase of lowest permutation nonzero amplitude.
Definition: qengine_opencl.hpp:316
void Invert(const complex &topRight, const complex &bottomLeft, bitLenInt qubitIndex)
Apply a single bit transformation that reverses bit probability and might effect phase.
Definition: opencl.cpp:703
void DecomposeDispose(bitLenInt start, bitLenInt length, QEngineOCLPtr dest)
Definition: opencl.cpp:1414
real1_f Prob(bitLenInt qubit)
PSEUDO-QUANTUM Direct measure of bit probability to be in |1> state.
Definition: opencl.cpp:1690
Abstract QEngine implementation, for all "Schroedinger method" engines.
Definition: qengine.hpp:31
virtual void Copy(QInterfacePtr orig)
Copy this QInterface.
Definition: qinterface.hpp:222
bitCapIntOcl maxQPowerOcl
Definition: qengine.hpp:40
virtual void Decompose(bitLenInt start, QInterfacePtr dest)=0
Minimally decompose a set of contiguous bits from the separably composed unit, into "destination".
virtual void X(bitLenInt qubit)
X gate.
Definition: qinterface.hpp:1084
virtual void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex *mtrx, bitLenInt bitCount, bitCapIntOcl const *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)=0
virtual bitLenInt Allocate(bitLenInt length)
Allocate new "length" count of |0> state qubits at end of qubit index position.
Definition: qinterface.hpp:470
virtual bitLenInt Compose(QInterfacePtr toCopy)
Combine another QInterface with this one, after the last bit index of this one.
Definition: qinterface.hpp:364
Definition: qengine_gpu_util.hpp:21
Half-precision floating-point type.
Definition: half.hpp:2222
virtual void UniformlyControlledSingleBit(const std::vector< bitLenInt > &controls, bitLenInt qubit, const complex *mtrxs)
Apply a "uniformly controlled" arbitrary single bit unitary transformation.
Definition: qinterface.hpp:627
virtual void Phase(const complex &topLeft, const complex &bottomRight, bitLenInt qubit)
Apply a single bit transformation that only effects phase.
Definition: qinterface.hpp:516
virtual void Invert(const complex &topRight, const complex &bottomLeft, bitLenInt qubit)
Apply a single bit transformation that reverses bit probability and might effect phase.
Definition: qinterface.hpp:529
virtual void Z(bitLenInt qubit)
Z gate.
Definition: qinterface.hpp:1117
virtual void U(bitLenInt target, real1_f theta, real1_f phi, real1_f lambda)
General unitary gate.
Definition: rotational.cpp:18
virtual real1_f FirstNonzeroPhase()
Get phase of lowest permutation nonzero amplitude.
Definition: qinterface.hpp:2985
GLOSSARY: bitLenInt - "bit-length integer" - unsigned integer ID of qubit position in register bitCap...
Definition: complex16x2simd.hpp:25
std::shared_ptr< QEngine > QEnginePtr
Definition: qrack_types.hpp:151
std::shared_ptr< OCLDeviceContext > DeviceContextPtr
Definition: oclengine.hpp:47
std::shared_ptr< QInterface > QInterfacePtr
Definition: qinterface.hpp:29
const real1_f _qrack_qunit_sep_thresh
Definition: qrack_functions.hpp:235
std::shared_ptr< QEngineOCL > QEngineOCLPtr
Definition: qengine_opencl.hpp:34
std::shared_ptr< EventVec > EventVecPtr
Definition: oclengine.hpp:51
bitLenInt log2Ocl(bitCapIntOcl n)
Definition: qrack_functions.hpp:88
void U(quid sid, bitLenInt q, real1_f theta, real1_f phi, real1_f lambda)
(External API) 3-parameter unitary gate
Definition: wasm_api.cpp:1143
std::complex< real1 > complex
Definition: qrack_types.hpp:128
unsigned long cl_map_flags
Definition: qengine_cuda.hpp:31
QRACK_CONST real1 REAL1_EPSILON
Definition: qrack_types.hpp:200
QRACK_CONST real1 ONE_R1
Definition: qrack_types.hpp:185
float real1_f
Definition: qrack_types.hpp:95
QRACK_CONST complex CMPLX_DEFAULT_ARG
Definition: qrack_types.hpp:257
std::shared_ptr< PoolItem > PoolItemPtr
Definition: qengine_cuda.hpp:162
SPECIAL_2X2
Definition: qengine_gpu_util.hpp:19
@ NONE
Definition: qengine_gpu_util.hpp:19
OCLAPI
Definition: oclapi.hpp:19
std::shared_ptr< void > BufferPtr
Definition: qengine_cuda.hpp:45
QRACK_CONST real1 PI_R1
Definition: qrack_types.hpp:178
unsigned long cl_mem_flags
Definition: qengine_cuda.hpp:32
const bitCapInt ZERO_BCI
Definition: qrack_types.hpp:130
bitCapIntOcl pow2Ocl(const bitLenInt &p)
Definition: qrack_functions.hpp:137
#define CL_MAP_WRITE
Definition: qengine_cuda.hpp:36
#define CL_MEM_READ_ONLY
Definition: qengine_cuda.hpp:40
#define CL_MAP_READ
Definition: qengine_cuda.hpp:35
#define BCI_ARG_LEN
Definition: qengine_opencl.hpp:26
#define CMPLX_NORM_LEN
Definition: qengine_opencl.hpp:27
#define REAL_ARG_LEN
Definition: qengine_opencl.hpp:28
#define REAL1_DEFAULT_ARG
Definition: qrack_types.hpp:177
#define bitLenInt
Definition: qrack_types.hpp:38
#define ZERO_R1_F
Definition: qrack_types.hpp:160
#define qrack_rand_gen_ptr
Definition: qrack_types.hpp:156
#define bitCapInt
Definition: qrack_types.hpp:62
#define bitCapIntOcl
Definition: qrack_types.hpp:50
#define QRACK_ALIGN_SIZE
Definition: qrack_types.hpp:157
Definition: qengine_cuda.hpp:50
QueueItem(OCLAPI ac, size_t wic, size_t lgs, size_t ds, std::vector< BufferPtr > b, size_t lbs)
Definition: qengine_opencl.hpp:63
QueueItem(real1_f runningNrm)
Definition: qengine_opencl.hpp:91
bool doNorm
Definition: qengine_cuda.hpp:59
size_t workItemCount
Definition: qengine_cuda.hpp:52
std::vector< BufferPtr > buffers
Definition: qengine_cuda.hpp:55
size_t deallocSize
Definition: qengine_cuda.hpp:54
QueueItem()
Definition: qengine_opencl.hpp:49
bool isSetRunningNorm
Definition: qengine_cuda.hpp:58
QueueItem(bool doNrm)
Definition: qengine_opencl.hpp:77
size_t localBuffSize
Definition: qengine_cuda.hpp:56
OCLAPI api_call
Definition: qengine_cuda.hpp:51
bool isSetDoNorm
Definition: qengine_cuda.hpp:57
size_t localGroupSize
Definition: qengine_cuda.hpp:53
real1 runningNorm
Definition: qengine_cuda.hpp:60