Qrack  9.0
General classical-emulating-quantum development framework
qengine_opencl.hpp
Go to the documentation of this file.
1 //
3 // (C) Daniel Strano and the Qrack contributors 2017-2023. All rights reserved.
4 //
5 // This is a multithreaded, universal quantum register simulation, allowing
6 // (nonphysical) register cloning and direct measurement of probability and
7 // phase, to leverage what advantages classical emulation of qubits can have.
8 //
9 // Licensed under the GNU Lesser General Public License V3.
10 // See LICENSE.md in the project root or https://www.gnu.org/licenses/lgpl-3.0.en.html
11 // for details.
12 
13 #pragma once
14 
15 #include "common/oclengine.hpp"
16 #include "qengine.hpp"
17 #include "qengine_gpu_util.hpp"
18 
19 #if !ENABLE_OPENCL
20 #error OpenCL has not been enabled
21 #endif
22 
23 #include <list>
24 #include <mutex>
25 
26 #define BCI_ARG_LEN 10
27 #define CMPLX_NORM_LEN 6
28 #define REAL_ARG_LEN 2
29 
30 namespace Qrack {
31 
32 typedef std::shared_ptr<cl::Buffer> BufferPtr;
33 
34 class QEngineOCL;
35 typedef std::shared_ptr<QEngineOCL> QEngineOCLPtr;
36 
37 struct QueueItem {
39  size_t workItemCount;
40  size_t localGroupSize;
41  size_t deallocSize;
42  std::vector<BufferPtr> buffers;
43  size_t localBuffSize;
44  bool isSetDoNorm;
45  bool isSetRunningNorm;
46  bool doNorm;
48 
50  : api_call()
51  , workItemCount(0U)
52  , localGroupSize(0U)
53  , deallocSize(0U)
54  , buffers()
55  , localBuffSize(0U)
56  , isSetDoNorm(false)
57  , isSetRunningNorm(true)
58  , doNorm(false)
60  {
61  }
62 
63  QueueItem(OCLAPI ac, size_t wic, size_t lgs, size_t ds, std::vector<BufferPtr> b, size_t lbs)
64  : api_call(ac)
65  , workItemCount(wic)
66  , localGroupSize(lgs)
67  , deallocSize(ds)
68  , buffers(b)
69  , localBuffSize(lbs)
70  , isSetDoNorm(false)
71  , isSetRunningNorm(false)
72  , doNorm(false)
74  {
75  }
76 
77  QueueItem(bool doNrm)
78  : api_call()
79  , workItemCount(0U)
80  , localGroupSize(0U)
81  , deallocSize(0U)
82  , buffers()
83  , localBuffSize(0U)
84  , isSetDoNorm(true)
85  , isSetRunningNorm(false)
86  , doNorm(doNrm)
88  {
89  }
90 
91  QueueItem(real1_f runningNrm)
92  : api_call()
93  , workItemCount(0U)
94  , localGroupSize(0U)
95  , deallocSize(0U)
96  , buffers()
97  , localBuffSize(0U)
98  , isSetDoNorm(false)
99  , isSetRunningNorm(true)
100  , doNorm(false)
101  , runningNorm(runningNrm)
102  {
103  }
104 };
105 
106 class PoolItem {
107 public:
111 
112  std::shared_ptr<real1> probArray;
113  std::shared_ptr<real1> angleArray;
114 
115  PoolItem(cl::Context& context)
116  : probArray(NULL)
117  , angleArray(NULL)
118  {
119  cmplxBuffer = MakeBuffer(context, sizeof(complex) * CMPLX_NORM_LEN);
120  realBuffer = MakeBuffer(context, sizeof(real1) * REAL_ARG_LEN);
121  ulongBuffer = MakeBuffer(context, sizeof(bitCapIntOcl) * BCI_ARG_LEN);
122  }
123 
125 
126 protected:
127  BufferPtr MakeBuffer(const cl::Context& context, size_t size)
128  {
129  cl_int error;
130  BufferPtr toRet = std::make_shared<cl::Buffer>(context, CL_MEM_READ_ONLY, size, (void*)NULL, &error);
131  if (error != CL_SUCCESS) {
132  if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE) {
133  throw bad_alloc("CL_MEM_OBJECT_ALLOCATION_FAILURE in PoolItem::MakeBuffer()");
134  }
135  if (error == CL_OUT_OF_HOST_MEMORY) {
136  throw bad_alloc("CL_OUT_OF_HOST_MEMORY in PoolItem::MakeBuffer()");
137  }
138  if (error == CL_INVALID_BUFFER_SIZE) {
139  throw bad_alloc("CL_INVALID_BUFFER_SIZE in PoolItem::MakeBuffer()");
140  }
141  throw std::runtime_error("OpenCL error code on buffer allocation attempt: " + std::to_string(error));
142  }
143 
144  return toRet;
145  }
146 };
147 
148 typedef std::shared_ptr<PoolItem> PoolItemPtr;
149 
168 class QEngineOCL : public QEngine {
169 protected:
170  bool didInit;
175  size_t nrmGroupSize;
177  int64_t deviceID;
180  std::shared_ptr<complex> stateVec;
181  std::mutex queue_mutex;
182  cl::CommandQueue queue;
183  cl::Context context;
184  // stateBuffer is allocated as a shared_ptr, because it's the only buffer that will be acted on outside of
185  // QEngineOCL itself, specifically by QEngineOCLMulti.
189  std::vector<EventVecPtr> wait_refs;
190  std::list<QueueItem> wait_queue_items;
191  std::vector<PoolItemPtr> poolItems;
192  std::unique_ptr<real1[], void (*)(real1*)> nrmArray;
193 
194 #if defined(__APPLE__)
195  real1* _aligned_nrm_array_alloc(bitCapIntOcl allocSize)
196  {
197  void* toRet;
198  posix_memalign(&toRet, QRACK_ALIGN_SIZE, allocSize);
199  return (real1*)toRet;
200  }
201 #endif
202 
204  {
205  if (callbackError == CL_SUCCESS) {
206  return;
207  }
208 
209  wait_queue_items.clear();
210  wait_refs.clear();
211 
212  throw std::runtime_error("Failed to enqueue kernel, error code: " + std::to_string(callbackError));
213  }
214 
215  // For std::function, cl_int use might discard int qualifiers.
216  void tryOcl(std::string message, std::function<int()> oclCall)
217  {
219 
220  if (oclCall() == CL_SUCCESS) {
221  // Success
222  return;
223  }
224 
225  // Soft finish (just for this QEngineOCL)
226  clFinish();
227 
228  if (oclCall() == CL_SUCCESS) {
229  // Success after clearing QEngineOCL queue
230  return;
231  }
232 
233  // Hard finish (for the unique OpenCL device)
234  clFinish(true);
235 
236  cl_int error = oclCall();
237  if (error == CL_SUCCESS) {
238  // Success after clearing all queues for the OpenCL device
239  return;
240  }
241 
242  wait_queue_items.clear();
243  wait_refs.clear();
244 
245  // We're fatally blocked. Throw to exit.
246  throw std::runtime_error(message + ", error code: " + std::to_string(error));
247  }
248 
249 public:
252  static const bitCapIntOcl OclMemDenom = 3U;
253 
272  QEngineOCL(bitLenInt qBitCount, bitCapInt initState, qrack_rand_gen_ptr rgp = nullptr,
273  complex phaseFac = CMPLX_DEFAULT_ARG, bool doNorm = false, bool randomGlobalPhase = true,
274  bool useHostMem = false, int64_t devID = -1, bool useHardwareRNG = true, bool ignored = false,
275  real1_f norm_thresh = REAL1_EPSILON, std::vector<int64_t> ignored2 = {}, bitLenInt ignored4 = 0U,
276  real1_f ignored3 = FP_NORM_EPSILON_F);
277 
279  {
280  // Theoretically, all user output is blocking, so don't throw in destructor.
281  callbackError = CL_SUCCESS;
282  // Make sure we track device allocation.
283  FreeAll();
284  }
285 
286  virtual bool isOpenCL() { return true; }
287 
288  bool IsZeroAmplitude() { return !stateBuffer; }
290  {
291  if (!stateBuffer) {
292  return ZERO_R1_F;
293  }
294 
296  }
297 
298  void SwitchHostPtr(bool useHostMem)
299  {
300  if (useHostMem == usingHostRam) {
301  return;
302  }
303 
304  std::shared_ptr<complex> copyVec = AllocStateVec(maxQPowerOcl, true);
305  GetQuantumState(copyVec.get());
306 
307  if (useHostMem) {
308  stateVec = copyVec;
310  } else {
311  stateVec = NULL;
313  tryOcl("Failed to write buffer", [&] {
314  return queue.enqueueWriteBuffer(
315  *stateBuffer, CL_TRUE, 0U, sizeof(complex) * maxQPowerOcl, copyVec.get(), ResetWaitEvents().get());
316  });
317  wait_refs.clear();
318  copyVec.reset();
319  }
320 
321  usingHostRam = useHostMem;
322  }
323 
324  void FreeAll();
325  void ZeroAmplitudes();
326  void CopyStateVec(QEnginePtr src);
327 
328  void GetAmplitudePage(complex* pagePtr, bitCapIntOcl offset, bitCapIntOcl length);
329  void SetAmplitudePage(const complex* pagePtr, bitCapIntOcl offset, bitCapIntOcl length);
330  void SetAmplitudePage(
331  QEnginePtr pageEnginePtr, bitCapIntOcl srcOffset, bitCapIntOcl dstOffset, bitCapIntOcl length);
332  void ShuffleBuffers(QEnginePtr engine);
334 
335  void QueueSetDoNormalize(bool doNorm) { AddQueueItem(QueueItem(doNorm)); }
336  void QueueSetRunningNorm(real1_f runningNrm) { AddQueueItem(QueueItem(runningNrm)); }
337  void AddQueueItem(const QueueItem& item)
338  {
339  bool isBase;
340  // For lock_guard:
341  if (true) {
342  std::lock_guard<std::mutex> lock(queue_mutex);
344  isBase = !wait_queue_items.size();
345  wait_queue_items.push_back(item);
346  }
347 
348  if (isBase) {
349  DispatchQueue();
350  }
351  }
352  void QueueCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector<BufferPtr> args,
353  size_t localBuffSize = 0U, size_t deallocSize = 0U)
354  {
355  if (localBuffSize > device_context->GetLocalSize()) {
356  throw bad_alloc("Local memory limits exceeded in QEngineOCL::QueueCall()");
357  }
358  AddQueueItem(QueueItem(api_call, workItemCount, localGroupSize, deallocSize, args, localBuffSize));
359  }
360 
361  bitCapIntOcl GetMaxSize() { return device_context->GetMaxAlloc() / sizeof(complex); };
362 
363  void SetPermutation(bitCapInt perm, complex phaseFac = CMPLX_DEFAULT_ARG);
364 
366  void UniformlyControlledSingleBit(const std::vector<bitLenInt>& controls, bitLenInt qubitIndex,
367  const complex* mtrxs, const std::vector<bitCapInt>& mtrxSkipPowers, bitCapInt mtrxSkipValueMask);
368  void UniformParityRZ(bitCapInt mask, real1_f angle);
369  void CUniformParityRZ(const std::vector<bitLenInt>& controls, bitCapInt mask, real1_f angle);
370 
371  /* Operations that have an improved implementation. */
372  using QEngine::X;
373  void X(bitLenInt target);
374  using QEngine::Z;
375  void Z(bitLenInt target);
376  using QEngine::Invert;
377  void Invert(complex topRight, complex bottomLeft, bitLenInt qubitIndex);
378  using QEngine::Phase;
379  void Phase(complex topLeft, complex bottomRight, bitLenInt qubitIndex);
380  void XMask(bitCapInt mask);
381  void PhaseParity(real1_f radians, bitCapInt mask);
382 
383  using QEngine::Compose;
385  bitLenInt Compose(QInterfacePtr toCopy) { return Compose(std::dynamic_pointer_cast<QEngineOCL>(toCopy)); }
386  bitLenInt Compose(QEngineOCLPtr toCopy, bitLenInt start);
388  {
389  return Compose(std::dynamic_pointer_cast<QEngineOCL>(toCopy), start);
390  }
391  using QEngine::Decompose;
392  void Decompose(bitLenInt start, QInterfacePtr dest);
393  void Dispose(bitLenInt start, bitLenInt length);
394  void Dispose(bitLenInt start, bitLenInt length, bitCapInt disposedPerm);
395  using QEngine::Allocate;
396  bitLenInt Allocate(bitLenInt start, bitLenInt length);
397 
398  void ROL(bitLenInt shift, bitLenInt start, bitLenInt length);
399 
400 #if ENABLE_ALU
401  void INC(bitCapInt toAdd, bitLenInt start, bitLenInt length);
402  void CINC(bitCapInt toAdd, bitLenInt inOutStart, bitLenInt length, const std::vector<bitLenInt>& controls);
403  void INCS(bitCapInt toAdd, bitLenInt start, bitLenInt length, bitLenInt carryIndex);
404 #if ENABLE_BCD
405  void INCBCD(bitCapInt toAdd, bitLenInt start, bitLenInt length);
406 #endif
407  void MUL(bitCapInt toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length);
408  void DIV(bitCapInt toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length);
409  void MULModNOut(bitCapInt toMul, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length);
410  void IMULModNOut(bitCapInt toMul, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length);
411  void POWModNOut(bitCapInt base, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length);
412  void CMUL(bitCapInt toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length,
413  const std::vector<bitLenInt>& controls);
414  void CDIV(bitCapInt toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length,
415  const std::vector<bitLenInt>& controls);
416  void CMULModNOut(bitCapInt toMul, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length,
417  const std::vector<bitLenInt>& controls);
418  void CIMULModNOut(bitCapInt toMul, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length,
419  const std::vector<bitLenInt>& controls);
420  void CPOWModNOut(bitCapInt base, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length,
421  const std::vector<bitLenInt>& controls);
422  void FullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut);
423  void IFullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut);
424 
425  bitCapInt IndexedLDA(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength,
426  const unsigned char* values, bool resetValue = true);
427  bitCapInt IndexedADC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength,
428  bitLenInt carryIndex, const unsigned char* values);
429  bitCapInt IndexedSBC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength,
430  bitLenInt carryIndex, const unsigned char* values);
431  void Hash(bitLenInt start, bitLenInt length, const unsigned char* values);
432 
433  void CPhaseFlipIfLess(bitCapInt greaterPerm, bitLenInt start, bitLenInt length, bitLenInt flagIndex);
434  void PhaseFlipIfLess(bitCapInt greaterPerm, bitLenInt start, bitLenInt length);
435 #endif
436 
437  real1_f Prob(bitLenInt qubit);
438  real1_f CtrlOrAntiProb(bool controlState, bitLenInt control, bitLenInt target);
439  real1_f ProbReg(bitLenInt start, bitLenInt length, bitCapInt permutation);
440  void ProbRegAll(bitLenInt start, bitLenInt length, real1* probsArray);
441  real1_f ProbMask(bitCapInt mask, bitCapInt permutation);
442  void ProbMaskAll(bitCapInt mask, real1* probsArray);
444  bool ForceMParity(bitCapInt mask, bool result, bool doForce = true);
445  real1_f ExpectationBitsAll(const std::vector<bitLenInt>& bits, bitCapInt offset = 0);
446 
447  void SetDevice(int64_t dID);
448  int64_t GetDevice() { return deviceID; }
449 
450  void SetQuantumState(const complex* inputState);
451  void GetQuantumState(complex* outputState);
452  void GetProbs(real1* outputProbs);
453  bitCapInt MAll();
455  void SetAmplitude(bitCapInt perm, complex amp);
456 
457  real1_f SumSqrDiff(QInterfacePtr toCompare) { return SumSqrDiff(std::dynamic_pointer_cast<QEngineOCL>(toCompare)); }
458  real1_f SumSqrDiff(QEngineOCLPtr toCompare);
459 
460  void NormalizeState(
461  real1_f nrm = REAL1_DEFAULT_ARG, real1_f norm_thresh = REAL1_DEFAULT_ARG, real1_f phaseArg = ZERO_R1_F);
462  ;
463  void UpdateRunningNorm(real1_f norm_thresh = REAL1_DEFAULT_ARG);
464  void Finish() { clFinish(); };
465  bool isFinished() { return !wait_queue_items.size(); };
466 
468 
469  void PopQueue(bool isDispatch);
470  void DispatchQueue();
471 
472 protected:
473  void AddAlloc(size_t size)
474  {
475  size_t currentAlloc = OCLEngine::Instance().AddToActiveAllocSize(deviceID, size);
476  if (device_context && (currentAlloc > device_context->GetGlobalAllocLimit())) {
478  throw bad_alloc("VRAM limits exceeded in QEngineOCL::AddAlloc()");
479  }
480  totalOclAllocSize += size;
481  }
482  void SubtractAlloc(size_t size)
483  {
485  totalOclAllocSize -= size;
486  }
487 
488  BufferPtr MakeBuffer(cl_mem_flags flags, size_t size, void* host_ptr = NULL)
489  {
491 
492  cl_int error;
493  BufferPtr toRet = std::make_shared<cl::Buffer>(context, flags, size, host_ptr, &error);
494  if (error == CL_SUCCESS) {
495  // Success
496  return toRet;
497  }
498 
499  // Soft finish (just for this QEngineOCL)
500  clFinish();
501 
502  toRet = std::make_shared<cl::Buffer>(context, flags, size, host_ptr, &error);
503  if (error == CL_SUCCESS) {
504  // Success after clearing QEngineOCL queue
505  return toRet;
506  }
507 
508  // Hard finish (for the unique OpenCL device)
509  clFinish(true);
510 
511  toRet = std::make_shared<cl::Buffer>(context, flags, size, host_ptr, &error);
512  if (error != CL_SUCCESS) {
513  if (error == CL_MEM_OBJECT_ALLOCATION_FAILURE) {
514  throw bad_alloc("CL_MEM_OBJECT_ALLOCATION_FAILURE in QEngineOCL::MakeBuffer()");
515  }
516  if (error == CL_OUT_OF_HOST_MEMORY) {
517  throw bad_alloc("CL_OUT_OF_HOST_MEMORY in QEngineOCL::MakeBuffer()");
518  }
519  if (error == CL_INVALID_BUFFER_SIZE) {
520  throw bad_alloc("CL_INVALID_BUFFER_SIZE in QEngineOCL::MakeBuffer()");
521  }
522  throw std::runtime_error("OpenCL error code on buffer allocation attempt: " + std::to_string(error));
523  }
524 
525  return toRet;
526  }
527 
528  real1_f GetExpectation(bitLenInt valueStart, bitLenInt valueLength);
529 
530  std::shared_ptr<complex> AllocStateVec(bitCapInt elemCount, bool doForceAlloc = false);
531  void FreeStateVec() { stateVec = NULL; }
532  void ResetStateBuffer(BufferPtr nStateBuffer);
533  BufferPtr MakeStateVecBuffer(std::shared_ptr<complex> nStateVec);
534  void ReinitBuffer();
535 
536  void Compose(OCLAPI apiCall, const bitCapIntOcl* bciArgs, QEngineOCLPtr toCopy);
537 
538  void InitOCL(int64_t devID);
540 
541  real1_f ParSum(real1* toSum, bitCapIntOcl maxI);
542 
553  void LockSync(cl_map_flags flags = (CL_MAP_READ | CL_MAP_WRITE));
564  void UnlockSync();
565 
572  void clFinish(bool doHard = false);
573 
577  void clDump();
578 
579  size_t FixWorkItemCount(size_t maxI, size_t wic)
580  {
581  if (wic > maxI) {
582  // Guaranteed to be a power of two
583  return maxI;
584  }
585 
586  // Otherwise, clamp to a power of two
587  return (size_t)pow2(log2(wic));
588  }
589 
590  size_t FixGroupSize(size_t wic, size_t gs)
591  {
592  if (gs > wic) {
593  return wic;
594  }
595 
596  return gs - (wic % gs);
597  }
598 
599  void DecomposeDispose(bitLenInt start, bitLenInt length, QEngineOCLPtr dest);
600 
601  using QEngine::Apply2x2;
602  void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex* mtrx, bitLenInt bitCount,
603  const bitCapIntOcl* qPowersSorted, bool doCalcNorm, real1_f norm_thresh = REAL1_DEFAULT_ARG)
604  {
605  Apply2x2(offset1, offset2, mtrx, bitCount, qPowersSorted, doCalcNorm, SPECIAL_2X2::NONE, norm_thresh);
606  }
607  void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex* mtrx, bitLenInt bitCount,
608  const bitCapIntOcl* qPowersSorted, bool doCalcNorm, SPECIAL_2X2 special,
609  real1_f norm_thresh = REAL1_DEFAULT_ARG);
610 
611  void BitMask(bitCapIntOcl mask, OCLAPI api_call, real1_f phase = (real1_f)PI_R1);
612 
613  void ApplyM(bitCapInt mask, bool result, complex nrm);
614  void ApplyM(bitCapInt mask, bitCapInt result, complex nrm);
615 
616  /* Utility functions used by the operations above. */
617  void WaitCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector<BufferPtr> args,
618  size_t localBuffSize = 0U);
619  EventVecPtr ResetWaitEvents(bool waitQueue = true);
620  void ApplyMx(OCLAPI api_call, const bitCapIntOcl* bciArgs, complex nrm);
621  real1_f Probx(OCLAPI api_call, const bitCapIntOcl* bciArgs);
622 
623  void ArithmeticCall(OCLAPI api_call, const bitCapIntOcl (&bciArgs)[BCI_ARG_LEN], const unsigned char* values = NULL,
624  bitCapIntOcl valuesLength = 0U);
625  void CArithmeticCall(OCLAPI api_call, const bitCapIntOcl (&bciArgs)[BCI_ARG_LEN], bitCapIntOcl* controlPowers,
626  bitLenInt controlLen, const unsigned char* values = NULL, bitCapIntOcl valuesLength = 0U);
627  void ROx(OCLAPI api_call, bitLenInt shift, bitLenInt start, bitLenInt length);
628 
629 #if ENABLE_ALU
630  void INCDECC(bitCapInt toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
631  void INCDECSC(bitCapInt toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
632  void INCDECSC(
633  bitCapInt toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex, bitLenInt carryIndex);
634 #if ENABLE_BCD
635  void INCDECBCDC(bitCapInt toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
636 #endif
637 
638  void INT(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length);
639  void CINT(
640  OCLAPI api_call, bitCapIntOcl toMod, bitLenInt start, bitLenInt length, const std::vector<bitLenInt>& controls);
641  void INTC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
642  void INTS(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex);
643  void INTSC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
644  void INTSC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex,
645  bitLenInt carryIndex);
646 #if ENABLE_BCD
647  void INTBCD(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length);
648  void INTBCDC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
649 #endif
650  void xMULx(OCLAPI api_call, const bitCapIntOcl* bciArgs, BufferPtr controlBuffer);
651  void MULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length);
652  void MULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart,
653  bitLenInt length);
654  void CMULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length,
655  const std::vector<bitLenInt>& controls);
656  void CMULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart,
657  bitLenInt length, const std::vector<bitLenInt>& controls);
658  void FullAdx(
659  bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut, OCLAPI api_call);
660  void PhaseFlipX(OCLAPI api_call, const bitCapIntOcl* bciArgs);
661 
662  bitCapIntOcl OpIndexed(OCLAPI api_call, bitCapIntOcl carryIn, bitLenInt indexStart, bitLenInt indexLength,
663  bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char* values);
664 #endif
665 
666  void ClearBuffer(BufferPtr buff, bitCapIntOcl offset, bitCapIntOcl size);
667 };
668 
669 } // namespace Qrack
size_t AddToActiveAllocSize(const int64_t &dev, size_t size)
Definition: oclengine.hpp:305
size_t SubtractFromActiveAllocSize(const int64_t &dev, size_t size)
Definition: oclengine.hpp:322
static OCLEngine & Instance()
Get a pointer to the Instance of the singleton. (The instance will be instantiated,...
Definition: oclengine.hpp:248
BufferPtr ulongBuffer
Definition: qengine_cuda.hpp:123
~PoolItem()
Definition: qengine_opencl.hpp:124
BufferPtr cmplxBuffer
Definition: qengine_cuda.hpp:121
BufferPtr MakeBuffer(size_t size)
Definition: qengine_cuda.hpp:140
BufferPtr realBuffer
Definition: qengine_cuda.hpp:122
std::shared_ptr< real1 > angleArray
Definition: qengine_cuda.hpp:126
BufferPtr MakeBuffer(const cl::Context &context, size_t size)
Definition: qengine_opencl.hpp:127
PoolItem(cl::Context &context)
Definition: qengine_opencl.hpp:115
std::shared_ptr< real1 > probArray
Definition: qengine_cuda.hpp:125
OpenCL enhanced QEngineCPU implementation.
Definition: qengine_opencl.hpp:168
void SetQuantumState(const complex *inputState)
Set arbitrary pure quantum state, in unsigned int permutation basis.
Definition: opencl.cpp:2913
virtual void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, complex const *mtrx, bitLenInt bitCount, bitCapIntOcl const *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)=0
cl::Context context
Definition: qengine_opencl.hpp:183
void GetProbs(real1 *outputProbs)
Get all probabilities, in unsigned int permutation basis.
Definition: opencl.cpp:3043
real1_f GetExpectation(bitLenInt valueStart, bitLenInt valueLength)
Definition: opencl.cpp:1982
void INCDECBCDC(bitCapInt toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Increment integer (BCD, with carry)
Definition: opencl.cpp:2398
void UniformlyControlledSingleBit(const std::vector< bitLenInt > &controls, bitLenInt qubitIndex, const complex *mtrxs, const std::vector< bitCapInt > &mtrxSkipPowers, bitCapInt mtrxSkipValueMask)
Definition: opencl.cpp:1029
void CMUL(bitCapInt toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled multiplication by integer.
Definition: opencl.cpp:2498
void IFullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut)
Inverse of FullAdd.
Definition: opencl.cpp:2468
QEnginePtr CloneEmpty()
Clone this QEngine's settings, with a zeroed state vector.
Definition: opencl.cpp:3155
void checkCallbackError()
Definition: qengine_opencl.hpp:203
void INTBCDC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Add or Subtract integer (BCD, with carry)
Definition: opencl.cpp:2363
void FullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut)
Quantum analog of classical "Full Adder" gate.
Definition: opencl.cpp:2462
size_t nrmGroupSize
Definition: qengine_opencl.hpp:175
void ArithmeticCall(OCLAPI api_call, const bitCapIntOcl(&bciArgs)[BCI_ARG_LEN], const unsigned char *values=NULL, bitCapIntOcl valuesLength=0U)
Definition: opencl.cpp:2002
int64_t GetDevice()
Get GPU device ID.
Definition: qengine_opencl.hpp:448
std::unique_ptr< real1[], void(*)(real1 *)> nrmArray
Definition: qengine_opencl.hpp:192
std::shared_ptr< complex > AllocStateVec(bitCapInt elemCount, bool doForceAlloc=false)
Definition: opencl.cpp:3270
bool usingHostRam
Definition: qengine_opencl.hpp:171
void CMULModNOut(bitCapInt toMul, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled multiplication modulo N by integer, (out of place)
Definition: opencl.cpp:2540
void INCDECC(bitCapInt toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Common driver method behing INCC and DECC.
Definition: opencl.cpp:2212
void AddQueueItem(const QueueItem &item)
Definition: qengine_opencl.hpp:337
size_t FixWorkItemCount(size_t maxI, size_t wic)
Definition: qengine_opencl.hpp:579
void UpdateRunningNorm(real1_f norm_thresh=REAL1_DEFAULT_ARG)
Force a calculation of the norm of the state vector, in order to make it unit length before the next ...
Definition: opencl.cpp:3223
void FullAdx(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut, OCLAPI api_call)
Definition: opencl.cpp:2473
std::list< QueueItem > wait_queue_items
Definition: qengine_opencl.hpp:190
DeviceContextPtr device_context
Definition: qengine_opencl.hpp:188
size_t nrmGroupCount
Definition: qengine_opencl.hpp:174
void ClearBuffer(BufferPtr buff, bitCapIntOcl offset, bitCapIntOcl size)
Definition: opencl.cpp:3316
std::vector< EventVecPtr > wait_refs
Definition: qengine_opencl.hpp:189
bitLenInt Compose(QInterfacePtr toCopy)
Combine another QInterface with this one, after the last bit index of this one.
Definition: qengine_opencl.hpp:385
void POWModNOut(bitCapInt base, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Raise a classical base to a quantum power, modulo N, (out of place)
Definition: opencl.cpp:2449
void UniformParityRZ(bitCapInt mask, real1_f angle)
If the target qubit set parity is odd, this applies a phase factor of .
Definition: opencl.cpp:1098
bool unlockHostMem
Definition: qengine_opencl.hpp:172
void ProbMaskAll(bitCapInt mask, real1 *probsArray)
Direct measure of masked permutation probability.
Definition: opencl.cpp:1807
void SwitchHostPtr(bool useHostMem)
Switch to/from host/device state vector bufffer.
Definition: qengine_opencl.hpp:298
void CINT(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt start, bitLenInt length, const std::vector< bitLenInt > &controls)
Add or Subtract integer (without sign or carry, with controls)
Definition: opencl.cpp:2127
void INTS(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex)
Add or Subtract integer (with overflow, without carry)
Definition: opencl.cpp:2218
void MUL(bitCapInt toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Multiply by integer.
Definition: opencl.cpp:2405
void SetAmplitude(bitCapInt perm, complex amp)
Sets the representational amplitude of a full permutation.
Definition: opencl.cpp:2996
size_t totalOclAllocSize
Definition: qengine_opencl.hpp:176
bitCapInt IndexedLDA(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, const unsigned char *values, bool resetValue=true)
Set 8 bit register bits based on read from classical memory.
Definition: opencl.cpp:2757
void INCBCD(bitCapInt toAdd, bitLenInt start, bitLenInt length)
Increment integer (BCD)
Definition: opencl.cpp:2357
void ResetStateBuffer(BufferPtr nStateBuffer)
Definition: opencl.cpp:658
void ROL(bitLenInt shift, bitLenInt start, bitLenInt length)
"Circular shift left" - shift bits left, and carry last bits.
Definition: opencl.cpp:2097
real1_f CtrlOrAntiProb(bool controlState, bitLenInt control, bitLenInt target)
Definition: opencl.cpp:1671
void WaitCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector< BufferPtr > args, size_t localBuffSize=0U)
Definition: opencl.cpp:404
void CPhaseFlipIfLess(bitCapInt greaterPerm, bitLenInt start, bitLenInt length, bitLenInt flagIndex)
The 6502 uses its carry flag also as a greater-than/less-than flag, for the CMP operation.
Definition: opencl.cpp:2883
PoolItemPtr GetFreePoolItem()
Definition: opencl.cpp:374
void Phase(complex topLeft, complex bottomRight, bitLenInt qubitIndex)
Apply a single bit transformation that only effects phase.
Definition: opencl.cpp:717
real1_f SumSqrDiff(QInterfacePtr toCompare)
Definition: qengine_opencl.hpp:457
void SetAmplitudePage(const complex *pagePtr, bitCapIntOcl offset, bitCapIntOcl length)
Copy a "page" of amplitudes from pagePtr into this QEngine's internal state.
Definition: opencl.cpp:153
void clDump()
Dumps the remaining asynchronous wait event list or queue of OpenCL events, for the current queue.
Definition: opencl.cpp:354
void ZeroAmplitudes()
Set all amplitudes to 0, and optionally temporarily deallocate state vector RAM.
Definition: opencl.cpp:99
void MULModNOut(bitCapInt toMul, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Multiplication modulo N by integer, (out of place)
Definition: opencl.cpp:2432
void InitOCL(int64_t devID)
Definition: opencl.cpp:656
bool ForceMParity(bitCapInt mask, bool result, bool doForce=true)
Act as if is a measurement of parity of the masked set of qubits was applied, except force the (usual...
Definition: opencl.cpp:1913
std::mutex queue_mutex
Definition: qengine_opencl.hpp:181
void DIV(bitCapInt toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Divide by integer.
Definition: opencl.cpp:2422
void BitMask(bitCapIntOcl mask, OCLAPI api_call, real1_f phase=(real1_f) PI_R1)
Definition: opencl.cpp:988
void MULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Definition: opencl.cpp:2622
void NormalizeState(real1_f nrm=REAL1_DEFAULT_ARG, real1_f norm_thresh=REAL1_DEFAULT_ARG, real1_f phaseArg=ZERO_R1_F)
Apply the normalization factor found by UpdateRunningNorm() or on the fly by a single bit gate.
Definition: opencl.cpp:3165
void INCS(bitCapInt toAdd, bitLenInt start, bitLenInt length, bitLenInt carryIndex)
Increment integer (without sign, with carry)
Definition: opencl.cpp:2249
void INCDECSC(bitCapInt toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Increment integer (with sign, with carry)
Definition: opencl.cpp:2320
void QueueCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector< BufferPtr > args, size_t localBuffSize=0U, size_t deallocSize=0U)
Definition: qengine_opencl.hpp:352
void CMULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Definition: opencl.cpp:2714
void INT(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length)
Add or Subtract integer (without sign or carry)
Definition: opencl.cpp:2101
cl_int callbackError
Definition: qengine_opencl.hpp:173
void CMULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Definition: opencl.cpp:2670
bitCapInt IndexedSBC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
Subtract based on an indexed load from classical memory.
Definition: opencl.cpp:2847
void Finish()
If asynchronous work is still running, block until it finishes.
Definition: qengine_opencl.hpp:464
cl_map_flags lockSyncFlags
Definition: qengine_opencl.hpp:178
void CopyStateVec(QEnginePtr src)
Exactly copy the state vector of a different QEngine instance.
Definition: opencl.cpp:114
void MULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Definition: opencl.cpp:2643
std::shared_ptr< complex > stateVec
Definition: qengine_opencl.hpp:180
void Hash(bitLenInt start, bitLenInt length, const unsigned char *values)
Set 8 bit register bits based on read from classical memory.
Definition: opencl.cpp:2854
void clFinish(bool doHard=false)
Finishes the asynchronous wait event list or queue of OpenCL events.
Definition: opencl.cpp:330
QInterfacePtr Clone()
Clone this QInterface.
Definition: opencl.cpp:3130
void PhaseFlipX(OCLAPI api_call, const bitCapIntOcl *bciArgs)
Definition: opencl.cpp:2863
void CPOWModNOut(bitCapInt base, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled multiplication modulo N by integer, (out of place)
Definition: opencl.cpp:2579
void Z(bitLenInt target)
Apply Pauli Z matrix to bit.
Definition: opencl.cpp:698
void AddAlloc(size_t size)
Definition: qengine_opencl.hpp:473
void INC(bitCapInt toAdd, bitLenInt start, bitLenInt length)
Increment integer (without sign, with carry)
Definition: opencl.cpp:2165
BufferPtr MakeBuffer(cl_mem_flags flags, size_t size, void *host_ptr=NULL)
Definition: qengine_opencl.hpp:488
void ApplyMx(OCLAPI api_call, const bitCapIntOcl *bciArgs, complex nrm)
Definition: opencl.cpp:1184
void INTC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Add or Subtract integer (without sign, with carry)
Definition: opencl.cpp:2181
real1_f ExpectationBitsAll(const std::vector< bitLenInt > &bits, bitCapInt offset=0)
Get permutation expectation value of bits.
Definition: opencl.cpp:1944
void ProbRegAll(bitLenInt start, bitLenInt length, real1 *probsArray)
Definition: opencl.cpp:1719
bitCapIntOcl GetMaxSize()
Definition: qengine_opencl.hpp:361
void CDIV(bitCapInt toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled division by integer.
Definition: opencl.cpp:2520
size_t FixGroupSize(size_t wic, size_t gs)
Definition: qengine_opencl.hpp:590
void SubtractAlloc(size_t size)
Definition: qengine_opencl.hpp:482
void ReinitBuffer()
Definition: opencl.cpp:3309
void FreeAll()
Definition: opencl.cpp:89
void GetQuantumState(complex *outputState)
Get pure quantum state, in unsigned int permutation basis.
Definition: opencl.cpp:3027
real1_f ProbMask(bitCapInt mask, bitCapInt permutation)
Direct measure of masked permutation probability.
Definition: opencl.cpp:1757
void ROx(OCLAPI api_call, bitLenInt shift, bitLenInt start, bitLenInt length)
Definition: opencl.cpp:2072
void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex *mtrx, bitLenInt bitCount, const bitCapIntOcl *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)
Definition: qengine_opencl.hpp:602
virtual QInterfacePtr Decompose(bitLenInt start, bitLenInt length)
Definition: qengine.hpp:270
cl::CommandQueue queue
Definition: qengine_opencl.hpp:182
void PhaseParity(real1_f radians, bitCapInt mask)
Parity phase gate.
Definition: opencl.cpp:747
void tryOcl(std::string message, std::function< int()> oclCall)
Definition: qengine_opencl.hpp:216
void QueueSetRunningNorm(real1_f runningNrm)
Add an operation to the (OpenCL) queue, to set the value of runningNorm, which is the normalization c...
Definition: qengine_opencl.hpp:336
void IMULModNOut(bitCapInt toMul, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Inverse of multiplication modulo N by integer, (out of place)
Definition: opencl.cpp:2441
void QueueSetDoNormalize(bool doNorm)
Add an operation to the (OpenCL) queue, to set the value of doNormalize, which controls whether to au...
Definition: qengine_opencl.hpp:335
bool IsZeroAmplitude()
Returns "true" only if amplitudes are all totally 0.
Definition: qengine_opencl.hpp:288
bitLenInt Compose(QEngineOCLPtr toCopy)
Definition: opencl.cpp:1329
BufferPtr stateBuffer
Definition: qengine_opencl.hpp:186
bitCapIntOcl OpIndexed(OCLAPI api_call, bitCapIntOcl carryIn, bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
Add or Subtract based on an indexed load from classical memory.
Definition: opencl.cpp:2791
void Invert(complex topRight, complex bottomLeft, bitLenInt qubitIndex)
Apply a single bit transformation that reverses bit probability and might effect phase.
Definition: opencl.cpp:705
void PopQueue(bool isDispatch)
Definition: opencl.cpp:413
real1_f ProbReg(bitLenInt start, bitLenInt length, bitCapInt permutation)
Direct measure of register permutation probability.
Definition: opencl.cpp:1706
void UnlockSync()
Unlocks synchronization between the state vector buffer and general RAM, so the state vector can be o...
Definition: opencl.cpp:310
EventVecPtr ResetWaitEvents(bool waitQueue=true)
Definition: opencl.cpp:387
void DispatchQueue()
Definition: opencl.cpp:446
bitLenInt Compose(QInterfacePtr toCopy, bitLenInt start)
Definition: qengine_opencl.hpp:387
void SetPermutation(bitCapInt perm, complex phaseFac=CMPLX_DEFAULT_ARG)
Set to a specific permutation of all qubits.
Definition: opencl.cpp:660
void SetDevice(int64_t dID)
Set GPU device ID.
Definition: opencl.cpp:523
bool didInit
Definition: qengine_opencl.hpp:170
void xMULx(OCLAPI api_call, const bitCapIntOcl *bciArgs, BufferPtr controlBuffer)
Definition: opencl.cpp:2594
real1_f ParSum(real1 *toSum, bitCapIntOcl maxI)
Definition: opencl.cpp:644
real1_f ProbParity(bitCapInt mask)
Overall probability of any odd permutation of the masked set of bits.
Definition: opencl.cpp:1892
complex GetAmplitude(bitCapInt perm)
Get the representational amplitude of a full permutation.
Definition: opencl.cpp:2978
bool isFinished()
Returns "false" if asynchronous work is still running, and "true" if all previously dispatched asynch...
Definition: qengine_opencl.hpp:465
void GetAmplitudePage(complex *pagePtr, bitCapIntOcl offset, bitCapIntOcl length)
Copy a "page" of amplitudes from this QEngine's internal state, into pagePtr.
Definition: opencl.cpp:138
real1_f Probx(OCLAPI api_call, const bitCapIntOcl *bciArgs)
Definition: opencl.cpp:1619
void Dispose(bitLenInt start, bitLenInt length)
Minimally decompose a set of contiguous bits from the separably composed unit, and discard the separa...
Definition: opencl.cpp:1552
static const bitCapIntOcl OclMemDenom
1 / OclMemDenom is the maximum fraction of total OCL device RAM that a single state vector should occ...
Definition: qengine_opencl.hpp:252
int64_t deviceID
Definition: qengine_opencl.hpp:177
bitCapInt MAll()
Measure permutation state of all coherent bits.
Definition: opencl.cpp:2927
void ShuffleBuffers(QEnginePtr engine)
Swap the high half of this engine with the low half of another.
Definition: opencl.cpp:230
void CArithmeticCall(OCLAPI api_call, const bitCapIntOcl(&bciArgs)[BCI_ARG_LEN], bitCapIntOcl *controlPowers, bitLenInt controlLen, const unsigned char *values=NULL, bitCapIntOcl valuesLength=0U)
Definition: opencl.cpp:2007
void LockSync(cl_map_flags flags=(CL_MAP_READ|CL_MAP_WRITE))
Locks synchronization between the state vector buffer and general RAM, so the state vector can be dir...
Definition: opencl.cpp:287
void XMask(bitCapInt mask)
Masked X gate.
Definition: opencl.cpp:734
~QEngineOCL()
Definition: qengine_opencl.hpp:278
void ApplyM(bitCapInt mask, bool result, complex nrm)
Definition: opencl.cpp:1208
void CUniformParityRZ(const std::vector< bitLenInt > &controls, bitCapInt mask, real1_f angle)
If the controls are set and the target qubit set parity is odd, this applies a phase factor of .
Definition: opencl.cpp:1132
complex permutationAmp
Definition: qengine_opencl.hpp:179
BufferPtr nrmBuffer
Definition: qengine_opencl.hpp:187
bitCapInt IndexedADC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
Add based on an indexed load from classical memory.
Definition: opencl.cpp:2840
bitLenInt Allocate(bitLenInt start, bitLenInt length)
Allocate new "length" count of |0> state qubits at specified qubit index start position.
Definition: opencl.cpp:1608
void PhaseFlipIfLess(bitCapInt greaterPerm, bitLenInt start, bitLenInt length)
This is an expedient for an adaptive Grover's search for a function's global minimum.
Definition: opencl.cpp:2899
virtual bool isOpenCL()
Returns "true" if current simulation is OpenCL-based.
Definition: qengine_opencl.hpp:286
void CINC(bitCapInt toAdd, bitLenInt inOutStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Add integer (without sign, with controls)
Definition: opencl.cpp:2170
std::vector< PoolItemPtr > poolItems
Definition: qengine_opencl.hpp:191
void FreeStateVec()
Definition: qengine_opencl.hpp:531
virtual void X(bitLenInt q)
Definition: qengine.hpp:183
void INTBCD(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length)
Add or Subtract integer (BCD)
Definition: opencl.cpp:2327
void INTSC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Add or Subtract integer (with sign, with carry)
Definition: opencl.cpp:2299
QEngineOCL(bitLenInt qBitCount, bitCapInt initState, qrack_rand_gen_ptr rgp=nullptr, complex phaseFac=CMPLX_DEFAULT_ARG, bool doNorm=false, bool randomGlobalPhase=true, bool useHostMem=false, int64_t devID=-1, bool useHardwareRNG=true, bool ignored=false, real1_f norm_thresh=REAL1_EPSILON, std::vector< int64_t > ignored2={}, bitLenInt ignored4=0U, real1_f ignored3=FP_NORM_EPSILON_F)
Initialize a Qrack::QEngineOCL object.
Definition: opencl.cpp:67
BufferPtr MakeStateVecBuffer(std::shared_ptr< complex > nStateVec)
Definition: opencl.cpp:3296
real1_f FirstNonzeroPhase()
Get phase of lowest permutation nonzero amplitude.
Definition: qengine_opencl.hpp:289
void CIMULModNOut(bitCapInt toMul, bitCapInt modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Inverse of controlled multiplication modulo N by integer, (out of place)
Definition: opencl.cpp:2561
void DecomposeDispose(bitLenInt start, bitLenInt length, QEngineOCLPtr dest)
Definition: opencl.cpp:1374
real1_f Prob(bitLenInt qubit)
PSEUDO-QUANTUM Direct measure of bit probability to be in |1> state.
Definition: opencl.cpp:1650
Abstract QEngine implementation, for all "Schroedinger method" engines.
Definition: qengine.hpp:31
virtual void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, complex const *mtrx, bitLenInt bitCount, bitCapIntOcl const *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)=0
bitCapIntOcl maxQPowerOcl
Definition: qengine.hpp:40
virtual void Decompose(bitLenInt start, QInterfacePtr dest)=0
Minimally decompose a set of contiguous bits from the separably composed unit, into "destination".
virtual void X(bitLenInt qubit)
X gate.
Definition: qinterface.hpp:1054
virtual bitLenInt Allocate(bitLenInt length)
Allocate new "length" count of |0> state qubits at end of qubit index position.
Definition: qinterface.hpp:434
virtual bitLenInt Compose(QInterfacePtr toCopy)
Combine another QInterface with this one, after the last bit index of this one.
Definition: qinterface.hpp:338
Definition: qengine_gpu_util.hpp:21
Half-precision floating-point type.
Definition: half.hpp:2222
virtual void Invert(const complex topRight, const complex bottomLeft, bitLenInt qubitIndex)
Apply a single bit transformation that reverses bit probability and might effect phase.
Definition: qinterface.hpp:493
virtual void UniformlyControlledSingleBit(const std::vector< bitLenInt > &controls, bitLenInt qubitIndex, const complex *mtrxs)
Apply a "uniformly controlled" arbitrary single bit unitary transformation.
Definition: qinterface.hpp:590
virtual void Z(bitLenInt qubit)
Z gate.
Definition: qinterface.hpp:1087
virtual void U(bitLenInt target, real1_f theta, real1_f phi, real1_f lambda)
General unitary gate.
Definition: rotational.cpp:18
virtual void Phase(const complex topLeft, const complex bottomRight, bitLenInt qubitIndex)
Apply a single bit transformation that only effects phase.
Definition: qinterface.hpp:480
virtual real1_f FirstNonzeroPhase()
Get phase of lowest permutation nonzero amplitude.
Definition: qinterface.hpp:2709
Definition: complex16x2simd.hpp:25
std::complex< half_float::half > complex
Definition: qrack_types.hpp:62
std::shared_ptr< QEngine > QEnginePtr
Definition: qrack_types.hpp:141
std::shared_ptr< OCLDeviceContext > DeviceContextPtr
Definition: oclengine.hpp:47
std::shared_ptr< QInterface > QInterfacePtr
Definition: qinterface.hpp:28
constexpr real1_f ZERO_R1_F
Definition: qrack_types.hpp:152
std::shared_ptr< QEngineOCL > QEngineOCLPtr
Definition: qengine_opencl.hpp:34
std::shared_ptr< EventVec > EventVecPtr
Definition: oclengine.hpp:51
constexpr real1_f FP_NORM_EPSILON_F
Definition: qrack_types.hpp:245
const real1 ONE_R1
Definition: qrack_types.hpp:153
unsigned long cl_map_flags
Definition: qengine_cuda.hpp:31
bitCapInt pow2(const bitLenInt &p)
Definition: qrack_functions.hpp:22
const real1 REAL1_DEFAULT_ARG
Definition: qrack_types.hpp:155
const real1 PI_R1
Definition: qrack_types.hpp:158
float real1_f
Definition: qrack_types.hpp:64
QRACK_CONST complex CMPLX_DEFAULT_ARG
Definition: qrack_types.hpp:242
std::shared_ptr< PoolItem > PoolItemPtr
Definition: qengine_cuda.hpp:162
SPECIAL_2X2
Definition: qengine_gpu_util.hpp:19
@ NONE
Definition: qengine_gpu_util.hpp:19
OCLAPI
Definition: oclapi.hpp:19
const real1 REAL1_EPSILON
Definition: qrack_types.hpp:157
std::shared_ptr< void > BufferPtr
Definition: qengine_cuda.hpp:45
unsigned long cl_mem_flags
Definition: qengine_cuda.hpp:32
bitLenInt log2(bitCapInt n)
Definition: qrack_functions.hpp:26
MICROSOFT_QUANTUM_DECL void U(_In_ uintq sid, _In_ uintq q, _In_ double theta, _In_ double phi, _In_ double lambda)
(External API) 3-parameter unitary gate
Definition: pinvoke_api.cpp:1362
#define CL_MAP_WRITE
Definition: qengine_cuda.hpp:36
#define CL_MEM_READ_ONLY
Definition: qengine_cuda.hpp:40
#define CL_MAP_READ
Definition: qengine_cuda.hpp:35
#define BCI_ARG_LEN
Definition: qengine_opencl.hpp:26
#define CMPLX_NORM_LEN
Definition: qengine_opencl.hpp:27
#define REAL_ARG_LEN
Definition: qengine_opencl.hpp:28
#define bitLenInt
Definition: qrack_types.hpp:44
#define qrack_rand_gen_ptr
Definition: qrack_types.hpp:146
#define bitCapInt
Definition: qrack_types.hpp:105
#define bitCapIntOcl
Definition: qrack_types.hpp:91
#define QRACK_ALIGN_SIZE
Definition: qrack_types.hpp:147
Definition: qengine_cuda.hpp:50
QueueItem(OCLAPI ac, size_t wic, size_t lgs, size_t ds, std::vector< BufferPtr > b, size_t lbs)
Definition: qengine_opencl.hpp:63
QueueItem(real1_f runningNrm)
Definition: qengine_opencl.hpp:91
bool doNorm
Definition: qengine_cuda.hpp:59
size_t workItemCount
Definition: qengine_cuda.hpp:52
std::vector< BufferPtr > buffers
Definition: qengine_cuda.hpp:55
size_t deallocSize
Definition: qengine_cuda.hpp:54
QueueItem()
Definition: qengine_opencl.hpp:49
bool isSetRunningNorm
Definition: qengine_cuda.hpp:58
QueueItem(bool doNrm)
Definition: qengine_opencl.hpp:77
size_t localBuffSize
Definition: qengine_cuda.hpp:56
OCLAPI api_call
Definition: qengine_cuda.hpp:51
bool isSetDoNorm
Definition: qengine_cuda.hpp:57
size_t localGroupSize
Definition: qengine_cuda.hpp:53
real1 runningNorm
Definition: qengine_cuda.hpp:60