Qrack  9.13
General classical-emulating-quantum development framework
qengine_cuda.hpp
Go to the documentation of this file.
1 //
3 // (C) Daniel Strano and the Qrack contributors 2017-2023. All rights reserved.
4 //
5 // This is a multithreaded, universal quantum register simulation, allowing
6 // (nonphysical) register cloning and direct measurement of probability and
7 // phase, to leverage what advantages classical emulation of qubits can have.
8 //
9 // Licensed under the GNU Lesser General Public License V3.
10 // See LICENSE.md in the project root or https://www.gnu.org/licenses/lgpl-3.0.en.html
11 // for details.
12 
13 #pragma once
14 
15 #include "common/cudaengine.cuh"
16 #include "qengine.hpp"
17 #include "qengine_gpu_util.hpp"
18 
19 #if !ENABLE_CUDA
20 #error CUDA has not been enabled
21 #endif
22 
23 #include <list>
24 
25 #define BCI_ARG_LEN 10
26 #define CMPLX_NORM_LEN 6
27 #define REAL_ARG_LEN 2
28 
29 namespace Qrack {
30 
31 typedef unsigned long cl_map_flags;
32 typedef unsigned long cl_mem_flags;
33 
34 // clang-format off
35 #define CL_MAP_READ (1 << 0)
36 #define CL_MAP_WRITE (1 << 1)
37 
38 #define CL_MEM_READ_WRITE (1 << 0)
39 #define CL_MEM_WRITE_ONLY (1 << 1)
40 #define CL_MEM_READ_ONLY (1 << 2)
41 #define CL_MEM_USE_HOST_PTR (1 << 3)
42 #define CL_MEM_COPY_HOST_PTR (1 << 5)
43 // clang-format on
44 
45 typedef std::shared_ptr<void> BufferPtr;
46 
47 class QEngineCUDA;
48 typedef std::shared_ptr<QEngineCUDA> QEngineCUDAPtr;
49 
50 struct QueueItem {
52  size_t workItemCount;
54  size_t deallocSize;
55  std::vector<BufferPtr> buffers;
56  size_t localBuffSize;
59  bool doNorm;
61 
63  : api_call()
64  , workItemCount(0U)
65  , localGroupSize(0U)
66  , deallocSize(0U)
67  , buffers()
68  , localBuffSize(0U)
69  , isSetDoNorm(false)
70  , isSetRunningNorm(true)
71  , doNorm(false)
73  {
74  }
75 
76  QueueItem(OCLAPI ac, size_t wic, size_t lgs, size_t ds, std::vector<BufferPtr> b, size_t lbs)
77  : api_call(ac)
78  , workItemCount(wic)
79  , localGroupSize(lgs)
80  , deallocSize(ds)
81  , buffers(b)
82  , localBuffSize(lbs)
83  , isSetDoNorm(false)
84  , isSetRunningNorm(false)
85  , doNorm(false)
87  {
88  }
89 
90  QueueItem(bool doNrm)
91  : api_call()
92  , workItemCount(0U)
93  , localGroupSize(0U)
94  , deallocSize(0U)
95  , buffers()
96  , localBuffSize(0U)
97  , isSetDoNorm(true)
98  , isSetRunningNorm(false)
99  , doNorm(doNrm)
101  {
102  }
103 
104  QueueItem(real1_f runningNrm)
105  : api_call()
106  , workItemCount(0U)
107  , localGroupSize(0U)
108  , deallocSize(0U)
109  , buffers()
110  , localBuffSize(0U)
111  , isSetDoNorm(false)
112  , isSetRunningNorm(true)
113  , doNorm(false)
114  , runningNorm(runningNrm)
115  {
116  }
117 };
118 
119 class PoolItem {
120 public:
124 
125  std::shared_ptr<real1> probArray;
126  std::shared_ptr<real1> angleArray;
127 
129  : probArray(NULL)
130  , angleArray(NULL)
131  {
135  }
136 
138 
139 protected:
140  BufferPtr MakeBuffer(size_t size)
141  {
142  cudaError_t error;
143 
144  BufferPtr toRet = std::shared_ptr<void>(AllocRaw(size, &error), [](void* c) { cudaFree(c); });
145 
146  if (error != cudaSuccess) {
147  throw std::runtime_error("CUDA error code on buffer allocation attempt: " + std::to_string(error));
148  }
149 
150  return toRet;
151  }
152 
153  void* AllocRaw(size_t size, cudaError_t* errorPtr)
154  {
155  void* toRet;
156  *errorPtr = cudaMalloc(&toRet, size);
157 
158  return toRet;
159  }
160 };
161 
162 typedef std::shared_ptr<PoolItem> PoolItemPtr;
163 
182 class QEngineCUDA : public QEngine {
183 protected:
184  bool didInit;
188  size_t nrmGroupSize;
190  int64_t deviceID;
193  std::shared_ptr<complex> stateVec;
194  std::mutex queue_mutex;
195  // stateBuffer is allocated as a shared_ptr, because it's the only buffer that will be acted on outside of
196  // QEngineCUDA itself, specifically by QEngineCUDAMulti.
200  std::list<QueueItem> wait_queue_items;
201  std::vector<PoolItemPtr> poolItems;
202  std::unique_ptr<real1[], void (*)(real1*)> nrmArray;
203 
204  // For std::function, cudaError_t use might discard int qualifiers.
205  void tryCuda(std::string message, std::function<cudaError_t()> oclCall)
206  {
207  if (oclCall() == cudaSuccess) {
208  // Success
209  return;
210  }
211 
212  // Soft finish (just for this QEngineCUDA)
213  clFinish();
214 
215  if (oclCall() == cudaSuccess) {
216  // Success after clearing QEngineCUDA queue
217  return;
218  }
219 
220  // Hard finish (for the unique OpenCL device)
221  clFinish(true);
222 
223  cudaError_t error = oclCall();
224  if (error == cudaSuccess) {
225  // Success after clearing all queues for the OpenCL device
226  return;
227  }
228 
229  wait_queue_items.clear();
230 
231  // We're fatally blocked. Throw to exit.
232  throw std::runtime_error(message + ", error code: " + std::to_string(error));
233  }
234 
235  using QEngine::Copy;
236  void Copy(QInterfacePtr orig) { Copy(std::dynamic_pointer_cast<QEngineCUDA>(orig)); }
237  void Copy(QEngineCUDAPtr orig)
238  {
239  didInit = orig->didInit;
240  usingHostRam = orig->usingHostRam;
241  unlockHostMem = orig->unlockHostMem;
242  nrmGroupCount = orig->nrmGroupCount;
243  nrmGroupSize = orig->nrmGroupSize;
244  AddAlloc(orig->totalOclAllocSize);
245  deviceID = orig->deviceID;
246  lockSyncFlags = orig->lockSyncFlags;
247  permutationAmp = orig->permutationAmp;
248  stateVec = orig->stateVec;
249  // queue_mutex = orig->queue_mutex;
250  stateBuffer = orig->stateBuffer;
251  nrmBuffer = orig->nrmBuffer;
252  device_context = orig->device_context;
253  wait_queue_items = orig->wait_queue_items;
254  poolItems = orig->poolItems;
255  }
256 
257 public:
260  static const bitCapIntOcl OclMemDenom = 3U;
261 
280  QEngineCUDA(bitLenInt qBitCount, const bitCapInt& initState, qrack_rand_gen_ptr rgp = nullptr,
281  const complex& phaseFac = CMPLX_DEFAULT_ARG, bool doNorm = false, bool randomGlobalPhase = true,
282  bool useHostMem = false, int64_t devID = -1, bool useHardwareRNG = true, bool ignored = false,
283  real1_f norm_thresh = REAL1_EPSILON, std::vector<int64_t> ignored2 = {}, bitLenInt ignored4 = 0U,
284  real1_f ignored3 = _qrack_qunit_sep_thresh);
285 
287  {
288  // Make sure we track device allocation.
289  FreeAll();
290  }
291 
292  virtual bool isOpenCL() { return true; }
293 
294  bool IsZeroAmplitude() { return !stateBuffer; }
296  {
297  if (!stateBuffer) {
298  return ZERO_R1_F;
299  }
300 
302  }
303 
306 
307  void GetAmplitudePage(complex* pagePtr, bitCapIntOcl offset, bitCapIntOcl length);
308  void SetAmplitudePage(const complex* pagePtr, bitCapIntOcl offset, bitCapIntOcl length);
310  QEnginePtr pageEnginePtr, bitCapIntOcl srcOffset, bitCapIntOcl dstOffset, bitCapIntOcl length);
313 
314  bitCapIntOcl GetMaxSize() { return device_context->GetMaxAlloc() / sizeof(complex); };
315 
316  void SetPermutation(const bitCapInt& perm, const complex& phaseFac = CMPLX_DEFAULT_ARG);
317 
319  void UniformlyControlledSingleBit(const std::vector<bitLenInt>& controls, bitLenInt qubitIndex,
320  const complex* mtrxs, const std::vector<bitCapInt>& mtrxSkipPowers, const bitCapInt& mtrxSkipValueMask);
321  void UniformParityRZ(const bitCapInt& mask, real1_f angle);
322  void CUniformParityRZ(const std::vector<bitLenInt>& controls, const bitCapInt& mask, real1_f angle);
323 
324  using QEngine::X;
325  void X(bitLenInt target);
326  using QEngine::Z;
327  void Z(bitLenInt target);
328  using QEngine::Invert;
329  void Invert(const complex& topRight, const complex& bottomLeft, bitLenInt qubitIndex);
330  using QEngine::Phase;
331  void Phase(const complex& topLeft, const complex& bottomRight, bitLenInt qubitIndex);
332 
333  void XMask(const bitCapInt& mask);
334  void PhaseParity(real1_f radians, const bitCapInt& mask);
335  void PhaseRootNMask(bitLenInt n, const bitCapInt& mask);
336 
337  using QEngine::Compose;
339  bitLenInt Compose(QInterfacePtr toCopy) { return Compose(std::dynamic_pointer_cast<QEngineCUDA>(toCopy)); }
342  {
343  return Compose(std::dynamic_pointer_cast<QEngineCUDA>(toCopy), start);
344  }
345  using QEngine::Decompose;
346  void Decompose(bitLenInt start, QInterfacePtr dest);
347  void Dispose(bitLenInt start, bitLenInt length);
348  void Dispose(bitLenInt start, bitLenInt length, const bitCapInt& disposedPerm);
349  using QEngine::Allocate;
351 
352  void ROL(bitLenInt shift, bitLenInt start, bitLenInt length);
353 
354 #if ENABLE_ALU
355  void INC(const bitCapInt& toAdd, bitLenInt start, bitLenInt length);
356  void CINC(const bitCapInt& toAdd, bitLenInt inOutStart, bitLenInt length, const std::vector<bitLenInt>& controls);
357  void INCS(const bitCapInt& toAdd, bitLenInt start, bitLenInt length, bitLenInt carryIndex);
358 #if ENABLE_BCD
359  void INCBCD(const bitCapInt& toAdd, bitLenInt start, bitLenInt length);
360 #endif
361  void MUL(const bitCapInt& toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length);
362  void DIV(const bitCapInt& toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length);
364  const bitCapInt& toMul, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length);
366  const bitCapInt& toMul, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length);
368  const bitCapInt& base, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length);
369  void CMUL(const bitCapInt& toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length,
370  const std::vector<bitLenInt>& controls);
371  void CDIV(const bitCapInt& toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length,
372  const std::vector<bitLenInt>& controls);
373  void CMULModNOut(const bitCapInt& toMul, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart,
374  bitLenInt length, const std::vector<bitLenInt>& controls);
375  void CIMULModNOut(const bitCapInt& toMul, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart,
376  bitLenInt length, const std::vector<bitLenInt>& controls);
377  void CPOWModNOut(const bitCapInt& base, const bitCapInt& modN, bitLenInt inStart, bitLenInt outStart,
378  bitLenInt length, const std::vector<bitLenInt>& controls);
379  void FullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut);
380  void IFullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut);
381 
382  bitCapInt IndexedLDA(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength,
383  const unsigned char* values, bool resetValue = true);
384  bitCapInt IndexedADC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength,
385  bitLenInt carryIndex, const unsigned char* values);
386  bitCapInt IndexedSBC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength,
387  bitLenInt carryIndex, const unsigned char* values);
388  void Hash(bitLenInt start, bitLenInt length, const unsigned char* values);
389 
390  void CPhaseFlipIfLess(const bitCapInt& greaterPerm, bitLenInt start, bitLenInt length, bitLenInt flagIndex);
391  void PhaseFlipIfLess(const bitCapInt& greaterPerm, bitLenInt start, bitLenInt length);
392 #endif
393 
395  real1_f CtrlOrAntiProb(bool controlState, bitLenInt control, bitLenInt target);
396  real1_f ProbReg(bitLenInt start, bitLenInt length, const bitCapInt& permutation);
397  void ProbRegAll(bitLenInt start, bitLenInt length, real1* probsArray);
398  real1_f ProbMask(const bitCapInt& mask, const bitCapInt& permutation);
399  void ProbMaskAll(const bitCapInt& mask, real1* probsArray);
401  bool ForceMParity(const bitCapInt& mask, bool result, bool doForce = true);
402  real1_f ExpectationBitsAll(const std::vector<bitLenInt>& bits, const bitCapInt& offset = ZERO_BCI);
403 
404  void SetDevice(int64_t dID);
405  int64_t GetDevice() { return deviceID; }
406 
407  void SetQuantumState(const complex* inputState);
408  void GetQuantumState(complex* outputState);
409  void GetProbs(real1* outputProbs);
412  void SetAmplitude(const bitCapInt& perm, const complex& amp);
413 
415  {
416  return SumSqrDiff(std::dynamic_pointer_cast<QEngineCUDA>(toCompare));
417  }
419 
421  real1_f nrm = REAL1_DEFAULT_ARG, real1_f norm_thresh = REAL1_DEFAULT_ARG, real1_f phaseArg = ZERO_R1_F);
423  void Finish() { clFinish(); };
424  bool isFinished() { return wait_queue_items.empty(); };
425 
428 
429  void PopQueue();
431 
432 protected:
433  void AddAlloc(size_t size)
434  {
435  size_t currentAlloc = CUDAEngine::Instance().AddToActiveAllocSize(deviceID, size);
436  if (device_context && (currentAlloc > device_context->GetGlobalAllocLimit())) {
437  CUDAEngine::Instance().SubtractFromActiveAllocSize(deviceID, size);
438  throw bad_alloc("VRAM limits exceeded in QEngineCUDA::AddAlloc()");
439  }
440  totalOclAllocSize += size;
441  }
442  void SubtractAlloc(size_t size)
443  {
444  CUDAEngine::Instance().SubtractFromActiveAllocSize(deviceID, size);
445  totalOclAllocSize -= size;
446  }
447 
448  BufferPtr MakeBuffer(cl_mem_flags flags, size_t size, void* host_ptr = NULL)
449  {
450  cudaError_t error;
451 
452  BufferPtr toRet = std::shared_ptr<void>(
453  AllocRaw(flags, host_ptr, size, &error), [this, flags](void* c) { FreeRaw(flags, c); });
454 
455  if (error == cudaSuccess) {
456  // Success
457  return toRet;
458  }
459 
460  // Soft finish (just for this QEngineCUDA)
461  clFinish();
462 
463  toRet = std::shared_ptr<void>(
464  AllocRaw(flags, host_ptr, size, &error), [this, flags](void* c) { FreeRaw(flags, c); });
465 
466  if (error == cudaSuccess) {
467  // Success after clearing QEngineCUDA queue
468  return toRet;
469  }
470 
471  // Hard finish (for the unique OpenCL device)
472  clFinish(true);
473 
474  toRet = std::shared_ptr<void>(
475  AllocRaw(flags, host_ptr, size, &error), [this, flags](void* c) { FreeRaw(flags, c); });
476 
477  if (error != cudaSuccess) {
478  throw std::runtime_error("CUDA error code on buffer allocation attempt: " + std::to_string(error));
479  }
480 
481  return toRet;
482  }
483 
484  void* AllocRaw(cl_mem_flags flags, void* host_ptr, size_t size, cudaError_t* errorPtr)
485  {
486  void* toRet = host_ptr;
487  *errorPtr = (flags & CL_MEM_USE_HOST_PTR) ? cudaHostRegister(host_ptr, size, cudaHostRegisterDefault)
488  : cudaMalloc(&toRet, size);
489  if ((*errorPtr == cudaSuccess) && (flags & CL_MEM_COPY_HOST_PTR)) {
490  cudaMemcpy(toRet, host_ptr, size, cudaMemcpyHostToDevice);
491  }
492 
493  return toRet;
494  }
495 
496  void FreeRaw(cl_mem_flags flags, void* c)
497  {
498  if (flags & CL_MEM_USE_HOST_PTR) {
499  cudaHostUnregister(c);
500  } else {
501  cudaFree(c);
502  }
503  }
504 
505  void SwitchHostPtr(bool useHostMem)
506  {
507  if (useHostMem == usingHostRam) {
508  return;
509  }
510 
511  std::shared_ptr<complex> copyVec = AllocStateVec(maxQPowerOcl, true);
512  GetQuantumState(copyVec.get());
513 
514  if (useHostMem) {
515  stateVec = copyVec;
517  } else {
518  stateVec = NULL;
520  clFinish();
521  tryCuda("Failed to write buffer", [&] {
522  return cudaMemcpy(
523  stateBuffer.get(), (void*)(copyVec.get()), sizeof(complex) * maxQPowerOcl, cudaMemcpyHostToDevice);
524  });
525  copyVec.reset();
526  }
527 
528  usingHostRam = useHostMem;
529  }
530 
531  void QueueCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector<BufferPtr> args,
532  size_t localBuffSize = 0U, size_t deallocSize = 0U)
533  {
534  if (localBuffSize > device_context->GetLocalSize()) {
535  throw bad_alloc("Local memory limits exceeded in QEngineCUDA::QueueCall()");
536  }
537  cudaStreamSynchronize(device_context->params_queue);
538  AddQueueItem(QueueItem(api_call, workItemCount, localGroupSize, deallocSize, args, localBuffSize));
539  }
540 
541  void QueueSetDoNormalize(bool doNorm) { AddQueueItem(QueueItem(doNorm)); }
542  void QueueSetRunningNorm(real1_f runningNrm) { AddQueueItem(QueueItem(runningNrm)); }
543  void AddQueueItem(const QueueItem& item)
544  {
545  // For lock_guard:
546  if (true) {
547  std::lock_guard<std::mutex> lock(queue_mutex);
548  wait_queue_items.push_back(item);
549  }
550 
551  DispatchQueue();
552  }
553 
554  real1_f GetExpectation(bitLenInt valueStart, bitLenInt valueLength);
555 
556  std::shared_ptr<complex> AllocStateVec(bitCapIntOcl elemCount, bool doForceAlloc = false);
557  void FreeStateVec() { stateVec = NULL; }
558  void FreeAll();
559  void ResetStateBuffer(BufferPtr nStateBuffer);
560  BufferPtr MakeStateVecBuffer(std::shared_ptr<complex> nStateVec);
561  void ReinitBuffer();
562 
563  void Compose(OCLAPI apiCall, const bitCapIntOcl* bciArgs, QEngineCUDAPtr toCopy);
564 
565  void InitOCL(int64_t devID);
567 
569 
591  void UnlockSync();
592 
599  void clFinish(bool doHard = false);
600 
604  void clDump();
605 
606  size_t FixWorkItemCount(size_t maxI, size_t wic)
607  {
608  if (wic > maxI) {
609  // Guaranteed to be a power of two
610  return maxI;
611  }
612 
613  // Otherwise, clamp to a power of two
614  return pow2Ocl(log2Ocl(wic));
615  }
616 
617  size_t FixGroupSize(size_t wic, size_t gs)
618  {
619  if (gs > wic) {
620  return wic;
621  }
622 
623  return gs - (wic % gs);
624  }
625 
627 
628  using QEngine::Apply2x2;
629  void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex* mtrx, bitLenInt bitCount,
630  const bitCapIntOcl* qPowersSorted, bool doCalcNorm, real1_f norm_thresh = REAL1_DEFAULT_ARG)
631  {
632  Apply2x2(offset1, offset2, mtrx, bitCount, qPowersSorted, doCalcNorm, SPECIAL_2X2::NONE, norm_thresh);
633  }
634  void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex* mtrx, bitLenInt bitCount,
635  const bitCapIntOcl* qPowersSorted, bool doCalcNorm, SPECIAL_2X2 special,
636  real1_f norm_thresh = REAL1_DEFAULT_ARG);
637 
638  void BitMask(bitCapIntOcl mask, OCLAPI api_call, real1_f phase = (real1_f)PI_R1);
639 
640  void ApplyM(const bitCapInt& mask, bool result, const complex& nrm);
641  void ApplyM(const bitCapInt& mask, const bitCapInt& result, const complex& nrm);
642 
643  /* Utility functions used by the operations above. */
644  void WaitCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector<BufferPtr> args,
645  size_t localBuffSize = 0U);
646  EventVecPtr ResetWaitEvents(bool waitQueue = true);
647  void ApplyMx(OCLAPI api_call, const bitCapIntOcl* bciArgs, const complex& nrm);
648  real1_f Probx(OCLAPI api_call, const bitCapIntOcl* bciArgs);
649 
650  void ArithmeticCall(OCLAPI api_call, const bitCapIntOcl (&bciArgs)[BCI_ARG_LEN], const unsigned char* values = NULL,
651  bitCapIntOcl valuesLength = 0U);
652  void CArithmeticCall(OCLAPI api_call, const bitCapIntOcl (&bciArgs)[BCI_ARG_LEN], bitCapIntOcl* controlPowers,
653  bitLenInt controlLen, const unsigned char* values = NULL, bitCapIntOcl valuesLength = 0U);
654  void ROx(OCLAPI api_call, bitLenInt shift, bitLenInt start, bitLenInt length);
655 
656 #if ENABLE_ALU
657  void INCDECC(const bitCapInt& toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
658  void INCDECSC(const bitCapInt& toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
659  void INCDECSC(
660  const bitCapInt& toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex, bitLenInt carryIndex);
661 #if ENABLE_BCD
662  void INCDECBCDC(const bitCapInt& toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
663 #endif
664 
665  void INT(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length);
666  void CINT(
667  OCLAPI api_call, bitCapIntOcl toMod, bitLenInt start, bitLenInt length, const std::vector<bitLenInt>& controls);
668  void INTC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
669  void INTS(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex);
670  void INTSC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
671  void INTSC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex,
672  bitLenInt carryIndex);
673 #if ENABLE_BCD
674  void INTBCD(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length);
675  void INTBCDC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex);
676 #endif
677  void xMULx(OCLAPI api_call, const bitCapIntOcl* bciArgs, BufferPtr controlBuffer);
678  void MULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length);
679  void MULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart,
680  bitLenInt length);
681  void CMULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length,
682  const std::vector<bitLenInt>& controls);
683  void CMULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart,
684  bitLenInt length, const std::vector<bitLenInt>& controls);
685  void FullAdx(
686  bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut, OCLAPI api_call);
687  void PhaseFlipX(OCLAPI api_call, const bitCapIntOcl* bciArgs);
688 
689  bitCapIntOcl OpIndexed(OCLAPI api_call, bitCapIntOcl carryIn, bitLenInt indexStart, bitLenInt indexLength,
690  bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char* values);
691 #endif
692 
693  void ClearBuffer(BufferPtr buff, bitCapIntOcl offset, bitCapIntOcl size);
694 };
695 
696 } // namespace Qrack
Definition: qengine_cuda.hpp:119
BufferPtr ulongBuffer
Definition: qengine_cuda.hpp:123
~PoolItem()
Definition: qengine_cuda.hpp:137
BufferPtr cmplxBuffer
Definition: qengine_cuda.hpp:121
BufferPtr MakeBuffer(size_t size)
Definition: qengine_cuda.hpp:140
BufferPtr realBuffer
Definition: qengine_cuda.hpp:122
std::shared_ptr< real1 > angleArray
Definition: qengine_cuda.hpp:126
PoolItem()
Definition: qengine_cuda.hpp:128
void * AllocRaw(size_t size, cudaError_t *errorPtr)
Definition: qengine_cuda.hpp:153
std::shared_ptr< real1 > probArray
Definition: qengine_cuda.hpp:125
OpenCL enhanced QEngineCPU implementation.
Definition: qengine_cuda.hpp:182
real1_f Prob(bitLenInt qubit)
Direct measure of bit probability to be in |1> state.
real1_f SumSqrDiff(QInterfacePtr toCompare)
Calculates (1 - <\psi_e|\psi_c>) between states |\psi_c> and |\psi_e>.
Definition: qengine_cuda.hpp:414
void Compose(OCLAPI apiCall, const bitCapIntOcl *bciArgs, QEngineCUDAPtr toCopy)
void ProbMaskAll(const bitCapInt &mask, real1 *probsArray)
Direct measure of masked permutation probability.
real1_f ProbParity(const bitCapInt &mask)
Overall probability of any odd permutation of the masked set of bits.
void POWModNOut(const bitCapInt &base, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Raise a classical base to a quantum power, modulo N, (out of place)
virtual bool isOpenCL()
Returns "true" if current simulation is OpenCL-based.
Definition: qengine_cuda.hpp:292
void INCS(const bitCapInt &toAdd, bitLenInt start, bitLenInt length, bitLenInt carryIndex)
Add a classical integer to the register, with sign and without carry.
void UniformlyControlledSingleBit(const std::vector< bitLenInt > &controls, bitLenInt qubitIndex, const complex *mtrxs, const std::vector< bitCapInt > &mtrxSkipPowers, const bitCapInt &mtrxSkipValueMask)
void QueueCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector< BufferPtr > args, size_t localBuffSize=0U, size_t deallocSize=0U)
Definition: qengine_cuda.hpp:531
void Decompose(bitLenInt start, QInterfacePtr dest)
Minimally decompose a set of contiguous bits from the separably composed unit, into "destination".
bitLenInt Allocate(bitLenInt start, bitLenInt length)
Allocate new "length" count of |0> state qubits at specified qubit index start position.
real1_f FirstNonzeroPhase()
Get phase of lowest permutation nonzero amplitude.
Definition: qengine_cuda.hpp:295
std::shared_ptr< complex > AllocStateVec(bitCapIntOcl elemCount, bool doForceAlloc=false)
void ApplyMx(OCLAPI api_call, const bitCapIntOcl *bciArgs, const complex &nrm)
bool didInit
Definition: qengine_cuda.hpp:184
void Dispose(bitLenInt start, bitLenInt length, const bitCapInt &disposedPerm)
Dispose a a contiguous set of qubits that are already in a permutation eigenstate.
void ShuffleBuffers(QEnginePtr engine)
Swap the high half of this engine with the low half of another.
void SetAmplitude(const bitCapInt &perm, const complex &amp)
Sets the representational amplitude of a full permutation.
void Finish()
If asynchronous work is still running, block until it finishes.
Definition: qengine_cuda.hpp:423
void INCDECC(const bitCapInt &toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Common driver method behind INCC and DECC (without sign, with carry)
void INTBCD(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length)
QInterfacePtr Copy()
Copy this QInterface.
void CUniformParityRZ(const std::vector< bitLenInt > &controls, const bitCapInt &mask, real1_f angle)
If the controls are set and the target qubit set parity is odd, this applies a phase factor of .
void DIV(const bitCapInt &toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Divide by integer.
bitLenInt Compose(QEngineCUDAPtr toCopy)
void PhaseFlipX(OCLAPI api_call, const bitCapIntOcl *bciArgs)
void SubtractAlloc(size_t size)
Definition: qengine_cuda.hpp:442
void FullAdx(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut, OCLAPI api_call)
BufferPtr stateBuffer
Definition: qengine_cuda.hpp:197
void XMask(const bitCapInt &mask)
Masked X gate.
void tryCuda(std::string message, std::function< cudaError_t()> oclCall)
Definition: qengine_cuda.hpp:205
void Dispose(bitLenInt start, bitLenInt length)
Minimally decompose a set of contiguous bits from the separably composed unit, and discard the separa...
void FullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut)
Quantum analog of classical "Full Adder" gate.
std::shared_ptr< complex > stateVec
Definition: qengine_cuda.hpp:193
bitCapInt IndexedSBC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
Subtract from an entangled 8 bit register state with a superposed index-offset-based read from classi...
real1_f ExpectationBitsAll(const std::vector< bitLenInt > &bits, const bitCapInt &offset=ZERO_BCI)
Get permutation expectation value of bits.
void IMULModNOut(const bitCapInt &toMul, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Inverse of multiplication modulo N by integer, (out of place)
void QueueSetRunningNorm(real1_f runningNrm)
Add an operation to the (OpenCL) queue, to set the value of runningNorm, which is the normalization c...
Definition: qengine_cuda.hpp:542
void NormalizeState(real1_f nrm=REAL1_DEFAULT_ARG, real1_f norm_thresh=REAL1_DEFAULT_ARG, real1_f phaseArg=ZERO_R1_F)
Apply the normalization factor found by UpdateRunningNorm() or on the fly by a single bit gate.
void CPhaseFlipIfLess(const bitCapInt &greaterPerm, bitLenInt start, bitLenInt length, bitLenInt flagIndex)
The 6502 uses its carry flag also as a greater-than/less-than flag, for the CMP operation.
void ROL(bitLenInt shift, bitLenInt start, bitLenInt length)
Circular shift left - shift bits left, and carry last bits.
void CMULModNOut(const bitCapInt &toMul, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled multiplication modulo N by integer, (out of place)
std::unique_ptr< real1[], void(*)(real1 *)> nrmArray
Definition: qengine_cuda.hpp:202
static const bitCapIntOcl OclMemDenom
1 / OclMemDenom is the maximum fraction of total OCL device RAM that a single state vector should occ...
Definition: qengine_cuda.hpp:260
int64_t GetDevice()
Get GPU device ID.
Definition: qengine_cuda.hpp:405
void UnlockSync()
Unlocks synchronization between the state vector buffer and general RAM, so the state vector can be o...
void AddAlloc(size_t size)
Definition: qengine_cuda.hpp:433
bool ForceMParity(const bitCapInt &mask, bool result, bool doForce=true)
Act as if is a measurement of parity of the masked set of qubits was applied, except force the (usual...
void INCDECSC(const bitCapInt &toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Common driver method behind INCSC and DECSC (without overflow flag)
void ArithmeticCall(OCLAPI api_call, const bitCapIntOcl(&bciArgs)[BCI_ARG_LEN], const unsigned char *values=NULL, bitCapIntOcl valuesLength=0U)
void ClearBuffer(BufferPtr buff, bitCapIntOcl offset, bitCapIntOcl size)
size_t FixGroupSize(size_t wic, size_t gs)
Definition: qengine_cuda.hpp:617
size_t totalOclAllocSize
Definition: qengine_cuda.hpp:189
void ApplyM(const bitCapInt &mask, const bitCapInt &result, const complex &nrm)
void X(bitLenInt target)
void GetProbs(real1 *outputProbs)
Get the pure quantum state representation.
QEnginePtr CloneEmpty()
Clone this QEngine's settings, with a zeroed state vector.
void GetQuantumState(complex *outputState)
Get the pure quantum state representation.
void SetAmplitudePage(const complex *pagePtr, bitCapIntOcl offset, bitCapIntOcl length)
Copy a "page" of amplitudes from pagePtr into this QEngine's internal state.
bitLenInt Compose(QEngineCUDAPtr toCopy, bitLenInt start)
~QEngineCUDA()
Definition: qengine_cuda.hpp:286
bitLenInt Compose(QInterfacePtr toCopy, bitLenInt start)
Compose() a QInterface peer, inserting its qubit into index order at start index.
Definition: qengine_cuda.hpp:341
void IFullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut)
Inverse of FullAdd.
void CDIV(const bitCapInt &toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled division by power of integer.
int64_t deviceID
Definition: qengine_cuda.hpp:190
void INTBCDC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
void CMUL(const bitCapInt &toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled multiplication by integer.
void SetPermutation(const bitCapInt &perm, const complex &phaseFac=CMPLX_DEFAULT_ARG)
Set to a specific permutation of all qubits.
void ROx(OCLAPI api_call, bitLenInt shift, bitLenInt start, bitLenInt length)
std::mutex queue_mutex
Definition: qengine_cuda.hpp:194
void CMULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
bool isFinished()
Returns "false" if asynchronous work is still running, and "true" if all previously dispatched asynch...
Definition: qengine_cuda.hpp:424
void ResetStateBuffer(BufferPtr nStateBuffer)
bitCapInt IndexedLDA(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, const unsigned char *values, bool resetValue=true)
Set 8 bit register bits by a superposed index-offset-based read from classical memory.
QEngineCUDA(bitLenInt qBitCount, const bitCapInt &initState, qrack_rand_gen_ptr rgp=nullptr, const complex &phaseFac=CMPLX_DEFAULT_ARG, bool doNorm=false, bool randomGlobalPhase=true, bool useHostMem=false, int64_t devID=-1, bool useHardwareRNG=true, bool ignored=false, real1_f norm_thresh=REAL1_EPSILON, std::vector< int64_t > ignored2={}, bitLenInt ignored4=0U, real1_f ignored3=_qrack_qunit_sep_thresh)
Initialize a Qrack::QEngineCUDA object.
void Z(bitLenInt target)
Z gate.
void INT(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length)
real1_f ProbReg(bitLenInt start, bitLenInt length, const bitCapInt &permutation)
Direct measure of register permutation probability.
void SetQuantumState(const complex *inputState)
Set an arbitrary pure quantum state representation.
void FreeStateVec()
Definition: qengine_cuda.hpp:557
complex GetAmplitude(const bitCapInt &perm)
Get the representational amplitude of a full permutation.
complex permutationAmp
Definition: qengine_cuda.hpp:192
void WaitCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector< BufferPtr > args, size_t localBuffSize=0U)
void ZeroAmplitudes()
Set all amplitudes to 0, and optionally temporarily deallocate state vector RAM.
bitCapIntOcl OpIndexed(OCLAPI api_call, bitCapIntOcl carryIn, bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
void INCDECSC(const bitCapInt &toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex, bitLenInt carryIndex)
Common driver method behind INCSC and DECSC (with overflow flag)
void SetAmplitudePage(QEnginePtr pageEnginePtr, bitCapIntOcl srcOffset, bitCapIntOcl dstOffset, bitCapIntOcl length)
Copy a "page" of amplitudes from another QEngine, pointed to by pageEnginePtr, into this QEngine's in...
BufferPtr nrmBuffer
Definition: qengine_cuda.hpp:198
virtual void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex *mtrx, bitLenInt bitCount, bitCapIntOcl const *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)=0
void Copy(QEngineCUDAPtr orig)
Definition: qengine_cuda.hpp:237
void ApplyM(const bitCapInt &mask, bool result, const complex &nrm)
bool usingHostRam
Definition: qengine_cuda.hpp:185
void Copy(QInterfacePtr orig)
Definition: qengine_cuda.hpp:236
void CPOWModNOut(const bitCapInt &base, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled, raise a classical base to a quantum power, modulo N, (out of place)
void PhaseRootNMask(bitLenInt n, const bitCapInt &mask)
Masked PhaseRootN gate.
std::list< QueueItem > wait_queue_items
Definition: qengine_cuda.hpp:200
void CINC(const bitCapInt &toAdd, bitLenInt inOutStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Add integer (without sign, with controls)
real1_f ProbMask(const bitCapInt &mask, const bitCapInt &permutation)
Direct measure of masked permutation probability.
DeviceContextPtr device_context
Definition: qengine_cuda.hpp:199
void CINT(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt start, bitLenInt length, const std::vector< bitLenInt > &controls)
void ProbRegAll(bitLenInt start, bitLenInt length, real1 *probsArray)
BufferPtr MakeStateVecBuffer(std::shared_ptr< complex > nStateVec)
bitCapInt IndexedADC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
Add to entangled 8 bit register state with a superposed index-offset-based read from classical memory...
void INTS(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex)
real1_f CtrlOrAntiProb(bool controlState, bitLenInt control, bitLenInt target)
void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex *mtrx, bitLenInt bitCount, const bitCapIntOcl *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)
Definition: qengine_cuda.hpp:629
void QueueSetDoNormalize(bool doNorm)
Add an operation to the (OpenCL) queue, to set the value of doNormalize, which controls whether to au...
Definition: qengine_cuda.hpp:541
void clDump()
Dumps the remaining asynchronous wait event list or queue of OpenCL events, for the current queue.
void MULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
bitLenInt Compose(QInterfacePtr toCopy)
Combine another QInterface with this one, after the last bit index of this one.
Definition: qengine_cuda.hpp:339
real1_f ParSum(real1 *toSum, bitCapIntOcl maxI)
void GetAmplitudePage(complex *pagePtr, bitCapIntOcl offset, bitCapIntOcl length)
Copy a "page" of amplitudes from this QEngine's internal state, into pagePtr.
bitCapInt MAll()
Measure permutation state of all coherent bits.
void MUL(const bitCapInt &toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Multiply by integer.
void MULModNOut(const bitCapInt &toMul, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Multiplication modulo N by integer, (out of place)
void PhaseParity(real1_f radians, const bitCapInt &mask)
Parity phase gate.
void LockSync(cl_map_flags flags=(CL_MAP_READ|CL_MAP_WRITE))
Locks synchronization between the state vector buffer and general RAM, so the state vector can be dir...
void INTSC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex, bitLenInt carryIndex)
void INCBCD(const bitCapInt &toAdd, bitLenInt start, bitLenInt length)
Add classical BCD integer (without sign)
size_t FixWorkItemCount(size_t maxI, size_t wic)
Definition: qengine_cuda.hpp:606
void xMULx(OCLAPI api_call, const bitCapIntOcl *bciArgs, BufferPtr controlBuffer)
void INTC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
void UniformParityRZ(const bitCapInt &mask, real1_f angle)
If the target qubit set parity is odd, this applies a phase factor of .
real1_f SumSqrDiff(QEngineCUDAPtr toCompare)
BufferPtr MakeBuffer(cl_mem_flags flags, size_t size, void *host_ptr=NULL)
Definition: qengine_cuda.hpp:448
void InitOCL(int64_t devID)
bool IsZeroAmplitude()
Returns "true" only if amplitudes are all totally 0.
Definition: qengine_cuda.hpp:294
void INTSC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
void Phase(const complex &topLeft, const complex &bottomRight, bitLenInt qubitIndex)
Apply a single bit transformation that only effects phase.
void MULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
void FreeRaw(cl_mem_flags flags, void *c)
Definition: qengine_cuda.hpp:496
size_t nrmGroupSize
Definition: qengine_cuda.hpp:188
void UpdateRunningNorm(real1_f norm_thresh=REAL1_DEFAULT_ARG)
Force a calculation of the norm of the state vector, in order to make it unit length before the next ...
EventVecPtr ResetWaitEvents(bool waitQueue=true)
void * AllocRaw(cl_mem_flags flags, void *host_ptr, size_t size, cudaError_t *errorPtr)
Definition: qengine_cuda.hpp:484
void INCDECBCDC(const bitCapInt &toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Common driver method behind INCSC and DECSC (without overflow flag)
size_t nrmGroupCount
Definition: qengine_cuda.hpp:187
void Invert(const complex &topRight, const complex &bottomLeft, bitLenInt qubitIndex)
Apply a single bit transformation that reverses bit probability and might effect phase.
void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex *mtrx, bitLenInt bitCount, const bitCapIntOcl *qPowersSorted, bool doCalcNorm, SPECIAL_2X2 special, real1_f norm_thresh=REAL1_DEFAULT_ARG)
bool unlockHostMem
Definition: qengine_cuda.hpp:186
void CIMULModNOut(const bitCapInt &toMul, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Inverse of controlled multiplication modulo N by integer, (out of place)
void CMULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
cl_map_flags lockSyncFlags
Definition: qengine_cuda.hpp:191
void SwitchHostPtr(bool useHostMem)
Switch to/from host/device state vector bufffer.
Definition: qengine_cuda.hpp:505
void CopyStateVec(QEnginePtr src)
Exactly copy the state vector of a different QEngine instance.
void BitMask(bitCapIntOcl mask, OCLAPI api_call, real1_f phase=(real1_f) PI_R1)
void DecomposeDispose(bitLenInt start, bitLenInt length, QEngineCUDAPtr dest)
real1_f Probx(OCLAPI api_call, const bitCapIntOcl *bciArgs)
PoolItemPtr GetFreePoolItem()
void Hash(bitLenInt start, bitLenInt length, const unsigned char *values)
Transform a length of qubit register via lookup through a hash table.
bitCapIntOcl GetMaxSize()
Definition: qengine_cuda.hpp:314
real1_f GetExpectation(bitLenInt valueStart, bitLenInt valueLength)
void clFinish(bool doHard=false)
Finishes the asynchronous wait event list or queue of OpenCL events.
std::vector< PoolItemPtr > poolItems
Definition: qengine_cuda.hpp:201
void AddQueueItem(const QueueItem &item)
Definition: qengine_cuda.hpp:543
void CArithmeticCall(OCLAPI api_call, const bitCapIntOcl(&bciArgs)[BCI_ARG_LEN], bitCapIntOcl *controlPowers, bitLenInt controlLen, const unsigned char *values=NULL, bitCapIntOcl valuesLength=0U)
QInterfacePtr Clone()
Clone this QInterface.
void INC(const bitCapInt &toAdd, bitLenInt start, bitLenInt length)
Add integer (without sign)
void SetDevice(int64_t dID)
Set GPU device ID.
void PhaseFlipIfLess(const bitCapInt &greaterPerm, bitLenInt start, bitLenInt length)
This is an expedient for an adaptive Grover's search for a function's global minimum.
Abstract QEngine implementation, for all "Schroedinger method" engines.
Definition: qengine.hpp:31
virtual void Copy(QInterfacePtr orig)
Copy this QInterface.
Definition: qinterface.hpp:222
bitCapIntOcl maxQPowerOcl
Definition: qengine.hpp:40
virtual void Decompose(bitLenInt start, QInterfacePtr dest)=0
Minimally decompose a set of contiguous bits from the separably composed unit, into "destination".
virtual void X(bitLenInt qubit)
X gate.
Definition: qinterface.hpp:1084
virtual void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex *mtrx, bitLenInt bitCount, bitCapIntOcl const *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)=0
virtual bitLenInt Allocate(bitLenInt length)
Allocate new "length" count of |0> state qubits at end of qubit index position.
Definition: qinterface.hpp:470
virtual bitLenInt Compose(QInterfacePtr toCopy)
Combine another QInterface with this one, after the last bit index of this one.
Definition: qinterface.hpp:364
Definition: qengine_gpu_util.hpp:21
Half-precision floating-point type.
Definition: half.hpp:2222
virtual void UniformlyControlledSingleBit(const std::vector< bitLenInt > &controls, bitLenInt qubit, const complex *mtrxs)
Apply a "uniformly controlled" arbitrary single bit unitary transformation.
Definition: qinterface.hpp:627
virtual void Phase(const complex &topLeft, const complex &bottomRight, bitLenInt qubit)
Apply a single bit transformation that only effects phase.
Definition: qinterface.hpp:516
virtual void Invert(const complex &topRight, const complex &bottomLeft, bitLenInt qubit)
Apply a single bit transformation that reverses bit probability and might effect phase.
Definition: qinterface.hpp:529
virtual void Z(bitLenInt qubit)
Z gate.
Definition: qinterface.hpp:1117
virtual void U(bitLenInt target, real1_f theta, real1_f phi, real1_f lambda)
General unitary gate.
Definition: rotational.cpp:18
virtual real1_f FirstNonzeroPhase()
Get phase of lowest permutation nonzero amplitude.
Definition: qinterface.hpp:2985
GLOSSARY: bitLenInt - "bit-length integer" - unsigned integer ID of qubit position in register bitCap...
Definition: complex16x2simd.hpp:25
std::shared_ptr< QEngine > QEnginePtr
Definition: qrack_types.hpp:151
std::shared_ptr< OCLDeviceContext > DeviceContextPtr
Definition: oclengine.hpp:47
std::shared_ptr< QInterface > QInterfacePtr
Definition: qinterface.hpp:29
const real1_f _qrack_qunit_sep_thresh
Definition: qrack_functions.hpp:235
std::shared_ptr< EventVec > EventVecPtr
Definition: oclengine.hpp:51
bitLenInt log2Ocl(bitCapIntOcl n)
Definition: qrack_functions.hpp:88
void U(quid sid, bitLenInt q, real1_f theta, real1_f phi, real1_f lambda)
(External API) 3-parameter unitary gate
Definition: wasm_api.cpp:1143
std::complex< real1 > complex
Definition: qrack_types.hpp:128
unsigned long cl_map_flags
Definition: qengine_cuda.hpp:31
QRACK_CONST real1 REAL1_EPSILON
Definition: qrack_types.hpp:200
QRACK_CONST real1 ONE_R1
Definition: qrack_types.hpp:185
float real1_f
Definition: qrack_types.hpp:95
QRACK_CONST complex CMPLX_DEFAULT_ARG
Definition: qrack_types.hpp:257
std::shared_ptr< QEngineCUDA > QEngineCUDAPtr
Definition: qengine_cuda.hpp:47
std::shared_ptr< PoolItem > PoolItemPtr
Definition: qengine_cuda.hpp:162
SPECIAL_2X2
Definition: qengine_gpu_util.hpp:19
@ NONE
Definition: qengine_gpu_util.hpp:19
OCLAPI
Definition: oclapi.hpp:19
std::shared_ptr< void > BufferPtr
Definition: qengine_cuda.hpp:45
QRACK_CONST real1 PI_R1
Definition: qrack_types.hpp:178
unsigned long cl_mem_flags
Definition: qengine_cuda.hpp:32
const bitCapInt ZERO_BCI
Definition: qrack_types.hpp:130
bitCapIntOcl pow2Ocl(const bitLenInt &p)
Definition: qrack_functions.hpp:137
#define CL_MAP_WRITE
Definition: qengine_cuda.hpp:36
#define BCI_ARG_LEN
Definition: qengine_cuda.hpp:25
#define CL_MEM_USE_HOST_PTR
Definition: qengine_cuda.hpp:41
#define CL_MEM_COPY_HOST_PTR
Definition: qengine_cuda.hpp:42
#define CMPLX_NORM_LEN
Definition: qengine_cuda.hpp:26
#define CL_MAP_READ
Definition: qengine_cuda.hpp:35
#define REAL_ARG_LEN
Definition: qengine_cuda.hpp:27
#define REAL1_DEFAULT_ARG
Definition: qrack_types.hpp:177
#define bitLenInt
Definition: qrack_types.hpp:38
#define ZERO_R1_F
Definition: qrack_types.hpp:160
#define qrack_rand_gen_ptr
Definition: qrack_types.hpp:156
#define bitCapInt
Definition: qrack_types.hpp:62
#define bitCapIntOcl
Definition: qrack_types.hpp:50
Definition: qengine_cuda.hpp:50
QueueItem(OCLAPI ac, size_t wic, size_t lgs, size_t ds, std::vector< BufferPtr > b, size_t lbs)
Definition: qengine_cuda.hpp:76
QueueItem(real1_f runningNrm)
Definition: qengine_cuda.hpp:104
bool doNorm
Definition: qengine_cuda.hpp:59
size_t workItemCount
Definition: qengine_cuda.hpp:52
std::vector< BufferPtr > buffers
Definition: qengine_cuda.hpp:55
size_t deallocSize
Definition: qengine_cuda.hpp:54
QueueItem()
Definition: qengine_cuda.hpp:62
bool isSetRunningNorm
Definition: qengine_cuda.hpp:58
QueueItem(bool doNrm)
Definition: qengine_cuda.hpp:90
size_t localBuffSize
Definition: qengine_cuda.hpp:56
OCLAPI api_call
Definition: qengine_cuda.hpp:51
bool isSetDoNorm
Definition: qengine_cuda.hpp:57
size_t localGroupSize
Definition: qengine_cuda.hpp:53
real1 runningNorm
Definition: qengine_cuda.hpp:60