15 #include "common/cudaengine.cuh"
20 #error CUDA has not been enabled
25 #define BCI_ARG_LEN 10
26 #define CMPLX_NORM_LEN 6
27 #define REAL_ARG_LEN 2
35 #define CL_MAP_READ (1 << 0)
36 #define CL_MAP_WRITE (1 << 1)
38 #define CL_MEM_READ_WRITE (1 << 0)
39 #define CL_MEM_WRITE_ONLY (1 << 1)
40 #define CL_MEM_READ_ONLY (1 << 2)
41 #define CL_MEM_USE_HOST_PTR (1 << 3)
42 #define CL_MEM_COPY_HOST_PTR (1 << 5)
76 QueueItem(
OCLAPI ac,
size_t wic,
size_t lgs,
size_t ds, std::vector<BufferPtr> b,
size_t lbs)
144 BufferPtr toRet = std::shared_ptr<void>(
AllocRaw(size, &error), [](
void* c) { cudaFree(c); });
146 if (error != cudaSuccess) {
147 throw std::runtime_error(
"CUDA error code on buffer allocation attempt: " + std::to_string(error));
156 *errorPtr = cudaMalloc(&toRet, size);
205 void tryCuda(std::string message, std::function<cudaError_t()> oclCall)
207 if (oclCall() == cudaSuccess) {
215 if (oclCall() == cudaSuccess) {
223 cudaError_t error = oclCall();
224 if (error == cudaSuccess) {
232 throw std::runtime_error(message +
", error code: " + std::to_string(error));
282 bool useHostMem =
false, int64_t devID = -1,
bool useHardwareRNG =
true,
bool ignored =
false,
320 const complex* mtrxs,
const std::vector<bitCapInt>& mtrxSkipPowers,
const bitCapInt& mtrxSkipValueMask);
343 return Compose(std::dynamic_pointer_cast<QEngineCUDA>(toCopy), start);
370 const std::vector<bitLenInt>& controls);
372 const std::vector<bitLenInt>& controls);
374 bitLenInt length,
const std::vector<bitLenInt>& controls);
376 bitLenInt length,
const std::vector<bitLenInt>& controls);
378 bitLenInt length,
const std::vector<bitLenInt>& controls);
383 const unsigned char* values,
bool resetValue =
true);
385 bitLenInt carryIndex,
const unsigned char* values);
387 bitLenInt carryIndex,
const unsigned char* values);
416 return SumSqrDiff(std::dynamic_pointer_cast<QEngineCUDA>(toCompare));
435 size_t currentAlloc = CUDAEngine::Instance().AddToActiveAllocSize(
deviceID, size);
437 CUDAEngine::Instance().SubtractFromActiveAllocSize(
deviceID, size);
438 throw bad_alloc(
"VRAM limits exceeded in QEngineCUDA::AddAlloc()");
444 CUDAEngine::Instance().SubtractFromActiveAllocSize(
deviceID, size);
453 AllocRaw(flags, host_ptr, size, &error), [
this, flags](
void* c) {
FreeRaw(flags, c); });
455 if (error == cudaSuccess) {
463 toRet = std::shared_ptr<void>(
464 AllocRaw(flags, host_ptr, size, &error), [
this, flags](
void* c) {
FreeRaw(flags, c); });
466 if (error == cudaSuccess) {
474 toRet = std::shared_ptr<void>(
475 AllocRaw(flags, host_ptr, size, &error), [
this, flags](
void* c) {
FreeRaw(flags, c); });
477 if (error != cudaSuccess) {
478 throw std::runtime_error(
"CUDA error code on buffer allocation attempt: " + std::to_string(error));
486 void* toRet = host_ptr;
487 *errorPtr = (flags &
CL_MEM_USE_HOST_PTR) ? cudaHostRegister(host_ptr, size, cudaHostRegisterDefault)
488 : cudaMalloc(&toRet, size);
490 cudaMemcpy(toRet, host_ptr, size, cudaMemcpyHostToDevice);
499 cudaHostUnregister(c);
521 tryCuda(
"Failed to write buffer", [&] {
531 void QueueCall(
OCLAPI api_call,
size_t workItemCount,
size_t localGroupSize, std::vector<BufferPtr> args,
532 size_t localBuffSize = 0
U,
size_t deallocSize = 0
U)
535 throw bad_alloc(
"Local memory limits exceeded in QEngineCUDA::QueueCall()");
623 return gs - (wic % gs);
644 void WaitCall(
OCLAPI api_call,
size_t workItemCount,
size_t localGroupSize, std::vector<BufferPtr> args,
645 size_t localBuffSize = 0
U);
682 const std::vector<bitLenInt>& controls);
684 bitLenInt length,
const std::vector<bitLenInt>& controls);
Definition: qengine_cuda.hpp:119
BufferPtr ulongBuffer
Definition: qengine_cuda.hpp:123
~PoolItem()
Definition: qengine_cuda.hpp:137
BufferPtr cmplxBuffer
Definition: qengine_cuda.hpp:121
BufferPtr MakeBuffer(size_t size)
Definition: qengine_cuda.hpp:140
BufferPtr realBuffer
Definition: qengine_cuda.hpp:122
std::shared_ptr< real1 > angleArray
Definition: qengine_cuda.hpp:126
PoolItem()
Definition: qengine_cuda.hpp:128
void * AllocRaw(size_t size, cudaError_t *errorPtr)
Definition: qengine_cuda.hpp:153
std::shared_ptr< real1 > probArray
Definition: qengine_cuda.hpp:125
OpenCL enhanced QEngineCPU implementation.
Definition: qengine_cuda.hpp:182
real1_f Prob(bitLenInt qubit)
Direct measure of bit probability to be in |1> state.
real1_f SumSqrDiff(QInterfacePtr toCompare)
Calculates (1 - <\psi_e|\psi_c>) between states |\psi_c> and |\psi_e>.
Definition: qengine_cuda.hpp:414
void Compose(OCLAPI apiCall, const bitCapIntOcl *bciArgs, QEngineCUDAPtr toCopy)
void ProbMaskAll(const bitCapInt &mask, real1 *probsArray)
Direct measure of masked permutation probability.
real1_f ProbParity(const bitCapInt &mask)
Overall probability of any odd permutation of the masked set of bits.
void POWModNOut(const bitCapInt &base, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Raise a classical base to a quantum power, modulo N, (out of place)
virtual bool isOpenCL()
Returns "true" if current simulation is OpenCL-based.
Definition: qengine_cuda.hpp:292
void INCS(const bitCapInt &toAdd, bitLenInt start, bitLenInt length, bitLenInt carryIndex)
Add a classical integer to the register, with sign and without carry.
void UniformlyControlledSingleBit(const std::vector< bitLenInt > &controls, bitLenInt qubitIndex, const complex *mtrxs, const std::vector< bitCapInt > &mtrxSkipPowers, const bitCapInt &mtrxSkipValueMask)
void QueueCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector< BufferPtr > args, size_t localBuffSize=0U, size_t deallocSize=0U)
Definition: qengine_cuda.hpp:531
void Decompose(bitLenInt start, QInterfacePtr dest)
Minimally decompose a set of contiguous bits from the separably composed unit, into "destination".
bitLenInt Allocate(bitLenInt start, bitLenInt length)
Allocate new "length" count of |0> state qubits at specified qubit index start position.
real1_f FirstNonzeroPhase()
Get phase of lowest permutation nonzero amplitude.
Definition: qengine_cuda.hpp:295
std::shared_ptr< complex > AllocStateVec(bitCapIntOcl elemCount, bool doForceAlloc=false)
void ApplyMx(OCLAPI api_call, const bitCapIntOcl *bciArgs, const complex &nrm)
bool didInit
Definition: qengine_cuda.hpp:184
void Dispose(bitLenInt start, bitLenInt length, const bitCapInt &disposedPerm)
Dispose a a contiguous set of qubits that are already in a permutation eigenstate.
void ShuffleBuffers(QEnginePtr engine)
Swap the high half of this engine with the low half of another.
void SetAmplitude(const bitCapInt &perm, const complex &)
Sets the representational amplitude of a full permutation.
void Finish()
If asynchronous work is still running, block until it finishes.
Definition: qengine_cuda.hpp:423
void INCDECC(const bitCapInt &toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Common driver method behind INCC and DECC (without sign, with carry)
void INTBCD(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length)
QInterfacePtr Copy()
Copy this QInterface.
void CUniformParityRZ(const std::vector< bitLenInt > &controls, const bitCapInt &mask, real1_f angle)
If the controls are set and the target qubit set parity is odd, this applies a phase factor of .
void DIV(const bitCapInt &toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Divide by integer.
bitLenInt Compose(QEngineCUDAPtr toCopy)
void PhaseFlipX(OCLAPI api_call, const bitCapIntOcl *bciArgs)
void SubtractAlloc(size_t size)
Definition: qengine_cuda.hpp:442
void FullAdx(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut, OCLAPI api_call)
BufferPtr stateBuffer
Definition: qengine_cuda.hpp:197
void XMask(const bitCapInt &mask)
Masked X gate.
void tryCuda(std::string message, std::function< cudaError_t()> oclCall)
Definition: qengine_cuda.hpp:205
void Dispose(bitLenInt start, bitLenInt length)
Minimally decompose a set of contiguous bits from the separably composed unit, and discard the separa...
void FullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut)
Quantum analog of classical "Full Adder" gate.
std::shared_ptr< complex > stateVec
Definition: qengine_cuda.hpp:193
bitCapInt IndexedSBC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
Subtract from an entangled 8 bit register state with a superposed index-offset-based read from classi...
real1_f ExpectationBitsAll(const std::vector< bitLenInt > &bits, const bitCapInt &offset=ZERO_BCI)
Get permutation expectation value of bits.
void IMULModNOut(const bitCapInt &toMul, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Inverse of multiplication modulo N by integer, (out of place)
void QueueSetRunningNorm(real1_f runningNrm)
Add an operation to the (OpenCL) queue, to set the value of runningNorm, which is the normalization c...
Definition: qengine_cuda.hpp:542
void NormalizeState(real1_f nrm=REAL1_DEFAULT_ARG, real1_f norm_thresh=REAL1_DEFAULT_ARG, real1_f phaseArg=ZERO_R1_F)
Apply the normalization factor found by UpdateRunningNorm() or on the fly by a single bit gate.
void CPhaseFlipIfLess(const bitCapInt &greaterPerm, bitLenInt start, bitLenInt length, bitLenInt flagIndex)
The 6502 uses its carry flag also as a greater-than/less-than flag, for the CMP operation.
void ROL(bitLenInt shift, bitLenInt start, bitLenInt length)
Circular shift left - shift bits left, and carry last bits.
void CMULModNOut(const bitCapInt &toMul, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled multiplication modulo N by integer, (out of place)
std::unique_ptr< real1[], void(*)(real1 *)> nrmArray
Definition: qengine_cuda.hpp:202
static const bitCapIntOcl OclMemDenom
1 / OclMemDenom is the maximum fraction of total OCL device RAM that a single state vector should occ...
Definition: qengine_cuda.hpp:260
int64_t GetDevice()
Get GPU device ID.
Definition: qengine_cuda.hpp:405
void UnlockSync()
Unlocks synchronization between the state vector buffer and general RAM, so the state vector can be o...
void AddAlloc(size_t size)
Definition: qengine_cuda.hpp:433
bool ForceMParity(const bitCapInt &mask, bool result, bool doForce=true)
Act as if is a measurement of parity of the masked set of qubits was applied, except force the (usual...
void INCDECSC(const bitCapInt &toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Common driver method behind INCSC and DECSC (without overflow flag)
void ArithmeticCall(OCLAPI api_call, const bitCapIntOcl(&bciArgs)[BCI_ARG_LEN], const unsigned char *values=NULL, bitCapIntOcl valuesLength=0U)
void ClearBuffer(BufferPtr buff, bitCapIntOcl offset, bitCapIntOcl size)
size_t FixGroupSize(size_t wic, size_t gs)
Definition: qengine_cuda.hpp:617
size_t totalOclAllocSize
Definition: qengine_cuda.hpp:189
void ApplyM(const bitCapInt &mask, const bitCapInt &result, const complex &nrm)
void GetProbs(real1 *outputProbs)
Get the pure quantum state representation.
QEnginePtr CloneEmpty()
Clone this QEngine's settings, with a zeroed state vector.
void GetQuantumState(complex *outputState)
Get the pure quantum state representation.
void SetAmplitudePage(const complex *pagePtr, bitCapIntOcl offset, bitCapIntOcl length)
Copy a "page" of amplitudes from pagePtr into this QEngine's internal state.
bitLenInt Compose(QEngineCUDAPtr toCopy, bitLenInt start)
~QEngineCUDA()
Definition: qengine_cuda.hpp:286
bitLenInt Compose(QInterfacePtr toCopy, bitLenInt start)
Compose() a QInterface peer, inserting its qubit into index order at start index.
Definition: qengine_cuda.hpp:341
void IFullAdd(bitLenInt inputBit1, bitLenInt inputBit2, bitLenInt carryInSumOut, bitLenInt carryOut)
Inverse of FullAdd.
void CDIV(const bitCapInt &toDiv, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled division by power of integer.
int64_t deviceID
Definition: qengine_cuda.hpp:190
void INTBCDC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
void CMUL(const bitCapInt &toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled multiplication by integer.
void SetPermutation(const bitCapInt &perm, const complex &phaseFac=CMPLX_DEFAULT_ARG)
Set to a specific permutation of all qubits.
void ROx(OCLAPI api_call, bitLenInt shift, bitLenInt start, bitLenInt length)
std::mutex queue_mutex
Definition: qengine_cuda.hpp:194
void CMULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
bool isFinished()
Returns "false" if asynchronous work is still running, and "true" if all previously dispatched asynch...
Definition: qengine_cuda.hpp:424
void ResetStateBuffer(BufferPtr nStateBuffer)
bitCapInt IndexedLDA(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, const unsigned char *values, bool resetValue=true)
Set 8 bit register bits by a superposed index-offset-based read from classical memory.
QEngineCUDA(bitLenInt qBitCount, const bitCapInt &initState, qrack_rand_gen_ptr rgp=nullptr, const complex &phaseFac=CMPLX_DEFAULT_ARG, bool doNorm=false, bool randomGlobalPhase=true, bool useHostMem=false, int64_t devID=-1, bool useHardwareRNG=true, bool ignored=false, real1_f norm_thresh=REAL1_EPSILON, std::vector< int64_t > ignored2={}, bitLenInt ignored4=0U, real1_f ignored3=_qrack_qunit_sep_thresh)
Initialize a Qrack::QEngineCUDA object.
void Z(bitLenInt target)
Z gate.
void INT(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length)
real1_f ProbReg(bitLenInt start, bitLenInt length, const bitCapInt &permutation)
Direct measure of register permutation probability.
void SetQuantumState(const complex *inputState)
Set an arbitrary pure quantum state representation.
void FreeStateVec()
Definition: qengine_cuda.hpp:557
complex GetAmplitude(const bitCapInt &perm)
Get the representational amplitude of a full permutation.
complex permutationAmp
Definition: qengine_cuda.hpp:192
void WaitCall(OCLAPI api_call, size_t workItemCount, size_t localGroupSize, std::vector< BufferPtr > args, size_t localBuffSize=0U)
void ZeroAmplitudes()
Set all amplitudes to 0, and optionally temporarily deallocate state vector RAM.
bitCapIntOcl OpIndexed(OCLAPI api_call, bitCapIntOcl carryIn, bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
void INCDECSC(const bitCapInt &toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex, bitLenInt carryIndex)
Common driver method behind INCSC and DECSC (with overflow flag)
void SetAmplitudePage(QEnginePtr pageEnginePtr, bitCapIntOcl srcOffset, bitCapIntOcl dstOffset, bitCapIntOcl length)
Copy a "page" of amplitudes from another QEngine, pointed to by pageEnginePtr, into this QEngine's in...
BufferPtr nrmBuffer
Definition: qengine_cuda.hpp:198
virtual void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex *mtrx, bitLenInt bitCount, bitCapIntOcl const *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)=0
void Copy(QEngineCUDAPtr orig)
Definition: qengine_cuda.hpp:237
void ApplyM(const bitCapInt &mask, bool result, const complex &nrm)
bool usingHostRam
Definition: qengine_cuda.hpp:185
void Copy(QInterfacePtr orig)
Definition: qengine_cuda.hpp:236
void CPOWModNOut(const bitCapInt &base, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Controlled, raise a classical base to a quantum power, modulo N, (out of place)
void PhaseRootNMask(bitLenInt n, const bitCapInt &mask)
Masked PhaseRootN gate.
std::list< QueueItem > wait_queue_items
Definition: qengine_cuda.hpp:200
void CINC(const bitCapInt &toAdd, bitLenInt inOutStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Add integer (without sign, with controls)
real1_f ProbMask(const bitCapInt &mask, const bitCapInt &permutation)
Direct measure of masked permutation probability.
DeviceContextPtr device_context
Definition: qengine_cuda.hpp:199
void CINT(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt start, bitLenInt length, const std::vector< bitLenInt > &controls)
void ProbRegAll(bitLenInt start, bitLenInt length, real1 *probsArray)
BufferPtr MakeStateVecBuffer(std::shared_ptr< complex > nStateVec)
bitCapInt IndexedADC(bitLenInt indexStart, bitLenInt indexLength, bitLenInt valueStart, bitLenInt valueLength, bitLenInt carryIndex, const unsigned char *values)
Add to entangled 8 bit register state with a superposed index-offset-based read from classical memory...
void INTS(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex)
real1_f CtrlOrAntiProb(bool controlState, bitLenInt control, bitLenInt target)
void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex *mtrx, bitLenInt bitCount, const bitCapIntOcl *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)
Definition: qengine_cuda.hpp:629
void QueueSetDoNormalize(bool doNorm)
Add an operation to the (OpenCL) queue, to set the value of doNormalize, which controls whether to au...
Definition: qengine_cuda.hpp:541
void clDump()
Dumps the remaining asynchronous wait event list or queue of OpenCL events, for the current queue.
void MULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
bitLenInt Compose(QInterfacePtr toCopy)
Combine another QInterface with this one, after the last bit index of this one.
Definition: qengine_cuda.hpp:339
real1_f ParSum(real1 *toSum, bitCapIntOcl maxI)
void GetAmplitudePage(complex *pagePtr, bitCapIntOcl offset, bitCapIntOcl length)
Copy a "page" of amplitudes from this QEngine's internal state, into pagePtr.
bitCapInt MAll()
Measure permutation state of all coherent bits.
void MUL(const bitCapInt &toMul, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
Multiply by integer.
void MULModNOut(const bitCapInt &toMul, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length)
Multiplication modulo N by integer, (out of place)
void PhaseParity(real1_f radians, const bitCapInt &mask)
Parity phase gate.
void LockSync(cl_map_flags flags=(CL_MAP_READ|CL_MAP_WRITE))
Locks synchronization between the state vector buffer and general RAM, so the state vector can be dir...
void INTSC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt overflowIndex, bitLenInt carryIndex)
void INCBCD(const bitCapInt &toAdd, bitLenInt start, bitLenInt length)
Add classical BCD integer (without sign)
size_t FixWorkItemCount(size_t maxI, size_t wic)
Definition: qengine_cuda.hpp:606
void xMULx(OCLAPI api_call, const bitCapIntOcl *bciArgs, BufferPtr controlBuffer)
void INTC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
void UniformParityRZ(const bitCapInt &mask, real1_f angle)
If the target qubit set parity is odd, this applies a phase factor of .
real1_f SumSqrDiff(QEngineCUDAPtr toCompare)
BufferPtr MakeBuffer(cl_mem_flags flags, size_t size, void *host_ptr=NULL)
Definition: qengine_cuda.hpp:448
void InitOCL(int64_t devID)
bool IsZeroAmplitude()
Returns "true" only if amplitudes are all totally 0.
Definition: qengine_cuda.hpp:294
void INTSC(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
void Phase(const complex &topLeft, const complex &bottomRight, bitLenInt qubitIndex)
Apply a single bit transformation that only effects phase.
void MULx(OCLAPI api_call, bitCapIntOcl toMod, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length)
void FreeRaw(cl_mem_flags flags, void *c)
Definition: qengine_cuda.hpp:496
size_t nrmGroupSize
Definition: qengine_cuda.hpp:188
void UpdateRunningNorm(real1_f norm_thresh=REAL1_DEFAULT_ARG)
Force a calculation of the norm of the state vector, in order to make it unit length before the next ...
EventVecPtr ResetWaitEvents(bool waitQueue=true)
void * AllocRaw(cl_mem_flags flags, void *host_ptr, size_t size, cudaError_t *errorPtr)
Definition: qengine_cuda.hpp:484
void INCDECBCDC(const bitCapInt &toMod, bitLenInt inOutStart, bitLenInt length, bitLenInt carryIndex)
Common driver method behind INCSC and DECSC (without overflow flag)
size_t nrmGroupCount
Definition: qengine_cuda.hpp:187
void Invert(const complex &topRight, const complex &bottomLeft, bitLenInt qubitIndex)
Apply a single bit transformation that reverses bit probability and might effect phase.
void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex *mtrx, bitLenInt bitCount, const bitCapIntOcl *qPowersSorted, bool doCalcNorm, SPECIAL_2X2 special, real1_f norm_thresh=REAL1_DEFAULT_ARG)
bool unlockHostMem
Definition: qengine_cuda.hpp:186
void CIMULModNOut(const bitCapInt &toMul, const bitCapInt &modN, bitLenInt inStart, bitLenInt outStart, bitLenInt length, const std::vector< bitLenInt > &controls)
Inverse of controlled multiplication modulo N by integer, (out of place)
void CMULModx(OCLAPI api_call, bitCapIntOcl toMod, bitCapIntOcl modN, bitLenInt inOutStart, bitLenInt carryStart, bitLenInt length, const std::vector< bitLenInt > &controls)
cl_map_flags lockSyncFlags
Definition: qengine_cuda.hpp:191
void SwitchHostPtr(bool useHostMem)
Switch to/from host/device state vector bufffer.
Definition: qengine_cuda.hpp:505
void CopyStateVec(QEnginePtr src)
Exactly copy the state vector of a different QEngine instance.
void BitMask(bitCapIntOcl mask, OCLAPI api_call, real1_f phase=(real1_f) PI_R1)
void DecomposeDispose(bitLenInt start, bitLenInt length, QEngineCUDAPtr dest)
real1_f Probx(OCLAPI api_call, const bitCapIntOcl *bciArgs)
PoolItemPtr GetFreePoolItem()
void Hash(bitLenInt start, bitLenInt length, const unsigned char *values)
Transform a length of qubit register via lookup through a hash table.
bitCapIntOcl GetMaxSize()
Definition: qengine_cuda.hpp:314
real1_f GetExpectation(bitLenInt valueStart, bitLenInt valueLength)
void clFinish(bool doHard=false)
Finishes the asynchronous wait event list or queue of OpenCL events.
std::vector< PoolItemPtr > poolItems
Definition: qengine_cuda.hpp:201
void AddQueueItem(const QueueItem &item)
Definition: qengine_cuda.hpp:543
void CArithmeticCall(OCLAPI api_call, const bitCapIntOcl(&bciArgs)[BCI_ARG_LEN], bitCapIntOcl *controlPowers, bitLenInt controlLen, const unsigned char *values=NULL, bitCapIntOcl valuesLength=0U)
QInterfacePtr Clone()
Clone this QInterface.
void INC(const bitCapInt &toAdd, bitLenInt start, bitLenInt length)
Add integer (without sign)
void SetDevice(int64_t dID)
Set GPU device ID.
void PhaseFlipIfLess(const bitCapInt &greaterPerm, bitLenInt start, bitLenInt length)
This is an expedient for an adaptive Grover's search for a function's global minimum.
Abstract QEngine implementation, for all "Schroedinger method" engines.
Definition: qengine.hpp:31
virtual void Copy(QInterfacePtr orig)
Copy this QInterface.
Definition: qinterface.hpp:222
bitCapIntOcl maxQPowerOcl
Definition: qengine.hpp:40
virtual void Decompose(bitLenInt start, QInterfacePtr dest)=0
Minimally decompose a set of contiguous bits from the separably composed unit, into "destination".
virtual void X(bitLenInt qubit)
X gate.
Definition: qinterface.hpp:1084
virtual void Apply2x2(bitCapIntOcl offset1, bitCapIntOcl offset2, const complex *mtrx, bitLenInt bitCount, bitCapIntOcl const *qPowersSorted, bool doCalcNorm, real1_f norm_thresh=REAL1_DEFAULT_ARG)=0
virtual bitLenInt Allocate(bitLenInt length)
Allocate new "length" count of |0> state qubits at end of qubit index position.
Definition: qinterface.hpp:470
virtual bitLenInt Compose(QInterfacePtr toCopy)
Combine another QInterface with this one, after the last bit index of this one.
Definition: qinterface.hpp:364
Definition: qengine_gpu_util.hpp:21
Half-precision floating-point type.
Definition: half.hpp:2222
virtual void UniformlyControlledSingleBit(const std::vector< bitLenInt > &controls, bitLenInt qubit, const complex *mtrxs)
Apply a "uniformly controlled" arbitrary single bit unitary transformation.
Definition: qinterface.hpp:627
virtual void Phase(const complex &topLeft, const complex &bottomRight, bitLenInt qubit)
Apply a single bit transformation that only effects phase.
Definition: qinterface.hpp:516
virtual void Invert(const complex &topRight, const complex &bottomLeft, bitLenInt qubit)
Apply a single bit transformation that reverses bit probability and might effect phase.
Definition: qinterface.hpp:529
virtual void Z(bitLenInt qubit)
Z gate.
Definition: qinterface.hpp:1117
virtual void U(bitLenInt target, real1_f theta, real1_f phi, real1_f lambda)
General unitary gate.
Definition: rotational.cpp:18
virtual real1_f FirstNonzeroPhase()
Get phase of lowest permutation nonzero amplitude.
Definition: qinterface.hpp:2985
GLOSSARY: bitLenInt - "bit-length integer" - unsigned integer ID of qubit position in register bitCap...
Definition: complex16x2simd.hpp:25
std::shared_ptr< QEngine > QEnginePtr
Definition: qrack_types.hpp:151
std::shared_ptr< OCLDeviceContext > DeviceContextPtr
Definition: oclengine.hpp:47
std::shared_ptr< QInterface > QInterfacePtr
Definition: qinterface.hpp:29
const real1_f _qrack_qunit_sep_thresh
Definition: qrack_functions.hpp:235
std::shared_ptr< EventVec > EventVecPtr
Definition: oclengine.hpp:51
bitLenInt log2Ocl(bitCapIntOcl n)
Definition: qrack_functions.hpp:88
void U(quid sid, bitLenInt q, real1_f theta, real1_f phi, real1_f lambda)
(External API) 3-parameter unitary gate
Definition: wasm_api.cpp:1143
std::complex< real1 > complex
Definition: qrack_types.hpp:128
unsigned long cl_map_flags
Definition: qengine_cuda.hpp:31
QRACK_CONST real1 REAL1_EPSILON
Definition: qrack_types.hpp:200
QRACK_CONST real1 ONE_R1
Definition: qrack_types.hpp:185
float real1_f
Definition: qrack_types.hpp:95
QRACK_CONST complex CMPLX_DEFAULT_ARG
Definition: qrack_types.hpp:257
std::shared_ptr< QEngineCUDA > QEngineCUDAPtr
Definition: qengine_cuda.hpp:47
std::shared_ptr< PoolItem > PoolItemPtr
Definition: qengine_cuda.hpp:162
SPECIAL_2X2
Definition: qengine_gpu_util.hpp:19
@ NONE
Definition: qengine_gpu_util.hpp:19
OCLAPI
Definition: oclapi.hpp:19
std::shared_ptr< void > BufferPtr
Definition: qengine_cuda.hpp:45
QRACK_CONST real1 PI_R1
Definition: qrack_types.hpp:178
unsigned long cl_mem_flags
Definition: qengine_cuda.hpp:32
const bitCapInt ZERO_BCI
Definition: qrack_types.hpp:130
bitCapIntOcl pow2Ocl(const bitLenInt &p)
Definition: qrack_functions.hpp:137
#define CL_MAP_WRITE
Definition: qengine_cuda.hpp:36
#define BCI_ARG_LEN
Definition: qengine_cuda.hpp:25
#define CL_MEM_USE_HOST_PTR
Definition: qengine_cuda.hpp:41
#define CL_MEM_COPY_HOST_PTR
Definition: qengine_cuda.hpp:42
#define CMPLX_NORM_LEN
Definition: qengine_cuda.hpp:26
#define CL_MAP_READ
Definition: qengine_cuda.hpp:35
#define REAL_ARG_LEN
Definition: qengine_cuda.hpp:27
#define REAL1_DEFAULT_ARG
Definition: qrack_types.hpp:177
#define bitLenInt
Definition: qrack_types.hpp:38
#define ZERO_R1_F
Definition: qrack_types.hpp:160
#define qrack_rand_gen_ptr
Definition: qrack_types.hpp:156
#define bitCapInt
Definition: qrack_types.hpp:62
#define bitCapIntOcl
Definition: qrack_types.hpp:50
Definition: qengine_cuda.hpp:50
QueueItem(OCLAPI ac, size_t wic, size_t lgs, size_t ds, std::vector< BufferPtr > b, size_t lbs)
Definition: qengine_cuda.hpp:76
QueueItem(real1_f runningNrm)
Definition: qengine_cuda.hpp:104
bool doNorm
Definition: qengine_cuda.hpp:59
size_t workItemCount
Definition: qengine_cuda.hpp:52
std::vector< BufferPtr > buffers
Definition: qengine_cuda.hpp:55
size_t deallocSize
Definition: qengine_cuda.hpp:54
QueueItem()
Definition: qengine_cuda.hpp:62
bool isSetRunningNorm
Definition: qengine_cuda.hpp:58
QueueItem(bool doNrm)
Definition: qengine_cuda.hpp:90
size_t localBuffSize
Definition: qengine_cuda.hpp:56
OCLAPI api_call
Definition: qengine_cuda.hpp:51
bool isSetDoNorm
Definition: qengine_cuda.hpp:57
size_t localGroupSize
Definition: qengine_cuda.hpp:53
real1 runningNorm
Definition: qengine_cuda.hpp:60