# Copyright 2021-2024 NVIDIA Corporation.  All rights reserved.
#
# Please refer to the NVIDIA end user license agreement (EULA) associated
# with this source code for terms and conditions that govern your use of
# this software. Any use, reproduction, disclosure, or distribution of
# this software and related documentation outside the terms of the EULA
# is strictly prohibited.
from cuda.bindings.cyruntime cimport *
from cuda.bindings._lib.cyruntime.utils cimport *
from libc.stdlib cimport malloc, free, calloc
from libc.string cimport memset, memcpy, strncmp
from libcpp cimport bool
cimport cuda.bindings._bindings.cydriver as cydriver

cdef cudaPythonGlobal m_global = globalGetInstance()

cdef cudaError_t _cudaMemcpy(void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memcpyDispatch(dst, src, count, kind)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaStreamCreate(cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamCreate(pStream, 0)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaEventCreate(cudaEvent_t* event) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuEventCreate(event, cydriver.CUevent_flags_enum.CU_EVENT_DEFAULT)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaEventQuery(cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = <cudaError_t>cydriver._cuEventQuery(<cydriver.CUevent>event)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaChannelFormatDesc _cudaCreateChannelDesc(int x, int y, int z, int w, cudaChannelFormatKind f) noexcept nogil:
    cdef cudaChannelFormatDesc desc
    desc.x = x
    desc.y = y
    desc.z = z
    desc.w = w
    desc.f = f
    return desc


cdef cudaError_t _cudaDriverGetVersion(int* driverVersion) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = <cudaError_t>cydriver._cuDriverGetVersion(driverVersion)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaRuntimeGetVersion(int* runtimeVersion) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    runtimeVersion[0] = m_global._CUDART_VERSION
    return cudaSuccess


cdef cudaError_t _cudaDeviceGetTexture1DLinearMaxWidth(size_t* maxWidthInElements, const cudaChannelFormatDesc* fmtDesc, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
    if fmtDesc == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    cdef cudaError_t err = cudaSuccess
    cdef cydriver.CUarray_format fmt
    cdef int numChannels = 0

    err = m_global.lazyInitDriver()
    if err != cudaSuccess:
        return err
    err = getDescInfo(fmtDesc, &numChannels, &fmt)
    if err == cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuDeviceGetTexture1DLinearMaxWidth(maxWidthInElements, fmt, <unsigned>numChannels, device)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMallocHost(void** ptr, size_t size) except ?cudaErrorCallRequiresNewerDriver nogil:
    if ptr == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = mallocHost(size, ptr, 0)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMallocPitch(void** devPtr, size_t* pitch, size_t width, size_t height) except ?cudaErrorCallRequiresNewerDriver nogil:
    if devPtr == NULL or pitch == NULL:
        return cudaErrorInvalidValue

    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = mallocPitch(width, height, 1, devPtr, pitch)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMallocMipmappedArray(cudaMipmappedArray_t* mipmappedArray, const cudaChannelFormatDesc* desc, cudaExtent extent, unsigned int numLevels, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    if mipmappedArray == NULL or desc == NULL:
        return cudaErrorInvalidValue

    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = mallocMipmappedArray(mipmappedArray, desc, extent.depth, extent.height, extent.width, numLevels, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemcpy2D(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memcpy2DPtr(<char*>dst, dpitch, <const char*>src, spitch, width, height, kind, NULL, False)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemcpy2DAsync(void* dst, size_t dpitch, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memcpy2DPtr(<char*>dst, dpitch, <const char*>src, spitch, width, height, kind, stream, True)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemcpyAsync(void* dst, const void* src, size_t count, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memcpyAsyncDispatch(dst, src, count, kind, stream)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaGraphAddMemcpyNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaMemcpy3DParms* pCopyParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cydriver.CUcontext context
    cdef cydriver.CUDA_MEMCPY3D_v2 driverNodeParams
    cdef cudaError_t err = cudaSuccess

    if pCopyParams == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err

    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    err = toDriverMemCopy3DParams(pCopyParams, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    err = <cudaError_t>cydriver._cuGraphAddMemcpyNode(pGraphNode, graph, pDependencies, numDependencies, &driverNodeParams, context)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaGraphAddMemcpyNode1D(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cydriver.CUcontext context
    cdef cydriver.CUDA_MEMCPY3D_v2 driverNodeParams
    cdef cudaMemcpy3DParms copyParams
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err

    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    copy1DConvertTo3DParams(dst, src, count, kind, &copyParams)

    err = toDriverMemCopy3DParams(&copyParams, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    err = <cudaError_t>cydriver._cuGraphAddMemcpyNode(pGraphNode, graph, pDependencies, numDependencies, &driverNodeParams, context)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaGraphMemcpyNodeSetParams1D(cudaGraphNode_t node, void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cydriver.CUDA_MEMCPY3D_v2 driverNodeParams
    cdef cudaMemcpy3DParms copyParams
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    copy1DConvertTo3DParams(dst, src, count, kind, &copyParams)

    err = toDriverMemCopy3DParams(&copyParams, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    err = <cudaError_t>cydriver._cuGraphMemcpyNodeSetParams(node, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaGraphExecMemcpyNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaMemcpy3DParms* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cydriver.CUcontext context
    cdef cydriver.CUDA_MEMCPY3D_v2 driverNodeParams
    cdef cudaError_t err = cudaSuccess

    if pNodeParams == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err

    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    err = toDriverMemCopy3DParams(pNodeParams, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    err = <cudaError_t>cydriver._cuGraphExecMemcpyNodeSetParams(hGraphExec, node, &driverNodeParams, context)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaGraphExecMemcpyNodeSetParams1D(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void* dst, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cydriver.CUcontext context
    cdef cydriver.CUDA_MEMCPY3D_v2 driverNodeParams
    cdef cudaMemcpy3DParms copyParams
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err

    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    copy1DConvertTo3DParams(dst, src, count, kind, &copyParams)

    err = toDriverMemCopy3DParams(&copyParams, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    err = <cudaError_t>cydriver._cuGraphExecMemcpyNodeSetParams(hGraphExec, node, &driverNodeParams, context)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaGetDriverEntryPoint(const char* symbol, void** funcPtr, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = <cudaError_t>cydriver._cuGetProcAddress_v2(symbol, funcPtr, m_global._CUDART_VERSION, flags, <cydriver.CUdriverProcAddressQueryResult*>driverStatus)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaGraphAddMemsetNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaMemsetParams* pMemsetParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cydriver.CUcontext context
    cdef cydriver.CUDA_MEMSET_NODE_PARAMS driverParams
    cdef cudaError_t err = cudaSuccess

    if pMemsetParams == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err

    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    toDriverMemsetNodeParams(pMemsetParams, &driverParams)

    err = <cudaError_t>cydriver._cuGraphAddMemsetNode(pGraphNode, graph, pDependencies, numDependencies, &driverParams, context)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaGraphExecMemsetNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaMemsetParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cydriver.CUcontext context
    cdef cydriver.CUDA_MEMSET_NODE_PARAMS driverParams
    cdef cudaError_t err = cudaSuccess

    if pNodeParams == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err

    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    toDriverMemsetNodeParams(pNodeParams, &driverParams)

    err = <cudaError_t>cydriver._cuGraphExecMemsetNodeSetParams(hGraphExec, node, &driverParams, context)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaGraphMemcpyNodeSetParams(cudaGraphNode_t node, const cudaMemcpy3DParms* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cydriver.CUDA_MEMCPY3D_v2 driverNodeParams
    cdef cudaError_t err = cudaSuccess

    if pNodeParams == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = toDriverMemCopy3DParams(pNodeParams, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuGraphMemcpyNodeSetParams(node, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaGraphMemcpyNodeGetParams(cudaGraphNode_t node, cudaMemcpy3DParms* p) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cydriver.CUDA_MEMCPY3D_v2 driverNodeParams

    if p == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue
    
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphMemcpyNodeGetParams(node, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = toCudartMemCopy3DParams(&driverNodeParams, p)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaFuncGetAttributes(cudaFuncAttributes* attr, const void* func) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    if NULL == attr:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue
    cdef int bytes = 0
    memset(attr, 0, sizeof(cudaFuncAttributes))
    err = <cudaError_t>cydriver._cuFuncGetAttribute(&attr[0].maxThreadsPerBlock,     cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, <cydriver.CUfunction>func)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuFuncGetAttribute(&attr[0].numRegs,                cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_NUM_REGS, <cydriver.CUfunction>func)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuFuncGetAttribute(&attr[0].ptxVersion,             cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_PTX_VERSION, <cydriver.CUfunction>func)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuFuncGetAttribute(&attr[0].binaryVersion,          cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_BINARY_VERSION, <cydriver.CUfunction>func)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuFuncGetAttribute(&bytes,                          cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, <cydriver.CUfunction>func)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    attr[0].sharedSizeBytes = <size_t>bytes
    err = <cudaError_t>cydriver._cuFuncGetAttribute(&bytes,                          cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES, <cydriver.CUfunction>func)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    attr[0].constSizeBytes = <size_t>bytes
    err = <cudaError_t>cydriver._cuFuncGetAttribute(&bytes,                          cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, <cydriver.CUfunction>func)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    attr[0].localSizeBytes = <size_t>bytes
    err = <cudaError_t>cydriver._cuFuncGetAttribute(&attr[0].cacheModeCA,            cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_CACHE_MODE_CA, <cydriver.CUfunction>func)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuFuncGetAttribute(&bytes,                          cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, <cydriver.CUfunction>func)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuFuncGetAttribute(&attr[0].preferredShmemCarveout, cydriver.CUfunction_attribute_enum.CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT, <cydriver.CUfunction>func)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    attr[0].maxDynamicSharedSizeBytes = <size_t>bytes
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMallocArray(cudaArray_t* arrayPtr, const cudaChannelFormatDesc* desc, size_t width, size_t height, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    if arrayPtr == NULL or desc == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = mallocArray(arrayPtr, desc, 0, height, width, 0, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMalloc3D(cudaPitchedPtr* pitchedDevPtr, cudaExtent extent) except ?cudaErrorCallRequiresNewerDriver nogil:
    if pitchedDevPtr == NULL:
        return cudaErrorInvalidValue

    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = mallocPitch(extent.width, extent.height, extent.depth, &pitchedDevPtr[0].ptr, &pitchedDevPtr[0].pitch)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    pitchedDevPtr[0].xsize = extent.width
    pitchedDevPtr[0].ysize = extent.height
    return err


cdef cudaError_t _cudaMalloc3DArray(cudaArray_t* arrayPtr, const cudaChannelFormatDesc* desc, cudaExtent extent, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    if arrayPtr == NULL or desc == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = mallocArray(arrayPtr, desc, extent.depth, extent.height, extent.width, 0, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef const char* _cudaGetErrorString(cudaError_t error) except ?NULL nogil:
    cdef const char* pStr = NULL
    cdef cudaError_t err = cudaSuccess

    err = <cudaError_t>cydriver._cuGetErrorString(<cydriver.CUresult>error, &pStr)
    if err != cudaSuccess:
        _setLastError(err)
    if err == <cudaError_t>cudaErrorInvalidValue:
        pStr = "unrecognized error code"
    return pStr


cdef cudaError_t _cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, void* userData, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = streamAddCallbackCommon(stream, callback, userData, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaStreamGetCaptureInfo_v2(cudaStream_t stream, cudaStreamCaptureStatus* captureStatus_out, unsigned long long* id_out, cudaGraph_t* graph_out, const cudaGraphNode_t** dependencies_out, size_t* numDependencies_out) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = streamGetCaptureInfoCommon(stream, captureStatus_out, id_out, graph_out, dependencies_out, numDependencies_out)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaImportExternalSemaphore(cudaExternalSemaphore_t* extSem_out, const cudaExternalSemaphoreHandleDesc* semHandleDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cydriver.CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC driverSemHandleDesc

    if semHandleDesc == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    memset(&driverSemHandleDesc, 0, sizeof(driverSemHandleDesc))

    if semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueFd:
        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD
        driverSemHandleDesc.handle.fd = semHandleDesc.handle.fd
    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueWin32:
        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32
        driverSemHandleDesc.handle.win32.handle = semHandleDesc.handle.win32.handle
        driverSemHandleDesc.handle.win32.name = semHandleDesc.handle.win32.name
    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt:
        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT
        driverSemHandleDesc.handle.win32.handle = semHandleDesc.handle.win32.handle
        driverSemHandleDesc.handle.win32.name = semHandleDesc.handle.win32.name
    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeD3D12Fence:
        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE
        driverSemHandleDesc.handle.win32.handle = semHandleDesc.handle.win32.handle
        driverSemHandleDesc.handle.win32.name = semHandleDesc.handle.win32.name
    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeD3D11Fence:
        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE
        driverSemHandleDesc.handle.win32.handle = semHandleDesc.handle.win32.handle
        driverSemHandleDesc.handle.win32.name = semHandleDesc.handle.win32.name
    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeNvSciSync:
        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC
        driverSemHandleDesc.handle.nvSciSyncObj = semHandleDesc.handle.nvSciSyncObj
    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeKeyedMutex:
        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX
        driverSemHandleDesc.handle.win32.handle = semHandleDesc.handle.win32.handle
        driverSemHandleDesc.handle.win32.name = semHandleDesc.handle.win32.name
    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeKeyedMutexKmt:
        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT
        driverSemHandleDesc.handle.win32.handle = semHandleDesc.handle.win32.handle
        driverSemHandleDesc.handle.win32.name = semHandleDesc.handle.win32.name
    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd:
        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD
        driverSemHandleDesc.handle.fd = semHandleDesc.handle.fd
    elif semHandleDesc.type == cudaExternalSemaphoreHandleType.cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32:
        driverSemHandleDesc.type =  cydriver.CUexternalSemaphoreHandleType_enum.CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32
        driverSemHandleDesc.handle.win32.handle = semHandleDesc.handle.win32.handle
        driverSemHandleDesc.handle.win32.name = semHandleDesc.handle.win32.name
    driverSemHandleDesc.flags = semHandleDesc.flags

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuImportExternalSemaphore(<cydriver.CUexternalSemaphore *>extSem_out, &driverSemHandleDesc)
    if err != <cudaError_t>cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaSignalExternalSemaphoresAsync_v2(const cudaExternalSemaphore_t* extSemArray, const cudaExternalSemaphoreSignalParams* paramsArray, unsigned int numExtSems, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuSignalExternalSemaphoresAsync(<const cydriver.CUexternalSemaphore *>extSemArray, <cydriver.CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *>paramsArray, numExtSems, stream)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    return cudaSuccess


cdef cudaError_t _cudaWaitExternalSemaphoresAsync_v2(const cudaExternalSemaphore_t* extSemArray, const cudaExternalSemaphoreWaitParams* paramsArray, unsigned int numExtSems, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuWaitExternalSemaphoresAsync(<const cydriver.CUexternalSemaphore *>extSemArray, <cydriver.CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *>paramsArray, numExtSems, stream)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    return cudaSuccess


cdef cudaError_t _cudaArrayGetInfo(cudaChannelFormatDesc* desc, cudaExtent* extent, unsigned int* flags, cudaArray_t array) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cydriver.CUDA_ARRAY3D_DESCRIPTOR_v2 driverDesc
    cdef size_t width  = 0
    cdef size_t height = 0
    cdef size_t depth  = 0

    # Zero out parameters in case cuArray3DGetDescriptor fails
    if flags:
        flags[0] = 0

    if desc:
        memset(desc, 0, sizeof(desc[0]))


    if extent:
        memset(extent, 0, sizeof(extent[0]))

    err = <cudaError_t>cydriver._cuArray3DGetDescriptor_v2(&driverDesc, <cydriver.CUarray>array)
    if err != <cudaError_t>cudaSuccess:
        _setLastError(err)
        return err

    # Flags are copied directly from the driver API
    if flags:
        flags[0] = driverDesc.Flags

    # Convert from driver API types to runtime API types. extent.Depth = 0
    # indicates a 2D array.
    if desc:
        width  = 0
        height = 0
        depth  = 0

        err = getChannelFormatDescFromDriverDesc(desc, &depth, &height, &width, &driverDesc)
        if err != <cudaError_t>cudaSuccess:
            _setLastError(err)
            return err

    if extent:
        extent.width  = driverDesc.Width
        extent.height = driverDesc.Height
        extent.depth  = driverDesc.Depth

    return cudaSuccess


cdef cudaError_t _cudaMemcpy2DToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memcpy2DToArray(dst, hOffset, wOffset, <const char*>src, spitch, width, height, kind, NULL, False)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemcpy2DFromArray(void* dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memcpy2DFromArray(<char*>dst, dpitch, src, hOffset, wOffset, width, height, kind, NULL, False)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemcpy2DArrayToArray(cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, size_t height, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memcpy2DArrayToArray(dst, hOffsetDst, wOffsetDst, src, hOffsetSrc, wOffsetSrc, width, height, kind)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemcpy2DToArrayAsync(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t spitch, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memcpy2DToArray(dst, hOffset, wOffset, <const char*>src, spitch, width, height, kind, stream, True)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemcpy2DFromArrayAsync(void* dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t width, size_t height, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memcpy2DFromArray(<char*>dst, dpitch, src, hOffset, wOffset, width, height, kind, stream, True)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemset3D(cudaPitchedPtr pitchedDevPtr, int value, cudaExtent extent) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memset3DPtr(pitchedDevPtr, value, extent, NULL, False)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemset3DAsync(cudaPitchedPtr pitchedDevPtr, int value, cudaExtent extent, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memset3DPtr(pitchedDevPtr, value, extent, stream, True)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memcpyToArray(dst, hOffset, wOffset, <const char*>src, count, kind, NULL, False)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemcpyFromArray(void* dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memcpyFromArray(<char*>dst, src, hOffset, wOffset, count, kind, NULL, 0)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemcpyToArrayAsync(cudaArray_t dst, size_t wOffset, size_t hOffset, const void* src, size_t count, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memcpyToArray(dst, hOffset, wOffset, <const char*>src, count, kind, stream, True)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemcpyFromArrayAsync(void* dst, cudaArray_const_t src, size_t wOffset, size_t hOffset, size_t count, cudaMemcpyKind kind, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memcpyFromArray(<char*>dst, src, hOffset, wOffset, count, kind, stream, True)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaPointerGetAttributes(cudaPointerAttributes* attributes, const void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cudaPointerAttributes attrib
    cdef cydriver.CUcontext driverContext = NULL
    cdef cydriver.CUmemorytype driverMemoryType
    cdef int isManaged = 0
    cdef cydriver.CUpointer_attribute[6] query
    query[0] = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_CONTEXT
    query[1] = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_MEMORY_TYPE
    query[2] = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_DEVICE_POINTER
    query[3] = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_HOST_POINTER
    query[4] = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_IS_MANAGED
    query[5] = cydriver.CUpointer_attribute_enum.CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL

    memset(&attrib, 0, sizeof(attrib))
    memset(&driverMemoryType, 0, sizeof(driverMemoryType))

    cdef void** data = [
        &driverContext,
        &driverMemoryType,
        &attrib.devicePointer,
        &attrib.hostPointer,
        &isManaged,
        &attrib.device
    ]

    if attributes == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    # Get all the attributes we need
    err = <cudaError_t>cydriver._cuPointerGetAttributes(<unsigned int>(sizeof(query)/sizeof(query[0])), query, data, <cydriver.CUdeviceptr_v2>ptr)
    if err != cudaSuccess:
        if attributes != NULL:
            memset(attributes, 0, sizeof(attributes[0]))
            attributes[0].device = -1
        _setLastError(err)
        return err

    if driverMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_HOST:
        if isManaged:
            attrib.type = cudaMemoryTypeManaged
        else:
            attrib.type = cudaMemoryTypeHost
    elif driverMemoryType == cydriver.CUmemorytype_enum.CU_MEMORYTYPE_DEVICE:
        if isManaged:
            attrib.type = cudaMemoryTypeManaged
        else:
            attrib.type = cudaMemoryTypeDevice
    else:
         if driverMemoryType == 0:
            attrib.type = cudaMemoryTypeUnregistered
         else:
            if attributes != NULL:
                memset(attributes, 0, sizeof(attributes[0]))
                attributes[0].device = -1
            _setLastError(cudaErrorInvalidValue)
            return cudaErrorInvalidValue

    # copy to user structure
    attributes[0] = attrib

    return cudaSuccess


cdef cudaError_t _cudaGetDeviceFlags(unsigned int* flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitDriver()
    if err != cudaSuccess:
        return err

    if flags == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    cdef cydriver.CUcontext driverContext
    err = <cudaError_t>cydriver._cuCtxGetCurrent(&driverContext)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    # Get the flags from the current context
    if driverContext != NULL:
        err = <cudaError_t>cydriver._cuCtxGetFlags(flags)
        if err != cudaSuccess:
            _setLastError(err)
        return err

    # Assume first valid device and get its implicit flags
    cdef cudaPythonDevice* device
    cdef unsigned int pcFlags
    cdef int pcActive
    device = m_global.getDevice(0)
    err = <cudaError_t>cydriver._cuDevicePrimaryCtxGetState(device[0].driverDevice, &pcFlags, &pcActive)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    flags[0] = pcFlags | cudaDeviceMapHost
    return cudaSuccess


cdef cudaError_t _cudaMemcpy3D(const cudaMemcpy3DParms* p) except ?cudaErrorCallRequiresNewerDriver nogil:
    if p == NULL:
        return cudaErrorInvalidValue

    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memcpy3D(p, False, 0, 0, NULL, False)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemcpy3DAsync(const cudaMemcpy3DParms* p, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    if p == NULL:
        return cudaErrorInvalidValue

    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memcpy3D(p, False, 0, 0, stream, True)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemPoolSetAccess(cudaMemPool_t memPool, const cudaMemAccessDesc* descList, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitDriver()
    if err != cudaSuccess:
        return err
    cdef size_t MAX_DEVICES = 32
    cdef cydriver.CUmemAccessDesc localList[32]
    cdef cydriver.CUmemAccessDesc *cuDescList
    cdef size_t i = 0

    if (count > MAX_DEVICES):
        cuDescList = <cydriver.CUmemAccessDesc*>calloc(sizeof(cydriver.CUmemAccessDesc), count)
    else:
        cuDescList = localList

    if cuDescList == NULL:
        _setLastError(cudaErrorMemoryAllocation)
        return cudaErrorMemoryAllocation

    while i < count:
        cuDescList[i].location.type = <cydriver.CUmemLocationType>descList[i].location.type
        cuDescList[i].location.id = descList[i].location.id
        cuDescList[i].flags = <cydriver.CUmemAccess_flags>descList[i].flags
        i += 1

    err = <cudaError_t>cydriver._cuMemPoolSetAccess(memPool, cuDescList, count)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    if count > MAX_DEVICES:
        free(cuDescList)

    return cudaSuccess


cdef cudaError_t _cudaDeviceReset() except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef int deviceOrdinal = 0
    cdef cudaError_t err = cudaSuccess
    if not m_global._lazyInitDriver:
        return cudaSuccess

    cdef cydriver.CUcontext context
    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    cdef cudaPythonDevice* device
    device = m_global.getDeviceFromPrimaryCtx(context)
    if device != NULL:
        err = resetPrimaryContext(device)
        if err != cudaSuccess:
            _setLastError(err)
    return err


cdef cudaError_t _cudaGetLastError() except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t last_err = m_global._lastError
    m_global._lastError = cudaSuccess
    return last_err


cdef cudaError_t _cudaPeekAtLastError() except ?cudaErrorCallRequiresNewerDriver nogil:
    return m_global._lastError


cdef cudaError_t _cudaGetDevice(int* deviceOrdinal) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cydriver.CUdevice driverDevice = 0

    err = m_global.lazyInitDriver()
    if err != cudaSuccess:
        return err

    cdef cudaPythonDevice *cudaDevice
    err = <cudaError_t>cydriver._cuCtxGetDevice(&driverDevice)
    if err == cudaSuccess:
        cudaDevice = m_global.getDeviceFromDriver(driverDevice)
        deviceOrdinal[0] = cudaDevice[0].deviceOrdinal
    elif err == cudaErrorDeviceUninitialized:
        # Like C Runtime, default to first device and let context creation happen in another call
        # By default, device 0 would initialized
        deviceOrdinal[0] = 0
        err = cudaSuccess
    return err


cdef cudaError_t _cudaSetDevice(int deviceOrdinal) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cudaPythonDevice *device

    err = m_global.lazyInitDriver()
    if err != cudaSuccess:
        return err

    device = m_global.getDevice(deviceOrdinal)
    if device == NULL:
        _setLastError(err)
        return cudaErrorInvalidDevice

    if device.primaryContext == NULL:
        initPrimaryContext(device)

    err = <cudaError_t>cydriver._cuCtxSetCurrent(device.primaryContext)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaGetDeviceProperties_v2(cudaDeviceProp* prop, int deviceOrdinal) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitDriver()
    if err != cudaSuccess:
        return err

    device = m_global.getDevice(deviceOrdinal)
    if device == NULL:
        _setLastError(err)
        return cudaErrorInvalidDevice

    err = <cudaError_t>cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.kernelExecTimeoutEnabled),  cydriver.CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, device[0].driverDevice)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    err = <cudaError_t>cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.computeMode),  cydriver.CU_DEVICE_ATTRIBUTE_COMPUTE_MODE, device[0].driverDevice)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    err = <cudaError_t>cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.clockRate), cydriver.CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device[0].driverDevice)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    err = <cudaError_t>cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.memoryClockRate), cydriver.CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, device[0].driverDevice)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    err = <cudaError_t>cydriver._cuDeviceGetAttribute(&(device[0].deviceProperties.singleToDoublePrecisionPerfRatio), cydriver.CU_DEVICE_ATTRIBUTE_SINGLE_TO_DOUBLE_PRECISION_PERF_RATIO, device[0].driverDevice)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    prop[0] = device[0].deviceProperties

    return cudaSuccess


cdef cudaError_t _cudaChooseDevice(int* device, const cudaDeviceProp* prop) except ?cudaErrorCallRequiresNewerDriver nogil:
    if device == NULL or prop == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitDriver()
    if err != cudaSuccess:
        return err
    cdef int best = -1
    cdef int maxrank = -1
    cdef int rank = 0
    cdef char* dontCare_name = [b'\0']
    cdef int dontCare_major = -1
    cdef int dontCare_minor = -1
    cdef size_t dontCare_totalGlobalMem = 0
    cdef int deviceOrdinal = 0
    cdef cudaDeviceProp *devProp

    for deviceOrdinal in range(m_global._numDevices):
        devProp = &m_global._deviceList[deviceOrdinal].deviceProperties
        rank = 0
        if (strncmp(prop[0].name, dontCare_name, sizeof(prop[0].name)) != 0):
            rank += strncmp(prop[0].name, devProp[0].name, sizeof(prop[0].name)) == 0
        if (prop[0].major != dontCare_major):
            rank += prop[0].major <= devProp[0].major
        if (prop[0].major == devProp[0].major and prop[0].minor != dontCare_minor):
            rank += prop[0].minor <= devProp[0].minor
        if (prop[0].totalGlobalMem != dontCare_totalGlobalMem):
            rank += prop[0].totalGlobalMem <= devProp[0].totalGlobalMem
        if (rank > maxrank):
            maxrank = rank
            best = deviceOrdinal

    device[0] = best
    return cudaSuccess


cdef cudaError_t _cudaMemcpyArrayToArray(cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, cudaMemcpyKind kind) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = memcpyArrayToArray(dst, hOffsetDst, wOffsetDst, src, hOffsetSrc, wOffsetSrc, count, kind)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaGetChannelDesc(cudaChannelFormatDesc* desc, cudaArray_const_t array) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    if desc == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = getChannelDesc(array, desc)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaCreateTextureObject(cudaTextureObject_t* pTexObject, const cudaResourceDesc* pResDesc, const cudaTextureDesc* pTexDesc, const cudaResourceViewDesc* pResViewDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    if pResDesc == NULL or pTexDesc == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    cdef cydriver.CUDA_RESOURCE_DESC rd
    cdef cydriver.CUDA_TEXTURE_DESC td
    cdef cydriver.CUDA_RESOURCE_VIEW_DESC rvd
    cdef cudaTextureDesc texDesc
    memcpy(&texDesc, pTexDesc, sizeof(cudaTextureDesc))
    texDesc.seamlessCubemap = 0

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    if pResViewDesc:
        err = getDriverResDescFromResDesc(&rd, pResDesc, &td, &texDesc, &rvd, pResViewDesc)
    else:
        err = getDriverResDescFromResDesc(&rd, pResDesc, &td, &texDesc, NULL, pResViewDesc)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    if pResViewDesc:
        err = <cudaError_t>cydriver._cuTexObjectCreate(pTexObject, &rd, &td, &rvd)
    else:
        err = <cudaError_t>cydriver._cuTexObjectCreate(pTexObject, &rd, &td, NULL)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGetTextureObjectTextureDesc(cudaTextureDesc* pTexDesc, cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    cdef cudaResourceDesc resDesc
    cdef cydriver.CUDA_RESOURCE_DESC rd
    cdef cydriver.CUDA_TEXTURE_DESC td
    cdef cudaTextureDesc texDesc

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuTexObjectGetResourceDesc(&rd, texObject)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuTexObjectGetTextureDesc(&td, texObject)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = getResDescFromDriverResDesc(&resDesc, &rd, &texDesc, &td, NULL, NULL)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    memcpy(pTexDesc, &texDesc, sizeof(cudaTextureDesc))

    return cudaSuccess

cdef cudaError_t _cudaGetTextureObjectResourceViewDesc(cudaResourceViewDesc* pResViewDesc, cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cudaResourceDesc resDesc
    cdef cydriver.CUDA_RESOURCE_DESC rd
    cdef cydriver.CUDA_RESOURCE_VIEW_DESC rvd

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err =  <cudaError_t>cydriver.cuTexObjectGetResourceDesc(&rd, texObject)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err =  <cudaError_t>cydriver.cuTexObjectGetResourceViewDesc(&rvd, texObject)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = getResDescFromDriverResDesc(&resDesc, &rd, NULL, NULL, pResViewDesc, &rvd)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    return cudaSuccess


cdef cudaError_t _cudaGetExportTable(const void** ppExportTable, const cudaUUID_t* pExportTableId) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess

    err = m_global.lazyInitDriver()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGetExportTable(ppExportTable, <const cydriver.CUuuid*>pExportTableId)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemcpy3DPeer(const cudaMemcpy3DPeerParms* p) except ?cudaErrorCallRequiresNewerDriver nogil:
    if p == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    cdef cudaError_t err = cudaSuccess
    cdef cudaMemcpy3DParms cp
    memset(&cp, 0, sizeof(cp))
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    cp.srcArray = p[0].srcArray
    cp.srcPos = p[0].srcPos
    cp.srcPtr = p[0].srcPtr
    cp.dstArray = p[0].dstArray
    cp.dstPos = p[0].dstPos
    cp.dstPtr = p[0].dstPtr
    cp.extent = p[0].extent
    cp.kind = cudaMemcpyKind.cudaMemcpyDeviceToDevice

    err = memcpy3D(&cp, True, p[0].srcDevice, p[0].dstDevice, NULL, False)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemcpy3DPeerAsync(const cudaMemcpy3DPeerParms* p, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    if p == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    cdef cudaError_t err = cudaSuccess
    cdef cudaMemcpy3DParms cp
    memset(&cp, 0, sizeof(cp))

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    cp.srcArray = p[0].srcArray
    cp.srcPos = p[0].srcPos
    cp.srcPtr = p[0].srcPtr
    cp.dstArray = p[0].dstArray
    cp.dstPos = p[0].dstPos
    cp.dstPtr = p[0].dstPtr
    cp.extent = p[0].extent
    cp.kind = cudaMemcpyKind.cudaMemcpyDeviceToDevice

    err = memcpy3D(&cp, True, p[0].srcDevice, p[0].dstDevice, stream, True)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaPitchedPtr _make_cudaPitchedPtr(void* d, size_t p, size_t xsz, size_t ysz) noexcept nogil:
    cdef cudaPitchedPtr s
    s.ptr   = d
    s.pitch = p
    s.xsize = xsz
    s.ysize = ysz
    return s


cdef cudaPos _make_cudaPos(size_t x, size_t y, size_t z) noexcept nogil:
    cdef cudaPos p
    p.x = x
    p.y = y
    p.z = z
    return p


cdef cudaExtent _make_cudaExtent(size_t w, size_t h, size_t d) noexcept nogil:
    cdef cudaExtent e
    e.width  = w
    e.height = h
    e.depth  = d
    return e


cdef cudaError_t _cudaSetDeviceFlags(unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    flags &= ~cudaDeviceMapHost
    if flags & ~cudaDeviceMask:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue
    cdef unsigned int scheduleFlags = flags & cudaDeviceScheduleMask
    if scheduleFlags and (scheduleFlags != cudaDeviceScheduleSpin and
                          scheduleFlags != cudaDeviceScheduleYield and
                          scheduleFlags != cudaDeviceScheduleBlockingSync):
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err

    cdef cydriver.CUcontext context
    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    cdef cudaPythonDevice* device
    device = m_global.getDeviceFromPrimaryCtx(context)
    if device == NULL:
        # We don't know if context provided is primary or not
        # cudaSetDevice may need to be called before retrying call
        return cudaErrorIncompatibleDriverContext

    err = <cudaError_t>cydriver._cuDevicePrimaryCtxSetFlags_v2(device[0].driverDevice, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaGraphAddMemAllocNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaMemAllocNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    if nodeParams == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue
    
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphAddMemAllocNode(pGraphNode, graph, pDependencies, numDependencies, <cydriver.CUDA_MEM_ALLOC_NODE_PARAMS *>nodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaGraphMemAllocNodeGetParams(cudaGraphNode_t node, cudaMemAllocNodeParams* params_out) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    if params_out == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphMemAllocNodeGetParams(node, <cydriver.CUDA_MEM_ALLOC_NODE_PARAMS *>params_out)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaGraphMemFreeNodeGetParams(cudaGraphNode_t node, void* dptr_out) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    if dptr_out == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphMemFreeNodeGetParams(node, <cydriver.CUdeviceptr *>dptr_out)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemAdvise(const void* devPtr, size_t count, cudaMemoryAdvise advice, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemAdvise(<cydriver.CUdeviceptr>devPtr, count, <cydriver.CUmem_advise>advice, <cydriver.CUdevice>device)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemAdvise_v2(const void* devPtr, size_t count, cudaMemoryAdvise advice, cudaMemLocation location) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cydriver.CUmemLocation _driver_location
    _driver_location.type = <cydriver.CUmemLocationType>location.type
    _driver_location.id = location.id
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemAdvise_v2(<cydriver.CUdeviceptr>devPtr, count, <cydriver.CUmem_advise>advice, _driver_location)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemRangeGetAttribute(void* data, size_t dataSize, cudaMemRangeAttribute attribute, const void* devPtr, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = <cudaError_t>cydriver._cuMemRangeGetAttribute(data, dataSize, <cydriver.CUmem_range_attribute>attribute, <cydriver.CUdeviceptr>devPtr, count)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMemRangeGetAttributes(void** data, size_t* dataSizes, cudaMemRangeAttribute* attributes, size_t numAttributes, const void* devPtr, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = <cudaError_t>cydriver._cuMemRangeGetAttributes(data, dataSizes, <cydriver.CUmem_range_attribute*>attributes, numAttributes, <cydriver.CUdeviceptr>devPtr, count)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaGetDeviceCount(int* count) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitDriver()
    if err != cudaSuccess:
        return err
    count[0] = m_global._numDevices
    return cudaSuccess


cdef cudaError_t _cudaDeviceGetAttribute(int* value, cudaDeviceAttr attr, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = <cudaError_t>cydriver._cuDeviceGetAttribute(value, <cydriver.CUdevice_attribute>attr, <cydriver.CUdevice>device)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaDeviceSetSharedMemConfig(cudaSharedMemConfig config) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuCtxSetSharedMemConfig(<cydriver.CUsharedconfig>config)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaDeviceGetByPCIBusId(int* device, const char* pciBusId) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = <cudaError_t>cydriver._cuDeviceGetByPCIBusId(<cydriver.CUdevice*>device, pciBusId)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaDeviceGetPCIBusId(char* pciBusId, int length, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = <cudaError_t>cydriver._cuDeviceGetPCIBusId(pciBusId, length, <cydriver.CUdevice>device)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaDeviceGetP2PAttribute(int* value, cudaDeviceP2PAttr attr, int srcDevice, int dstDevice) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = <cudaError_t>cydriver._cuDeviceGetP2PAttribute(value, <cydriver.CUdevice_P2PAttribute>attr, <cydriver.CUdevice>srcDevice, <cydriver.CUdevice>dstDevice)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaArrayGetSparseProperties(cudaArraySparseProperties* sparseProperties, cudaArray_t array) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cydriver.CUDA_ARRAY_SPARSE_PROPERTIES _driver_sparseProperties
    if not sparseProperties:
        _setLastError(cudaErrorInvalidValue)
        return cudaError.cudaErrorInvalidValue
    memset(sparseProperties, 0, sizeof(cudaArraySparseProperties))

    err = <cudaError_t>cydriver._cuArrayGetSparseProperties(&_driver_sparseProperties, <cydriver.CUarray>array)
    if err == cudaSuccess:
        sparseProperties[0].miptailFirstLevel = _driver_sparseProperties.miptailFirstLevel
        sparseProperties[0].miptailSize       = _driver_sparseProperties.miptailSize
        sparseProperties[0].flags             = _driver_sparseProperties.flags
        sparseProperties[0].tileExtent.width  = _driver_sparseProperties.tileExtent.width
        sparseProperties[0].tileExtent.height = _driver_sparseProperties.tileExtent.height
        sparseProperties[0].tileExtent.depth  = _driver_sparseProperties.tileExtent.depth

    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaMipmappedArrayGetSparseProperties(cudaArraySparseProperties* sparseProperties, cudaMipmappedArray_t mipmap) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cydriver.CUDA_ARRAY_SPARSE_PROPERTIES _driver_sparseProperties
    if not sparseProperties:
        _setLastError(cudaErrorInvalidValue)
        return cudaError.cudaErrorInvalidValue
    memset(sparseProperties, 0, sizeof(cudaArraySparseProperties))

    err = <cudaError_t>cydriver._cuMipmappedArrayGetSparseProperties(&_driver_sparseProperties, <cydriver.CUmipmappedArray>mipmap)
    if err == cudaSuccess:
        sparseProperties[0].miptailFirstLevel = _driver_sparseProperties.miptailFirstLevel
        sparseProperties[0].miptailSize       = _driver_sparseProperties.miptailSize
        sparseProperties[0].flags             = _driver_sparseProperties.flags
        sparseProperties[0].tileExtent.width  = _driver_sparseProperties.tileExtent.width
        sparseProperties[0].tileExtent.height = _driver_sparseProperties.tileExtent.height
        sparseProperties[0].tileExtent.depth  = _driver_sparseProperties.tileExtent.depth

    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaDeviceCanAccessPeer(int* canAccessPeer, int device, int peerDevice) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitDriver()
    if err != cudaSuccess:
        return err
    cdef cudaPythonDevice *driverDevice
    cdef cudaPythonDevice *driverPeerDevice
    driverDevice = m_global.getDevice(device)
    driverPeerDevice = m_global.getDevice(peerDevice)
    if driverDevice == NULL or driverPeerDevice == NULL:
        return cudaErrorInvalidDevice

    err = <cudaError_t>cydriver._cuDeviceCanAccessPeer(canAccessPeer, driverDevice.driverDevice, driverPeerDevice.driverDevice)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    if device == peerDevice:
        canAccessPeer[0] = 0
    return err


cdef cudaError_t _cudaMemcpyPeer(void* dst, int dstDevice, const void* src, int srcDevice, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cudaPythonDevice *device
    cdef cudaPythonDevice *peerDevice
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    if count > 0:
        peerDevice = m_global.getDevice(dstDevice)
        device = m_global.getDevice(srcDevice)
        if device == NULL or peerDevice == NULL:
            _setLastError(err)
            return cudaErrorInvalidDevice
        err = initPrimaryContext(peerDevice)
        if err != cudaSuccess:
            _setLastError(err)
            return err
        err = initPrimaryContext(device)
        if err != cudaSuccess:
            _setLastError(err)
            return err
        err = <cudaError_t>cydriver._cuMemcpyPeer(<cydriver.CUdeviceptr>dst, peerDevice[0].primaryContext, <cydriver.CUdeviceptr>src, device[0].primaryContext, count)
        if err != cudaSuccess:
            _setLastError(err)
            return err
    return err


cdef cudaError_t _cudaMemcpyPeerAsync(void* dst, int dstDevice, const void* src, int srcDevice, size_t count, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cudaPythonDevice *device
    cdef cudaPythonDevice *peerDevice
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    if count > 0:
        peerDevice = m_global.getDevice(dstDevice)
        device = m_global.getDevice(srcDevice)
        if device == NULL or peerDevice == NULL:
            _setLastError(err)
            return cudaErrorInvalidDevice
        err = initPrimaryContext(peerDevice)
        if err != cudaSuccess:
            _setLastError(err)
            return err
        err = initPrimaryContext(device)
        if err != cudaSuccess:
            _setLastError(err)
            return err
        err = <cudaError_t>cydriver._cuMemcpyPeerAsync(<cydriver.CUdeviceptr>dst, peerDevice[0].primaryContext, <cydriver.CUdeviceptr>src, device[0].primaryContext, count, <cydriver.CUstream>stream)
        if err != cudaSuccess:
            _setLastError(err)
            return err
    return err


cdef cudaError_t _cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cydriver.CUcontext context
    cdef cudaPythonDevice *dev
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    dev = m_global.getDeviceFromPrimaryCtx(context)
    if dev == NULL:
        # We don't know if context provided is primary or not
        # cudaSetDevice may need to be called before retrying call
        _setLastError(cudaErrorIncompatibleDriverContext)
        return cudaErrorIncompatibleDriverContext
    dev = m_global.getDevice(peerDevice)
    if dev == NULL:
        return cudaErrorInvalidDevice
    err = initPrimaryContext(dev)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuCtxEnablePeerAccess(dev.primaryContext, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err


cdef cudaError_t _cudaDeviceDisablePeerAccess(int peerDevice) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cydriver.CUcontext context
    cdef cudaPythonDevice *dev
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    dev = m_global.getDeviceFromPrimaryCtx(context)
    if dev == NULL:
        # We don't know if context provided is primary or not
        # cudaSetDevice may need to be called before retrying call
        _setLastError(cudaErrorIncompatibleDriverContext)
        return cudaErrorIncompatibleDriverContext
    dev = m_global.getDevice(peerDevice)
    if dev == NULL:
        return cudaErrorInvalidDevice
    err = initPrimaryContext(dev)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuCtxDisablePeerAccess(dev.primaryContext)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaExternalMemoryGetMappedMipmappedArray(cudaMipmappedArray_t* mipmap, cudaExternalMemory_t extMem, const cudaExternalMemoryMipmappedArrayDesc* mipmapDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    cdef cydriver.CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC _driver_mipmapDesc
    memset(&_driver_mipmapDesc, 0, sizeof(_driver_mipmapDesc))
    _driver_mipmapDesc.offset = mipmapDesc[0].offset
    _driver_mipmapDesc.arrayDesc.Width = mipmapDesc[0].extent.width
    _driver_mipmapDesc.arrayDesc.Height = mipmapDesc[0].extent.height
    _driver_mipmapDesc.arrayDesc.Depth = mipmapDesc[0].extent.depth
    err_rt = getDescInfo(&mipmapDesc[0].formatDesc, <int *>&_driver_mipmapDesc.arrayDesc.NumChannels, &_driver_mipmapDesc.arrayDesc.Format)
    if err_rt != cudaError.cudaSuccess:
        _setLastError(err_rt)
        return err_rt
    _driver_mipmapDesc.arrayDesc.Flags = mipmapDesc[0].flags
    _driver_mipmapDesc.numLevels = mipmapDesc[0].numLevels

    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuExternalMemoryGetMappedMipmappedArray(<cydriver.CUmipmappedArray*>mipmap, <cydriver.CUexternalMemory>extMem, &_driver_mipmapDesc)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGetSurfaceObjectResourceDesc(cudaResourceDesc* pResDesc, cudaSurfaceObject_t surfObject) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    cdef cydriver.CUDA_RESOURCE_DESC _driver_pResDesc

    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuSurfObjectGetResourceDesc(&_driver_pResDesc, <cydriver.CUsurfObject>surfObject)
    memset(pResDesc, 0, sizeof(cudaResourceDesc))
    if _driver_pResDesc.resType == cydriver.CU_RESOURCE_TYPE_ARRAY:
        pResDesc[0].resType         = cudaResourceType.cudaResourceTypeArray
        pResDesc[0].res.array.array = <cudaArray_t>_driver_pResDesc.res.array.hArray
    elif _driver_pResDesc.resType == cydriver.CU_RESOURCE_TYPE_MIPMAPPED_ARRAY:
        pResDesc[0].resType = cudaResourceType.cudaResourceTypeMipmappedArray
        pResDesc[0].res.mipmap.mipmap = <cudaMipmappedArray_t>_driver_pResDesc.res.mipmap.hMipmappedArray
    elif _driver_pResDesc.resType == cydriver.CU_RESOURCE_TYPE_LINEAR:
        pResDesc[0].resType                = cudaResourceType.cudaResourceTypeLinear
        pResDesc[0].res.linear.devPtr      = <void *>_driver_pResDesc.res.linear.devPtr
        pResDesc[0].res.linear.sizeInBytes = _driver_pResDesc.res.linear.sizeInBytes
    elif _driver_pResDesc.resType == cydriver.CU_RESOURCE_TYPE_PITCH2D:
        pResDesc[0].resType                  = cudaResourceType.cudaResourceTypePitch2D
        pResDesc[0].res.pitch2D.devPtr       = <void *>_driver_pResDesc.res.pitch2D.devPtr
        pResDesc[0].res.pitch2D.pitchInBytes = _driver_pResDesc.res.pitch2D.pitchInBytes
        pResDesc[0].res.pitch2D.width        = _driver_pResDesc.res.pitch2D.width
        pResDesc[0].res.pitch2D.height       = _driver_pResDesc.res.pitch2D.height
    if _driver_pResDesc.resType == cydriver.CU_RESOURCE_TYPE_LINEAR or _driver_pResDesc.resType == cydriver.CU_RESOURCE_TYPE_PITCH2D:
        channel_size = 0
        if _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_UNSIGNED_INT8:
            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindUnsigned
            channel_size = 8
        elif _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_UNSIGNED_INT16:
            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindUnsigned
            channel_size = 16
        elif _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_UNSIGNED_INT32:
            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindUnsigned
            channel_size = 32
        elif _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_SIGNED_INT8:
            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindSigned
            channel_size = 8
        elif _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_SIGNED_INT16:
            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindSigned
            channel_size = 16
        elif _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_SIGNED_INT32:
            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindSigned
            channel_size = 32
        elif _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_HALF:
            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindFloat
            channel_size = 16
        elif _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_FLOAT:
            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindFloat
            channel_size = 32
        elif _driver_pResDesc.res.linear.format == cydriver.CU_AD_FORMAT_NV12:
            pResDesc[0].res.linear.desc.f = cudaChannelFormatKind.cudaChannelFormatKindNV12
            channel_size = 8
        else:
            _setLastError(cudaErrorInvalidChannelDescriptor)
            return cudaError.cudaErrorInvalidChannelDescriptor
        pResDesc[0].res.linear.desc.x = 0
        pResDesc[0].res.linear.desc.y = 0
        pResDesc[0].res.linear.desc.z = 0
        pResDesc[0].res.linear.desc.w = 0
        if _driver_pResDesc.res.linear.numChannels >= 4:
            pResDesc[0].res.linear.desc.w = channel_size
        if _driver_pResDesc.res.linear.numChannels >= 3:
            pResDesc[0].res.linear.desc.z = channel_size
        if _driver_pResDesc.res.linear.numChannels >= 2:
            pResDesc[0].res.linear.desc.y = channel_size
        if _driver_pResDesc.res.linear.numChannels >= 1:
            pResDesc[0].res.linear.desc.x = channel_size
        if _driver_pResDesc.res.linear.numChannels < 1 or _driver_pResDesc.res.linear.numChannels >= 5:
            _setLastError(cudaErrorInvalidChannelDescriptor)
            return cudaError.cudaErrorInvalidChannelDescriptor

    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphKernelNodeGetParams(cudaGraphNode_t node, cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    cdef cydriver.CUDA_KERNEL_NODE_PARAMS driverNodeParams

    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphKernelNodeGetParams_v2(<cydriver.CUgraphNode>node, &driverNodeParams)
    pNodeParams[0].func = <void*>driverNodeParams.func
    pNodeParams[0].gridDim.x = driverNodeParams.gridDimX
    pNodeParams[0].gridDim.y = driverNodeParams.gridDimY
    pNodeParams[0].gridDim.z = driverNodeParams.gridDimZ
    pNodeParams[0].blockDim.x = driverNodeParams.blockDimX
    pNodeParams[0].blockDim.y = driverNodeParams.blockDimY
    pNodeParams[0].blockDim.z = driverNodeParams.blockDimZ
    pNodeParams[0].sharedMemBytes = driverNodeParams.sharedMemBytes
    pNodeParams[0].kernelParams = driverNodeParams.kernelParams
    pNodeParams[0].extra = driverNodeParams.extra

    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaExternalMemoryGetMappedBuffer(void** devPtr, cudaExternalMemory_t extMem, const cudaExternalMemoryBufferDesc* bufferDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    cdef cydriver.CUDA_EXTERNAL_MEMORY_BUFFER_DESC _driver_bufferDesc
    memset(&_driver_bufferDesc, 0, sizeof(_driver_bufferDesc))
    _driver_bufferDesc.offset = bufferDesc[0].offset
    _driver_bufferDesc.size = bufferDesc[0].size
    _driver_bufferDesc.flags = bufferDesc[0].flags

    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuExternalMemoryGetMappedBuffer(<cydriver.CUdeviceptr*>devPtr, <cydriver.CUexternalMemory>extMem, &_driver_bufferDesc)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaImportExternalMemory(cudaExternalMemory_t* extMem_out, const cudaExternalMemoryHandleDesc* memHandleDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    cdef cydriver.CUDA_EXTERNAL_MEMORY_HANDLE_DESC _driver_memHandleDesc
    memset(&_driver_memHandleDesc, 0, sizeof(_driver_memHandleDesc))

    if memHandleDesc[0].type == cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueFd:
        _driver_memHandleDesc.type = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD
        _driver_memHandleDesc.handle.fd = memHandleDesc[0].handle.fd
    elif memHandleDesc[0].type == cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueWin32:
        _driver_memHandleDesc.type = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32
        _driver_memHandleDesc.handle.win32.handle = memHandleDesc[0].handle.win32.handle
        _driver_memHandleDesc.handle.win32.name = memHandleDesc[0].handle.win32.name
    elif memHandleDesc[0].type == cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeOpaqueWin32Kmt:
        _driver_memHandleDesc.type = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT
        _driver_memHandleDesc.handle.win32.handle = memHandleDesc[0].handle.win32.handle
        _driver_memHandleDesc.handle.win32.name = memHandleDesc[0].handle.win32.name
    elif memHandleDesc[0].type == cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D12Heap:
        _driver_memHandleDesc.type = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP
        _driver_memHandleDesc.handle.win32.handle = memHandleDesc[0].handle.win32.handle
        _driver_memHandleDesc.handle.win32.name = memHandleDesc[0].handle.win32.name
    elif memHandleDesc[0].type == cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D12Resource:
        _driver_memHandleDesc.type = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE
        _driver_memHandleDesc.handle.win32.handle = memHandleDesc[0].handle.win32.handle
        _driver_memHandleDesc.handle.win32.name = memHandleDesc[0].handle.win32.name
    elif memHandleDesc[0].type == cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D11Resource:
        _driver_memHandleDesc.type = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE
        _driver_memHandleDesc.handle.win32.handle = memHandleDesc[0].handle.win32.handle
        _driver_memHandleDesc.handle.win32.name = memHandleDesc[0].handle.win32.name
    elif memHandleDesc[0].type == cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeD3D11ResourceKmt:
        _driver_memHandleDesc.type = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT
        _driver_memHandleDesc.handle.win32.handle = memHandleDesc[0].handle.win32.handle
        _driver_memHandleDesc.handle.win32.name = memHandleDesc[0].handle.win32.name
    elif memHandleDesc[0].type == cudaExternalMemoryHandleType.cudaExternalMemoryHandleTypeNvSciBuf:
        _driver_memHandleDesc.type = cydriver.CUexternalMemoryHandleType_enum.CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF
        _driver_memHandleDesc.handle.nvSciBufObject = memHandleDesc[0].handle.nvSciBufObject
    _driver_memHandleDesc.size = memHandleDesc[0].size
    _driver_memHandleDesc.flags = memHandleDesc[0].flags

    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuImportExternalMemory(<cydriver.CUexternalMemory*>extMem_out, &_driver_memHandleDesc)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaCreateSurfaceObject(cudaSurfaceObject_t* pSurfObject, const cudaResourceDesc* pResDesc) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    cdef cydriver.CUDA_RESOURCE_DESC _driver_pResDesc
    memset(&_driver_pResDesc, 0, sizeof(_driver_pResDesc))
    err = toDriverCudaResourceDesc(&_driver_pResDesc, pResDesc)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    err = <cudaError_t>cydriver._cuSurfObjectCreate(<cydriver.CUsurfObject*>pSurfObject, &_driver_pResDesc)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGetTextureObjectResourceDesc(cudaResourceDesc* pResDesc, cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    cdef cydriver.CUDA_RESOURCE_DESC _driver_pResDesc
    memset(&_driver_pResDesc, 0, sizeof(_driver_pResDesc))
    err = toDriverCudaResourceDesc(&_driver_pResDesc, pResDesc)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    err = <cudaError_t>cydriver._cuTexObjectGetResourceDesc(&_driver_pResDesc, <cydriver.CUtexObject>texObject)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaEGLStreamProducerPresentFrame(cudaEglStreamConnection* conn, cudaEglFrame eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    cdef cydriver.CUeglFrame cueglFrame
    err = getDriverEglFrame(&cueglFrame, eglframe)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuEGLStreamProducerPresentFrame(<cydriver.CUeglStreamConnection*>conn, cueglFrame, pStream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaEGLStreamProducerReturnFrame(cudaEglStreamConnection* conn, cudaEglFrame* eglframe, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    if eglframe == NULL:
        err = cudaErrorInvalidResourceHandle
        _setLastError(err)
        return err
    cdef cydriver.CUeglFrame cueglFrame
    err = <cudaError_t>cydriver._cuEGLStreamProducerReturnFrame(<cydriver.CUeglStreamConnection*>conn, &cueglFrame, pStream)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = getRuntimeEglFrame(eglframe, cueglFrame)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    return err

cdef cudaError_t _cudaGraphicsResourceGetMappedEglFrame(cudaEglFrame* eglFrame, cudaGraphicsResource_t resource, unsigned int index, unsigned int mipLevel) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    cdef cydriver.CUeglFrame cueglFrame
    memset(&cueglFrame, 0, sizeof(cueglFrame))
    err = <cudaError_t>cydriver._cuGraphicsResourceGetMappedEglFrame(&cueglFrame, <cydriver.CUgraphicsResource>resource, index, mipLevel)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = getRuntimeEglFrame(eglFrame, cueglFrame)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    return err

cdef cudaError_t _cudaVDPAUSetVDPAUDevice(int device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?cudaErrorCallRequiresNewerDriver nogil:
    return cudaErrorNotSupported

cdef cudaError_t _cudaArrayGetMemoryRequirements(cudaArrayMemoryRequirements* memoryRequirements, cudaArray_t array, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cydriver.CUDA_ARRAY_MEMORY_REQUIREMENTS driverMemoryRequirements
    if memoryRequirements == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    memset(memoryRequirements, 0, sizeof(memoryRequirements[0]))
    err = <cudaError_t>cydriver._cuArrayGetMemoryRequirements(&driverMemoryRequirements, <cydriver.CUarray>array, device)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    memoryRequirements[0].size = driverMemoryRequirements.size
    memoryRequirements[0].alignment = driverMemoryRequirements.alignment
    return cudaSuccess

cdef cudaError_t _cudaMipmappedArrayGetMemoryRequirements(cudaArrayMemoryRequirements* memoryRequirements, cudaMipmappedArray_t mipmap, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cydriver.CUDA_ARRAY_MEMORY_REQUIREMENTS driverMemoryRequirements
    if memoryRequirements == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue

    memset(memoryRequirements, 0, sizeof(memoryRequirements[0]))
    err = <cudaError_t>cydriver._cuMipmappedArrayGetMemoryRequirements(&driverMemoryRequirements, <cydriver.CUmipmappedArray>mipmap, device)
    if err != cudaSuccess:
        _setLastError(err)
        return err

    memoryRequirements[0].size = driverMemoryRequirements.size
    memoryRequirements[0].alignment = driverMemoryRequirements.alignment
    return cudaSuccess

cdef cudaError_t _cudaStreamGetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, cudaStreamAttrValue* value_out) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamGetAttribute(<cydriver.CUstream>hStream, <cydriver.CUstreamAttrID>attr, <cydriver.CUstreamAttrValue*>value_out)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamSetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, const cudaStreamAttrValue* value) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamSetAttribute(<cydriver.CUstream>hStream, <cydriver.CUstreamAttrID>attr, <cydriver.CUstreamAttrValue*>value)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphKernelNodeGetAttribute(cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, cudaKernelNodeAttrValue* value_out) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphKernelNodeGetAttribute(<cydriver.CUgraphNode>hNode, <cydriver.CUkernelNodeAttrID>attr, <cydriver.CUkernelNodeAttrValue*>value_out)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphKernelNodeSetAttribute(cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, const cudaKernelNodeAttrValue* value) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphKernelNodeSetAttribute(<cydriver.CUgraphNode>hNode, <cydriver.CUkernelNodeAttrID>attr, <cydriver.CUkernelNodeAttrValue*>value)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaVDPAUGetDevice(int* device, VdpDevice vdpDevice, VdpGetProcAddress* vdpGetProcAddress) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuVDPAUGetDevice(<cydriver.CUdevice*>device, vdpDevice, vdpGetProcAddress)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphicsVDPAURegisterVideoSurface(cudaGraphicsResource** resource, VdpVideoSurface vdpSurface, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphicsVDPAURegisterVideoSurface(<cydriver.CUgraphicsResource*>resource, vdpSurface, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphicsVDPAURegisterOutputSurface(cudaGraphicsResource** resource, VdpOutputSurface vdpSurface, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphicsVDPAURegisterOutputSurface(<cydriver.CUgraphicsResource*>resource, vdpSurface, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGLGetDevices(unsigned int* pCudaDeviceCount, int* pCudaDevices, unsigned int cudaDeviceCount, cudaGLDeviceList deviceList) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGLGetDevices_v2(pCudaDeviceCount, <cydriver.CUdevice*>pCudaDevices, cudaDeviceCount, <cydriver.CUGLDeviceList>deviceList)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphicsGLRegisterImage(cudaGraphicsResource** resource, GLuint image, GLenum target, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphicsGLRegisterImage(<cydriver.CUgraphicsResource*>resource, image, target, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphicsGLRegisterBuffer(cudaGraphicsResource** resource, GLuint buffer, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphicsGLRegisterBuffer(<cydriver.CUgraphicsResource*>resource, buffer, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDeviceSynchronize() except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuCtxSynchronize()
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDeviceSetLimit(cudaLimit limit, size_t value) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuCtxSetLimit(<cydriver.CUlimit>limit, value)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDeviceGetLimit(size_t* pValue, cudaLimit limit) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuCtxGetLimit(pValue, <cydriver.CUlimit>limit)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDeviceGetCacheConfig(cudaFuncCache* pCacheConfig) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuCtxGetCacheConfig(<cydriver.CUfunc_cache*>pCacheConfig)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDeviceGetStreamPriorityRange(int* leastPriority, int* greatestPriority) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuCtxGetStreamPriorityRange(leastPriority, greatestPriority)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDeviceSetCacheConfig(cudaFuncCache cacheConfig) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuCtxSetCacheConfig(<cydriver.CUfunc_cache>cacheConfig)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDeviceGetSharedMemConfig(cudaSharedMemConfig* pConfig) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuCtxGetSharedMemConfig(<cydriver.CUsharedconfig*>pConfig)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaIpcGetEventHandle(cudaIpcEventHandle_t* handle, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuIpcGetEventHandle(<cydriver.CUipcEventHandle*>handle, <cydriver.CUevent>event)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaIpcOpenEventHandle(cudaEvent_t* event, cudaIpcEventHandle_t handle) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    cdef cydriver.CUipcEventHandle _driver_handle
    memcpy(&_driver_handle, &handle, sizeof(_driver_handle))
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuIpcOpenEventHandle(<cydriver.CUevent*>event, _driver_handle)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaIpcGetMemHandle(cudaIpcMemHandle_t* handle, void* devPtr) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuIpcGetMemHandle(<cydriver.CUipcMemHandle*>handle, <cydriver.CUdeviceptr>devPtr)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaIpcOpenMemHandle(void** devPtr, cudaIpcMemHandle_t handle, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    cdef cydriver.CUipcMemHandle _driver_handle
    memcpy(&_driver_handle, &handle, sizeof(_driver_handle))
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuIpcOpenMemHandle_v2(<cydriver.CUdeviceptr*>devPtr, _driver_handle, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaIpcCloseMemHandle(void* devPtr) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuIpcCloseMemHandle(<cydriver.CUdeviceptr>devPtr)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDeviceFlushGPUDirectRDMAWrites(cudaFlushGPUDirectRDMAWritesTarget target, cudaFlushGPUDirectRDMAWritesScope scope) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuFlushGPUDirectRDMAWrites(<cydriver.CUflushGPUDirectRDMAWritesTarget>target, <cydriver.CUflushGPUDirectRDMAWritesScope>scope)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDeviceGetDefaultMemPool(cudaMemPool_t* memPool, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuDeviceGetDefaultMemPool(<cydriver.CUmemoryPool*>memPool, <cydriver.CUdevice>device)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDeviceSetMemPool(int device, cudaMemPool_t memPool) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuDeviceSetMemPool(<cydriver.CUdevice>device, <cydriver.CUmemoryPool>memPool)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDeviceGetMemPool(cudaMemPool_t* memPool, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuDeviceGetMemPool(<cydriver.CUmemoryPool*>memPool, <cydriver.CUdevice>device)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDeviceGetNvSciSyncAttributes(void* nvSciSyncAttrList, int device, int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuDeviceGetNvSciSyncAttributes(nvSciSyncAttrList, <cydriver.CUdevice>device, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamCreateWithFlags(cudaStream_t* pStream, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamCreate(<cydriver.CUstream*>pStream, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamCreateWithPriority(cudaStream_t* pStream, unsigned int flags, int priority) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamCreateWithPriority(<cydriver.CUstream*>pStream, flags, priority)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamGetPriority(cudaStream_t hStream, int* priority) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamGetPriority(<cydriver.CUstream>hStream, priority)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamGetFlags(cudaStream_t hStream, unsigned int* flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamGetFlags(<cydriver.CUstream>hStream, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaCtxResetPersistingL2Cache() except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuCtxResetPersistingL2Cache()
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamCopyAttributes(<cydriver.CUstream>dst, <cydriver.CUstream>src)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamDestroy(cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamDestroy_v2(<cydriver.CUstream>stream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamWaitEvent(cudaStream_t stream, cudaEvent_t event, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamWaitEvent(<cydriver.CUstream>stream, <cydriver.CUevent>event, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamSynchronize(cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamSynchronize(<cydriver.CUstream>stream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamQuery(cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamQuery(<cydriver.CUstream>stream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamAttachMemAsync(cudaStream_t stream, void* devPtr, size_t length, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamAttachMemAsync(<cydriver.CUstream>stream, <cydriver.CUdeviceptr>devPtr, length, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamBeginCapture(cudaStream_t stream, cudaStreamCaptureMode mode) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamBeginCapture_v2(<cydriver.CUstream>stream, <cydriver.CUstreamCaptureMode>mode)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamBeginCaptureToGraph(cudaStream_t stream, cudaGraph_t graph, const cudaGraphNode_t* dependencies, const cudaGraphEdgeData* dependencyData, size_t numDependencies, cudaStreamCaptureMode mode) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamBeginCaptureToGraph(<cydriver.CUstream>stream, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>dependencies, <cydriver.CUgraphEdgeData*> dependencyData, numDependencies, <cydriver.CUstreamCaptureMode>mode)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaThreadExchangeStreamCaptureMode(cudaStreamCaptureMode* mode) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuThreadExchangeStreamCaptureMode(<cydriver.CUstreamCaptureMode*>mode)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t* pGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamEndCapture(<cydriver.CUstream>stream, <cydriver.CUgraph*>pGraph)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamIsCapturing(cudaStream_t stream, cudaStreamCaptureStatus* pCaptureStatus) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamIsCapturing(<cydriver.CUstream>stream, <cydriver.CUstreamCaptureStatus*>pCaptureStatus)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamUpdateCaptureDependencies(cudaStream_t stream, cudaGraphNode_t* dependencies, size_t numDependencies, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamUpdateCaptureDependencies(<cydriver.CUstream>stream, <cydriver.CUgraphNode*>dependencies, numDependencies, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaEventCreateWithFlags(cudaEvent_t* event, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuEventCreate(<cydriver.CUevent*>event, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaEventRecord(cudaEvent_t event, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuEventRecord(<cydriver.CUevent>event, <cydriver.CUstream>stream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaEventRecordWithFlags(cudaEvent_t event, cudaStream_t stream, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuEventRecordWithFlags(<cydriver.CUevent>event, <cydriver.CUstream>stream, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaEventSynchronize(cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuEventSynchronize(<cydriver.CUevent>event)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaEventDestroy(cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuEventDestroy_v2(<cydriver.CUevent>event)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaEventElapsedTime(float* ms, cudaEvent_t start, cudaEvent_t end) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuEventElapsedTime(ms, <cydriver.CUevent>start, <cydriver.CUevent>end)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDestroyExternalMemory(cudaExternalMemory_t extMem) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuDestroyExternalMemory(<cydriver.CUexternalMemory>extMem)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuDestroyExternalSemaphore(<cydriver.CUexternalSemaphore>extSem)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaFuncSetCacheConfig(const void* func, cudaFuncCache cacheConfig) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuFuncSetCacheConfig(<cydriver.CUfunction>func, <cydriver.CUfunc_cache>cacheConfig)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaFuncSetSharedMemConfig(const void* func, cudaSharedMemConfig config) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuFuncSetSharedMemConfig(<cydriver.CUfunction>func, <cydriver.CUsharedconfig>config)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaFuncSetAttribute(const void* func, cudaFuncAttribute attr, int value) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuFuncSetAttribute(<cydriver.CUfunction>func, <cydriver.CUfunction_attribute>attr, value)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaLaunchHostFunc(cudaStream_t stream, cudaHostFn_t fn, void* userData) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = streamAddHostCallbackCommon(stream, fn, userData)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaOccupancyMaxActiveBlocksPerMultiprocessor(int* numBlocks, const void* func, int blockSize, size_t dynamicSMemSize) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks, <cydriver.CUfunction>func, blockSize, dynamicSMemSize)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaOccupancyAvailableDynamicSMemPerBlock(size_t* dynamicSmemSize, const void* func, int numBlocks, int blockSize) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuOccupancyAvailableDynamicSMemPerBlock(dynamicSmemSize, <cydriver.CUfunction>func, numBlocks, blockSize)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int* numBlocks, const void* func, int blockSize, size_t dynamicSMemSize, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks, <cydriver.CUfunction>func, blockSize, dynamicSMemSize, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMallocManaged(void** devPtr, size_t size, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemAllocManaged(<cydriver.CUdeviceptr*>devPtr, size, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMalloc(void** devPtr, size_t size) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemAlloc_v2(<cydriver.CUdeviceptr*>devPtr, size)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaFree(void* devPtr) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemFree_v2(<cydriver.CUdeviceptr>devPtr)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaFreeHost(void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemFreeHost(ptr)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaFreeArray(cudaArray_t array) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuArrayDestroy(<cydriver.CUarray>array)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMipmappedArrayDestroy(<cydriver.CUmipmappedArray>mipmappedArray)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaHostAlloc(void** pHost, size_t size, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemHostAlloc(pHost, size, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaHostRegister(void* ptr, size_t size, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemHostRegister_v2(ptr, size, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaHostUnregister(void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemHostUnregister(ptr)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaHostGetDevicePointer(void** pDevice, void* pHost, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemHostGetDevicePointer_v2(<cydriver.CUdeviceptr*>pDevice, pHost, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaHostGetFlags(unsigned int* pFlags, void* pHost) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemHostGetFlags(pFlags, pHost)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGetMipmappedArrayLevel(cudaArray_t* levelArray, cudaMipmappedArray_const_t mipmappedArray, unsigned int level) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMipmappedArrayGetLevel(<cydriver.CUarray*>levelArray, <cydriver.CUmipmappedArray>mipmappedArray, level)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemGetInfo(size_t* free, size_t* total) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemGetInfo_v2(free, total)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaArrayGetPlane(cudaArray_t* pPlaneArray, cudaArray_t hArray, unsigned int planeIdx) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuArrayGetPlane(<cydriver.CUarray*>pPlaneArray, <cydriver.CUarray>hArray, planeIdx)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemset(void* devPtr, int value, size_t count) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemsetD8_v2(<cydriver.CUdeviceptr>devPtr, <unsigned char>value, count)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemset2D(void* devPtr, size_t pitch, int value, size_t width, size_t height) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemsetD2D8_v2(<cydriver.CUdeviceptr>devPtr, pitch, <unsigned char>value, width, height)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemsetAsync(void* devPtr, int value, size_t count, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemsetD8Async(<cydriver.CUdeviceptr>devPtr, <unsigned char>value, count, <cydriver.CUstream>stream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemset2DAsync(void* devPtr, size_t pitch, int value, size_t width, size_t height, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemsetD2D8Async(<cydriver.CUdeviceptr>devPtr, pitch, <unsigned char>value, width, height, <cydriver.CUstream>stream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemPrefetchAsync(const void* devPtr, size_t count, int dstDevice, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemPrefetchAsync(<cydriver.CUdeviceptr>devPtr, count, <cydriver.CUdevice>dstDevice, <cydriver.CUstream>stream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemPrefetchAsync_v2(const void* devPtr, size_t count, cudaMemLocation location, unsigned int flags, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cydriver.CUmemLocation _driver_location
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    _driver_location.type = <cydriver.CUmemLocationType>location.type
    _driver_location.id = location.id
    err = <cudaError_t>cydriver._cuMemPrefetchAsync_v2(<cydriver.CUdeviceptr>devPtr, count, _driver_location, flags, <cydriver.CUstream>stream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMallocAsync(void** devPtr, size_t size, cudaStream_t hStream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemAllocAsync(<cydriver.CUdeviceptr*>devPtr, size, <cydriver.CUstream>hStream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaFreeAsync(void* devPtr, cudaStream_t hStream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemFreeAsync(<cydriver.CUdeviceptr>devPtr, <cydriver.CUstream>hStream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemPoolTrimTo(cudaMemPool_t memPool, size_t minBytesToKeep) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemPoolTrimTo(<cydriver.CUmemoryPool>memPool, minBytesToKeep)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemPoolSetAttribute(cudaMemPool_t memPool, cudaMemPoolAttr attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemPoolSetAttribute(<cydriver.CUmemoryPool>memPool, <cydriver.CUmemPool_attribute>attr, value)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemPoolGetAttribute(cudaMemPool_t memPool, cudaMemPoolAttr attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemPoolGetAttribute(<cydriver.CUmemoryPool>memPool, <cydriver.CUmemPool_attribute>attr, value)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemPoolGetAccess(cudaMemAccessFlags* flags, cudaMemPool_t memPool, cudaMemLocation* location) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemPoolGetAccess(<cydriver.CUmemAccess_flags*>flags, <cydriver.CUmemoryPool>memPool, <cydriver.CUmemLocation*>location)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemPoolCreate(cudaMemPool_t* memPool, const cudaMemPoolProps* poolProps) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemPoolCreate(<cydriver.CUmemoryPool*>memPool, <cydriver.CUmemPoolProps*>poolProps)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemPoolDestroy(cudaMemPool_t memPool) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemPoolDestroy(<cydriver.CUmemoryPool>memPool)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMallocFromPoolAsync(void** ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemAllocFromPoolAsync(<cydriver.CUdeviceptr*>ptr, size, <cydriver.CUmemoryPool>memPool, <cydriver.CUstream>stream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemPoolExportToShareableHandle(void* shareableHandle, cudaMemPool_t memPool, cudaMemAllocationHandleType handleType, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemPoolExportToShareableHandle(shareableHandle, <cydriver.CUmemoryPool>memPool, <cydriver.CUmemAllocationHandleType>handleType, <unsigned long long>flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemPoolImportFromShareableHandle(cudaMemPool_t* memPool, void* shareableHandle, cudaMemAllocationHandleType handleType, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemPoolImportFromShareableHandle(<cydriver.CUmemoryPool*>memPool, shareableHandle, <cydriver.CUmemAllocationHandleType>handleType, <unsigned long long>flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemPoolExportPointer(cudaMemPoolPtrExportData* exportData, void* ptr) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemPoolExportPointer(<cydriver.CUmemPoolPtrExportData*>exportData, <cydriver.CUdeviceptr>ptr)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemPoolImportPointer(void** ptr, cudaMemPool_t memPool, cudaMemPoolPtrExportData* exportData) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemPoolImportPointer(<cydriver.CUdeviceptr*>ptr, <cydriver.CUmemoryPool>memPool, <cydriver.CUmemPoolPtrExportData*>exportData)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphicsUnregisterResource(<cydriver.CUgraphicsResource>resource)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphicsResourceSetMapFlags(cudaGraphicsResource_t resource, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphicsResourceSetMapFlags_v2(<cydriver.CUgraphicsResource>resource, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphicsMapResources(int count, cudaGraphicsResource_t* resources, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphicsMapResources(<unsigned int>count, <cydriver.CUgraphicsResource*>resources, <cydriver.CUstream>stream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphicsUnmapResources(int count, cudaGraphicsResource_t* resources, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphicsUnmapResources(<unsigned int>count, <cydriver.CUgraphicsResource*>resources, <cydriver.CUstream>stream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphicsResourceGetMappedPointer(void** devPtr, size_t* size, cudaGraphicsResource_t resource) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphicsResourceGetMappedPointer_v2(<cydriver.CUdeviceptr*>devPtr, size, <cydriver.CUgraphicsResource>resource)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphicsSubResourceGetMappedArray(cudaArray_t* array, cudaGraphicsResource_t resource, unsigned int arrayIndex, unsigned int mipLevel) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphicsSubResourceGetMappedArray(<cydriver.CUarray*>array, <cydriver.CUgraphicsResource>resource, arrayIndex, mipLevel)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphicsResourceGetMappedMipmappedArray(cudaMipmappedArray_t* mipmappedArray, cudaGraphicsResource_t resource) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphicsResourceGetMappedMipmappedArray(<cydriver.CUmipmappedArray*>mipmappedArray, <cydriver.CUgraphicsResource>resource)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDestroyTextureObject(cudaTextureObject_t texObject) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuTexObjectDestroy(<cydriver.CUtexObject>texObject)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuSurfObjectDestroy(<cydriver.CUsurfObject>surfObject)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphCreate(cudaGraph_t* pGraph, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphCreate(<cydriver.CUgraph*>pGraph, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphAddKernelNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    cdef cydriver.CUDA_KERNEL_NODE_PARAMS driverNodeParams
    err = toDriverKernelNodeParams(pNodeParams, &driverNodeParams)
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphAddKernelNode_v2(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphKernelNodeSetParams(cudaGraphNode_t node, const cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    cdef cydriver.CUDA_KERNEL_NODE_PARAMS driverNodeParams
    err = toDriverKernelNodeParams(pNodeParams, &driverNodeParams)
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphKernelNodeSetParams_v2(<cydriver.CUgraphNode>node, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphKernelNodeCopyAttributes(cudaGraphNode_t hSrc, cudaGraphNode_t hDst) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphKernelNodeCopyAttributes(<cydriver.CUgraphNode>hSrc, <cydriver.CUgraphNode>hDst)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphMemsetNodeGetParams(cudaGraphNode_t node, cudaMemsetParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphMemsetNodeGetParams(<cydriver.CUgraphNode>node, <cydriver.CUDA_MEMSET_NODE_PARAMS*>pNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphMemsetNodeSetParams(cudaGraphNode_t node, const cudaMemsetParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphMemsetNodeSetParams(<cydriver.CUgraphNode>node, <cydriver.CUDA_MEMSET_NODE_PARAMS*>pNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphAddHostNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    cdef cydriver.CUDA_HOST_NODE_PARAMS driverNodeParams
    toDriverHostNodeParams(pNodeParams, &driverNodeParams)
    err = <cudaError_t>cydriver._cuGraphAddHostNode(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphHostNodeGetParams(cudaGraphNode_t node, cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphHostNodeGetParams(<cydriver.CUgraphNode>node, <cydriver.CUDA_HOST_NODE_PARAMS*>pNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphHostNodeSetParams(cudaGraphNode_t node, const cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    cdef cydriver.CUDA_HOST_NODE_PARAMS driverNodeParams
    toDriverHostNodeParams(pNodeParams, &driverNodeParams)
    err = <cudaError_t>cydriver._cuGraphHostNodeSetParams(<cydriver.CUgraphNode>node, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphAddChildGraphNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaGraph_t childGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphAddChildGraphNode(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies, <cydriver.CUgraph>childGraph)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t* pGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphChildGraphNodeGetGraph(<cydriver.CUgraphNode>node, <cydriver.CUgraph*>pGraph)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphAddEmptyNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphAddEmptyNode(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphAddEventRecordNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphAddEventRecordNode(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies, <cydriver.CUevent>event)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphEventRecordNodeGetEvent(cudaGraphNode_t node, cudaEvent_t* event_out) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphEventRecordNodeGetEvent(<cydriver.CUgraphNode>node, <cydriver.CUevent*>event_out)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphEventRecordNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphEventRecordNodeSetEvent(<cydriver.CUgraphNode>node, <cydriver.CUevent>event)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphAddEventWaitNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphAddEventWaitNode(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies, <cydriver.CUevent>event)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphEventWaitNodeGetEvent(cudaGraphNode_t node, cudaEvent_t* event_out) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphEventWaitNodeGetEvent(<cydriver.CUgraphNode>node, <cydriver.CUevent*>event_out)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphEventWaitNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphEventWaitNodeSetEvent(<cydriver.CUgraphNode>node, <cydriver.CUevent>event)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphAddExternalSemaphoresSignalNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaExternalSemaphoreSignalNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphAddExternalSemaphoresSignalNode(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies, <cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS*>nodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphExternalSemaphoresSignalNodeGetParams(cudaGraphNode_t hNode, cudaExternalSemaphoreSignalNodeParams* params_out) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphExternalSemaphoresSignalNodeGetParams(<cydriver.CUgraphNode>hNode, <cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS*>params_out)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphExternalSemaphoresSignalNodeSetParams(cudaGraphNode_t hNode, const cudaExternalSemaphoreSignalNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphExternalSemaphoresSignalNodeSetParams(<cydriver.CUgraphNode>hNode, <cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS*>nodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphAddExternalSemaphoresWaitNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, const cudaExternalSemaphoreWaitNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphAddExternalSemaphoresWaitNode(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies, <cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS*>nodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphExternalSemaphoresWaitNodeGetParams(cudaGraphNode_t hNode, cudaExternalSemaphoreWaitNodeParams* params_out) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphExternalSemaphoresWaitNodeGetParams(<cydriver.CUgraphNode>hNode, <cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS*>params_out)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphExternalSemaphoresWaitNodeSetParams(cudaGraphNode_t hNode, const cudaExternalSemaphoreWaitNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphExternalSemaphoresWaitNodeSetParams(<cydriver.CUgraphNode>hNode, <cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS*>nodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphAddMemFreeNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, void* dptr) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphAddMemFreeNode(<cydriver.CUgraphNode*>pGraphNode, <cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pDependencies, numDependencies, <cydriver.CUdeviceptr>dptr)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDeviceGraphMemTrim(int device) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuDeviceGraphMemTrim(<cydriver.CUdevice>device)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDeviceGetGraphMemAttribute(int device, cudaGraphMemAttributeType attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuDeviceGetGraphMemAttribute(<cydriver.CUdevice>device, <cydriver.CUgraphMem_attribute>attr, value)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDeviceSetGraphMemAttribute(int device, cudaGraphMemAttributeType attr, void* value) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuDeviceSetGraphMemAttribute(<cydriver.CUdevice>device, <cydriver.CUgraphMem_attribute>attr, value)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphClone(cudaGraph_t* pGraphClone, cudaGraph_t originalGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphClone(<cydriver.CUgraph*>pGraphClone, <cydriver.CUgraph>originalGraph)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphNodeFindInClone(cudaGraphNode_t* pNode, cudaGraphNode_t originalNode, cudaGraph_t clonedGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphNodeFindInClone(<cydriver.CUgraphNode*>pNode, <cydriver.CUgraphNode>originalNode, <cydriver.CUgraph>clonedGraph)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphNodeGetType(cudaGraphNode_t node, cudaGraphNodeType* pType) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphNodeGetType(<cydriver.CUgraphNode>node, <cydriver.CUgraphNodeType*>pType)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphGetNodes(cudaGraph_t graph, cudaGraphNode_t* nodes, size_t* numNodes) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphGetNodes(<cydriver.CUgraph>graph, <cydriver.CUgraphNode*>nodes, numNodes)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphGetRootNodes(cudaGraph_t graph, cudaGraphNode_t* pRootNodes, size_t* pNumRootNodes) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphGetRootNodes(<cydriver.CUgraph>graph, <cydriver.CUgraphNode*>pRootNodes, pNumRootNodes)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphGetEdges(cudaGraph_t graph, cudaGraphNode_t* from_, cudaGraphNode_t* to, size_t* numEdges) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphGetEdges(<cydriver.CUgraph>graph, <cydriver.CUgraphNode*>from_, <cydriver.CUgraphNode*>to, numEdges)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphNodeGetDependencies(cudaGraphNode_t node, cudaGraphNode_t* pDependencies, size_t* pNumDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphNodeGetDependencies(<cydriver.CUgraphNode>node, <cydriver.CUgraphNode*>pDependencies, pNumDependencies)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphNodeGetDependentNodes(cudaGraphNode_t node, cudaGraphNode_t* pDependentNodes, size_t* pNumDependentNodes) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphNodeGetDependentNodes(<cydriver.CUgraphNode>node, <cydriver.CUgraphNode*>pDependentNodes, pNumDependentNodes)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphAddDependencies(<cydriver.CUgraph>graph, <cydriver.CUgraphNode*>from_, <cydriver.CUgraphNode*>to, numDependencies)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphRemoveDependencies(<cydriver.CUgraph>graph, <cydriver.CUgraphNode*>from_, <cydriver.CUgraphNode*>to, numDependencies)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphDestroyNode(cudaGraphNode_t node) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphDestroyNode(<cydriver.CUgraphNode>node)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphInstantiate(cudaGraphExec_t* pGraphExec, cudaGraph_t graph, unsigned long long flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    return _cudaGraphInstantiateWithFlags(pGraphExec, graph, flags)

cdef cudaError_t _cudaGraphInstantiateWithFlags(cudaGraphExec_t* pGraphExec, cudaGraph_t graph, unsigned long long flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphInstantiateWithFlags(<cydriver.CUgraphExec*>pGraphExec, <cydriver.CUgraph>graph, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphExecKernelNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaKernelNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    cdef cydriver.CUDA_KERNEL_NODE_PARAMS driverNodeParams
    err = toDriverKernelNodeParams(pNodeParams, &driverNodeParams)
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphExecKernelNodeSetParams_v2(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>node, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const cudaHostNodeParams* pNodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    cdef cydriver.CUDA_HOST_NODE_PARAMS driverNodeParams
    toDriverHostNodeParams(pNodeParams, &driverNodeParams)
    err = <cudaError_t>cydriver._cuGraphExecHostNodeSetParams(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>node, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphExecChildGraphNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, cudaGraph_t childGraph) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphExecChildGraphNodeSetParams(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>node, <cydriver.CUgraph>childGraph)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphExecEventRecordNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphExecEventRecordNodeSetEvent(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>hNode, <cydriver.CUevent>event)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphExecEventWaitNodeSetEvent(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphExecEventWaitNodeSetEvent(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>hNode, <cydriver.CUevent>event)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphExecExternalSemaphoresSignalNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, const cudaExternalSemaphoreSignalNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphExecExternalSemaphoresSignalNodeSetParams(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>hNode, <cydriver.CUDA_EXT_SEM_SIGNAL_NODE_PARAMS*>nodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphExecExternalSemaphoresWaitNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, const cudaExternalSemaphoreWaitNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphExecExternalSemaphoresWaitNodeSetParams(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>hNode, <cydriver.CUDA_EXT_SEM_WAIT_NODE_PARAMS*>nodeParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphNodeSetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int isEnabled) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphNodeSetEnabled(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>hNode, isEnabled)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphNodeGetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int* isEnabled) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphNodeGetEnabled(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraphNode>hNode, isEnabled)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, cudaGraphExecUpdateResultInfo* resultInfo) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphExecUpdate_v2(<cydriver.CUgraphExec>hGraphExec, <cydriver.CUgraph>hGraph, <cydriver.CUgraphExecUpdateResultInfo*>resultInfo)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphUpload(cudaGraphExec_t graphExec, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphUpload(<cydriver.CUgraphExec>graphExec, <cydriver.CUstream>stream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphLaunch(cudaGraphExec_t graphExec, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphLaunch(<cydriver.CUgraphExec>graphExec, <cydriver.CUstream>stream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphExecDestroy(cudaGraphExec_t graphExec) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphExecDestroy(<cydriver.CUgraphExec>graphExec)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphDestroy(cudaGraph_t graph) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphDestroy(<cydriver.CUgraph>graph)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphDebugDotPrint(cudaGraph_t graph, const char* path, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphDebugDotPrint(<cydriver.CUgraph>graph, path, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaUserObjectCreate(cudaUserObject_t* object_out, void* ptr, cudaHostFn_t destroy, unsigned int initialRefcount, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuUserObjectCreate(<cydriver.CUuserObject*>object_out, ptr, <cydriver.CUhostFn>destroy, initialRefcount, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaUserObjectRetain(cudaUserObject_t object, unsigned int count) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuUserObjectRetain(<cydriver.CUuserObject>object, count)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaUserObjectRelease(cudaUserObject_t object, unsigned int count) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuUserObjectRelease(<cydriver.CUuserObject>object, count)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphRetainUserObject(cudaGraph_t graph, cudaUserObject_t object, unsigned int count, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphRetainUserObject(<cydriver.CUgraph>graph, <cydriver.CUuserObject>object, count, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphReleaseUserObject(cudaGraph_t graph, cudaUserObject_t object, unsigned int count) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphReleaseUserObject(<cydriver.CUgraph>graph, <cydriver.CUuserObject>object, count)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaProfilerStart() except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuProfilerStart()
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaProfilerStop() except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuProfilerStop()
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphicsEGLRegisterImage(cudaGraphicsResource_t* pCudaResource, EGLImageKHR image, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphicsEGLRegisterImage(<cydriver.CUgraphicsResource*>pCudaResource, image, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaEGLStreamConsumerConnect(cudaEglStreamConnection* conn, EGLStreamKHR eglStream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuEGLStreamConsumerConnect(<cydriver.CUeglStreamConnection*>conn, eglStream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaEGLStreamConsumerConnectWithFlags(cudaEglStreamConnection* conn, EGLStreamKHR eglStream, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuEGLStreamConsumerConnectWithFlags(<cydriver.CUeglStreamConnection*>conn, eglStream, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaEGLStreamConsumerDisconnect(cudaEglStreamConnection* conn) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuEGLStreamConsumerDisconnect(<cydriver.CUeglStreamConnection*>conn)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaEGLStreamConsumerAcquireFrame(cudaEglStreamConnection* conn, cudaGraphicsResource_t* pCudaResource, cudaStream_t* pStream, unsigned int timeout) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuEGLStreamConsumerAcquireFrame(<cydriver.CUeglStreamConnection*>conn, <cydriver.CUgraphicsResource*>pCudaResource, <cydriver.CUstream*>pStream, timeout)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaEGLStreamConsumerReleaseFrame(cudaEglStreamConnection* conn, cudaGraphicsResource_t pCudaResource, cudaStream_t* pStream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuEGLStreamConsumerReleaseFrame(<cydriver.CUeglStreamConnection*>conn, <cydriver.CUgraphicsResource>pCudaResource, <cydriver.CUstream*>pStream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaEGLStreamProducerConnect(cudaEglStreamConnection* conn, EGLStreamKHR eglStream, EGLint width, EGLint height) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuEGLStreamProducerConnect(<cydriver.CUeglStreamConnection*>conn, eglStream, width, height)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaEGLStreamProducerDisconnect(cudaEglStreamConnection* conn) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuEGLStreamProducerDisconnect(<cydriver.CUeglStreamConnection*>conn)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaEventCreateFromEGLSync(cudaEvent_t* phEvent, EGLSyncKHR eglSync, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuEventCreateFromEGLSync(<cydriver.CUevent*>phEvent, eglSync, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaInitDevice(int deviceOrdinal, unsigned int deviceFlags, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cudaPythonDevice *device
    cdef unsigned int scheduleFlags

    err = m_global.lazyInitDriver()
    if err != cudaSuccess:
        return err

    device = m_global.getDevice(deviceOrdinal)
    if device == NULL:
        _setLastError(cudaErrorInvalidDevice)
        return cudaErrorInvalidDevice

    if device.primaryContext == NULL:
        initPrimaryContext(device)

    if flags & cudaInitDeviceFlagsAreValid:
        scheduleFlags = deviceFlags & cudaDeviceScheduleMask
        deviceFlags &= ~cudaDeviceMapHost
        if deviceFlags & ~cudaDeviceMask:
            _setLastError(cudaErrorInvalidValue)
            return cudaErrorInvalidValue
        if scheduleFlags:
            if scheduleFlags != cudaDeviceScheduleSpin and scheduleFlags != cudaDeviceScheduleYield and scheduleFlags != cudaDeviceScheduleBlockingSync:
                _setLastError(cudaErrorInvalidValue)
                return cudaErrorInvalidValue

        err = <cudaError_t>cydriver._cuDevicePrimaryCtxSetFlags_v2(device[0].driverDevice, deviceFlags)
        if err != cudaSuccess:
            _setLastError(err)
            return err
    return cudaSuccess

cdef cudaError_t _cudaStreamGetId(cudaStream_t hStream, unsigned long long* streamId) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamGetId(<cydriver.CUstream>hStream, streamId)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphInstantiateWithParams(cudaGraphExec_t* pGraphExec, cudaGraph_t graph, cudaGraphInstantiateParams* instantiateParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphInstantiateWithParams(<cydriver.CUgraphExec*>pGraphExec, <cydriver.CUgraph>graph, <cydriver.CUDA_GRAPH_INSTANTIATE_PARAMS*>instantiateParams)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphExecGetFlags(cudaGraphExec_t graphExec, unsigned long long* flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphExecGetFlags(<cydriver.CUgraphExec>graphExec, <cydriver.cuuint64_t *>flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGetKernel(cudaKernel_t* kernelPtr, const void* entryFuncAddr) except ?cudaErrorCallRequiresNewerDriver nogil:
    if kernelPtr == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue
    if entryFuncAddr == NULL:
        _setLastError(cudaErrorInvalidDeviceFunction)
        return cudaErrorInvalidDeviceFunction

    kernelPtr[0] = <cudaKernel_t>entryFuncAddr;
    return cudaSuccess

cdef cudaError_t _cudaGraphAddNode(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, size_t numDependencies, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    if nodeParams == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        _setLastError(err)
        return err
    cdef cydriver.CUgraphNodeParams driverNodeParams
    err = toDriverGraphNodeParams(nodeParams, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuGraphAddNode(pGraphNode, graph, pDependencies, numDependencies, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    toCudartGraphNodeOutParams(&driverNodeParams, nodeParams)
    return cudaSuccess

cdef cudaError_t _cudaGraphNodeSetParams(cudaGraphNode_t node, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    if nodeParams == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        _setLastError(err)
        return err
    cdef cydriver.CUgraphNodeParams driverNodeParams
    err = toDriverGraphNodeParams(nodeParams, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuGraphNodeSetParams(node, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    toCudartGraphNodeOutParams(&driverNodeParams, nodeParams);
    return cudaSuccess

cdef cudaError_t _cudaGraphExecNodeSetParams(cudaGraphExec_t graphExec, cudaGraphNode_t node, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    if nodeParams == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        _setLastError(err)
        return err
    cdef cydriver.CUgraphNodeParams driverNodeParams
    err = toDriverGraphNodeParams(nodeParams, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuGraphExecNodeSetParams(graphExec, node, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    toCudartGraphNodeOutParams(&driverNodeParams, nodeParams);
    return cudaSuccess

cdef cudaError_t _cudaGraphConditionalHandleCreate(cudaGraphConditionalHandle* pHandle_out, cudaGraph_t graph, unsigned int defaultLaunchValue, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cydriver.CUcontext context
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuCtxGetCurrent(&context)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuGraphConditionalHandleCreate(<cydriver.CUgraphConditionalHandle *>pHandle_out, graph, context, defaultLaunchValue, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamGetCaptureInfo_v3(cudaStream_t stream, cudaStreamCaptureStatus* captureStatus_out, unsigned long long* id_out, cudaGraph_t* graph_out, const cudaGraphNode_t** dependencies_out, const cudaGraphEdgeData** edgeData_out, size_t* numDependencies_out) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = streamGetCaptureInfoCommon_v3(stream, captureStatus_out, id_out, graph_out, dependencies_out, edgeData_out, numDependencies_out)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamUpdateCaptureDependencies_v2(cudaStream_t stream, cudaGraphNode_t* dependencies, const cudaGraphEdgeData* dependencyData, size_t numDependencies, unsigned int flags) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuStreamUpdateCaptureDependencies_v2(<cydriver.CUstream>stream, <cydriver.CUgraphNode*>dependencies, <const cydriver.CUgraphEdgeData*>dependencyData, numDependencies, flags)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphGetEdges_v2(cudaGraph_t graph, cudaGraphNode_t* from_, cudaGraphNode_t* to, cudaGraphEdgeData* edgeData, size_t* numEdges) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphGetEdges_v2(<cydriver.CUgraph>graph, <cydriver.CUgraphNode*>from_, <cydriver.CUgraphNode*>to, <cydriver.CUgraphEdgeData*>edgeData, numEdges)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphNodeGetDependencies_v2(cudaGraphNode_t node, cudaGraphNode_t* pDependencies, cudaGraphEdgeData* edgeData, size_t* pNumDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphNodeGetDependencies_v2(<cydriver.CUgraphNode>node, <cydriver.CUgraphNode*>pDependencies, <cydriver.CUgraphEdgeData*>edgeData, pNumDependencies)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphNodeGetDependentNodes_v2(cudaGraphNode_t node, cudaGraphNode_t* pDependentNodes, cudaGraphEdgeData* edgeData, size_t* pNumDependentNodes) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphNodeGetDependentNodes_v2(<cydriver.CUgraphNode>node, <cydriver.CUgraphNode*>pDependentNodes, <cydriver.CUgraphEdgeData*>edgeData, pNumDependentNodes)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphAddDependencies_v2(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, const cudaGraphEdgeData* edgeData, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphAddDependencies_v2(<cydriver.CUgraph>graph, <cydriver.CUgraphNode*>from_, <cydriver.CUgraphNode*>to, <const cydriver.CUgraphEdgeData*>edgeData, numDependencies)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphRemoveDependencies_v2(cudaGraph_t graph, const cudaGraphNode_t* from_, const cudaGraphNode_t* to, const cudaGraphEdgeData* edgeData, size_t numDependencies) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuGraphRemoveDependencies_v2(<cydriver.CUgraph>graph, <cydriver.CUgraphNode*>from_, <cydriver.CUgraphNode*>to, <const cydriver.CUgraphEdgeData*>edgeData, numDependencies)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGraphAddNode_v2(cudaGraphNode_t* pGraphNode, cudaGraph_t graph, const cudaGraphNode_t* pDependencies, const cudaGraphEdgeData* dependencyData, size_t numDependencies, cudaGraphNodeParams* nodeParams) except ?cudaErrorCallRequiresNewerDriver nogil:
    if nodeParams == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        _setLastError(err)
        return err
    cdef cydriver.CUgraphNodeParams driverNodeParams
    err = toDriverGraphNodeParams(nodeParams, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    err = <cudaError_t>cydriver._cuGraphAddNode_v2(pGraphNode, graph, pDependencies, <const cydriver.CUgraphEdgeData*>dependencyData, numDependencies, &driverNodeParams)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    toCudartGraphNodeOutParams(&driverNodeParams, nodeParams)
    return cudaSuccess
cimport cuda.bindings._lib.dlfcn as dlfcn

cdef cudaError_t _getLocalRuntimeVersion(int* runtimeVersion) except ?cudaErrorCallRequiresNewerDriver nogil:
    # Load
    handle = dlfcn.dlopen('libcudart.so.12', dlfcn.RTLD_NOW)
    if handle == NULL:
        with gil:
            raise RuntimeError(f'Failed to dlopen libcudart.so.12')

    __cudaRuntimeGetVersion = dlfcn.dlsym(handle, 'cudaRuntimeGetVersion')

    if __cudaRuntimeGetVersion == NULL:
        with gil:
            raise RuntimeError(f'Function "cudaRuntimeGetVersion" not found in libcudart.so.12')

    # Call
    cdef cudaError_t err = cudaSuccess
    err = (<cudaError_t (*)(int*) except ?cudaErrorCallRequiresNewerDriver nogil> __cudaRuntimeGetVersion)(runtimeVersion)

    # Unload
    dlfcn.dlclose(handle)

    # Return
    return err

cdef cudaError_t _cudaDeviceRegisterAsyncNotification(int device, cudaAsyncCallback callbackFunc, void* userData, cudaAsyncCallbackHandle_t* callback) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = DeviceRegisterAsyncNotificationCommon(device, callbackFunc, userData, callback)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaDeviceUnregisterAsyncNotification(int device, cudaAsyncCallbackHandle_t callback) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = DeviceUnregisterAsyncNotificationCommon(device, callback)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaGetDriverEntryPointByVersion(const char* symbol, void** funcPtr, unsigned int cudaVersion, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = <cudaError_t>cydriver._cuGetProcAddress_v2(symbol, funcPtr, cudaVersion, flags, <cydriver.CUdriverProcAddressQueryResult*>driverStatus)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaLibraryLoadData(cudaLibrary_t* library, const void* code, cudaJitOption* jitOptions, void** jitOptionsValues, unsigned int numJitOptions, cudaLibraryOption* libraryOptions, void** libraryOptionValues, unsigned int numLibraryOptions) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuLibraryLoadData(library, code, <cydriver.CUjit_option*>jitOptions, jitOptionsValues, numJitOptions, <cydriver.CUlibraryOption*>libraryOptions, libraryOptionValues, numLibraryOptions)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaLibraryLoadFromFile(cudaLibrary_t* library, const char* fileName, cudaJitOption* jitOptions, void** jitOptionsValues, unsigned int numJitOptions, cudaLibraryOption* libraryOptions, void** libraryOptionValues, unsigned int numLibraryOptions) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuLibraryLoadFromFile(library, fileName, <cydriver.CUjit_option*>jitOptions, jitOptionsValues, numJitOptions, <cydriver.CUlibraryOption*>libraryOptions, libraryOptionValues, numLibraryOptions)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaLibraryUnload(cudaLibrary_t library) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuLibraryUnload(library)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaLibraryGetKernel(cudaKernel_t* pKernel, cudaLibrary_t library, const char* name) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuLibraryGetKernel(pKernel, library, name)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaLibraryGetGlobal(void** dptr, size_t* numbytes, cudaLibrary_t library, const char* name) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuLibraryGetGlobal(<cydriver.CUdeviceptr*>dptr, numbytes, library, name)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaLibraryGetManaged(void** dptr, size_t* numbytes, cudaLibrary_t library, const char* name) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuLibraryGetManaged(<cydriver.CUdeviceptr*>dptr, numbytes, library, name)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaLibraryGetUnifiedFunction(void** fptr, cudaLibrary_t library, const char* symbol) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuLibraryGetUnifiedFunction(fptr, library, symbol)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaLibraryGetKernelCount(unsigned int* count, cudaLibrary_t lib) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuLibraryGetKernelCount(count, lib)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaLibraryEnumerateKernels(cudaKernel_t* kernels, unsigned int numKernels, cudaLibrary_t lib) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuLibraryEnumerateKernels(kernels, numKernels, lib)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaKernelSetAttributeForDevice(cudaKernel_t kernel, cudaFuncAttribute attr, int value, int device) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuKernelSetAttribute(<cydriver.CUfunction_attribute>attr, value, kernel, <cydriver.CUdevice>device)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaStreamGetDevice(cudaStream_t stream, int* device) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    cdef cydriver.CUdevice driverDevice = 0
    if stream == <cudaStream_t>NULL or stream == <cudaStream_t>cudaStreamLegacy or stream == <cudaStream_t>cudaStreamPerThread:
        err = m_global.lazyInitContextState()
        if err != cudaSuccess:
            return err
    err = <cudaError_t>cydriver._cuStreamGetDevice(<cydriver.CUstream>stream, &driverDevice)
    if err != cudaSuccess:
        _setLastError(err)
        return err
    cudaDevice = m_global.getDeviceFromDriver(driverDevice)
    device[0] = cudaDevice[0].deviceOrdinal
    return cudaSuccess

cdef cudaError_t _cudaEventElapsedTime_v2(float* ms, cudaEvent_t start, cudaEvent_t end) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    if ms == NULL:
        _setLastError(cudaErrorInvalidValue)
        return cudaErrorInvalidValue
    err = <cudaError_t>cydriver._cuEventElapsedTime_v2(ms, <cydriver.CUevent>start, <cydriver.CUevent>end)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemcpyBatchAsync(void** dsts, void** srcs, size_t* sizes, size_t count, cudaMemcpyAttributes* attrs, size_t* attrsIdxs, size_t numAttrs, size_t* failIdx, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver._cuMemcpyBatchAsync(<cydriver.CUdeviceptr*>dsts, <cydriver.CUdeviceptr*>srcs, sizes, count, <cydriver.CUmemcpyAttributes*>attrs, attrsIdxs, numAttrs, failIdx, <cydriver.CUstream>stream)
    if err != cudaSuccess:
        _setLastError(err)
    return err

cdef cudaError_t _cudaMemcpy3DBatchAsync(size_t numOps, cudaMemcpy3DBatchOp* opList, size_t* failIdx, unsigned long long flags, cudaStream_t stream) except ?cudaErrorCallRequiresNewerDriver nogil:
    cdef cudaError_t err = cudaSuccess
    err = m_global.lazyInitContextState()
    if err != cudaSuccess:
        return err
    err = <cudaError_t>cydriver.cuMemcpy3DBatchAsync(numOps, <cydriver.CUDA_MEMCPY3D_BATCH_OP*>opList, failIdx, flags, <cydriver.CUstream>stream)
    if err != cudaSuccess:
        _setLastError(err)
    return err
