/* ============================================================
 *
 * This file is a part of digiKam project
 * https://www.digikam.org
 *
 * Date        : 2012-10-31
 * Description : QtAV: Multimedia framework based on Qt and FFmpeg
 *               Based on NVIDIA Corporation CUDA header
 *
 * SPDX-FileCopyrightText: 2012-2022 Wang Bin <wbsecg1 at gmail dot com>
 *
 * SPDX-License-Identifier: GPL-2.0-or-later
 *
 * ============================================================ */

#ifndef QTAV_DYNLINK_CUDA_H
#define QTAV_DYNLINK_CUDA_H

#include <stddef.h>

#define CUDA_VERSION 7050

#if defined(_WIN32) || defined(__CYGWIN__)   // krazy:exclude=cpp
#   define CUDAAPI __stdcall
#else
#   define CUDAAPI
#endif

typedef int                           CUdevice;
typedef struct CUarray_st*            CUarray;                      /**< CUDA array                     */
typedef struct CUctx_st*              CUcontext;                    /**< CUDA context                   */

#if defined(__x86_64) || defined(AMD64) || defined(_M_AMD64)

typedef unsigned long long            CUdeviceptr;

#else

typedef unsigned int                  CUdeviceptr;

#endif

typedef struct CUstream_st*           CUstream;                     /**< CUDA stream                    */
typedef struct CUgraphicsResource_st* CUgraphicsResource;           /**< CUDA graphics interop resource */

/**
 * Context creation flags
 */
typedef enum CUctx_flags_enum
{
    CU_CTX_SCHED_AUTO          = 0x00, /**< Automatic scheduling                                           */
    CU_CTX_SCHED_SPIN          = 0x01, /**< Set spin as default scheduling                                 */
    CU_CTX_SCHED_YIELD         = 0x02, /**< Set yield as default scheduling                                */
    CU_CTX_SCHED_BLOCKING_SYNC = 0x04, /**< Set blocking synchronization as default scheduling             */
    CU_CTX_BLOCKING_SYNC       = 0x04, /**< Set blocking synchronization as default scheduling \deprecated */
    CU_CTX_MAP_HOST            = 0x08, /**< Support mapped pinned allocations                              */
    CU_CTX_LMEM_RESIZE_TO_MAX  = 0x10, /**< Keep local memory allocation after launch                      */

#if __CUDA_API_VERSION < 4000

    CU_CTX_SCHED_MASK          = 0x03,
    CU_CTX_FLAGS_MASK          = 0x1f

#else

    CU_CTX_SCHED_MASK          = 0x07,
    CU_CTX_PRIMARY             = 0x20, /**< Initialize and return the primary context                      */
    CU_CTX_FLAGS_MASK          = 0x3f

#endif

} CUctx_flags;

/**
 * Stream creation flags
 */
typedef enum CUstream_flags_enum
{
    CU_STREAM_DEFAULT          = 0x00, /**< Default stream flag                                            */
    CU_STREAM_NON_BLOCKING     = 0x01  /**< Stream does not synchronize with stream 0 (the nullptr stream) */
} CUstream_flags;

/**
 * Device properties
 */
typedef enum CUdevice_attribute_enum
{
    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK             = 1,  /**< Maximum number of threads per block                                                */
    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X                   = 2,  /**< Maximum block dimension X                                                          */
    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Y                   = 3,  /**< Maximum block dimension Y                                                          */
    CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_Z                   = 4,  /**< Maximum block dimension Z                                                          */
    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X                    = 5,  /**< Maximum grid dimension X                                                           */
    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Y                    = 6,  /**< Maximum grid dimension Y                                                           */
    CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_Z                    = 7,  /**< Maximum grid dimension Z                                                           */
    CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK       = 8,  /**< Maximum shared memory available per block in bytes                                 */
    CU_DEVICE_ATTRIBUTE_SHARED_MEMORY_PER_BLOCK           = 8,  /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK                    */
    CU_DEVICE_ATTRIBUTE_TOTAL_CONSTANT_MEMORY             = 9,  /**< Memory available on device for __constant__ variables in a CUDA C kernel in bytes  */
    CU_DEVICE_ATTRIBUTE_WARP_SIZE                         = 10, /**< Warp size in threads                                                               */
    CU_DEVICE_ATTRIBUTE_MAX_PITCH                         = 11, /**< Maximum pitch in bytes allowed by memory copies                                    */
    CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK           = 12, /**< Maximum number of 32-bit registers available per block                             */
    CU_DEVICE_ATTRIBUTE_REGISTERS_PER_BLOCK               = 12, /**< Deprecated, use CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK                        */
    CU_DEVICE_ATTRIBUTE_CLOCK_RATE                        = 13, /**< Peak clock frequency in kilohertz                                                  */
    CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT                 = 14, /**< Alignment requirement for textures                                                 */
    CU_DEVICE_ATTRIBUTE_GPU_OVERLAP                       = 15, /**< Device can possibly copy memory and execute a kernel concurrently                  */
    CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT              = 16, /**< Number of multiprocessors on device                                                */
    CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT               = 17, /**< Specifies whether there is a run time limit on kernels                             */
    CU_DEVICE_ATTRIBUTE_INTEGRATED                        = 18, /**< Device is integrated with host memory                                              */
    CU_DEVICE_ATTRIBUTE_CAN_MAP_HOST_MEMORY               = 19, /**< Device can map host memory into CUDA address space                                 */
    CU_DEVICE_ATTRIBUTE_COMPUTE_MODE                      = 20, /**< Compute mode (See ::CUcomputemode for details)                                     */
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_WIDTH           = 21, /**< Maximum 1D texture width                                                           */
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_WIDTH           = 22, /**< Maximum 2D texture width                                                           */
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_HEIGHT          = 23, /**< Maximum 2D texture height                                                          */
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_WIDTH           = 24, /**< Maximum 3D texture width                                                           */
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_HEIGHT          = 25, /**< Maximum 3D texture height                                                          */
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE3D_DEPTH           = 26, /**< Maximum 3D texture depth                                                           */
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_WIDTH     = 27, /**< Maximum texture array width                                                        */
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_HEIGHT    = 28, /**< Maximum texture array height                                                       */
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE2D_ARRAY_NUMSLICES = 29, /**< Maximum slices in a texture array                                                  */
    CU_DEVICE_ATTRIBUTE_SURFACE_ALIGNMENT                 = 30, /**< Alignment requirement for surfaces                                                 */
    CU_DEVICE_ATTRIBUTE_CONCURRENT_KERNELS                = 31, /**< Device can possibly execute multiple kernels concurrently                          */
    CU_DEVICE_ATTRIBUTE_ECC_ENABLED                       = 32, /**< Device has ECC support enabled                                                     */
    CU_DEVICE_ATTRIBUTE_PCI_BUS_ID                        = 33, /**< PCI bus ID of the device                                                           */
    CU_DEVICE_ATTRIBUTE_PCI_DEVICE_ID                     = 34, /**< PCI device ID of the device                                                        */
    CU_DEVICE_ATTRIBUTE_TCC_DRIVER                        = 35  /**< Device is using TCC driver model                                                   */

#if __CUDA_API_VERSION >= 4000

  , CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE                 = 36, /**< Peak memory clock frequency in kilohertz                                           */
    CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH           = 37, /**< Global memory bus width in bits                                                    */
    CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE                     = 38, /**< Size of L2 cache in bytes                                                          */
    CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR    = 39, /**< Maximum resident threads per multiprocessor                                        */
    CU_DEVICE_ATTRIBUTE_ASYNC_ENGINE_COUNT                = 40, /**< Number of asynchronous engines                                                     */
    CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING                = 41, /**< Device uses shares a unified address space with the host                           */
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_WIDTH   = 42, /**< Maximum 1D layered texture width                                                   */
    CU_DEVICE_ATTRIBUTE_MAXIMUM_TEXTURE1D_LAYERED_LAYERS  = 43  /**< Maximum layers in a 1D layered texture                                             */

#endif

} CUdevice_attribute;

/**
 * Error codes
 */
typedef enum cudaError_enum
{
    /**
     * The API call returned with no errors. In the case of query calls, this
     * can also mean that the operation being queried is complete (see
     * ::cuEventQuery() and ::cuStreamQuery()).
     */
    CUDA_SUCCESS                              = 0,

    /**
     * This indicates that one or more of the parameters passed to the API call
     * is not within an acceptable range of values.
     */
    CUDA_ERROR_INVALID_VALUE                  = 1,

    /**
     * The API call failed because it was unable to allocate enough memory to
     * perform the requested operation.
     */
    CUDA_ERROR_OUT_OF_MEMORY                  = 2,

    /**
     * This indicates that the CUDA driver has not been initialized with
     * ::cuInit() or that initialization has failed.
     */
    CUDA_ERROR_NOT_INITIALIZED                = 3,

    /**
     * This indicates that the CUDA driver is in the process of shutting down.
     */
    CUDA_ERROR_DEINITIALIZED                  = 4,

    /**
     * This indicates profiling APIs are called while application is running
     * in visual profiler mode.
    */
    CUDA_ERROR_PROFILER_DISABLED              = 5,
    /**
     * This indicates profiling has not been initialized for this context.
     * Call cuProfilerInitialize() to resolve this.
    */
    CUDA_ERROR_PROFILER_NOT_INITIALIZED       = 6,
    /**
     * This indicates profiler has already been started and probably
     * cuProfilerStart() is incorrectly called.
    */
    CUDA_ERROR_PROFILER_ALREADY_STARTED       = 7,
    /**
     * This indicates profiler has already been stopped and probably
     * cuProfilerStop() is incorrectly called.
    */
    CUDA_ERROR_PROFILER_ALREADY_STOPPED       = 8,
    /**
     * This indicates that no CUDA-capable devices were detected by the installed
     * CUDA driver.
     */
    CUDA_ERROR_NO_DEVICE                      = 100,

    /**
     * This indicates that the device ordinal supplied by the user does not
     * correspond to a valid CUDA device.
     */
    CUDA_ERROR_INVALID_DEVICE                 = 101,


    /**
     * This indicates that the device kernel image is invalid. This can also
     * indicate an invalid CUDA module.
     */
    CUDA_ERROR_INVALID_IMAGE                  = 200,

    /**
     * This most frequently indicates that there is no context bound to the
     * current thread. This can also be returned if the context passed to an
     * API call is not a valid handle (such as a context that has had
     * ::cuCtxDestroy() invoked on it). This can also be returned if a user
     * mixes different API versions (i.e. 3010 context with 3020 API calls).
     * See ::cuCtxGetApiVersion() for more details.
     */
    CUDA_ERROR_INVALID_CONTEXT                = 201,

    /**
     * This indicated that the context being supplied as a parameter to the
     * API call was already the active context.
     * \deprecated
     * This error return is deprecated as of CUDA 3.2. It is no longer an
     * error to attempt to push the active context via ::cuCtxPushCurrent().
     */
    CUDA_ERROR_CONTEXT_ALREADY_CURRENT        = 202,

    /**
     * This indicates that a map or register operation has failed.
     */
    CUDA_ERROR_MAP_FAILED                     = 205,

    /**
     * This indicates that an unmap or unregister operation has failed.
     */
    CUDA_ERROR_UNMAP_FAILED                   = 206,

    /**
     * This indicates that the specified array is currently mapped and thus
     * cannot be destroyed.
     */
    CUDA_ERROR_ARRAY_IS_MAPPED                = 207,

    /**
     * This indicates that the resource is already mapped.
     */
    CUDA_ERROR_ALREADY_MAPPED                 = 208,

    /**
     * This indicates that there is no kernel image available that is suitable
     * for the device. This can occur when a user specifies code generation
     * options for a particular CUDA source file that do not include the
     * corresponding device configuration.
     */
    CUDA_ERROR_NO_BINARY_FOR_GPU              = 209,

    /**
     * This indicates that a resource has already been acquired.
     */
    CUDA_ERROR_ALREADY_ACQUIRED               = 210,

    /**
     * This indicates that a resource is not mapped.
     */
    CUDA_ERROR_NOT_MAPPED                     = 211,

    /**
     * This indicates that a mapped resource is not available for access as an
     * array.
     */
    CUDA_ERROR_NOT_MAPPED_AS_ARRAY            = 212,

    /**
     * This indicates that a mapped resource is not available for access as a
     * pointer.
     */
    CUDA_ERROR_NOT_MAPPED_AS_POINTER          = 213,

    /**
     * This indicates that an uncorrectable ECC error was detected during
     * execution.
     */
    CUDA_ERROR_ECC_UNCORRECTABLE              = 214,

    /**
     * This indicates that the ::CUlimit passed to the API call is not
     * supported by the active device.
     */
    CUDA_ERROR_UNSUPPORTED_LIMIT              = 215,

    /**
     * This indicates that the ::CUcontext passed to the API call can
     * only be bound to a single CPU thread at a time but is already
     * bound to a CPU thread.
     */
    CUDA_ERROR_CONTEXT_ALREADY_IN_USE         = 216,

    /**
     * This indicates that the device kernel source is invalid.
     */
    CUDA_ERROR_INVALID_SOURCE                 = 300,

    /**
     * This indicates that the file specified was not found.
     */
    CUDA_ERROR_FILE_NOT_FOUND                 = 301,

    /**
     * This indicates that a link to a shared object failed to resolve.
     */
    CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,

    /**
     * This indicates that initialization of a shared object failed.
     */
    CUDA_ERROR_SHARED_OBJECT_INIT_FAILED      = 303,

    /**
     * This indicates that an OS call failed.
     */
    CUDA_ERROR_OPERATING_SYSTEM               = 304,


    /**
     * This indicates that a resource handle passed to the API call was not
     * valid. Resource handles are opaque types like ::CUstream and ::CUevent.
     */
    CUDA_ERROR_INVALID_HANDLE                 = 400,


    /**
     * This indicates that a named symbol was not found. Examples of symbols
     * are global/constant variable names, texture names, and surface names.
     */
    CUDA_ERROR_NOT_FOUND                      = 500,


    /**
     * This indicates that asynchronous operations issued previously have not
     * completed yet. This result is not actually an error, but must be indicated
     * differently than ::CUDA_SUCCESS (which indicates completion). Calls that
     * may return this value include ::cuEventQuery() and ::cuStreamQuery().
     */
    CUDA_ERROR_NOT_READY                      = 600,


    /**
     * An exception occurred on the device while executing a kernel. Common
     * causes include dereferencing an invalid device pointer and accessing
     * out of bounds shared memory. The context cannot be used, so it must
     * be destroyed (and a new one should be created). All existing device
     * memory allocations from this context are invalid and must be
     * reconstructed if the program is to continue using CUDA.
     */
    CUDA_ERROR_LAUNCH_FAILED                  = 700,

    /**
     * This indicates that a launch did not occur because it did not have
     * appropriate resources. This error usually indicates that the user has
     * attempted to pass too many arguments to the device kernel, or the
     * kernel launch specifies too many threads for the kernel's register
     * count. Passing arguments of the wrong size (i.e. a 64-bit pointer
     * when a 32-bit int is expected) is equivalent to passing too many
     * arguments and can also result in this error.
     */
    CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES        = 701,

    /**
     * This indicates that the device kernel took too long to execute. This can
     * only occur if timeouts are enabled - see the device attribute
     * ::CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT for more information. The
     * context cannot be used (and must be destroyed similar to
     * ::CUDA_ERROR_LAUNCH_FAILED). All existing device memory allocations from
     * this context are invalid and must be reconstructed if the program is to
     * continue using CUDA.
     */
    CUDA_ERROR_LAUNCH_TIMEOUT                 = 702,

    /**
     * This error indicates a kernel launch that uses an incompatible texturing
     * mode.
     */
    CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  = 703,

    /**
     * This error indicates that a call to ::cuCtxEnablePeerAccess() is
     * trying to re-enable peer access to a context which has already
     * had peer access to it enabled.
     */
    CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED    = 704,

    /**
     * This error indicates that a call to ::cuMemPeerRegister is trying to
     * register memory from a context which has not had peer access
     * enabled yet via ::cuCtxEnablePeerAccess(), or that
     * ::cuCtxDisablePeerAccess() is trying to disable peer access
     * which has not been enabled yet.
     */
    CUDA_ERROR_PEER_ACCESS_NOT_ENABLED        = 705,

    /**
     * This error indicates that a call to ::cuMemPeerRegister is trying to
     * register already-registered memory.
     */
    CUDA_ERROR_PEER_MEMORY_ALREADY_REGISTERED = 706,

    /**
     * This error indicates that a call to ::cuMemPeerUnregister is trying to
     * unregister memory that has not been registered.
     */
    CUDA_ERROR_PEER_MEMORY_NOT_REGISTERED     = 707,

    /**
     * This error indicates that ::cuCtxCreate was called with the flag
     * ::CU_CTX_PRIMARY on a device which already has initialized its
     * primary context.
     */
    CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE         = 708,

    /**
     * This error indicates that the context current to the calling thread
     * has been destroyed using ::cuCtxDestroy, or is a primary context which
     * has not yet been initialized.
     */
    CUDA_ERROR_CONTEXT_IS_DESTROYED           = 709,

    /**
     * This indicates that an unknown internal error has occurred.
     */
    CUDA_ERROR_UNKNOWN                        = 999
} CUresult;

/**
 * Memory types
 */
typedef enum CUmemorytype_enum
{
    CU_MEMORYTYPE_HOST    = 0x01,    /**< Host memory                   */
    CU_MEMORYTYPE_DEVICE  = 0x02,    /**< Device memory                 */
    CU_MEMORYTYPE_ARRAY   = 0x03     /**< Array memory                  */

#if __CUDA_API_VERSION >= 4000

  , CU_MEMORYTYPE_UNIFIED = 0x04     /**< Unified device or host memory */

#endif

} CUmemorytype;

/**
 * Compute Modes
 */
typedef enum CUcomputemode_enum
{
    CU_COMPUTEMODE_DEFAULT           = 0, /**< Default compute mode (Multiple contexts allowed per device)                                                        */
    CU_COMPUTEMODE_EXCLUSIVE         = 1, /**< Compute-exclusive-thread mode (Only one context used by a single thread can be present on this device at a time)   */
    CU_COMPUTEMODE_PROHIBITED        = 2  /**< Compute-prohibited mode (No contexts can be created on this device at this time)                                   */

#if __CUDA_API_VERSION >= 4000

  , CU_COMPUTEMODE_EXCLUSIVE_PROCESS = 3  /**< Compute-exclusive-process mode (Only one context used by a single process can be present on this device at a time) */

#endif

} CUcomputemode;

/**
 * Flags to register a graphics resource
 */
typedef enum CUgraphicsRegisterFlags_enum
{
    CU_GRAPHICS_REGISTER_FLAGS_NONE          = 0x00,
    CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY     = 0x01,
    CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02,
    CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST  = 0x04
} CUgraphicsRegisterFlags;

/**
 * Flags for mapping and unmapping interop resources
 */
typedef enum CUgraphicsMapResourceFlags_enum
{
    CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE          = 0x00,
    CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY     = 0x01,
    CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02
} CUgraphicsMapResourceFlags;

typedef struct CUDA_MEMCPY2D_st
{
    size_t          srcXInBytes;
    size_t          srcY;
    CUmemorytype    srcMemoryType;
    const void*     srcHost;
    CUdeviceptr     srcDevice;
    CUarray         srcArray;
    size_t          srcPitch;

    size_t          dstXInBytes;
    size_t          dstY;
    CUmemorytype    dstMemoryType;
    void*           dstHost;
    CUdeviceptr     dstDevice;
    CUarray         dstArray;
    size_t          dstPitch;

    size_t          WidthInBytes;
    size_t          Height;
} CUDA_MEMCPY2D;

#endif // QTAV_DYNLINK_CUDA_H
