diff --git a/tensorflow/compiler/xla/cpu_function_runtime.cc b/tensorflow/compiler/xla/cpu_function_runtime.cc index 517b30a8251..43ca4806711 100644 --- a/tensorflow/compiler/xla/cpu_function_runtime.cc +++ b/tensorflow/compiler/xla/cpu_function_runtime.cc @@ -16,6 +16,7 @@ limitations under the License. #include "tensorflow/compiler/xla/cpu_function_runtime.h" #include "tensorflow/core/platform/dynamic_annotations.h" +#include namespace xla { namespace { diff --git a/tensorflow/core/kernels/BUILD b/tensorflow/core/kernels/BUILD index 22d2facf5ca..de3d5d34760 100644 --- a/tensorflow/core/kernels/BUILD +++ b/tensorflow/core/kernels/BUILD @@ -1339,8 +1339,6 @@ tf_kernel_library( ], deps = if_cuda_or_rocm([ ":cuda_solvers", - ]) + if_cuda([ - "@cub_archive//:cub", ]) + if_rocm([ "@rocprim_archive//:rocprim", ]) + ARRAY_DEPS, @@ -2502,7 +2500,7 @@ tf_kernel_library( deps = DYNAMIC_DEPS + [ ":fill_functor", ":gather_functor", - ] + if_cuda(["@cub_archive//:cub"]) + if_rocm([ + ] + if_rocm([ "@rocprim_archive//:rocprim", ]), ) @@ -2905,7 +2903,7 @@ tf_kernel_library( tf_kernel_library( name = "non_max_suppression_op", prefix = "non_max_suppression_op", - deps = IMAGE_DEPS + if_cuda(["@cub_archive//:cub"]), + deps = IMAGE_DEPS, ) tf_kernel_library( @@ -3823,9 +3821,7 @@ tf_kernel_library( name = "reduction_ops", gpu_srcs = ["reduction_gpu_kernels.cu.h"], prefix = "reduction_ops", - deps = MATH_DEPS + [":transpose_functor"] + if_cuda([ - "@cub_archive//:cub", - ]) + if_rocm([ + deps = MATH_DEPS + [":transpose_functor"] + if_rocm([ "@rocprim_archive//:rocprim", ]), ) @@ -3850,9 +3846,7 @@ tf_kernel_library( "scan_ops_gpu_half.cu.cc", "scan_ops_gpu_int.cu.cc", ], - deps = MATH_DEPS + if_cuda([ - "@cub_archive//:cub", - ]) + if_rocm([ + deps = MATH_DEPS + if_rocm([ "@rocprim_archive//:rocprim", ]), ) @@ -4284,7 +4278,6 @@ tf_kernel_library( "//tensorflow/core:framework", "//tensorflow/core:lib", ] + if_cuda([ - "@cub_archive//:cub", "@local_config_cuda//cuda:cudnn_header", ]) + if_rocm([ "@rocprim_archive//:rocprim", @@ -4373,7 +4366,6 @@ tf_kernel_library( ] + if_cuda_or_rocm([ ":reduction_ops", ]) + if_cuda([ - "@cub_archive//:cub", "//tensorflow/core:stream_executor", "//tensorflow/stream_executor/cuda:cuda_stream", ]) + if_rocm([ @@ -4415,8 +4407,6 @@ tf_kernel_library( prefix = "softmax_op", deps = NN_DEPS + if_cuda_or_rocm([ ":reduction_ops", - ]) + if_cuda([ - "@cub_archive//:cub", ]) + if_rocm([ "@rocprim_archive//:rocprim", ]), @@ -4451,9 +4441,7 @@ tf_kernel_library( "topk_op_gpu_int8.cu.cc", "topk_op_gpu_uint8.cu.cc", ], - deps = NN_DEPS + if_cuda([ - "@cub_archive//:cub", - ]) + if_rocm([ + deps = NN_DEPS + if_rocm([ "@rocprim_archive//:rocprim", ]), ) @@ -4478,7 +4466,7 @@ tf_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//third_party/eigen3", - ] + if_cuda(["@cub_archive//:cub"]) + if_rocm([ + ] + if_rocm([ "@rocprim_archive//:rocprim", ]), ) @@ -4491,7 +4479,7 @@ tf_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//third_party/eigen3", - ] + if_cuda(["@cub_archive//:cub"]) + if_rocm([ + ] + if_rocm([ "@rocprim_archive//:rocprim", ]), ) @@ -4506,7 +4494,7 @@ tf_kernel_library( "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:nn_grad", - ] + if_cuda(["@cub_archive//:cub"]) + if_rocm([ + ] + if_rocm([ "@rocprim_archive//:rocprim", ]), ) @@ -5084,8 +5072,6 @@ tf_kernel_library( "//third_party/eigen3", ] + if_cuda_or_rocm([ ":reduction_ops", - ]) + if_cuda([ - "@cub_archive//:cub", ]) + if_rocm([ "@rocprim_archive//:rocprim", ]), @@ -5655,8 +5641,6 @@ tf_kernel_library( "//tensorflow/core:lib_internal", ] + if_cuda_or_rocm([ ":reduction_ops", - ]) + if_cuda([ - "@cub_archive//:cub", ]) + if_rocm([ "@rocprim_archive//:rocprim", ]), diff --git a/tensorflow/core/kernels/bincount_op_gpu.cu.cc b/tensorflow/core/kernels/bincount_op_gpu.cu.cc index a2ad9cacf12..303056387b3 100644 --- a/tensorflow/core/kernels/bincount_op_gpu.cu.cc +++ b/tensorflow/core/kernels/bincount_op_gpu.cu.cc @@ -18,7 +18,7 @@ limitations under the License. #define EIGEN_USE_GPU #if GOOGLE_CUDA -#include "third_party/cub/device/device_histogram.cuh" +#include "third_party/gpus/cuda/include/cub/cub.cuh" #elif TENSORFLOW_USE_ROCM #include "external/rocprim_archive/hipcub/include/hipcub/hipcub.hpp" #endif diff --git a/tensorflow/core/kernels/cuda_sparse.cc b/tensorflow/core/kernels/cuda_sparse.cc index dff78bc9c7e..04d9e7294f0 100644 --- a/tensorflow/core/kernels/cuda_sparse.cc +++ b/tensorflow/core/kernels/cuda_sparse.cc @@ -209,7 +209,7 @@ static inline Status GtsvImpl(SparseFn op, cusparseHandle_t cusparse_handle, dl, d, du, B, ldb); \ } -TF_CALL_LAPACK_TYPES(GTSV_INSTANCE); +//TF_CALL_LAPACK_TYPES(GTSV_INSTANCE); #define GTSV_NO_PIVOT_INSTANCE(Scalar, sparse_prefix) \ template <> \ @@ -221,7 +221,7 @@ TF_CALL_LAPACK_TYPES(GTSV_INSTANCE); m, n, dl, d, du, B, ldb); \ } -TF_CALL_LAPACK_TYPES(GTSV_NO_PIVOT_INSTANCE); +//TF_CALL_LAPACK_TYPES(GTSV_NO_PIVOT_INSTANCE); template static inline Status GtsvStridedBatchImpl(SparseFn op, @@ -247,7 +247,7 @@ static inline Status GtsvStridedBatchImpl(SparseFn op, batchCount, batchStride); \ } -TF_CALL_LAPACK_TYPES(GTSV_STRIDED_BATCH_INSTANCE); +//TF_CALL_LAPACK_TYPES(GTSV_STRIDED_BATCH_INSTANCE); template static inline Status Gtsv2Impl(SparseFn op, cusparseHandle_t cusparse_handle, diff --git a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc index 7fe519b063c..49f31718396 100644 --- a/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc +++ b/tensorflow/core/kernels/dynamic_partition_op_gpu.cu.cc @@ -36,10 +36,7 @@ limitations under the License. #define EIGEN_USE_GPU #if GOOGLE_CUDA -#include "third_party/cub/device/device_radix_sort.cuh" -#include "third_party/cub/device/device_reduce.cuh" -#include "third_party/cub/iterator/constant_input_iterator.cuh" -#include "third_party/cub/thread/thread_operators.cuh" +#include "third_party/gpus/cuda/include/cub/cub.cuh" #elif TENSORFLOW_USE_ROCM #include "external/rocprim_archive/hipcub/include/hipcub/hipcub.hpp" #endif diff --git a/tensorflow/core/kernels/histogram_op_gpu.cu.cc b/tensorflow/core/kernels/histogram_op_gpu.cu.cc index 5f35adccba3..f82d8d0efbd 100644 --- a/tensorflow/core/kernels/histogram_op_gpu.cu.cc +++ b/tensorflow/core/kernels/histogram_op_gpu.cu.cc @@ -19,7 +19,7 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #if GOOGLE_CUDA -#include "third_party/cub/device/device_histogram.cuh" +#include "third_party/gpus/cuda/include/cub/cub.cuh" #elif TENSORFLOW_USE_ROCM #include "external/rocprim_archive/hipcub/include/hipcub/hipcub.hpp" #endif diff --git a/tensorflow/core/kernels/non_max_suppression_op.cu.cc b/tensorflow/core/kernels/non_max_suppression_op.cu.cc index 1749a66579b..da6f0174119 100644 --- a/tensorflow/core/kernels/non_max_suppression_op.cu.cc +++ b/tensorflow/core/kernels/non_max_suppression_op.cu.cc @@ -19,9 +19,7 @@ limitations under the License. #include "absl/strings/str_cat.h" #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "third_party/cub/device/device_radix_sort.cuh" -#include "third_party/cub/device/device_segmented_radix_sort.cuh" -#include "third_party/cub/device/device_select.cuh" +#include "third_party/gpus/cuda/include/cub/cub.cuh" #include "tensorflow/core/framework/numeric_types.h" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/tensor_types.h" diff --git a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h index 7024d62a53e..62a9faa45d7 100644 --- a/tensorflow/core/kernels/reduction_gpu_kernels.cu.h +++ b/tensorflow/core/kernels/reduction_gpu_kernels.cu.h @@ -24,12 +24,7 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #if GOOGLE_CUDA -#include "third_party/cub/device/device_reduce.cuh" -#include "third_party/cub/device/device_segmented_reduce.cuh" -#include "third_party/cub/iterator/counting_input_iterator.cuh" -#include "third_party/cub/iterator/transform_input_iterator.cuh" -#include "third_party/cub/warp/warp_reduce.cuh" -#include "third_party/gpus/cuda/include/cuComplex.h" +#include "third_party/gpus/cuda/include/cub/cub.cuh" #elif TENSORFLOW_USE_ROCM #include "external/rocprim_archive/hipcub/include/hipcub/hipcub.hpp" #endif diff --git a/tensorflow/core/kernels/scan_ops_gpu.h b/tensorflow/core/kernels/scan_ops_gpu.h index eaa9360a5b7..1665d998ca6 100644 --- a/tensorflow/core/kernels/scan_ops_gpu.h +++ b/tensorflow/core/kernels/scan_ops_gpu.h @@ -25,11 +25,7 @@ limitations under the License. #endif // CUDA_VERSION >= 9000 #if GOOGLE_CUDA -#include "third_party/cub/block/block_load.cuh" -#include "third_party/cub/block/block_scan.cuh" -#include "third_party/cub/block/block_store.cuh" -#include "third_party/cub/iterator/counting_input_iterator.cuh" -#include "third_party/cub/iterator/transform_input_iterator.cuh" +#include "third_party/gpus/cuda/include/cub/cub.cuh" #include "third_party/gpus/cuda/include/cuComplex.h" #elif TENSORFLOW_USE_ROCM #include "external/rocprim_archive/hipcub/include/hipcub/hipcub.hpp" diff --git a/tensorflow/core/kernels/topk_op_gpu.h b/tensorflow/core/kernels/topk_op_gpu.h index 12717ca11fe..a48a86ca89d 100644 --- a/tensorflow/core/kernels/topk_op_gpu.h +++ b/tensorflow/core/kernels/topk_op_gpu.h @@ -23,9 +23,7 @@ limitations under the License. #include #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" -#include "third_party/cub/device/device_segmented_radix_sort.cuh" -#include "third_party/cub/iterator/counting_input_iterator.cuh" -#include "third_party/cub/iterator/transform_input_iterator.cuh" +#include "third_party/gpus/cuda/include/cub/cub.cuh" #include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/register_types.h" #include "tensorflow/core/framework/tensor.h" diff --git a/tensorflow/core/kernels/where_op_gpu.cu.h b/tensorflow/core/kernels/where_op_gpu.cu.h index 8c228d60ebb..e49de52fcfd 100644 --- a/tensorflow/core/kernels/where_op_gpu.cu.h +++ b/tensorflow/core/kernels/where_op_gpu.cu.h @@ -22,10 +22,7 @@ limitations under the License. #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" #if GOOGLE_CUDA -#include "third_party/cub/device/device_reduce.cuh" -#include "third_party/cub/device/device_select.cuh" -#include "third_party/cub/iterator/counting_input_iterator.cuh" -#include "third_party/cub/iterator/transform_input_iterator.cuh" +#include "third_party/gpus/cuda/include/cub/cub.cuh" #elif TENSORFLOW_USE_ROCM #include "external/rocprim_archive/hipcub/include/hipcub/hipcub.hpp" #endif @@ -246,6 +243,8 @@ class WhereOutputIterator { return *(ptr_ + (valid ? (NDIM * n) : 0)); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator int64*(){ return ptr_; } + private: int64* ptr_; const Eigen::DenseIndex max_row_; diff --git a/tensorflow/core/lib/bfloat16/bfloat16.h b/tensorflow/core/lib/bfloat16/bfloat16.h index a133f7e0f17..c2c5681417d 100644 --- a/tensorflow/core/lib/bfloat16/bfloat16.h +++ b/tensorflow/core/lib/bfloat16/bfloat16.h @@ -18,6 +18,7 @@ limitations under the License. #include #include +#include #include "tensorflow/core/platform/byte_order.h" diff --git a/tensorflow/core/lib/core/coding.h b/tensorflow/core/lib/core/coding.h index bfab80dd007..33deb95faf7 100644 --- a/tensorflow/core/lib/core/coding.h +++ b/tensorflow/core/lib/core/coding.h @@ -24,6 +24,7 @@ limitations under the License. #include "tensorflow/core/lib/core/raw_coding.h" #include "tensorflow/core/lib/core/stringpiece.h" #include "tensorflow/core/platform/types.h" +#include namespace tensorflow { namespace core { diff --git a/tensorflow/core/lib/gif/gif_io.cc b/tensorflow/core/lib/gif/gif_io.cc index dc5406920a4..ae2f0b373d4 100644 --- a/tensorflow/core/lib/gif/gif_io.cc +++ b/tensorflow/core/lib/gif/gif_io.cc @@ -17,6 +17,8 @@ limitations under the License. #include "tensorflow/core/lib/gif/gif_io.h" #include +#include +#include #include "tensorflow/core/lib/gtl/cleanup.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/gif.h" diff --git a/tensorflow/core/platform/posix/port.cc b/tensorflow/core/platform/posix/port.cc index 47f4abae3bb..f70d5ea5e24 100644 --- a/tensorflow/core/platform/posix/port.cc +++ b/tensorflow/core/platform/posix/port.cc @@ -37,6 +37,7 @@ limitations under the License. #include #include #include +#include #ifdef TF_USE_SNAPPY #include "snappy.h" #endif diff --git a/tensorflow/lite/kernels/internal/spectrogram.cc b/tensorflow/lite/kernels/internal/spectrogram.cc index 784e4bc99ef..0d016e27b3f 100644 --- a/tensorflow/lite/kernels/internal/spectrogram.cc +++ b/tensorflow/lite/kernels/internal/spectrogram.cc @@ -17,6 +17,7 @@ limitations under the License. #include #include +#include #include "third_party/fft2d/fft.h" diff --git a/tensorflow/python/lib/core/bfloat16.cc b/tensorflow/python/lib/core/bfloat16.cc index fde3a837702..b30bf048bcd 100644 --- a/tensorflow/python/lib/core/bfloat16.cc +++ b/tensorflow/python/lib/core/bfloat16.cc @@ -490,7 +490,7 @@ bool RegisterBfloat16Cast(int numpy_type, bool cast_is_safe) { } template -void BinaryUFunc(char** args, npy_intp* dimensions, npy_intp* steps, +void BinaryUFunc(char** args, npy_intp const* dimensions, npy_intp const* steps, void* data) { const char* i0 = args[0]; const char* i1 = args[1]; @@ -506,7 +506,7 @@ void BinaryUFunc(char** args, npy_intp* dimensions, npy_intp* steps, } template -void CompareUFunc(char** args, npy_intp* dimensions, npy_intp* steps, +void CompareUFunc(char** args, npy_intp const* dimensions, npy_intp const* steps, void* data) { BinaryUFunc(args, dimensions, steps, data); } @@ -612,7 +612,7 @@ bool Initialize() { return false; } PyUFuncObject* ufunc = reinterpret_cast(ufunc_obj.get()); - if (types.size() != ufunc->nargs) { + if (types.size() != (size_t)ufunc->nargs) { PyErr_Format(PyExc_AssertionError, "ufunc %s takes %d arguments, loop takes %lu", name, ufunc->nargs, types.size()); diff --git a/tensorflow/stream_executor/cuda/BUILD b/tensorflow/stream_executor/cuda/BUILD index 27b1364c6cb..700d9734ec0 100644 --- a/tensorflow/stream_executor/cuda/BUILD +++ b/tensorflow/stream_executor/cuda/BUILD @@ -88,7 +88,7 @@ cc_library( cc_library( name = "cuda_stub", srcs = if_cuda_is_configured(["cuda_stub.cc"]), - textual_hdrs = ["cuda_10_0.inc"], + textual_hdrs = ["cuda_12_0.inc"], deps = if_cuda_is_configured([ "@local_config_cuda//cuda:cuda_headers", "//tensorflow/stream_executor/lib", diff --git a/tensorflow/stream_executor/cuda/cublas_11_0.inc b/tensorflow/stream_executor/cuda/cublas_11_0.inc new file mode 100644 index 00000000000..072df757ddb --- /dev/null +++ b/tensorflow/stream_executor/cuda/cublas_11_0.inc @@ -0,0 +1,5236 @@ +// Auto-generated, do not edit. + +static inline cublasStatus_t cublasMigrateComputeType(cublasHandle_t handle, + cudaDataType_t dataType, + cublasComputeType_t* computeType) { + cublasMath_t mathMode = CUBLAS_DEFAULT_MATH; + cublasStatus_t status = CUBLAS_STATUS_SUCCESS; + + status = cublasGetMathMode(handle, &mathMode); + if (status != CUBLAS_STATUS_SUCCESS) { + return status; + } + + bool isPedantic = ((mathMode & 0xf) == CUBLAS_PEDANTIC_MATH); + + switch (dataType) { + case CUDA_R_32F: + case CUDA_C_32F: + *computeType = isPedantic ? CUBLAS_COMPUTE_32F_PEDANTIC : CUBLAS_COMPUTE_32F; + return CUBLAS_STATUS_SUCCESS; + case CUDA_R_64F: + case CUDA_C_64F: + *computeType = isPedantic ? CUBLAS_COMPUTE_64F_PEDANTIC : CUBLAS_COMPUTE_64F; + return CUBLAS_STATUS_SUCCESS; + case CUDA_R_16F: + *computeType = isPedantic ? CUBLAS_COMPUTE_16F_PEDANTIC : CUBLAS_COMPUTE_16F; + return CUBLAS_STATUS_SUCCESS; + case CUDA_R_32I: + *computeType = isPedantic ? CUBLAS_COMPUTE_32I_PEDANTIC : CUBLAS_COMPUTE_32I; + return CUBLAS_STATUS_SUCCESS; + default: + return CUBLAS_STATUS_NOT_SUPPORTED; + } +} + +extern "C" { +cublasStatus_t CUBLASWINAPI cublasCreate_v2(cublasHandle_t *handle) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t *); + static auto func_ptr = LoadSymbol("cublasCreate_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle); +} + +cublasStatus_t CUBLASWINAPI cublasDestroy_v2(cublasHandle_t handle) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t); + static auto func_ptr = LoadSymbol("cublasDestroy_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle); +} + +cublasStatus_t CUBLASWINAPI cublasGetVersion_v2(cublasHandle_t handle, + int *version) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int *); + static auto func_ptr = LoadSymbol("cublasGetVersion_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, version); +} + +cublasStatus_t CUBLASWINAPI cublasGetProperty(libraryPropertyType type, + int *value) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(libraryPropertyType, int *); + static auto func_ptr = LoadSymbol("cublasGetProperty"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(type, value); +} + +size_t CUBLASWINAPI cublasGetCudartVersion(void) { + using FuncPtr = size_t(CUBLASWINAPI *)(); + static auto func_ptr = LoadSymbol("cublasGetCudartVersion"); + if (!func_ptr) LogFatalSymbolNotFound("cublasGetCudartVersion"); + return func_ptr(); +} + +cublasStatus_t CUBLASWINAPI cublasSetWorkspace_v2(cublasHandle_t handle, + void *workspace, + size_t workspaceSizeInBytes) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, void *, size_t); + static auto func_ptr = LoadSymbol("cublasSetWorkspace_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, workspace, workspaceSizeInBytes); +} + +cublasStatus_t CUBLASWINAPI cublasSetStream_v2(cublasHandle_t handle, + cudaStream_t streamId) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cublasSetStream_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, streamId); +} + +cublasStatus_t CUBLASWINAPI cublasGetStream_v2(cublasHandle_t handle, + cudaStream_t *streamId) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cudaStream_t *); + static auto func_ptr = LoadSymbol("cublasGetStream_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, streamId); +} + +cublasStatus_t CUBLASWINAPI cublasGetPointerMode_v2(cublasHandle_t handle, + cublasPointerMode_t *mode) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t *); + static auto func_ptr = LoadSymbol("cublasGetPointerMode_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode); +} + +cublasStatus_t CUBLASWINAPI cublasSetPointerMode_v2(cublasHandle_t handle, + cublasPointerMode_t mode) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasPointerMode_t); + static auto func_ptr = LoadSymbol("cublasSetPointerMode_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode); +} + +cublasStatus_t CUBLASWINAPI cublasGetAtomicsMode(cublasHandle_t handle, + cublasAtomicsMode_t *mode) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t *); + static auto func_ptr = LoadSymbol("cublasGetAtomicsMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode); +} + +cublasStatus_t CUBLASWINAPI cublasSetAtomicsMode(cublasHandle_t handle, + cublasAtomicsMode_t mode) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasAtomicsMode_t); + static auto func_ptr = LoadSymbol("cublasSetAtomicsMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode); +} + +cublasStatus_t CUBLASWINAPI cublasGetMathMode(cublasHandle_t handle, + cublasMath_t *mode) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasMath_t *); + static auto func_ptr = LoadSymbol("cublasGetMathMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode); +} + +cublasStatus_t CUBLASWINAPI cublasSetMathMode(cublasHandle_t handle, + cublasMath_t mode) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasMath_t); + static auto func_ptr = LoadSymbol("cublasSetMathMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode); +} + +cublasStatus_t CUBLASWINAPI cublasGetSmCountTarget(cublasHandle_t handle, + int *smCountTarget) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int *); + static auto func_ptr = LoadSymbol("cublasGetSmCountTarget"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, smCountTarget); +} + +cublasStatus_t CUBLASWINAPI cublasSetSmCountTarget(cublasHandle_t handle, + int smCountTarget) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int); + static auto func_ptr = LoadSymbol("cublasSetSmCountTarget"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, smCountTarget); +} + +const char *CUBLASWINAPI cublasGetStatusName(cublasStatus_t status) { + using FuncPtr = const char *(CUBLASWINAPI *)(cublasStatus_t); + static auto func_ptr = LoadSymbol("cublasGetStatusName"); + if (!func_ptr) return "cublasGetStatusName symbol not found."; + return func_ptr(status); +} + +const char *CUBLASWINAPI cublasGetStatusString(cublasStatus_t status) { + using FuncPtr = const char *(CUBLASWINAPI *)(cublasStatus_t); + static auto func_ptr = LoadSymbol("cublasGetStatusString"); + if (!func_ptr) return "cublasGetStatusString symbol not found."; + return func_ptr(status); +} + +cublasStatus_t CUBLASWINAPI cublasLoggerConfigure(int logIsOn, int logToStdOut, + int logToStdErr, + const char *logFileName) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const char *); + static auto func_ptr = LoadSymbol("cublasLoggerConfigure"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(logIsOn, logToStdOut, logToStdErr, logFileName); +} + +cublasStatus_t CUBLASWINAPI +cublasSetLoggerCallback(cublasLogCallback userCallback) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLogCallback); + static auto func_ptr = LoadSymbol("cublasSetLoggerCallback"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(userCallback); +} + +cublasStatus_t CUBLASWINAPI +cublasGetLoggerCallback(cublasLogCallback *userCallback) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasLogCallback *); + static auto func_ptr = LoadSymbol("cublasGetLoggerCallback"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(userCallback); +} + +cublasStatus_t CUBLASWINAPI cublasSetVector(int n, int elemSize, const void *x, + int incx, void *devicePtr, + int incy) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, void *, int); + static auto func_ptr = LoadSymbol("cublasSetVector"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(n, elemSize, x, incx, devicePtr, incy); +} + +cublasStatus_t CUBLASWINAPI cublasGetVector(int n, int elemSize, const void *x, + int incx, void *y, int incy) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, void *, int); + static auto func_ptr = LoadSymbol("cublasGetVector"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(n, elemSize, x, incx, y, incy); +} + +cublasStatus_t CUBLASWINAPI cublasSetMatrix(int rows, int cols, int elemSize, + const void *A, int lda, void *B, + int ldb) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const void *, + int, void *, int); + static auto func_ptr = LoadSymbol("cublasSetMatrix"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rows, cols, elemSize, A, lda, B, ldb); +} + +cublasStatus_t CUBLASWINAPI cublasGetMatrix(int rows, int cols, int elemSize, + const void *A, int lda, void *B, + int ldb) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, int, const void *, + int, void *, int); + static auto func_ptr = LoadSymbol("cublasGetMatrix"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rows, cols, elemSize, A, lda, B, ldb); +} + +cublasStatus_t CUBLASWINAPI cublasSetVectorAsync(int n, int elemSize, + const void *hostPtr, int incx, + void *devicePtr, int incy, + cudaStream_t stream) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, + void *, int, cudaStream_t); + static auto func_ptr = LoadSymbol("cublasSetVectorAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(n, elemSize, hostPtr, incx, devicePtr, incy, stream); +} + +cublasStatus_t CUBLASWINAPI cublasGetVectorAsync(int n, int elemSize, + const void *devicePtr, + int incx, void *hostPtr, + int incy, + cudaStream_t stream) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, const void *, int, + void *, int, cudaStream_t); + static auto func_ptr = LoadSymbol("cublasGetVectorAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(n, elemSize, devicePtr, incx, hostPtr, incy, stream); +} + +cublasStatus_t CUBLASWINAPI cublasSetMatrixAsync(int rows, int cols, + int elemSize, const void *A, + int lda, void *B, int ldb, + cudaStream_t stream) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + int, int, int, const void *, int, void *, int, cudaStream_t); + static auto func_ptr = LoadSymbol("cublasSetMatrixAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rows, cols, elemSize, A, lda, B, ldb, stream); +} + +cublasStatus_t CUBLASWINAPI cublasGetMatrixAsync(int rows, int cols, + int elemSize, const void *A, + int lda, void *B, int ldb, + cudaStream_t stream) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + int, int, int, const void *, int, void *, int, cudaStream_t); + static auto func_ptr = LoadSymbol("cublasGetMatrixAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rows, cols, elemSize, A, lda, B, ldb, stream); +} + +void CUBLASWINAPI cublasXerbla(const char *srName, int info) { + using FuncPtr = void(CUBLASWINAPI *)(const char *, int); + static auto func_ptr = LoadSymbol("cublasXerbla"); + if (!func_ptr) LogFatalSymbolNotFound("cublasXerbla"); + return func_ptr(srName, info); +} + +cublasStatus_t CUBLASWINAPI cublasNrm2Ex(cublasHandle_t handle, int n, + const void *x, cudaDataType xType, + int incx, void *result, + cudaDataType resultType, + cudaDataType executionType) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const void *, cudaDataType, int, void *, + cudaDataType, cudaDataType); + static auto func_ptr = LoadSymbol("cublasNrm2Ex"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, xType, incx, result, resultType, executionType); +} + +cublasStatus_t CUBLASWINAPI cublasSnrm2_v2(cublasHandle_t handle, int n, + const float *x, int incx, + float *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, + const float *, int, float *); + static auto func_ptr = LoadSymbol("cublasSnrm2_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, result); +} + +cublasStatus_t CUBLASWINAPI cublasDnrm2_v2(cublasHandle_t handle, int n, + const double *x, int incx, + double *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, + const double *, int, double *); + static auto func_ptr = LoadSymbol("cublasDnrm2_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, result); +} + +cublasStatus_t CUBLASWINAPI cublasScnrm2_v2(cublasHandle_t handle, int n, + const cuComplex *x, int incx, + float *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const cuComplex *, int, float *); + static auto func_ptr = LoadSymbol("cublasScnrm2_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, result); +} + +cublasStatus_t CUBLASWINAPI cublasDznrm2_v2(cublasHandle_t handle, int n, + const cuDoubleComplex *x, int incx, + double *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const cuDoubleComplex *, int, double *); + static auto func_ptr = LoadSymbol("cublasDznrm2_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, result); +} + +cublasStatus_t CUBLASWINAPI cublasDotEx(cublasHandle_t handle, int n, + const void *x, cudaDataType xType, + int incx, const void *y, + cudaDataType yType, int incy, + void *result, cudaDataType resultType, + cudaDataType executionType) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const void *, cudaDataType, int, const void *, + cudaDataType, int, void *, cudaDataType, cudaDataType); + static auto func_ptr = LoadSymbol("cublasDotEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType, + executionType); +} + +cublasStatus_t CUBLASWINAPI cublasDotcEx(cublasHandle_t handle, int n, + const void *x, cudaDataType xType, + int incx, const void *y, + cudaDataType yType, int incy, + void *result, cudaDataType resultType, + cudaDataType executionType) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const void *, cudaDataType, int, const void *, + cudaDataType, int, void *, cudaDataType, cudaDataType); + static auto func_ptr = LoadSymbol("cublasDotcEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, xType, incx, y, yType, incy, result, resultType, + executionType); +} + +cublasStatus_t CUBLASWINAPI cublasSdot_v2(cublasHandle_t handle, int n, + const float *x, int incx, + const float *y, int incy, + float *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const float *, int, const float *, int, float *); + static auto func_ptr = LoadSymbol("cublasSdot_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy, result); +} + +cublasStatus_t CUBLASWINAPI cublasDdot_v2(cublasHandle_t handle, int n, + const double *x, int incx, + const double *y, int incy, + double *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const double *, int, const double *, int, double *); + static auto func_ptr = LoadSymbol("cublasDdot_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy, result); +} + +cublasStatus_t CUBLASWINAPI cublasCdotu_v2(cublasHandle_t handle, int n, + const cuComplex *x, int incx, + const cuComplex *y, int incy, + cuComplex *result) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, + int, const cuComplex *, int, cuComplex *); + static auto func_ptr = LoadSymbol("cublasCdotu_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy, result); +} + +cublasStatus_t CUBLASWINAPI cublasCdotc_v2(cublasHandle_t handle, int n, + const cuComplex *x, int incx, + const cuComplex *y, int incy, + cuComplex *result) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, + int, const cuComplex *, int, cuComplex *); + static auto func_ptr = LoadSymbol("cublasCdotc_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy, result); +} + +cublasStatus_t CUBLASWINAPI cublasZdotu_v2(cublasHandle_t handle, int n, + const cuDoubleComplex *x, int incx, + const cuDoubleComplex *y, int incy, + cuDoubleComplex *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, cuDoubleComplex *); + static auto func_ptr = LoadSymbol("cublasZdotu_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy, result); +} + +cublasStatus_t CUBLASWINAPI cublasZdotc_v2(cublasHandle_t handle, int n, + const cuDoubleComplex *x, int incx, + const cuDoubleComplex *y, int incy, + cuDoubleComplex *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, cuDoubleComplex *); + static auto func_ptr = LoadSymbol("cublasZdotc_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy, result); +} + +cublasStatus_t CUBLASWINAPI +cublasScalEx(cublasHandle_t handle, int n, + const void *alpha, /* host or device pointer */ + cudaDataType alphaType, void *x, cudaDataType xType, int incx, + cudaDataType executionType) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const void *, cudaDataType, void *, cudaDataType, + int, cudaDataType); + static auto func_ptr = LoadSymbol("cublasScalEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, alpha, alphaType, x, xType, incx, executionType); +} + +cublasStatus_t CUBLASWINAPI +cublasSscal_v2(cublasHandle_t handle, int n, + const float *alpha, /* host or device pointer */ + float *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, + const float *, float *, int); + static auto func_ptr = LoadSymbol("cublasSscal_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, alpha, x, incx); +} + +cublasStatus_t CUBLASWINAPI +cublasDscal_v2(cublasHandle_t handle, int n, + const double *alpha, /* host or device pointer */ + double *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, + const double *, double *, int); + static auto func_ptr = LoadSymbol("cublasDscal_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, alpha, x, incx); +} + +cublasStatus_t CUBLASWINAPI +cublasCscal_v2(cublasHandle_t handle, int n, + const cuComplex *alpha, /* host or device pointer */ + cuComplex *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const cuComplex *, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCscal_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, alpha, x, incx); +} + +cublasStatus_t CUBLASWINAPI +cublasCsscal_v2(cublasHandle_t handle, int n, + const float *alpha, /* host or device pointer */ + cuComplex *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const float *, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCsscal_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, alpha, x, incx); +} + +cublasStatus_t CUBLASWINAPI +cublasZscal_v2(cublasHandle_t handle, int n, + const cuDoubleComplex *alpha, /* host or device pointer */ + cuDoubleComplex *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const cuDoubleComplex *, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZscal_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, alpha, x, incx); +} + +cublasStatus_t CUBLASWINAPI +cublasZdscal_v2(cublasHandle_t handle, int n, + const double *alpha, /* host or device pointer */ + cuDoubleComplex *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const double *, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZdscal_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, alpha, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasAxpyEx( + cublasHandle_t handle, int n, + const void *alpha, /* host or device pointer */ + cudaDataType alphaType, const void *x, cudaDataType xType, int incx, + void *y, cudaDataType yType, int incy, cudaDataType executiontype) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const void *, cudaDataType, const void *, + cudaDataType, int, void *, cudaDataType, int, cudaDataType); + static auto func_ptr = LoadSymbol("cublasAxpyEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, alpha, alphaType, x, xType, incx, y, yType, incy, + executiontype); +} + +cublasStatus_t CUBLASWINAPI +cublasSaxpy_v2(cublasHandle_t handle, int n, + const float *alpha, /* host or device pointer */ + const float *x, int incx, float *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const float *, const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasSaxpy_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, alpha, x, incx, y, incy); +} + +cublasStatus_t CUBLASWINAPI +cublasDaxpy_v2(cublasHandle_t handle, int n, + const double *alpha, /* host or device pointer */ + const double *x, int incx, double *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const double *, const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDaxpy_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, alpha, x, incx, y, incy); +} + +cublasStatus_t CUBLASWINAPI +cublasCaxpy_v2(cublasHandle_t handle, int n, + const cuComplex *alpha, /* host or device pointer */ + const cuComplex *x, int incx, cuComplex *y, int incy) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const cuComplex *, + const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCaxpy_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, alpha, x, incx, y, incy); +} + +cublasStatus_t CUBLASWINAPI cublasZaxpy_v2( + cublasHandle_t handle, int n, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, + int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZaxpy_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, alpha, x, incx, y, incy); +} + +cublasStatus_t CUBLASWINAPI cublasCopyEx(cublasHandle_t handle, int n, + const void *x, cudaDataType xType, + int incx, void *y, cudaDataType yType, + int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const void *, cudaDataType, int, void *, + cudaDataType, int); + static auto func_ptr = LoadSymbol("cublasCopyEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, xType, incx, y, yType, incy); +} + +cublasStatus_t CUBLASWINAPI cublasScopy_v2(cublasHandle_t handle, int n, + const float *x, int incx, float *y, + int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasScopy_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy); +} + +cublasStatus_t CUBLASWINAPI cublasDcopy_v2(cublasHandle_t handle, int n, + const double *x, int incx, double *y, + int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDcopy_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy); +} + +cublasStatus_t CUBLASWINAPI cublasCcopy_v2(cublasHandle_t handle, int n, + const cuComplex *x, int incx, + cuComplex *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCcopy_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy); +} + +cublasStatus_t CUBLASWINAPI cublasZcopy_v2(cublasHandle_t handle, int n, + const cuDoubleComplex *x, int incx, + cuDoubleComplex *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, + const cuDoubleComplex *, int, + cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZcopy_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy); +} + +cublasStatus_t CUBLASWINAPI cublasSswap_v2(cublasHandle_t handle, int n, + float *x, int incx, float *y, + int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, float *, + int, float *, int); + static auto func_ptr = LoadSymbol("cublasSswap_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy); +} + +cublasStatus_t CUBLASWINAPI cublasDswap_v2(cublasHandle_t handle, int n, + double *x, int incx, double *y, + int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, double *, + int, double *, int); + static auto func_ptr = LoadSymbol("cublasDswap_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy); +} + +cublasStatus_t CUBLASWINAPI cublasCswap_v2(cublasHandle_t handle, int n, + cuComplex *x, int incx, cuComplex *y, + int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCswap_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy); +} + +cublasStatus_t CUBLASWINAPI cublasZswap_v2(cublasHandle_t handle, int n, + cuDoubleComplex *x, int incx, + cuDoubleComplex *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZswap_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy); +} + +cublasStatus_t CUBLASWINAPI cublasSwapEx(cublasHandle_t handle, int n, void *x, + cudaDataType xType, int incx, void *y, + cudaDataType yType, int incy) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, void *, cudaDataType, + int, void *, cudaDataType, int); + static auto func_ptr = LoadSymbol("cublasSwapEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, xType, incx, y, yType, incy); +} + +cublasStatus_t CUBLASWINAPI cublasIsamax_v2(cublasHandle_t handle, int n, + const float *x, int incx, + int *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, + const float *, int, int *); + static auto func_ptr = LoadSymbol("cublasIsamax_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, result); +} + +cublasStatus_t CUBLASWINAPI cublasIdamax_v2(cublasHandle_t handle, int n, + const double *x, int incx, + int *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, + const double *, int, int *); + static auto func_ptr = LoadSymbol("cublasIdamax_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, result); +} + +cublasStatus_t CUBLASWINAPI cublasIcamax_v2(cublasHandle_t handle, int n, + const cuComplex *x, int incx, + int *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, + const cuComplex *, int, int *); + static auto func_ptr = LoadSymbol("cublasIcamax_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, result); +} + +cublasStatus_t CUBLASWINAPI cublasIzamax_v2(cublasHandle_t handle, int n, + const cuDoubleComplex *x, int incx, + int *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const cuDoubleComplex *, int, int *); + static auto func_ptr = LoadSymbol("cublasIzamax_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, result); +} + +cublasStatus_t CUBLASWINAPI cublasIamaxEx( + cublasHandle_t handle, int n, const void *x, cudaDataType xType, int incx, + int *result /* host or device pointer */ +) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const void *, cudaDataType, int, int *); + static auto func_ptr = LoadSymbol("cublasIamaxEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, xType, incx, result); +} + +cublasStatus_t CUBLASWINAPI cublasIsamin_v2(cublasHandle_t handle, int n, + const float *x, int incx, + int *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, + const float *, int, int *); + static auto func_ptr = LoadSymbol("cublasIsamin_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, result); +} + +cublasStatus_t CUBLASWINAPI cublasIdamin_v2(cublasHandle_t handle, int n, + const double *x, int incx, + int *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, + const double *, int, int *); + static auto func_ptr = LoadSymbol("cublasIdamin_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, result); +} + +cublasStatus_t CUBLASWINAPI cublasIcamin_v2(cublasHandle_t handle, int n, + const cuComplex *x, int incx, + int *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, + const cuComplex *, int, int *); + static auto func_ptr = LoadSymbol("cublasIcamin_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, result); +} + +cublasStatus_t CUBLASWINAPI cublasIzamin_v2(cublasHandle_t handle, int n, + const cuDoubleComplex *x, int incx, + int *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const cuDoubleComplex *, int, int *); + static auto func_ptr = LoadSymbol("cublasIzamin_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, result); +} + +cublasStatus_t CUBLASWINAPI cublasIaminEx( + cublasHandle_t handle, int n, const void *x, cudaDataType xType, int incx, + int *result /* host or device pointer */ +) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const void *, cudaDataType, int, int *); + static auto func_ptr = LoadSymbol("cublasIaminEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, xType, incx, result); +} + +cublasStatus_t CUBLASWINAPI cublasAsumEx( + cublasHandle_t handle, int n, const void *x, cudaDataType xType, int incx, + void *result, cudaDataType resultType, /* host or device pointer */ + cudaDataType executiontype) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const void *, cudaDataType, int, void *, + cudaDataType, cudaDataType); + static auto func_ptr = LoadSymbol("cublasAsumEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, xType, incx, result, resultType, executiontype); +} + +cublasStatus_t CUBLASWINAPI cublasSasum_v2(cublasHandle_t handle, int n, + const float *x, int incx, + float *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, + const float *, int, float *); + static auto func_ptr = LoadSymbol("cublasSasum_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, result); +} + +cublasStatus_t CUBLASWINAPI cublasDasum_v2(cublasHandle_t handle, int n, + const double *x, int incx, + double *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, + const double *, int, double *); + static auto func_ptr = LoadSymbol("cublasDasum_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, result); +} + +cublasStatus_t CUBLASWINAPI cublasScasum_v2(cublasHandle_t handle, int n, + const cuComplex *x, int incx, + float *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const cuComplex *, int, float *); + static auto func_ptr = LoadSymbol("cublasScasum_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, result); +} + +cublasStatus_t CUBLASWINAPI cublasDzasum_v2(cublasHandle_t handle, int n, + const cuDoubleComplex *x, int incx, + double *result) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const cuDoubleComplex *, int, double *); + static auto func_ptr = LoadSymbol("cublasDzasum_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, result); +} + +cublasStatus_t CUBLASWINAPI +cublasSrot_v2(cublasHandle_t handle, int n, float *x, int incx, float *y, + int incy, const float *c, /* host or device pointer */ + const float *s) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, float *, int, float *, + int, const float *, const float *); + static auto func_ptr = LoadSymbol("cublasSrot_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy, c, s); +} + +cublasStatus_t CUBLASWINAPI +cublasDrot_v2(cublasHandle_t handle, int n, double *x, int incx, double *y, + int incy, const double *c, /* host or device pointer */ + const double *s) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, double *, int, double *, int, const double *, + const double *); + static auto func_ptr = LoadSymbol("cublasDrot_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy, c, s); +} + +cublasStatus_t CUBLASWINAPI cublasCrot_v2( + cublasHandle_t handle, int n, cuComplex *x, int incx, cuComplex *y, + int incy, const float *c, /* host or device pointer */ + const cuComplex *s) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *, + const cuComplex *); + static auto func_ptr = LoadSymbol("cublasCrot_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy, c, s); +} + +cublasStatus_t CUBLASWINAPI cublasCsrot_v2( + cublasHandle_t handle, int n, cuComplex *x, int incx, cuComplex *y, + int incy, const float *c, /* host or device pointer */ + const float *s) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, cuComplex *, int, cuComplex *, int, const float *, + const float *); + static auto func_ptr = LoadSymbol("cublasCsrot_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy, c, s); +} + +cublasStatus_t CUBLASWINAPI cublasZrot_v2( + cublasHandle_t handle, int n, cuDoubleComplex *x, int incx, + cuDoubleComplex *y, int incy, const double *c, /* host or device pointer */ + const cuDoubleComplex *s) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, + const double *, const cuDoubleComplex *); + static auto func_ptr = LoadSymbol("cublasZrot_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy, c, s); +} + +cublasStatus_t CUBLASWINAPI cublasZdrot_v2( + cublasHandle_t handle, int n, cuDoubleComplex *x, int incx, + cuDoubleComplex *y, int incy, const double *c, /* host or device pointer */ + const double *s) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, cuDoubleComplex *, int, cuDoubleComplex *, int, + const double *, const double *); + static auto func_ptr = LoadSymbol("cublasZdrot_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy, c, s); +} + +cublasStatus_t CUBLASWINAPI +cublasRotEx(cublasHandle_t handle, int n, void *x, cudaDataType xType, int incx, + void *y, cudaDataType yType, int incy, + const void *c, /* host or device pointer */ + const void *s, cudaDataType csType, cudaDataType executiontype) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, void *, cudaDataType, int, void *, cudaDataType, int, + const void *, const void *, cudaDataType, cudaDataType); + static auto func_ptr = LoadSymbol("cublasRotEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, xType, incx, y, yType, incy, c, s, csType, + executiontype); +} + +cublasStatus_t CUBLASWINAPI +cublasSrotg_v2(cublasHandle_t handle, float *a, /* host or device pointer */ + float *b, /* host or device pointer */ + float *c, /* host or device pointer */ + float *s) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, float *, + float *, float *, float *); + static auto func_ptr = LoadSymbol("cublasSrotg_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, a, b, c, s); +} + +cublasStatus_t CUBLASWINAPI +cublasDrotg_v2(cublasHandle_t handle, double *a, /* host or device pointer */ + double *b, /* host or device pointer */ + double *c, /* host or device pointer */ + double *s) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, double *, + double *, double *, double *); + static auto func_ptr = LoadSymbol("cublasDrotg_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, a, b, c, s); +} + +cublasStatus_t CUBLASWINAPI +cublasCrotg_v2(cublasHandle_t handle, cuComplex *a, /* host or device pointer */ + cuComplex *b, /* host or device pointer */ + float *c, /* host or device pointer */ + cuComplex *s) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cuComplex *, cuComplex *, float *, cuComplex *); + static auto func_ptr = LoadSymbol("cublasCrotg_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, a, b, c, s); +} + +cublasStatus_t CUBLASWINAPI cublasZrotg_v2( + cublasHandle_t handle, cuDoubleComplex *a, /* host or device pointer */ + cuDoubleComplex *b, /* host or device pointer */ + double *c, /* host or device pointer */ + cuDoubleComplex *s) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cuDoubleComplex *, cuDoubleComplex *, double *, + cuDoubleComplex *); + static auto func_ptr = LoadSymbol("cublasZrotg_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, a, b, c, s); +} + +cublasStatus_t CUBLASWINAPI cublasRotgEx(cublasHandle_t handle, + void *a, /* host or device pointer */ + void *b, /* host or device pointer */ + cudaDataType abType, + void *c, /* host or device pointer */ + void *s, /* host or device pointer */ + cudaDataType csType, + cudaDataType executiontype) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, void *, void *, + cudaDataType, void *, void *, + cudaDataType, cudaDataType); + static auto func_ptr = LoadSymbol("cublasRotgEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, a, b, abType, c, s, csType, executiontype); +} + +cublasStatus_t CUBLASWINAPI cublasSrotm_v2(cublasHandle_t handle, int n, + float *x, int incx, float *y, + int incy, const float *param) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, float *, int, float *, int, const float *); + static auto func_ptr = LoadSymbol("cublasSrotm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy, param); +} + +cublasStatus_t CUBLASWINAPI cublasDrotm_v2(cublasHandle_t handle, int n, + double *x, int incx, double *y, + int incy, const double *param) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, double *, int, double *, int, const double *); + static auto func_ptr = LoadSymbol("cublasDrotm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, incx, y, incy, param); +} + +cublasStatus_t CUBLASWINAPI +cublasRotmEx(cublasHandle_t handle, int n, void *x, cudaDataType xType, + int incx, void *y, cudaDataType yType, int incy, + const void *param, /* host or device pointer */ + cudaDataType paramType, cudaDataType executiontype) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, void *, cudaDataType, int, void *, cudaDataType, int, + const void *, cudaDataType, cudaDataType); + static auto func_ptr = LoadSymbol("cublasRotmEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, x, xType, incx, y, yType, incy, param, paramType, + executiontype); +} + +cublasStatus_t CUBLASWINAPI +cublasSrotmg_v2(cublasHandle_t handle, float *d1, /* host or device pointer */ + float *d2, /* host or device pointer */ + float *x1, /* host or device pointer */ + const float *y1, /* host or device pointer */ + float *param) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, float *, float *, float *, const float *, float *); + static auto func_ptr = LoadSymbol("cublasSrotmg_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, d1, d2, x1, y1, param); +} + +cublasStatus_t CUBLASWINAPI +cublasDrotmg_v2(cublasHandle_t handle, double *d1, /* host or device pointer */ + double *d2, /* host or device pointer */ + double *x1, /* host or device pointer */ + const double *y1, /* host or device pointer */ + double *param) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, double *, double *, double *, const double *, double *); + static auto func_ptr = LoadSymbol("cublasDrotmg_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, d1, d2, x1, y1, param); +} + +cublasStatus_t CUBLASWINAPI +cublasRotmgEx(cublasHandle_t handle, void *d1, /* host or device pointer */ + cudaDataType d1Type, void *d2, /* host or device pointer */ + cudaDataType d2Type, void *x1, /* host or device pointer */ + cudaDataType x1Type, const void *y1, /* host or device pointer */ + cudaDataType y1Type, void *param, /* host or device pointer */ + cudaDataType paramType, cudaDataType executiontype) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, void *, cudaDataType, void *, cudaDataType, void *, + cudaDataType, const void *, cudaDataType, void *, cudaDataType, + cudaDataType); + static auto func_ptr = LoadSymbol("cublasRotmgEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, d1, d1Type, d2, d2Type, x1, x1Type, y1, y1Type, param, + paramType, executiontype); +} + +cublasStatus_t CUBLASWINAPI +cublasSgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, + const float *alpha, /* host or device pointer */ + const float *A, int lda, const float *x, int incx, + const float *beta, /* host or device pointer */ + float *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, const float *, const float *, + int, const float *, int, const float *, float *, int); + static auto func_ptr = LoadSymbol("cublasSgemv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI +cublasDgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, + const double *alpha, /* host or device pointer */ + const double *A, int lda, const double *x, int incx, + const double *beta, /* host or device pointer */ + double *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, const double *, + const double *, int, const double *, int, const double *, double *, int); + static auto func_ptr = LoadSymbol("cublasDgemv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI +cublasCgemv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, + const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, const cuComplex *x, int incx, + const cuComplex *beta, /* host or device pointer */ + cuComplex *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, const cuComplex *, + const cuComplex *, int, const cuComplex *, int, const cuComplex *, + cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCgemv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI cublasZgemv_v2( + cublasHandle_t handle, cublasOperation_t trans, int m, int n, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, + const cuDoubleComplex *beta, /* host or device pointer */ + cuDoubleComplex *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *, + const cuDoubleComplex *, int, const cuDoubleComplex *, int, + const cuDoubleComplex *, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZgemv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, alpha, A, lda, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI +cublasSgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, + int kl, int ku, const float *alpha, /* host or device pointer */ + const float *A, int lda, const float *x, int incx, + const float *beta, /* host or device pointer */ + float *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, int, int, const float *, + const float *, int, const float *, int, const float *, float *, int); + static auto func_ptr = LoadSymbol("cublasSgbmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, + incy); +} + +cublasStatus_t CUBLASWINAPI +cublasDgbmv_v2(cublasHandle_t handle, cublasOperation_t trans, int m, int n, + int kl, int ku, const double *alpha, /* host or device pointer */ + const double *A, int lda, const double *x, int incx, + const double *beta, /* host or device pointer */ + double *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, int, int, const double *, + const double *, int, const double *, int, const double *, double *, int); + static auto func_ptr = LoadSymbol("cublasDgbmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, + incy); +} + +cublasStatus_t CUBLASWINAPI cublasCgbmv_v2( + cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, + int ku, const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, const cuComplex *x, int incx, + const cuComplex *beta, /* host or device pointer */ + cuComplex *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, int, int, const cuComplex *, + const cuComplex *, int, const cuComplex *, int, const cuComplex *, + cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCgbmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, + incy); +} + +cublasStatus_t CUBLASWINAPI cublasZgbmv_v2( + cublasHandle_t handle, cublasOperation_t trans, int m, int n, int kl, + int ku, const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, + const cuDoubleComplex *beta, /* host or device pointer */ + cuDoubleComplex *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, int, int, + const cuDoubleComplex *, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, + int); + static auto func_ptr = LoadSymbol("cublasZgbmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, + incy); +} + +cublasStatus_t CUBLASWINAPI cublasStrmv_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + cublasDiagType_t diag, int n, const float *A, int lda, float *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasStrmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasDtrmv_v2(cublasHandle_t handle, + cublasFillMode_t uplo, + cublasOperation_t trans, + cublasDiagType_t diag, int n, + const double *A, int lda, double *x, + int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDtrmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasCtrmv_v2(cublasHandle_t handle, + cublasFillMode_t uplo, + cublasOperation_t trans, + cublasDiagType_t diag, int n, + const cuComplex *A, int lda, + cuComplex *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCtrmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasZtrmv_v2(cublasHandle_t handle, + cublasFillMode_t uplo, + cublasOperation_t trans, + cublasDiagType_t diag, int n, + const cuDoubleComplex *A, int lda, + cuDoubleComplex *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, const cuDoubleComplex *, int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZtrmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasStbmv_v2(cublasHandle_t handle, + cublasFillMode_t uplo, + cublasOperation_t trans, + cublasDiagType_t diag, int n, int k, + const float *A, int lda, float *x, + int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, int, const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasStbmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasDtbmv_v2(cublasHandle_t handle, + cublasFillMode_t uplo, + cublasOperation_t trans, + cublasDiagType_t diag, int n, int k, + const double *A, int lda, double *x, + int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, int, const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDtbmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasCtbmv_v2(cublasHandle_t handle, + cublasFillMode_t uplo, + cublasOperation_t trans, + cublasDiagType_t diag, int n, int k, + const cuComplex *A, int lda, + cuComplex *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, int, const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCtbmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasZtbmv_v2(cublasHandle_t handle, + cublasFillMode_t uplo, + cublasOperation_t trans, + cublasDiagType_t diag, int n, int k, + const cuDoubleComplex *A, int lda, + cuDoubleComplex *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZtbmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasStpmv_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + cublasDiagType_t diag, int n, const float *AP, float *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, const float *, float *, int); + static auto func_ptr = LoadSymbol("cublasStpmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasDtpmv_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + cublasDiagType_t diag, int n, const double *AP, double *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, const double *, double *, int); + static auto func_ptr = LoadSymbol("cublasDtpmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasCtpmv_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + cublasDiagType_t diag, int n, const cuComplex *AP, cuComplex *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, const cuComplex *, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCtpmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasZtpmv_v2(cublasHandle_t handle, + cublasFillMode_t uplo, + cublasOperation_t trans, + cublasDiagType_t diag, int n, + const cuDoubleComplex *AP, + cuDoubleComplex *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, const cuDoubleComplex *, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZtpmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasStrsv_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + cublasDiagType_t diag, int n, const float *A, int lda, float *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasStrsv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasDtrsv_v2(cublasHandle_t handle, + cublasFillMode_t uplo, + cublasOperation_t trans, + cublasDiagType_t diag, int n, + const double *A, int lda, double *x, + int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDtrsv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasCtrsv_v2(cublasHandle_t handle, + cublasFillMode_t uplo, + cublasOperation_t trans, + cublasDiagType_t diag, int n, + const cuComplex *A, int lda, + cuComplex *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCtrsv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasZtrsv_v2(cublasHandle_t handle, + cublasFillMode_t uplo, + cublasOperation_t trans, + cublasDiagType_t diag, int n, + const cuDoubleComplex *A, int lda, + cuDoubleComplex *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, const cuDoubleComplex *, int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZtrsv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, A, lda, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasStpsv_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + cublasDiagType_t diag, int n, const float *AP, float *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, const float *, float *, int); + static auto func_ptr = LoadSymbol("cublasStpsv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasDtpsv_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + cublasDiagType_t diag, int n, const double *AP, double *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, const double *, double *, int); + static auto func_ptr = LoadSymbol("cublasDtpsv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasCtpsv_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + cublasDiagType_t diag, int n, const cuComplex *AP, cuComplex *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, const cuComplex *, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCtpsv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasZtpsv_v2(cublasHandle_t handle, + cublasFillMode_t uplo, + cublasOperation_t trans, + cublasDiagType_t diag, int n, + const cuDoubleComplex *AP, + cuDoubleComplex *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, const cuDoubleComplex *, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZtpsv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, AP, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasStbsv_v2(cublasHandle_t handle, + cublasFillMode_t uplo, + cublasOperation_t trans, + cublasDiagType_t diag, int n, int k, + const float *A, int lda, float *x, + int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, int, const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasStbsv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasDtbsv_v2(cublasHandle_t handle, + cublasFillMode_t uplo, + cublasOperation_t trans, + cublasDiagType_t diag, int n, int k, + const double *A, int lda, double *x, + int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, int, const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDtbsv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasCtbsv_v2(cublasHandle_t handle, + cublasFillMode_t uplo, + cublasOperation_t trans, + cublasDiagType_t diag, int n, int k, + const cuComplex *A, int lda, + cuComplex *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, int, const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCtbsv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); +} + +cublasStatus_t CUBLASWINAPI cublasZtbsv_v2(cublasHandle_t handle, + cublasFillMode_t uplo, + cublasOperation_t trans, + cublasDiagType_t diag, int n, int k, + const cuDoubleComplex *A, int lda, + cuDoubleComplex *x, int incx) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, cublasDiagType_t, + int, int, const cuDoubleComplex *, int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZtbsv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, diag, n, k, A, lda, x, incx); +} + +cublasStatus_t CUBLASWINAPI +cublasSsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const float *alpha, /* host or device pointer */ + const float *A, int lda, const float *x, int incx, + const float *beta, /* host or device pointer */ + float *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, + const float *, int, const float *, float *, int); + static auto func_ptr = LoadSymbol("cublasSsymv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI +cublasDsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const double *alpha, /* host or device pointer */ + const double *A, int lda, const double *x, int incx, + const double *beta, /* host or device pointer */ + double *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const double *, const double *, + int, const double *, int, const double *, double *, int); + static auto func_ptr = LoadSymbol("cublasDsymv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI +cublasCsymv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, const cuComplex *x, int incx, + const cuComplex *beta, /* host or device pointer */ + cuComplex *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const cuComplex *, + const cuComplex *, int, const cuComplex *, int, const cuComplex *, + cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCsymv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI cublasZsymv_v2( + cublasHandle_t handle, cublasFillMode_t uplo, int n, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, + const cuDoubleComplex *beta, /* host or device pointer */ + cuDoubleComplex *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, + const cuDoubleComplex *, int, const cuDoubleComplex *, int, + const cuDoubleComplex *, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZsymv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI +cublasChemv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, const cuComplex *x, int incx, + const cuComplex *beta, /* host or device pointer */ + cuComplex *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const cuComplex *, + const cuComplex *, int, const cuComplex *, int, const cuComplex *, + cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasChemv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI cublasZhemv_v2( + cublasHandle_t handle, cublasFillMode_t uplo, int n, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, + const cuDoubleComplex *beta, /* host or device pointer */ + cuDoubleComplex *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, + const cuDoubleComplex *, int, const cuDoubleComplex *, int, + const cuDoubleComplex *, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZhemv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, A, lda, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI +cublasSsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, + const float *alpha, /* host or device pointer */ + const float *A, int lda, const float *x, int incx, + const float *beta, /* host or device pointer */ + float *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, int, const float *, const float *, + int, const float *, int, const float *, float *, int); + static auto func_ptr = LoadSymbol("cublasSsbmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI +cublasDsbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, + const double *alpha, /* host or device pointer */ + const double *A, int lda, const double *x, int incx, + const double *beta, /* host or device pointer */ + double *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, int, const double *, + const double *, int, const double *, int, const double *, double *, int); + static auto func_ptr = LoadSymbol("cublasDsbmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI +cublasChbmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, + const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, const cuComplex *x, int incx, + const cuComplex *beta, /* host or device pointer */ + cuComplex *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, int, const cuComplex *, + const cuComplex *, int, const cuComplex *, int, const cuComplex *, + cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasChbmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI cublasZhbmv_v2( + cublasHandle_t handle, cublasFillMode_t uplo, int n, int k, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, + const cuDoubleComplex *beta, /* host or device pointer */ + cuDoubleComplex *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, int, const cuDoubleComplex *, + const cuDoubleComplex *, int, const cuDoubleComplex *, int, + const cuDoubleComplex *, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZhbmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI +cublasSspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const float *alpha, /* host or device pointer */ + const float *AP, const float *x, int incx, + const float *beta, /* host or device pointer */ + float *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const float *, const float *, + const float *, int, const float *, float *, int); + static auto func_ptr = LoadSymbol("cublasSspmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI +cublasDspmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const double *alpha, /* host or device pointer */ + const double *AP, const double *x, int incx, + const double *beta, /* host or device pointer */ + double *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const double *, const double *, + const double *, int, const double *, double *, int); + static auto func_ptr = LoadSymbol("cublasDspmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI +cublasChpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const cuComplex *alpha, /* host or device pointer */ + const cuComplex *AP, const cuComplex *x, int incx, + const cuComplex *beta, /* host or device pointer */ + cuComplex *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const cuComplex *, + const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *, + int); + static auto func_ptr = LoadSymbol("cublasChpmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI +cublasZhpmv_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *AP, const cuDoubleComplex *x, int incx, + const cuDoubleComplex *beta, /* host or device pointer */ + cuDoubleComplex *y, int incy) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, + const cuDoubleComplex *, const cuDoubleComplex *, int, + const cuDoubleComplex *, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZhpmv_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, AP, x, incx, beta, y, incy); +} + +cublasStatus_t CUBLASWINAPI cublasSger_v2( + cublasHandle_t handle, int m, int n, + const float *alpha, /* host or device pointer */ + const float *x, int incx, const float *y, int incy, float *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, int, const float *, const float *, int, + const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasSger_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); +} + +cublasStatus_t CUBLASWINAPI cublasDger_v2( + cublasHandle_t handle, int m, int n, + const double *alpha, /* host or device pointer */ + const double *x, int incx, const double *y, int incy, double *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, int, const double *, const double *, int, + const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDger_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); +} + +cublasStatus_t CUBLASWINAPI +cublasCgeru_v2(cublasHandle_t handle, int m, int n, + const cuComplex *alpha, /* host or device pointer */ + const cuComplex *x, int incx, const cuComplex *y, int incy, + cuComplex *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int, + const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCgeru_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); +} + +cublasStatus_t CUBLASWINAPI +cublasCgerc_v2(cublasHandle_t handle, int m, int n, + const cuComplex *alpha, /* host or device pointer */ + const cuComplex *x, int incx, const cuComplex *y, int incy, + cuComplex *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, int, const cuComplex *, const cuComplex *, int, + const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCgerc_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); +} + +cublasStatus_t CUBLASWINAPI +cublasZgeru_v2(cublasHandle_t handle, int m, int n, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, + int incy, cuDoubleComplex *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, int, const cuDoubleComplex *, + const cuDoubleComplex *, int, const cuDoubleComplex *, int, + cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZgeru_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); +} + +cublasStatus_t CUBLASWINAPI +cublasZgerc_v2(cublasHandle_t handle, int m, int n, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, + int incy, cuDoubleComplex *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, int, const cuDoubleComplex *, + const cuDoubleComplex *, int, const cuDoubleComplex *, int, + cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZgerc_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, alpha, x, incx, y, incy, A, lda); +} + +cublasStatus_t CUBLASWINAPI +cublasSsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const float *alpha, /* host or device pointer */ + const float *x, int incx, float *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, + float *, int); + static auto func_ptr = LoadSymbol("cublasSsyr_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); +} + +cublasStatus_t CUBLASWINAPI +cublasDsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const double *alpha, /* host or device pointer */ + const double *x, int incx, double *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const double *, const double *, + int, double *, int); + static auto func_ptr = LoadSymbol("cublasDsyr_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); +} + +cublasStatus_t CUBLASWINAPI +cublasCsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const cuComplex *alpha, /* host or device pointer */ + const cuComplex *x, int incx, cuComplex *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const cuComplex *, + const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCsyr_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); +} + +cublasStatus_t CUBLASWINAPI +cublasZsyr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, + const cuDoubleComplex *, int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZsyr_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); +} + +cublasStatus_t CUBLASWINAPI +cublasCher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const float *alpha, /* host or device pointer */ + const cuComplex *x, int incx, cuComplex *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *, + int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCher_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); +} + +cublasStatus_t CUBLASWINAPI +cublasZher_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const double *alpha, /* host or device pointer */ + const cuDoubleComplex *x, int incx, cuDoubleComplex *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const double *, + const cuDoubleComplex *, int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZher_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, A, lda); +} + +cublasStatus_t CUBLASWINAPI +cublasSspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const float *alpha, /* host or device pointer */ + const float *x, int incx, float *AP) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, + float *); + static auto func_ptr = LoadSymbol("cublasSspr_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, AP); +} + +cublasStatus_t CUBLASWINAPI +cublasDspr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const double *alpha, /* host or device pointer */ + const double *x, int incx, double *AP) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const double *, const double *, + int, double *); + static auto func_ptr = LoadSymbol("cublasDspr_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, AP); +} + +cublasStatus_t CUBLASWINAPI +cublasChpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const float *alpha, /* host or device pointer */ + const cuComplex *x, int incx, cuComplex *AP) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const float *, const cuComplex *, + int, cuComplex *); + static auto func_ptr = LoadSymbol("cublasChpr_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, AP); +} + +cublasStatus_t CUBLASWINAPI +cublasZhpr_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const double *alpha, /* host or device pointer */ + const cuDoubleComplex *x, int incx, cuDoubleComplex *AP) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const double *, + const cuDoubleComplex *, int, cuDoubleComplex *); + static auto func_ptr = LoadSymbol("cublasZhpr_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, AP); +} + +cublasStatus_t CUBLASWINAPI cublasSsyr2_v2( + cublasHandle_t handle, cublasFillMode_t uplo, int n, + const float *alpha, /* host or device pointer */ + const float *x, int incx, const float *y, int incy, float *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, + const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasSsyr2_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); +} + +cublasStatus_t CUBLASWINAPI cublasDsyr2_v2( + cublasHandle_t handle, cublasFillMode_t uplo, int n, + const double *alpha, /* host or device pointer */ + const double *x, int incx, const double *y, int incy, double *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const double *, const double *, + int, const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDsyr2_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); +} + +cublasStatus_t CUBLASWINAPI +cublasCsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const cuComplex *alpha, /* host or device pointer */ + const cuComplex *x, int incx, const cuComplex *y, int incy, + cuComplex *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const cuComplex *, + const cuComplex *, int, const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCsyr2_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); +} + +cublasStatus_t CUBLASWINAPI +cublasZsyr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, + int incy, cuDoubleComplex *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, + const cuDoubleComplex *, int, const cuDoubleComplex *, int, + cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZsyr2_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); +} + +cublasStatus_t CUBLASWINAPI +cublasCher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const cuComplex *alpha, /* host or device pointer */ + const cuComplex *x, int incx, const cuComplex *y, int incy, + cuComplex *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const cuComplex *, + const cuComplex *, int, const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCher2_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); +} + +cublasStatus_t CUBLASWINAPI +cublasZher2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, + int incy, cuDoubleComplex *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, + const cuDoubleComplex *, int, const cuDoubleComplex *, int, + cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZher2_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, A, lda); +} + +cublasStatus_t CUBLASWINAPI +cublasSspr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const float *alpha, /* host or device pointer */ + const float *x, int incx, const float *y, int incy, float *AP) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const float *, const float *, int, + const float *, int, float *); + static auto func_ptr = LoadSymbol("cublasSspr2_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); +} + +cublasStatus_t CUBLASWINAPI cublasDspr2_v2( + cublasHandle_t handle, cublasFillMode_t uplo, int n, + const double *alpha, /* host or device pointer */ + const double *x, int incx, const double *y, int incy, double *AP) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const double *, const double *, + int, const double *, int, double *); + static auto func_ptr = LoadSymbol("cublasDspr2_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); +} + +cublasStatus_t CUBLASWINAPI cublasChpr2_v2( + cublasHandle_t handle, cublasFillMode_t uplo, int n, + const cuComplex *alpha, /* host or device pointer */ + const cuComplex *x, int incx, const cuComplex *y, int incy, cuComplex *AP) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const cuComplex *, + const cuComplex *, int, const cuComplex *, int, cuComplex *); + static auto func_ptr = LoadSymbol("cublasChpr2_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); +} + +cublasStatus_t CUBLASWINAPI +cublasZhpr2_v2(cublasHandle_t handle, cublasFillMode_t uplo, int n, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, + int incy, cuDoubleComplex *AP) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, + const cuDoubleComplex *, int, const cuDoubleComplex *, int, + cuDoubleComplex *); + static auto func_ptr = LoadSymbol("cublasZhpr2_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, alpha, x, incx, y, incy, AP); +} + +cublasStatus_t CUBLASWINAPI cublasSgemvBatched( + cublasHandle_t handle, cublasOperation_t trans, int m, int n, + const float *alpha, /* host or device pointer */ + const float *const Aarray[], int lda, const float *const xarray[], int incx, + const float *beta, /* host or device pointer */ + float *const yarray[], int incy, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, const float *, + const float *const[], int, const float *const[], int, const float *, + float *const[], int, int); + static auto func_ptr = LoadSymbol("cublasSgemvBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, alpha, Aarray, lda, xarray, incx, beta, + yarray, incy, batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasDgemvBatched( + cublasHandle_t handle, cublasOperation_t trans, int m, int n, + const double *alpha, /* host or device pointer */ + const double *const Aarray[], int lda, const double *const xarray[], + int incx, const double *beta, /* host or device pointer */ + double *const yarray[], int incy, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, const double *, + const double *const[], int, const double *const[], int, const double *, + double *const[], int, int); + static auto func_ptr = LoadSymbol("cublasDgemvBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, alpha, Aarray, lda, xarray, incx, beta, + yarray, incy, batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasCgemvBatched( + cublasHandle_t handle, cublasOperation_t trans, int m, int n, + const cuComplex *alpha, /* host or device pointer */ + const cuComplex *const Aarray[], int lda, const cuComplex *const xarray[], + int incx, const cuComplex *beta, /* host or device pointer */ + cuComplex *const yarray[], int incy, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, const cuComplex *, + const cuComplex *const[], int, const cuComplex *const[], int, + const cuComplex *, cuComplex *const[], int, int); + static auto func_ptr = LoadSymbol("cublasCgemvBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, alpha, Aarray, lda, xarray, incx, beta, + yarray, incy, batchCount); +} + +cublasStatus_t CUBLASWINAPI +cublasZgemvBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *const Aarray[], int lda, + const cuDoubleComplex *const xarray[], int incx, + const cuDoubleComplex *beta, /* host or device pointer */ + cuDoubleComplex *const yarray[], int incy, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *, + const cuDoubleComplex *const[], int, const cuDoubleComplex *const[], int, + const cuDoubleComplex *, cuDoubleComplex *const[], int, int); + static auto func_ptr = LoadSymbol("cublasZgemvBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, alpha, Aarray, lda, xarray, incx, beta, + yarray, incy, batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasSgemvStridedBatched( + cublasHandle_t handle, cublasOperation_t trans, int m, int n, + const float *alpha, /* host or device pointer */ + const float *A, int lda, long long int strideA, /* purposely signed */ + const float *x, int incx, long long int stridex, + const float *beta, /* host or device pointer */ + float *y, int incy, long long int stridey, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, const float *, const float *, + int, long long, const float *, int, long long, const float *, float *, + int, long long, int); + static auto func_ptr = LoadSymbol("cublasSgemvStridedBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, alpha, A, lda, strideA, x, incx, stridex, + beta, y, incy, stridey, batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasDgemvStridedBatched( + cublasHandle_t handle, cublasOperation_t trans, int m, int n, + const double *alpha, /* host or device pointer */ + const double *A, int lda, long long int strideA, /* purposely signed */ + const double *x, int incx, long long int stridex, + const double *beta, /* host or device pointer */ + double *y, int incy, long long int stridey, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, const double *, + const double *, int, long long, const double *, int, long long, + const double *, double *, int, long long, int); + static auto func_ptr = LoadSymbol("cublasDgemvStridedBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, alpha, A, lda, strideA, x, incx, stridex, + beta, y, incy, stridey, batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasCgemvStridedBatched( + cublasHandle_t handle, cublasOperation_t trans, int m, int n, + const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, long long int strideA, /* purposely signed */ + const cuComplex *x, int incx, long long int stridex, + const cuComplex *beta, /* host or device pointer */ + cuComplex *y, int incy, long long int stridey, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, const cuComplex *, + const cuComplex *, int, long long, const cuComplex *, int, long long, + const cuComplex *, cuComplex *, int, long long, int); + static auto func_ptr = LoadSymbol("cublasCgemvStridedBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, alpha, A, lda, strideA, x, incx, stridex, + beta, y, incy, stridey, batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasZgemvStridedBatched( + cublasHandle_t handle, cublasOperation_t trans, int m, int n, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, + long long int strideA, /* purposely signed */ + const cuDoubleComplex *x, int incx, long long int stridex, + const cuDoubleComplex *beta, /* host or device pointer */ + cuDoubleComplex *y, int incy, long long int stridey, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, const cuDoubleComplex *, + const cuDoubleComplex *, int, long long, const cuDoubleComplex *, int, + long long, const cuDoubleComplex *, cuDoubleComplex *, int, long long, + int); + static auto func_ptr = LoadSymbol("cublasZgemvStridedBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, alpha, A, lda, strideA, x, incx, stridex, + beta, y, incy, stridey, batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasSgemm_v2( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const float *alpha, /* host or device pointer */ + const float *A, int lda, const float *B, int ldb, + const float *beta, /* host or device pointer */ + float *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const float *, const float *, int, const float *, int, const float *, + float *, int); + static auto func_ptr = LoadSymbol("cublasSgemm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, + C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasDgemm_v2( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const double *alpha, /* host or device pointer */ + const double *A, int lda, const double *B, int ldb, + const double *beta, /* host or device pointer */ + double *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const double *, const double *, int, const double *, int, const double *, + double *, int); + static auto func_ptr = LoadSymbol("cublasDgemm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, + C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasCgemm_v2( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, const cuComplex *B, int ldb, + const cuComplex *beta, /* host or device pointer */ + cuComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const cuComplex *, const cuComplex *, int, const cuComplex *, int, + const cuComplex *, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCgemm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, + C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasCgemm3m( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, const cuComplex *B, int ldb, + const cuComplex *beta, /* host or device pointer */ + cuComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const cuComplex *, const cuComplex *, int, const cuComplex *, int, + const cuComplex *, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCgemm3m"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, + C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasCgemm3mEx( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const cuComplex *alpha, const void *A, + cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb, + const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const cuComplex *, const void *, cudaDataType, int, const void *, + cudaDataType, int, const cuComplex *, void *, cudaDataType, int); + static auto func_ptr = LoadSymbol("cublasCgemm3mEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, + Btype, ldb, beta, C, Ctype, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasZgemm_v2( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, + const cuDoubleComplex *beta, /* host or device pointer */ + cuDoubleComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const cuDoubleComplex *, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, + int); + static auto func_ptr = LoadSymbol("cublasZgemm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, + C, ldc); +} + +cublasStatus_t CUBLASWINAPI +cublasZgemm3m(cublasHandle_t handle, cublasOperation_t transa, + cublasOperation_t transb, int m, int n, int k, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, + int ldb, const cuDoubleComplex *beta, /* host or device pointer */ + cuDoubleComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const cuDoubleComplex *, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, + int); + static auto func_ptr = LoadSymbol("cublasZgemm3m"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, + C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasSgemmEx( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const float *alpha, /* host or device pointer */ + const void *A, cudaDataType Atype, int lda, const void *B, + cudaDataType Btype, int ldb, const float *beta, /* host or device pointer */ + void *C, cudaDataType Ctype, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const float *, const void *, cudaDataType, int, const void *, + cudaDataType, int, const float *, void *, cudaDataType, int); + static auto func_ptr = LoadSymbol("cublasSgemmEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, + Btype, ldb, beta, C, Ctype, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasGemmEx( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const void *alpha, /* host or device pointer */ + const void *A, cudaDataType Atype, int lda, const void *B, + cudaDataType Btype, int ldb, const void *beta, /* host or device pointer */ + void *C, cudaDataType Ctype, int ldc, cudaDataType computeType, + cublasGemmAlgo_t algo) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const void *, const void *, cudaDataType, int, const void *, cudaDataType, + int, const void *, void *, cudaDataType, int, cublasComputeType_t, + cublasGemmAlgo_t); + static auto func_ptr = LoadSymbol("cublasGemmEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + cublasComputeType_t migratedComputeType = CUBLAS_COMPUTE_32F; + cublasMigrateComputeType(handle, computeType, &migratedComputeType); + return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, + Btype, ldb, beta, C, Ctype, ldc, migratedComputeType, algo); +} + +cublasStatus_t CUBLASWINAPI cublasCgemmEx( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const cuComplex *alpha, const void *A, + cudaDataType Atype, int lda, const void *B, cudaDataType Btype, int ldb, + const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const cuComplex *, const void *, cudaDataType, int, const void *, + cudaDataType, int, const cuComplex *, void *, cudaDataType, int); + static auto func_ptr = LoadSymbol("cublasCgemmEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, B, + Btype, ldb, beta, C, Ctype, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasUint8gemmBias( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + cublasOperation_t transc, int m, int n, int k, const unsigned char *A, + int A_bias, int lda, const unsigned char *B, int B_bias, int ldb, + unsigned char *C, int C_bias, int ldc, int C_mult, int C_shift) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, cublasOperation_t, + int, int, int, const unsigned char *, int, int, const unsigned char *, + int, int, unsigned char *, int, int, int, int); + static auto func_ptr = LoadSymbol("cublasUint8gemmBias"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, transc, m, n, k, A, A_bias, lda, B, + B_bias, ldb, C, C_bias, ldc, C_mult, C_shift); +} + +cublasStatus_t CUBLASWINAPI cublasSsyrk_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const float *alpha, /* host or device pointer */ + const float *A, int lda, const float *beta, /* host or device pointer */ + float *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const float *, const float *, int, const float *, float *, int); + static auto func_ptr = LoadSymbol("cublasSsyrk_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasDsyrk_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const double *alpha, /* host or device pointer */ + const double *A, int lda, const double *beta, /* host or device pointer */ + double *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const double *, const double *, int, const double *, double *, int); + static auto func_ptr = LoadSymbol("cublasDsyrk_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasCsyrk_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, + const cuComplex *beta, /* host or device pointer */ + cuComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const cuComplex *, const cuComplex *, int, const cuComplex *, cuComplex *, + int); + static auto func_ptr = LoadSymbol("cublasCsyrk_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasZsyrk_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, + const cuDoubleComplex *beta, /* host or device pointer */ + cuDoubleComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const cuDoubleComplex *, const cuDoubleComplex *, int, + const cuDoubleComplex *, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZsyrk_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasCsyrkEx( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const cuComplex *alpha, /* host or device pointer */ + const void *A, cudaDataType Atype, int lda, + const cuComplex *beta, /* host or device pointer */ + void *C, cudaDataType Ctype, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const cuComplex *, const void *, cudaDataType, int, const cuComplex *, + void *, cudaDataType, int); + static auto func_ptr = LoadSymbol("cublasCsyrkEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, + Ctype, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasCsyrk3mEx( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const cuComplex *alpha, const void *A, cudaDataType Atype, + int lda, const cuComplex *beta, void *C, cudaDataType Ctype, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const cuComplex *, const void *, cudaDataType, int, const cuComplex *, + void *, cudaDataType, int); + static auto func_ptr = LoadSymbol("cublasCsyrk3mEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, + Ctype, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasCherk_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const float *alpha, /* host or device pointer */ + const cuComplex *A, int lda, const float *beta, /* host or device pointer */ + cuComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const float *, const cuComplex *, int, const float *, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCherk_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasZherk_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const double *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, + const double *beta, /* host or device pointer */ + cuDoubleComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const double *, const cuDoubleComplex *, int, const double *, + cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZherk_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, beta, C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasCherkEx( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const float *alpha, /* host or device pointer */ + const void *A, cudaDataType Atype, int lda, + const float *beta, /* host or device pointer */ + void *C, cudaDataType Ctype, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const float *, const void *, cudaDataType, int, const float *, void *, + cudaDataType, int); + static auto func_ptr = LoadSymbol("cublasCherkEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, + Ctype, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasCherk3mEx( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const float *alpha, const void *A, cudaDataType Atype, + int lda, const float *beta, void *C, cudaDataType Ctype, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const float *, const void *, cudaDataType, int, const float *, void *, + cudaDataType, int); + static auto func_ptr = LoadSymbol("cublasCherk3mEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, Atype, lda, beta, C, + Ctype, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasSsyr2k_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const float *alpha, /* host or device pointer */ + const float *A, int lda, const float *B, int ldb, + const float *beta, /* host or device pointer */ + float *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const float *, const float *, int, const float *, int, const float *, + float *, int); + static auto func_ptr = LoadSymbol("cublasSsyr2k_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasDsyr2k_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const double *alpha, /* host or device pointer */ + const double *A, int lda, const double *B, int ldb, + const double *beta, /* host or device pointer */ + double *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const double *, const double *, int, const double *, int, const double *, + double *, int); + static auto func_ptr = LoadSymbol("cublasDsyr2k_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasCsyr2k_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, const cuComplex *B, int ldb, + const cuComplex *beta, /* host or device pointer */ + cuComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const cuComplex *, const cuComplex *, int, const cuComplex *, int, + const cuComplex *, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCsyr2k_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasZsyr2k_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, + const cuDoubleComplex *beta, /* host or device pointer */ + cuDoubleComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const cuDoubleComplex *, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, + int); + static auto func_ptr = LoadSymbol("cublasZsyr2k_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasCher2k_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, const cuComplex *B, int ldb, + const float *beta, /* host or device pointer */ + cuComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const cuComplex *, const cuComplex *, int, const cuComplex *, int, + const float *, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCher2k_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasZher2k_v2( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, + const double *beta, /* host or device pointer */ + cuDoubleComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const cuDoubleComplex *, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZher2k_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasSsyrkx( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const float *alpha, /* host or device pointer */ + const float *A, int lda, const float *B, int ldb, + const float *beta, /* host or device pointer */ + float *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const float *, const float *, int, const float *, int, const float *, + float *, int); + static auto func_ptr = LoadSymbol("cublasSsyrkx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasDsyrkx( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const double *alpha, /* host or device pointer */ + const double *A, int lda, const double *B, int ldb, + const double *beta, /* host or device pointer */ + double *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const double *, const double *, int, const double *, int, const double *, + double *, int); + static auto func_ptr = LoadSymbol("cublasDsyrkx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasCsyrkx( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, const cuComplex *B, int ldb, + const cuComplex *beta, /* host or device pointer */ + cuComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const cuComplex *, const cuComplex *, int, const cuComplex *, int, + const cuComplex *, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCsyrkx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasZsyrkx( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, + const cuDoubleComplex *beta, /* host or device pointer */ + cuDoubleComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const cuDoubleComplex *, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, + int); + static auto func_ptr = LoadSymbol("cublasZsyrkx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasCherkx( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, const cuComplex *B, int ldb, + const float *beta, /* host or device pointer */ + cuComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const cuComplex *, const cuComplex *, int, const cuComplex *, int, + const float *, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCherkx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasZherkx( + cublasHandle_t handle, cublasFillMode_t uplo, cublasOperation_t trans, + int n, int k, const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, + const double *beta, /* host or device pointer */ + cuDoubleComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, cublasOperation_t, int, int, + const cuDoubleComplex *, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, const double *, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZherkx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasSsymm_v2( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, + int n, const float *alpha, /* host or device pointer */ + const float *A, int lda, const float *B, int ldb, + const float *beta, /* host or device pointer */ + float *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, + const float *, const float *, int, const float *, int, const float *, + float *, int); + static auto func_ptr = LoadSymbol("cublasSsymm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasDsymm_v2( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, + int n, const double *alpha, /* host or device pointer */ + const double *A, int lda, const double *B, int ldb, + const double *beta, /* host or device pointer */ + double *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, + const double *, const double *, int, const double *, int, const double *, + double *, int); + static auto func_ptr = LoadSymbol("cublasDsymm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasCsymm_v2( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, + int n, const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, const cuComplex *B, int ldb, + const cuComplex *beta, /* host or device pointer */ + cuComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, + const cuComplex *, const cuComplex *, int, const cuComplex *, int, + const cuComplex *, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCsymm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasZsymm_v2( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, + int n, const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, + const cuDoubleComplex *beta, /* host or device pointer */ + cuDoubleComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, + const cuDoubleComplex *, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, + int); + static auto func_ptr = LoadSymbol("cublasZsymm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasChemm_v2( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, + int n, const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, const cuComplex *B, int ldb, + const cuComplex *beta, /* host or device pointer */ + cuComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, + const cuComplex *, const cuComplex *, int, const cuComplex *, int, + const cuComplex *, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasChemm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasZhemm_v2( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, int m, + int n, const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, + const cuDoubleComplex *beta, /* host or device pointer */ + cuDoubleComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, int, int, + const cuDoubleComplex *, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, const cuDoubleComplex *, cuDoubleComplex *, + int); + static auto func_ptr = LoadSymbol("cublasZhemm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasStrsm_v2( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, + cublasOperation_t trans, cublasDiagType_t diag, int m, int n, + const float *alpha, /* host or device pointer */ + const float *A, int lda, float *B, int ldb) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, + cublasDiagType_t, int, int, const float *, const float *, int, float *, + int); + static auto func_ptr = LoadSymbol("cublasStrsm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); +} + +cublasStatus_t CUBLASWINAPI cublasDtrsm_v2( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, + cublasOperation_t trans, cublasDiagType_t diag, int m, int n, + const double *alpha, /* host or device pointer */ + const double *A, int lda, double *B, int ldb) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, + cublasDiagType_t, int, int, const double *, const double *, int, double *, + int); + static auto func_ptr = LoadSymbol("cublasDtrsm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); +} + +cublasStatus_t CUBLASWINAPI cublasCtrsm_v2( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, + cublasOperation_t trans, cublasDiagType_t diag, int m, int n, + const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, cuComplex *B, int ldb) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, + cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int, + cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCtrsm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); +} + +cublasStatus_t CUBLASWINAPI cublasZtrsm_v2( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, + cublasOperation_t trans, cublasDiagType_t diag, int m, int n, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, cuDoubleComplex *B, int ldb) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, + cublasDiagType_t, int, int, const cuDoubleComplex *, + const cuDoubleComplex *, int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZtrsm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb); +} + +cublasStatus_t CUBLASWINAPI cublasStrmm_v2( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, + cublasOperation_t trans, cublasDiagType_t diag, int m, int n, + const float *alpha, /* host or device pointer */ + const float *A, int lda, const float *B, int ldb, float *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, + cublasDiagType_t, int, int, const float *, const float *, int, + const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasStrmm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, + C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasDtrmm_v2( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, + cublasOperation_t trans, cublasDiagType_t diag, int m, int n, + const double *alpha, /* host or device pointer */ + const double *A, int lda, const double *B, int ldb, double *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, + cublasDiagType_t, int, int, const double *, const double *, int, + const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDtrmm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, + C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasCtrmm_v2( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, + cublasOperation_t trans, cublasDiagType_t diag, int m, int n, + const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, const cuComplex *B, int ldb, cuComplex *C, + int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, + cublasDiagType_t, int, int, const cuComplex *, const cuComplex *, int, + const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCtrmm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, + C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasZtrmm_v2( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, + cublasOperation_t trans, cublasDiagType_t diag, int m, int n, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, + cuDoubleComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, + cublasDiagType_t, int, int, const cuDoubleComplex *, + const cuDoubleComplex *, int, const cuDoubleComplex *, int, + cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZtrmm_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, + C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasSgemmBatched( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const float *alpha, /* host or device pointer */ + const float *const Aarray[], int lda, const float *const Barray[], int ldb, + const float *beta, /* host or device pointer */ + float *const Carray[], int ldc, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const float *, const float *const[], int, const float *const[], int, + const float *, float *const[], int, int); + static auto func_ptr = LoadSymbol("cublasSgemmBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, + ldb, beta, Carray, ldc, batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasDgemmBatched( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const double *alpha, /* host or device pointer */ + const double *const Aarray[], int lda, const double *const Barray[], + int ldb, const double *beta, /* host or device pointer */ + double *const Carray[], int ldc, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const double *, const double *const[], int, const double *const[], int, + const double *, double *const[], int, int); + static auto func_ptr = LoadSymbol("cublasDgemmBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, + ldb, beta, Carray, ldc, batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasCgemmBatched( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ + const cuComplex *const Aarray[], int lda, const cuComplex *const Barray[], + int ldb, const cuComplex *beta, /* host or device pointer */ + cuComplex *const Carray[], int ldc, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const cuComplex *, const cuComplex *const[], int, + const cuComplex *const[], int, const cuComplex *, cuComplex *const[], int, + int); + static auto func_ptr = LoadSymbol("cublasCgemmBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, + ldb, beta, Carray, ldc, batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasCgemm3mBatched( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ + const cuComplex *const Aarray[], int lda, const cuComplex *const Barray[], + int ldb, const cuComplex *beta, /* host or device pointer */ + cuComplex *const Carray[], int ldc, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const cuComplex *, const cuComplex *const[], int, + const cuComplex *const[], int, const cuComplex *, cuComplex *const[], int, + int); + static auto func_ptr = LoadSymbol("cublasCgemm3mBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, + ldb, beta, Carray, ldc, batchCount); +} + +cublasStatus_t CUBLASWINAPI +cublasZgemmBatched(cublasHandle_t handle, cublasOperation_t transa, + cublasOperation_t transb, int m, int n, int k, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *const Aarray[], int lda, + const cuDoubleComplex *const Barray[], int ldb, + const cuDoubleComplex *beta, /* host or device pointer */ + cuDoubleComplex *const Carray[], int ldc, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const cuDoubleComplex *, const cuDoubleComplex *const[], int, + const cuDoubleComplex *const[], int, const cuDoubleComplex *, + cuDoubleComplex *const[], int, int); + static auto func_ptr = LoadSymbol("cublasZgemmBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, lda, Barray, + ldb, beta, Carray, ldc, batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasGemmBatchedEx( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const void *alpha, /* host or device pointer */ + const void *const Aarray[], cudaDataType Atype, int lda, + const void *const Barray[], cudaDataType Btype, int ldb, + const void *beta, /* host or device pointer */ + void *const Carray[], cudaDataType Ctype, int ldc, int batchCount, + cudaDataType computeType, cublasGemmAlgo_t algo) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const void *, const void *const[], cudaDataType, int, const void *const[], + cudaDataType, int, const void *, void *const[], cudaDataType, int, int, + cublasComputeType_t, cublasGemmAlgo_t); + static auto func_ptr = LoadSymbol("cublasGemmBatchedEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + cublasComputeType_t migratedComputeType = CUBLAS_COMPUTE_32F; + cublasMigrateComputeType(handle, computeType, &migratedComputeType); + return func_ptr(handle, transa, transb, m, n, k, alpha, Aarray, Atype, lda, + Barray, Btype, ldb, beta, Carray, Ctype, ldc, batchCount, + migratedComputeType, algo); +} + +cublasStatus_t CUBLASWINAPI cublasGemmStridedBatchedEx( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const void *alpha, /* host or device pointer */ + const void *A, cudaDataType Atype, int lda, + long long int strideA, /* purposely signed */ + const void *B, cudaDataType Btype, int ldb, long long int strideB, + const void *beta, /* host or device pointer */ + void *C, cudaDataType Ctype, int ldc, long long int strideC, int batchCount, + cudaDataType computeType, cublasGemmAlgo_t algo) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const void *, const void *, cudaDataType, int, long long, const void *, + cudaDataType, int, long long, const void *, void *, cudaDataType, int, + long long, int, cublasComputeType_t, cublasGemmAlgo_t); + static auto func_ptr = LoadSymbol("cublasGemmStridedBatchedEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + cublasComputeType_t migratedComputeType = CUBLAS_COMPUTE_32F; + cublasMigrateComputeType(handle, computeType, &migratedComputeType); + return func_ptr(handle, transa, transb, m, n, k, alpha, A, Atype, lda, + strideA, B, Btype, ldb, strideB, beta, C, Ctype, ldc, strideC, + batchCount, migratedComputeType, algo); +} + +cublasStatus_t CUBLASWINAPI cublasSgemmStridedBatched( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const float *alpha, /* host or device pointer */ + const float *A, int lda, long long int strideA, /* purposely signed */ + const float *B, int ldb, long long int strideB, + const float *beta, /* host or device pointer */ + float *C, int ldc, long long int strideC, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const float *, const float *, int, long long, const float *, int, + long long, const float *, float *, int, long long, int); + static auto func_ptr = LoadSymbol("cublasSgemmStridedBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, + ldb, strideB, beta, C, ldc, strideC, batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasDgemmStridedBatched( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const double *alpha, /* host or device pointer */ + const double *A, int lda, long long int strideA, /* purposely signed */ + const double *B, int ldb, long long int strideB, + const double *beta, /* host or device pointer */ + double *C, int ldc, long long int strideC, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const double *, const double *, int, long long, const double *, int, + long long, const double *, double *, int, long long, int); + static auto func_ptr = LoadSymbol("cublasDgemmStridedBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, + ldb, strideB, beta, C, ldc, strideC, batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasCgemmStridedBatched( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, long long int strideA, /* purposely signed */ + const cuComplex *B, int ldb, long long int strideB, + const cuComplex *beta, /* host or device pointer */ + cuComplex *C, int ldc, long long int strideC, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const cuComplex *, const cuComplex *, int, long long, const cuComplex *, + int, long long, const cuComplex *, cuComplex *, int, long long, int); + static auto func_ptr = LoadSymbol("cublasCgemmStridedBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, + ldb, strideB, beta, C, ldc, strideC, batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasCgemm3mStridedBatched( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, long long int strideA, /* purposely signed */ + const cuComplex *B, int ldb, long long int strideB, + const cuComplex *beta, /* host or device pointer */ + cuComplex *C, int ldc, long long int strideC, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const cuComplex *, const cuComplex *, int, long long, const cuComplex *, + int, long long, const cuComplex *, cuComplex *, int, long long, int); + static auto func_ptr = LoadSymbol("cublasCgemm3mStridedBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, + ldb, strideB, beta, C, ldc, strideC, batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasZgemmStridedBatched( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, int k, + const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, + long long int strideA, /* purposely signed */ + const cuDoubleComplex *B, int ldb, long long int strideB, + const cuDoubleComplex *beta, /* host or device poi */ + cuDoubleComplex *C, int ldc, long long int strideC, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, int, + const cuDoubleComplex *, const cuDoubleComplex *, int, long long, + const cuDoubleComplex *, int, long long, const cuDoubleComplex *, + cuDoubleComplex *, int, long long, int); + static auto func_ptr = LoadSymbol("cublasZgemmStridedBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, + ldb, strideB, beta, C, ldc, strideC, batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasSgeam( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, const float *alpha, /* host or device pointer */ + const float *A, int lda, const float *beta, /* host or device pointer */ + const float *B, int ldb, float *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, + const float *, const float *, int, const float *, const float *, int, + float *, int); + static auto func_ptr = LoadSymbol("cublasSgeam"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasDgeam( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, const double *alpha, /* host or device pointer */ + const double *A, int lda, const double *beta, /* host or device pointer */ + const double *B, int ldb, double *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, + const double *, const double *, int, const double *, const double *, int, + double *, int); + static auto func_ptr = LoadSymbol("cublasDgeam"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasCgeam( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, const cuComplex *alpha, /* host or device pointer */ + const cuComplex *A, int lda, + const cuComplex *beta, /* host or device pointer */ + const cuComplex *B, int ldb, cuComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, + const cuComplex *, const cuComplex *, int, const cuComplex *, + const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCgeam"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasZgeam( + cublasHandle_t handle, cublasOperation_t transa, cublasOperation_t transb, + int m, int n, const cuDoubleComplex *alpha, /* host or device pointer */ + const cuDoubleComplex *A, int lda, + const cuDoubleComplex *beta, /* host or device pointer */ + const cuDoubleComplex *B, int ldb, cuDoubleComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, cublasOperation_t, int, int, + const cuDoubleComplex *, const cuDoubleComplex *, int, + const cuDoubleComplex *, const cuDoubleComplex *, int, cuDoubleComplex *, + int); + static auto func_ptr = LoadSymbol("cublasZgeam"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transa, transb, m, n, alpha, A, lda, beta, B, ldb, C, + ldc); +} + +cublasStatus_t CUBLASWINAPI cublasSgetrfBatched( + cublasHandle_t handle, int n, float *const A[], /*Device pointer*/ + int lda, int *P, /*Device Pointer*/ + int *info, /*Device Pointer*/ + int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, float *const[], int, int *, int *, int); + static auto func_ptr = LoadSymbol("cublasSgetrfBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, A, lda, P, info, batchSize); +} + +cublasStatus_t CUBLASWINAPI cublasDgetrfBatched( + cublasHandle_t handle, int n, double *const A[], /*Device pointer*/ + int lda, int *P, /*Device Pointer*/ + int *info, /*Device Pointer*/ + int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, double *const[], int, int *, int *, int); + static auto func_ptr = LoadSymbol("cublasDgetrfBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, A, lda, P, info, batchSize); +} + +cublasStatus_t CUBLASWINAPI cublasCgetrfBatched( + cublasHandle_t handle, int n, cuComplex *const A[], /*Device pointer*/ + int lda, int *P, /*Device Pointer*/ + int *info, /*Device Pointer*/ + int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, cuComplex *const[], int, int *, int *, int); + static auto func_ptr = LoadSymbol("cublasCgetrfBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, A, lda, P, info, batchSize); +} + +cublasStatus_t CUBLASWINAPI cublasZgetrfBatched( + cublasHandle_t handle, int n, cuDoubleComplex *const A[], /*Device pointer*/ + int lda, int *P, /*Device Pointer*/ + int *info, /*Device Pointer*/ + int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, cuDoubleComplex *const[], int, int *, int *, int); + static auto func_ptr = LoadSymbol("cublasZgetrfBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, A, lda, P, info, batchSize); +} + +cublasStatus_t CUBLASWINAPI cublasSgetriBatched( + cublasHandle_t handle, int n, const float *const A[], /*Device pointer*/ + int lda, const int *P, /*Device pointer*/ + float *const C[], /*Device pointer*/ + int ldc, int *info, int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const float *const[], int, const int *, + float *const[], int, int *, int); + static auto func_ptr = LoadSymbol("cublasSgetriBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); +} + +cublasStatus_t CUBLASWINAPI cublasDgetriBatched( + cublasHandle_t handle, int n, const double *const A[], /*Device pointer*/ + int lda, const int *P, /*Device pointer*/ + double *const C[], /*Device pointer*/ + int ldc, int *info, int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const double *const[], int, const int *, + double *const[], int, int *, int); + static auto func_ptr = LoadSymbol("cublasDgetriBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); +} + +cublasStatus_t CUBLASWINAPI cublasCgetriBatched( + cublasHandle_t handle, int n, const cuComplex *const A[], /*Device pointer*/ + int lda, const int *P, /*Device pointer*/ + cuComplex *const C[], /*Device pointer*/ + int ldc, int *info, int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const cuComplex *const[], int, const int *, + cuComplex *const[], int, int *, int); + static auto func_ptr = LoadSymbol("cublasCgetriBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); +} + +cublasStatus_t CUBLASWINAPI +cublasZgetriBatched(cublasHandle_t handle, int n, + const cuDoubleComplex *const A[], /*Device pointer*/ + int lda, const int *P, /*Device pointer*/ + cuDoubleComplex *const C[], /*Device pointer*/ + int ldc, int *info, int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const cuDoubleComplex *const[], int, const int *, + cuDoubleComplex *const[], int, int *, int); + static auto func_ptr = LoadSymbol("cublasZgetriBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, A, lda, P, C, ldc, info, batchSize); +} + +cublasStatus_t CUBLASWINAPI cublasSgetrsBatched( + cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, + const float *const Aarray[], int lda, const int *devIpiv, + float *const Barray[], int ldb, int *info, int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, const float *const[], int, + const int *, float *const[], int, int *, int); + static auto func_ptr = LoadSymbol("cublasSgetrsBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, + info, batchSize); +} + +cublasStatus_t CUBLASWINAPI cublasDgetrsBatched( + cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, + const double *const Aarray[], int lda, const int *devIpiv, + double *const Barray[], int ldb, int *info, int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, const double *const[], int, + const int *, double *const[], int, int *, int); + static auto func_ptr = LoadSymbol("cublasDgetrsBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, + info, batchSize); +} + +cublasStatus_t CUBLASWINAPI cublasCgetrsBatched( + cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, + const cuComplex *const Aarray[], int lda, const int *devIpiv, + cuComplex *const Barray[], int ldb, int *info, int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, const cuComplex *const[], + int, const int *, cuComplex *const[], int, int *, int); + static auto func_ptr = LoadSymbol("cublasCgetrsBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, + info, batchSize); +} + +cublasStatus_t CUBLASWINAPI cublasZgetrsBatched( + cublasHandle_t handle, cublasOperation_t trans, int n, int nrhs, + const cuDoubleComplex *const Aarray[], int lda, const int *devIpiv, + cuDoubleComplex *const Barray[], int ldb, int *info, int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, + const cuDoubleComplex *const[], int, const int *, + cuDoubleComplex *const[], int, int *, int); + static auto func_ptr = LoadSymbol("cublasZgetrsBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, n, nrhs, Aarray, lda, devIpiv, Barray, ldb, + info, batchSize); +} + +cublasStatus_t CUBLASWINAPI cublasStrsmBatched( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, + cublasOperation_t trans, cublasDiagType_t diag, int m, int n, + const float *alpha, /*Host or Device Pointer*/ + const float *const A[], int lda, float *const B[], int ldb, + int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, + cublasDiagType_t, int, int, const float *, const float *const[], int, + float *const[], int, int); + static auto func_ptr = LoadSymbol("cublasStrsmBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, + batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasDtrsmBatched( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, + cublasOperation_t trans, cublasDiagType_t diag, int m, int n, + const double *alpha, /*Host or Device Pointer*/ + const double *const A[], int lda, double *const B[], int ldb, + int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, + cublasDiagType_t, int, int, const double *, const double *const[], int, + double *const[], int, int); + static auto func_ptr = LoadSymbol("cublasDtrsmBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, + batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasCtrsmBatched( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, + cublasOperation_t trans, cublasDiagType_t diag, int m, int n, + const cuComplex *alpha, /*Host or Device Pointer*/ + const cuComplex *const A[], int lda, cuComplex *const B[], int ldb, + int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, + cublasDiagType_t, int, int, const cuComplex *, const cuComplex *const[], + int, cuComplex *const[], int, int); + static auto func_ptr = LoadSymbol("cublasCtrsmBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, + batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasZtrsmBatched( + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, + cublasOperation_t trans, cublasDiagType_t diag, int m, int n, + const cuDoubleComplex *alpha, /*Host or Device Pointer*/ + const cuDoubleComplex *const A[], int lda, cuDoubleComplex *const B[], + int ldb, int batchCount) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, cublasFillMode_t, cublasOperation_t, + cublasDiagType_t, int, int, const cuDoubleComplex *, + const cuDoubleComplex *const[], int, cuDoubleComplex *const[], int, int); + static auto func_ptr = LoadSymbol("cublasZtrsmBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, side, uplo, trans, diag, m, n, alpha, A, lda, B, ldb, + batchCount); +} + +cublasStatus_t CUBLASWINAPI cublasSmatinvBatched( + cublasHandle_t handle, int n, const float *const A[], /*Device pointer*/ + int lda, float *const Ainv[], /*Device pointer*/ + int lda_inv, int *info, /*Device Pointer*/ + int batchSize) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const float *const[], + int, float *const[], int, int *, int); + static auto func_ptr = LoadSymbol("cublasSmatinvBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); +} + +cublasStatus_t CUBLASWINAPI cublasDmatinvBatched( + cublasHandle_t handle, int n, const double *const A[], /*Device pointer*/ + int lda, double *const Ainv[], /*Device pointer*/ + int lda_inv, int *info, /*Device Pointer*/ + int batchSize) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, const double *const[], + int, double *const[], int, int *, int); + static auto func_ptr = LoadSymbol("cublasDmatinvBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); +} + +cublasStatus_t CUBLASWINAPI cublasCmatinvBatched( + cublasHandle_t handle, int n, const cuComplex *const A[], /*Device pointer*/ + int lda, cuComplex *const Ainv[], /*Device pointer*/ + int lda_inv, int *info, /*Device Pointer*/ + int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const cuComplex *const[], int, cuComplex *const[], + int, int *, int); + static auto func_ptr = LoadSymbol("cublasCmatinvBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); +} + +cublasStatus_t CUBLASWINAPI +cublasZmatinvBatched(cublasHandle_t handle, int n, + const cuDoubleComplex *const A[], /*Device pointer*/ + int lda, cuDoubleComplex *const Ainv[], /*Device pointer*/ + int lda_inv, int *info, /*Device Pointer*/ + int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, const cuDoubleComplex *const[], int, + cuDoubleComplex *const[], int, int *, int); + static auto func_ptr = LoadSymbol("cublasZmatinvBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, A, lda, Ainv, lda_inv, info, batchSize); +} + +cublasStatus_t CUBLASWINAPI +cublasSgeqrfBatched(cublasHandle_t handle, int m, int n, + float *const Aarray[], /*Device pointer*/ + int lda, float *const TauArray[], /*Device pointer*/ + int *info, int batchSize) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, int, float *const[], + int, float *const[], int *, int); + static auto func_ptr = LoadSymbol("cublasSgeqrfBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); +} + +cublasStatus_t CUBLASWINAPI +cublasDgeqrfBatched(cublasHandle_t handle, int m, int n, + double *const Aarray[], /*Device pointer*/ + int lda, double *const TauArray[], /*Device pointer*/ + int *info, int batchSize) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, int, int, double *const[], + int, double *const[], int *, int); + static auto func_ptr = LoadSymbol("cublasDgeqrfBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); +} + +cublasStatus_t CUBLASWINAPI +cublasCgeqrfBatched(cublasHandle_t handle, int m, int n, + cuComplex *const Aarray[], /*Device pointer*/ + int lda, cuComplex *const TauArray[], /*Device pointer*/ + int *info, int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, int, cuComplex *const[], int, cuComplex *const[], + int *, int); + static auto func_ptr = LoadSymbol("cublasCgeqrfBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); +} + +cublasStatus_t CUBLASWINAPI cublasZgeqrfBatched( + cublasHandle_t handle, int m, int n, + cuDoubleComplex *const Aarray[], /*Device pointer*/ + int lda, cuDoubleComplex *const TauArray[], /*Device pointer*/ + int *info, int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, int, int, cuDoubleComplex *const[], int, + cuDoubleComplex *const[], int *, int); + static auto func_ptr = LoadSymbol("cublasZgeqrfBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, Aarray, lda, TauArray, info, batchSize); +} + +cublasStatus_t CUBLASWINAPI +cublasSgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, + int nrhs, float *const Aarray[], /*Device pointer*/ + int lda, float *const Carray[], /*Device pointer*/ + int ldc, int *info, int *devInfoArray, /*Device pointer*/ + int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, int, float *const[], int, + float *const[], int, int *, int *, int); + static auto func_ptr = LoadSymbol("cublasSgelsBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, + devInfoArray, batchSize); +} + +cublasStatus_t CUBLASWINAPI +cublasDgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, + int nrhs, double *const Aarray[], /*Device pointer*/ + int lda, double *const Carray[], /*Device pointer*/ + int ldc, int *info, int *devInfoArray, /*Device pointer*/ + int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, int, double *const[], int, + double *const[], int, int *, int *, int); + static auto func_ptr = LoadSymbol("cublasDgelsBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, + devInfoArray, batchSize); +} + +cublasStatus_t CUBLASWINAPI +cublasCgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, + int nrhs, cuComplex *const Aarray[], /*Device pointer*/ + int lda, cuComplex *const Carray[], /*Device pointer*/ + int ldc, int *info, int *devInfoArray, int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, int, cuComplex *const[], int, + cuComplex *const[], int, int *, int *, int); + static auto func_ptr = LoadSymbol("cublasCgelsBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, + devInfoArray, batchSize); +} + +cublasStatus_t CUBLASWINAPI +cublasZgelsBatched(cublasHandle_t handle, cublasOperation_t trans, int m, int n, + int nrhs, cuDoubleComplex *const Aarray[], /*Device pointer*/ + int lda, cuDoubleComplex *const Carray[], /*Device pointer*/ + int ldc, int *info, int *devInfoArray, int batchSize) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasOperation_t, int, int, int, + cuDoubleComplex *const[], int, cuDoubleComplex *const[], int, int *, + int *, int); + static auto func_ptr = LoadSymbol("cublasZgelsBatched"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, trans, m, n, nrhs, Aarray, lda, Carray, ldc, info, + devInfoArray, batchSize); +} + +cublasStatus_t CUBLASWINAPI cublasSdgmm(cublasHandle_t handle, + cublasSideMode_t mode, int m, int n, + const float *A, int lda, const float *x, + int incx, float *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, int, int, const float *, int, + const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasSdgmm"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasDdgmm(cublasHandle_t handle, + cublasSideMode_t mode, int m, int n, + const double *A, int lda, + const double *x, int incx, double *C, + int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, int, int, const double *, int, + const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDdgmm"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasCdgmm(cublasHandle_t handle, + cublasSideMode_t mode, int m, int n, + const cuComplex *A, int lda, + const cuComplex *x, int incx, + cuComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, int, int, const cuComplex *, int, + const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCdgmm"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasZdgmm(cublasHandle_t handle, + cublasSideMode_t mode, int m, int n, + const cuDoubleComplex *A, int lda, + const cuDoubleComplex *x, int incx, + cuDoubleComplex *C, int ldc) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasSideMode_t, int, int, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZdgmm"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode, m, n, A, lda, x, incx, C, ldc); +} + +cublasStatus_t CUBLASWINAPI cublasStpttr(cublasHandle_t handle, + cublasFillMode_t uplo, int n, + const float *AP, float *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const float *, float *, int); + static auto func_ptr = LoadSymbol("cublasStpttr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, AP, A, lda); +} + +cublasStatus_t CUBLASWINAPI cublasDtpttr(cublasHandle_t handle, + cublasFillMode_t uplo, int n, + const double *AP, double *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const double *, double *, int); + static auto func_ptr = LoadSymbol("cublasDtpttr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, AP, A, lda); +} + +cublasStatus_t CUBLASWINAPI cublasCtpttr(cublasHandle_t handle, + cublasFillMode_t uplo, int n, + const cuComplex *AP, cuComplex *A, + int lda) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, + const cuComplex *, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCtpttr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, AP, A, lda); +} + +cublasStatus_t CUBLASWINAPI cublasZtpttr(cublasHandle_t handle, + cublasFillMode_t uplo, int n, + const cuDoubleComplex *AP, + cuDoubleComplex *A, int lda) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, + cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZtpttr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, AP, A, lda); +} + +cublasStatus_t CUBLASWINAPI cublasStrttp(cublasHandle_t handle, + cublasFillMode_t uplo, int n, + const float *A, int lda, float *AP) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const float *, int, float *); + static auto func_ptr = LoadSymbol("cublasStrttp"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, A, lda, AP); +} + +cublasStatus_t CUBLASWINAPI cublasDtrttp(cublasHandle_t handle, + cublasFillMode_t uplo, int n, + const double *A, int lda, double *AP) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const double *, int, double *); + static auto func_ptr = LoadSymbol("cublasDtrttp"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, A, lda, AP); +} + +cublasStatus_t CUBLASWINAPI cublasCtrttp(cublasHandle_t handle, + cublasFillMode_t uplo, int n, + const cuComplex *A, int lda, + cuComplex *AP) { + using FuncPtr = + cublasStatus_t(CUBLASWINAPI *)(cublasHandle_t, cublasFillMode_t, int, + const cuComplex *, int, cuComplex *); + static auto func_ptr = LoadSymbol("cublasCtrttp"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, A, lda, AP); +} + +cublasStatus_t CUBLASWINAPI cublasZtrttp(cublasHandle_t handle, + cublasFillMode_t uplo, int n, + const cuDoubleComplex *A, int lda, + cuDoubleComplex *AP) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)( + cublasHandle_t, cublasFillMode_t, int, const cuDoubleComplex *, int, + cuDoubleComplex *); + static auto func_ptr = LoadSymbol("cublasZtrttp"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, uplo, n, A, lda, AP); +} + +cublasStatus CUBLASWINAPI cublasInit(void) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(); + static auto func_ptr = LoadSymbol("cublasInit"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +cublasStatus CUBLASWINAPI cublasShutdown(void) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(); + static auto func_ptr = LoadSymbol("cublasShutdown"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +cublasStatus CUBLASWINAPI cublasGetError(void) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(); + static auto func_ptr = LoadSymbol("cublasGetError"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +cublasStatus CUBLASWINAPI cublasGetVersion(int *version) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int *); + static auto func_ptr = LoadSymbol("cublasGetVersion"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(version); +} + +cublasStatus CUBLASWINAPI cublasAlloc(int n, int elemSize, void **devicePtr) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(int, int, void **); + static auto func_ptr = LoadSymbol("cublasAlloc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(n, elemSize, devicePtr); +} + +cublasStatus CUBLASWINAPI cublasFree(void *devicePtr) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(void *); + static auto func_ptr = LoadSymbol("cublasFree"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devicePtr); +} + +cublasStatus CUBLASWINAPI cublasSetKernelStream(cudaStream_t stream) { + using FuncPtr = cublasStatus_t(CUBLASWINAPI *)(cudaStream_t); + static auto func_ptr = LoadSymbol("cublasSetKernelStream"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream); +} + +float CUBLASWINAPI cublasSnrm2(int n, const float *x, int incx) { + using FuncPtr = float(CUBLASWINAPI *)(int, const float *, int); + static auto func_ptr = LoadSymbol("cublasSnrm2"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSnrm2"); + return func_ptr(n, x, incx); +} + +double CUBLASWINAPI cublasDnrm2(int n, const double *x, int incx) { + using FuncPtr = double(CUBLASWINAPI *)(int, const double *, int); + static auto func_ptr = LoadSymbol("cublasDnrm2"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDnrm2"); + return func_ptr(n, x, incx); +} + +float CUBLASWINAPI cublasScnrm2(int n, const cuComplex *x, int incx) { + using FuncPtr = float(CUBLASWINAPI *)(int, const cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasScnrm2"); + if (!func_ptr) LogFatalSymbolNotFound("cublasScnrm2"); + return func_ptr(n, x, incx); +} + +double CUBLASWINAPI cublasDznrm2(int n, const cuDoubleComplex *x, int incx) { + using FuncPtr = double(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasDznrm2"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDznrm2"); + return func_ptr(n, x, incx); +} + +float CUBLASWINAPI cublasSdot(int n, const float *x, int incx, const float *y, + int incy) { + using FuncPtr = + float(CUBLASWINAPI *)(int, const float *, int, const float *, int); + static auto func_ptr = LoadSymbol("cublasSdot"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSdot"); + return func_ptr(n, x, incx, y, incy); +} + +double CUBLASWINAPI cublasDdot(int n, const double *x, int incx, + const double *y, int incy) { + using FuncPtr = + double(CUBLASWINAPI *)(int, const double *, int, const double *, int); + static auto func_ptr = LoadSymbol("cublasDdot"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDdot"); + return func_ptr(n, x, incx, y, incy); +} + +cuComplex CUBLASWINAPI cublasCdotu(int n, const cuComplex *x, int incx, + const cuComplex *y, int incy) { + using FuncPtr = cuComplex(CUBLASWINAPI *)(int, const cuComplex *, int, + const cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCdotu"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCdotu"); + return func_ptr(n, x, incx, y, incy); +} + +cuComplex CUBLASWINAPI cublasCdotc(int n, const cuComplex *x, int incx, + const cuComplex *y, int incy) { + using FuncPtr = cuComplex(CUBLASWINAPI *)(int, const cuComplex *, int, + const cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCdotc"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCdotc"); + return func_ptr(n, x, incx, y, incy); +} + +cuDoubleComplex CUBLASWINAPI cublasZdotu(int n, const cuDoubleComplex *x, + int incx, const cuDoubleComplex *y, + int incy) { + using FuncPtr = cuDoubleComplex(CUBLASWINAPI *)( + int, const cuDoubleComplex *, int, const cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZdotu"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZdotu"); + return func_ptr(n, x, incx, y, incy); +} + +cuDoubleComplex CUBLASWINAPI cublasZdotc(int n, const cuDoubleComplex *x, + int incx, const cuDoubleComplex *y, + int incy) { + using FuncPtr = cuDoubleComplex(CUBLASWINAPI *)( + int, const cuDoubleComplex *, int, const cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZdotc"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZdotc"); + return func_ptr(n, x, incx, y, incy); +} + +void CUBLASWINAPI cublasSscal(int n, float alpha, float *x, int incx) { + using FuncPtr = void(CUBLASWINAPI *)(int, float, float *, int); + static auto func_ptr = LoadSymbol("cublasSscal"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSscal"); + return func_ptr(n, alpha, x, incx); +} + +void CUBLASWINAPI cublasDscal(int n, double alpha, double *x, int incx) { + using FuncPtr = void(CUBLASWINAPI *)(int, double, double *, int); + static auto func_ptr = LoadSymbol("cublasDscal"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDscal"); + return func_ptr(n, alpha, x, incx); +} + +void CUBLASWINAPI cublasCscal(int n, cuComplex alpha, cuComplex *x, int incx) { + using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCscal"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCscal"); + return func_ptr(n, alpha, x, incx); +} + +void CUBLASWINAPI cublasZscal(int n, cuDoubleComplex alpha, cuDoubleComplex *x, + int incx) { + using FuncPtr = + void(CUBLASWINAPI *)(int, cuDoubleComplex, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZscal"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZscal"); + return func_ptr(n, alpha, x, incx); +} + +void CUBLASWINAPI cublasCsscal(int n, float alpha, cuComplex *x, int incx) { + using FuncPtr = void(CUBLASWINAPI *)(int, float, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCsscal"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCsscal"); + return func_ptr(n, alpha, x, incx); +} + +void CUBLASWINAPI cublasZdscal(int n, double alpha, cuDoubleComplex *x, + int incx) { + using FuncPtr = void(CUBLASWINAPI *)(int, double, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZdscal"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZdscal"); + return func_ptr(n, alpha, x, incx); +} + +void CUBLASWINAPI cublasSaxpy(int n, float alpha, const float *x, int incx, + float *y, int incy) { + using FuncPtr = + void(CUBLASWINAPI *)(int, float, const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasSaxpy"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSaxpy"); + return func_ptr(n, alpha, x, incx, y, incy); +} + +void CUBLASWINAPI cublasDaxpy(int n, double alpha, const double *x, int incx, + double *y, int incy) { + using FuncPtr = + void(CUBLASWINAPI *)(int, double, const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDaxpy"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDaxpy"); + return func_ptr(n, alpha, x, incx, y, incy); +} + +void CUBLASWINAPI cublasCaxpy(int n, cuComplex alpha, const cuComplex *x, + int incx, cuComplex *y, int incy) { + using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex, const cuComplex *, int, + cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCaxpy"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCaxpy"); + return func_ptr(n, alpha, x, incx, y, incy); +} + +void CUBLASWINAPI cublasZaxpy(int n, cuDoubleComplex alpha, + const cuDoubleComplex *x, int incx, + cuDoubleComplex *y, int incy) { + using FuncPtr = + void(CUBLASWINAPI *)(int, cuDoubleComplex, const cuDoubleComplex *, int, + cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZaxpy"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZaxpy"); + return func_ptr(n, alpha, x, incx, y, incy); +} + +void CUBLASWINAPI cublasScopy(int n, const float *x, int incx, float *y, + int incy) { + using FuncPtr = void(CUBLASWINAPI *)(int, const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasScopy"); + if (!func_ptr) LogFatalSymbolNotFound("cublasScopy"); + return func_ptr(n, x, incx, y, incy); +} + +void CUBLASWINAPI cublasDcopy(int n, const double *x, int incx, double *y, + int incy) { + using FuncPtr = void(CUBLASWINAPI *)(int, const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDcopy"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDcopy"); + return func_ptr(n, x, incx, y, incy); +} + +void CUBLASWINAPI cublasCcopy(int n, const cuComplex *x, int incx, cuComplex *y, + int incy) { + using FuncPtr = + void(CUBLASWINAPI *)(int, const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCcopy"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCcopy"); + return func_ptr(n, x, incx, y, incy); +} + +void CUBLASWINAPI cublasZcopy(int n, const cuDoubleComplex *x, int incx, + cuDoubleComplex *y, int incy) { + using FuncPtr = void(CUBLASWINAPI *)(int, const cuDoubleComplex *, int, + cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZcopy"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZcopy"); + return func_ptr(n, x, incx, y, incy); +} + +void CUBLASWINAPI cublasSswap(int n, float *x, int incx, float *y, int incy) { + using FuncPtr = void(CUBLASWINAPI *)(int, float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasSswap"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSswap"); + return func_ptr(n, x, incx, y, incy); +} + +void CUBLASWINAPI cublasDswap(int n, double *x, int incx, double *y, int incy) { + using FuncPtr = void(CUBLASWINAPI *)(int, double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDswap"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDswap"); + return func_ptr(n, x, incx, y, incy); +} + +void CUBLASWINAPI cublasCswap(int n, cuComplex *x, int incx, cuComplex *y, + int incy) { + using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCswap"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCswap"); + return func_ptr(n, x, incx, y, incy); +} + +void CUBLASWINAPI cublasZswap(int n, cuDoubleComplex *x, int incx, + cuDoubleComplex *y, int incy) { + using FuncPtr = + void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZswap"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZswap"); + return func_ptr(n, x, incx, y, incy); +} + +int CUBLASWINAPI cublasIsamax(int n, const float *x, int incx) { + using FuncPtr = int(CUBLASWINAPI *)(int, const float *, int); + static auto func_ptr = LoadSymbol("cublasIsamax"); + if (!func_ptr) LogFatalSymbolNotFound("cublasIsamax"); + return func_ptr(n, x, incx); +} + +int CUBLASWINAPI cublasIdamax(int n, const double *x, int incx) { + using FuncPtr = int(CUBLASWINAPI *)(int, const double *, int); + static auto func_ptr = LoadSymbol("cublasIdamax"); + if (!func_ptr) LogFatalSymbolNotFound("cublasIdamax"); + return func_ptr(n, x, incx); +} + +int CUBLASWINAPI cublasIcamax(int n, const cuComplex *x, int incx) { + using FuncPtr = int(CUBLASWINAPI *)(int, const cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasIcamax"); + if (!func_ptr) LogFatalSymbolNotFound("cublasIcamax"); + return func_ptr(n, x, incx); +} + +int CUBLASWINAPI cublasIzamax(int n, const cuDoubleComplex *x, int incx) { + using FuncPtr = int(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasIzamax"); + if (!func_ptr) LogFatalSymbolNotFound("cublasIzamax"); + return func_ptr(n, x, incx); +} + +int CUBLASWINAPI cublasIsamin(int n, const float *x, int incx) { + using FuncPtr = int(CUBLASWINAPI *)(int, const float *, int); + static auto func_ptr = LoadSymbol("cublasIsamin"); + if (!func_ptr) LogFatalSymbolNotFound("cublasIsamin"); + return func_ptr(n, x, incx); +} + +int CUBLASWINAPI cublasIdamin(int n, const double *x, int incx) { + using FuncPtr = int(CUBLASWINAPI *)(int, const double *, int); + static auto func_ptr = LoadSymbol("cublasIdamin"); + if (!func_ptr) LogFatalSymbolNotFound("cublasIdamin"); + return func_ptr(n, x, incx); +} + +int CUBLASWINAPI cublasIcamin(int n, const cuComplex *x, int incx) { + using FuncPtr = int(CUBLASWINAPI *)(int, const cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasIcamin"); + if (!func_ptr) LogFatalSymbolNotFound("cublasIcamin"); + return func_ptr(n, x, incx); +} + +int CUBLASWINAPI cublasIzamin(int n, const cuDoubleComplex *x, int incx) { + using FuncPtr = int(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasIzamin"); + if (!func_ptr) LogFatalSymbolNotFound("cublasIzamin"); + return func_ptr(n, x, incx); +} + +float CUBLASWINAPI cublasSasum(int n, const float *x, int incx) { + using FuncPtr = float(CUBLASWINAPI *)(int, const float *, int); + static auto func_ptr = LoadSymbol("cublasSasum"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSasum"); + return func_ptr(n, x, incx); +} + +double CUBLASWINAPI cublasDasum(int n, const double *x, int incx) { + using FuncPtr = double(CUBLASWINAPI *)(int, const double *, int); + static auto func_ptr = LoadSymbol("cublasDasum"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDasum"); + return func_ptr(n, x, incx); +} + +float CUBLASWINAPI cublasScasum(int n, const cuComplex *x, int incx) { + using FuncPtr = float(CUBLASWINAPI *)(int, const cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasScasum"); + if (!func_ptr) LogFatalSymbolNotFound("cublasScasum"); + return func_ptr(n, x, incx); +} + +double CUBLASWINAPI cublasDzasum(int n, const cuDoubleComplex *x, int incx) { + using FuncPtr = double(CUBLASWINAPI *)(int, const cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasDzasum"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDzasum"); + return func_ptr(n, x, incx); +} + +void CUBLASWINAPI cublasSrot(int n, float *x, int incx, float *y, int incy, + float sc, float ss) { + using FuncPtr = + void(CUBLASWINAPI *)(int, float *, int, float *, int, float, float); + static auto func_ptr = LoadSymbol("cublasSrot"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSrot"); + return func_ptr(n, x, incx, y, incy, sc, ss); +} + +void CUBLASWINAPI cublasDrot(int n, double *x, int incx, double *y, int incy, + double sc, double ss) { + using FuncPtr = + void(CUBLASWINAPI *)(int, double *, int, double *, int, double, double); + static auto func_ptr = LoadSymbol("cublasDrot"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDrot"); + return func_ptr(n, x, incx, y, incy, sc, ss); +} + +void CUBLASWINAPI cublasCrot(int n, cuComplex *x, int incx, cuComplex *y, + int incy, float c, cuComplex s) { + using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int, + float, cuComplex); + static auto func_ptr = LoadSymbol("cublasCrot"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCrot"); + return func_ptr(n, x, incx, y, incy, c, s); +} + +void CUBLASWINAPI cublasZrot(int n, cuDoubleComplex *x, int incx, + cuDoubleComplex *y, int incy, double sc, + cuDoubleComplex cs) { + using FuncPtr = + void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, cuDoubleComplex *, int, + double, cuDoubleComplex); + static auto func_ptr = LoadSymbol("cublasZrot"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZrot"); + return func_ptr(n, x, incx, y, incy, sc, cs); +} + +void CUBLASWINAPI cublasCsrot(int n, cuComplex *x, int incx, cuComplex *y, + int incy, float c, float s) { + using FuncPtr = void(CUBLASWINAPI *)(int, cuComplex *, int, cuComplex *, int, + float, float); + static auto func_ptr = LoadSymbol("cublasCsrot"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCsrot"); + return func_ptr(n, x, incx, y, incy, c, s); +} + +void CUBLASWINAPI cublasZdrot(int n, cuDoubleComplex *x, int incx, + cuDoubleComplex *y, int incy, double c, + double s) { + using FuncPtr = void(CUBLASWINAPI *)(int, cuDoubleComplex *, int, + cuDoubleComplex *, int, double, double); + static auto func_ptr = LoadSymbol("cublasZdrot"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZdrot"); + return func_ptr(n, x, incx, y, incy, c, s); +} + +void CUBLASWINAPI cublasSrotg(float *sa, float *sb, float *sc, float *ss) { + using FuncPtr = void(CUBLASWINAPI *)(float *, float *, float *, float *); + static auto func_ptr = LoadSymbol("cublasSrotg"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSrotg"); + return func_ptr(sa, sb, sc, ss); +} + +void CUBLASWINAPI cublasDrotg(double *sa, double *sb, double *sc, double *ss) { + using FuncPtr = void(CUBLASWINAPI *)(double *, double *, double *, double *); + static auto func_ptr = LoadSymbol("cublasDrotg"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDrotg"); + return func_ptr(sa, sb, sc, ss); +} + +void CUBLASWINAPI cublasCrotg(cuComplex *ca, cuComplex cb, float *sc, + cuComplex *cs) { + using FuncPtr = + void(CUBLASWINAPI *)(cuComplex *, cuComplex, float *, cuComplex *); + static auto func_ptr = LoadSymbol("cublasCrotg"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCrotg"); + return func_ptr(ca, cb, sc, cs); +} + +void CUBLASWINAPI cublasZrotg(cuDoubleComplex *ca, cuDoubleComplex cb, + double *sc, cuDoubleComplex *cs) { + using FuncPtr = void(CUBLASWINAPI *)(cuDoubleComplex *, cuDoubleComplex, + double *, cuDoubleComplex *); + static auto func_ptr = LoadSymbol("cublasZrotg"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZrotg"); + return func_ptr(ca, cb, sc, cs); +} + +void CUBLASWINAPI cublasSrotm(int n, float *x, int incx, float *y, int incy, + const float *sparam) { + using FuncPtr = + void(CUBLASWINAPI *)(int, float *, int, float *, int, const float *); + static auto func_ptr = LoadSymbol("cublasSrotm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSrotm"); + return func_ptr(n, x, incx, y, incy, sparam); +} + +void CUBLASWINAPI cublasDrotm(int n, double *x, int incx, double *y, int incy, + const double *sparam) { + using FuncPtr = + void(CUBLASWINAPI *)(int, double *, int, double *, int, const double *); + static auto func_ptr = LoadSymbol("cublasDrotm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDrotm"); + return func_ptr(n, x, incx, y, incy, sparam); +} + +void CUBLASWINAPI cublasSrotmg(float *sd1, float *sd2, float *sx1, + const float *sy1, float *sparam) { + using FuncPtr = + void(CUBLASWINAPI *)(float *, float *, float *, const float *, float *); + static auto func_ptr = LoadSymbol("cublasSrotmg"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSrotmg"); + return func_ptr(sd1, sd2, sx1, sy1, sparam); +} + +void CUBLASWINAPI cublasDrotmg(double *sd1, double *sd2, double *sx1, + const double *sy1, double *sparam) { + using FuncPtr = void(CUBLASWINAPI *)(double *, double *, double *, + const double *, double *); + static auto func_ptr = LoadSymbol("cublasDrotmg"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDrotmg"); + return func_ptr(sd1, sd2, sx1, sy1, sparam); +} + +void CUBLASWINAPI cublasSgemv(char trans, int m, int n, float alpha, + const float *A, int lda, const float *x, int incx, + float beta, float *y, int incy) { + using FuncPtr = + void(CUBLASWINAPI *)(char, int, int, float, const float *, int, + const float *, int, float, float *, int); + static auto func_ptr = LoadSymbol("cublasSgemv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSgemv"); + return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasDgemv(char trans, int m, int n, double alpha, + const double *A, int lda, const double *x, + int incx, double beta, double *y, int incy) { + using FuncPtr = + void(CUBLASWINAPI *)(char, int, int, double, const double *, int, + const double *, int, double, double *, int); + static auto func_ptr = LoadSymbol("cublasDgemv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDgemv"); + return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasCgemv(char trans, int m, int n, cuComplex alpha, + const cuComplex *A, int lda, const cuComplex *x, + int incx, cuComplex beta, cuComplex *y, + int incy) { + using FuncPtr = + void(CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int, + const cuComplex *, int, cuComplex, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCgemv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCgemv"); + return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasZgemv(char trans, int m, int n, cuDoubleComplex alpha, + const cuDoubleComplex *A, int lda, + const cuDoubleComplex *x, int incx, + cuDoubleComplex beta, cuDoubleComplex *y, + int incy) { + using FuncPtr = void(CUBLASWINAPI *)( + char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZgemv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZgemv"); + return func_ptr(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasSgbmv(char trans, int m, int n, int kl, int ku, + float alpha, const float *A, int lda, + const float *x, int incx, float beta, float *y, + int incy) { + using FuncPtr = + void(CUBLASWINAPI *)(char, int, int, int, int, float, const float *, int, + const float *, int, float, float *, int); + static auto func_ptr = LoadSymbol("cublasSgbmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSgbmv"); + return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasDgbmv(char trans, int m, int n, int kl, int ku, + double alpha, const double *A, int lda, + const double *x, int incx, double beta, double *y, + int incy) { + using FuncPtr = + void(CUBLASWINAPI *)(char, int, int, int, int, double, const double *, + int, const double *, int, double, double *, int); + static auto func_ptr = LoadSymbol("cublasDgbmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDgbmv"); + return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasCgbmv(char trans, int m, int n, int kl, int ku, + cuComplex alpha, const cuComplex *A, int lda, + const cuComplex *x, int incx, cuComplex beta, + cuComplex *y, int incy) { + using FuncPtr = void(CUBLASWINAPI *)( + char, int, int, int, int, cuComplex, const cuComplex *, int, + const cuComplex *, int, cuComplex, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCgbmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCgbmv"); + return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasZgbmv(char trans, int m, int n, int kl, int ku, + cuDoubleComplex alpha, const cuDoubleComplex *A, + int lda, const cuDoubleComplex *x, int incx, + cuDoubleComplex beta, cuDoubleComplex *y, + int incy) { + using FuncPtr = void(CUBLASWINAPI *)( + char, int, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZgbmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZgbmv"); + return func_ptr(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasStrmv(char uplo, char trans, char diag, int n, + const float *A, int lda, float *x, int incx) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const float *, + int, float *, int); + static auto func_ptr = LoadSymbol("cublasStrmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasStrmv"); + return func_ptr(uplo, trans, diag, n, A, lda, x, incx); +} + +void CUBLASWINAPI cublasDtrmv(char uplo, char trans, char diag, int n, + const double *A, int lda, double *x, int incx) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, + int, double *, int); + static auto func_ptr = LoadSymbol("cublasDtrmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDtrmv"); + return func_ptr(uplo, trans, diag, n, A, lda, x, incx); +} + +void CUBLASWINAPI cublasCtrmv(char uplo, char trans, char diag, int n, + const cuComplex *A, int lda, cuComplex *x, + int incx) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, + int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCtrmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCtrmv"); + return func_ptr(uplo, trans, diag, n, A, lda, x, incx); +} + +void CUBLASWINAPI cublasZtrmv(char uplo, char trans, char diag, int n, + const cuDoubleComplex *A, int lda, + cuDoubleComplex *x, int incx) { + using FuncPtr = + void(CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int, + cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZtrmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZtrmv"); + return func_ptr(uplo, trans, diag, n, A, lda, x, incx); +} + +void CUBLASWINAPI cublasStbmv(char uplo, char trans, char diag, int n, int k, + const float *A, int lda, float *x, int incx) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, + const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasStbmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasStbmv"); + return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); +} + +void CUBLASWINAPI cublasDtbmv(char uplo, char trans, char diag, int n, int k, + const double *A, int lda, double *x, int incx) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, + const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDtbmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDtbmv"); + return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); +} + +void CUBLASWINAPI cublasCtbmv(char uplo, char trans, char diag, int n, int k, + const cuComplex *A, int lda, cuComplex *x, + int incx) { + using FuncPtr = void(CUBLASWINAPI *)( + char, char, char, int, int, const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCtbmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCtbmv"); + return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); +} + +void CUBLASWINAPI cublasZtbmv(char uplo, char trans, char diag, int n, int k, + const cuDoubleComplex *A, int lda, + cuDoubleComplex *x, int incx) { + using FuncPtr = + void(CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *, + int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZtbmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZtbmv"); + return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); +} + +void CUBLASWINAPI cublasStpmv(char uplo, char trans, char diag, int n, + const float *AP, float *x, int incx) { + using FuncPtr = + void(CUBLASWINAPI *)(char, char, char, int, const float *, float *, int); + static auto func_ptr = LoadSymbol("cublasStpmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasStpmv"); + return func_ptr(uplo, trans, diag, n, AP, x, incx); +} + +void CUBLASWINAPI cublasDtpmv(char uplo, char trans, char diag, int n, + const double *AP, double *x, int incx) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, + double *, int); + static auto func_ptr = LoadSymbol("cublasDtpmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDtpmv"); + return func_ptr(uplo, trans, diag, n, AP, x, incx); +} + +void CUBLASWINAPI cublasCtpmv(char uplo, char trans, char diag, int n, + const cuComplex *AP, cuComplex *x, int incx) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, + cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCtpmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCtpmv"); + return func_ptr(uplo, trans, diag, n, AP, x, incx); +} + +void CUBLASWINAPI cublasZtpmv(char uplo, char trans, char diag, int n, + const cuDoubleComplex *AP, cuDoubleComplex *x, + int incx) { + using FuncPtr = void(CUBLASWINAPI *)( + char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZtpmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZtpmv"); + return func_ptr(uplo, trans, diag, n, AP, x, incx); +} + +void CUBLASWINAPI cublasStrsv(char uplo, char trans, char diag, int n, + const float *A, int lda, float *x, int incx) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const float *, + int, float *, int); + static auto func_ptr = LoadSymbol("cublasStrsv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasStrsv"); + return func_ptr(uplo, trans, diag, n, A, lda, x, incx); +} + +void CUBLASWINAPI cublasDtrsv(char uplo, char trans, char diag, int n, + const double *A, int lda, double *x, int incx) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, + int, double *, int); + static auto func_ptr = LoadSymbol("cublasDtrsv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDtrsv"); + return func_ptr(uplo, trans, diag, n, A, lda, x, incx); +} + +void CUBLASWINAPI cublasCtrsv(char uplo, char trans, char diag, int n, + const cuComplex *A, int lda, cuComplex *x, + int incx) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, + int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCtrsv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCtrsv"); + return func_ptr(uplo, trans, diag, n, A, lda, x, incx); +} + +void CUBLASWINAPI cublasZtrsv(char uplo, char trans, char diag, int n, + const cuDoubleComplex *A, int lda, + cuDoubleComplex *x, int incx) { + using FuncPtr = + void(CUBLASWINAPI *)(char, char, char, int, const cuDoubleComplex *, int, + cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZtrsv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZtrsv"); + return func_ptr(uplo, trans, diag, n, A, lda, x, incx); +} + +void CUBLASWINAPI cublasStpsv(char uplo, char trans, char diag, int n, + const float *AP, float *x, int incx) { + using FuncPtr = + void(CUBLASWINAPI *)(char, char, char, int, const float *, float *, int); + static auto func_ptr = LoadSymbol("cublasStpsv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasStpsv"); + return func_ptr(uplo, trans, diag, n, AP, x, incx); +} + +void CUBLASWINAPI cublasDtpsv(char uplo, char trans, char diag, int n, + const double *AP, double *x, int incx) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const double *, + double *, int); + static auto func_ptr = LoadSymbol("cublasDtpsv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDtpsv"); + return func_ptr(uplo, trans, diag, n, AP, x, incx); +} + +void CUBLASWINAPI cublasCtpsv(char uplo, char trans, char diag, int n, + const cuComplex *AP, cuComplex *x, int incx) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, const cuComplex *, + cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCtpsv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCtpsv"); + return func_ptr(uplo, trans, diag, n, AP, x, incx); +} + +void CUBLASWINAPI cublasZtpsv(char uplo, char trans, char diag, int n, + const cuDoubleComplex *AP, cuDoubleComplex *x, + int incx) { + using FuncPtr = void(CUBLASWINAPI *)( + char, char, char, int, const cuDoubleComplex *, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZtpsv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZtpsv"); + return func_ptr(uplo, trans, diag, n, AP, x, incx); +} + +void CUBLASWINAPI cublasStbsv(char uplo, char trans, char diag, int n, int k, + const float *A, int lda, float *x, int incx) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, + const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasStbsv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasStbsv"); + return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); +} + +void CUBLASWINAPI cublasDtbsv(char uplo, char trans, char diag, int n, int k, + const double *A, int lda, double *x, int incx) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, int, int, + const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDtbsv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDtbsv"); + return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); +} + +void CUBLASWINAPI cublasCtbsv(char uplo, char trans, char diag, int n, int k, + const cuComplex *A, int lda, cuComplex *x, + int incx) { + using FuncPtr = void(CUBLASWINAPI *)( + char, char, char, int, int, const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCtbsv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCtbsv"); + return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); +} + +void CUBLASWINAPI cublasZtbsv(char uplo, char trans, char diag, int n, int k, + const cuDoubleComplex *A, int lda, + cuDoubleComplex *x, int incx) { + using FuncPtr = + void(CUBLASWINAPI *)(char, char, char, int, int, const cuDoubleComplex *, + int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZtbsv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZtbsv"); + return func_ptr(uplo, trans, diag, n, k, A, lda, x, incx); +} + +void CUBLASWINAPI cublasSsymv(char uplo, int n, float alpha, const float *A, + int lda, const float *x, int incx, float beta, + float *y, int incy) { + using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int, + const float *, int, float, float *, int); + static auto func_ptr = LoadSymbol("cublasSsymv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSsymv"); + return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasDsymv(char uplo, int n, double alpha, const double *A, + int lda, const double *x, int incx, double beta, + double *y, int incy) { + using FuncPtr = + void(CUBLASWINAPI *)(char, int, double, const double *, int, + const double *, int, double, double *, int); + static auto func_ptr = LoadSymbol("cublasDsymv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDsymv"); + return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasChemv(char uplo, int n, cuComplex alpha, + const cuComplex *A, int lda, const cuComplex *x, + int incx, cuComplex beta, cuComplex *y, + int incy) { + using FuncPtr = + void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, + const cuComplex *, int, cuComplex, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasChemv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasChemv"); + return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasZhemv(char uplo, int n, cuDoubleComplex alpha, + const cuDoubleComplex *A, int lda, + const cuDoubleComplex *x, int incx, + cuDoubleComplex beta, cuDoubleComplex *y, + int incy) { + using FuncPtr = void(CUBLASWINAPI *)( + char, int, cuDoubleComplex, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZhemv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZhemv"); + return func_ptr(uplo, n, alpha, A, lda, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasSsbmv(char uplo, int n, int k, float alpha, + const float *A, int lda, const float *x, int incx, + float beta, float *y, int incy) { + using FuncPtr = + void(CUBLASWINAPI *)(char, int, int, float, const float *, int, + const float *, int, float, float *, int); + static auto func_ptr = LoadSymbol("cublasSsbmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSsbmv"); + return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasDsbmv(char uplo, int n, int k, double alpha, + const double *A, int lda, const double *x, + int incx, double beta, double *y, int incy) { + using FuncPtr = + void(CUBLASWINAPI *)(char, int, int, double, const double *, int, + const double *, int, double, double *, int); + static auto func_ptr = LoadSymbol("cublasDsbmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDsbmv"); + return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasChbmv(char uplo, int n, int k, cuComplex alpha, + const cuComplex *A, int lda, const cuComplex *x, + int incx, cuComplex beta, cuComplex *y, + int incy) { + using FuncPtr = + void(CUBLASWINAPI *)(char, int, int, cuComplex, const cuComplex *, int, + const cuComplex *, int, cuComplex, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasChbmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasChbmv"); + return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasZhbmv(char uplo, int n, int k, cuDoubleComplex alpha, + const cuDoubleComplex *A, int lda, + const cuDoubleComplex *x, int incx, + cuDoubleComplex beta, cuDoubleComplex *y, + int incy) { + using FuncPtr = void(CUBLASWINAPI *)( + char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZhbmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZhbmv"); + return func_ptr(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasSspmv(char uplo, int n, float alpha, const float *AP, + const float *x, int incx, float beta, float *y, + int incy) { + using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, + const float *, int, float, float *, int); + static auto func_ptr = LoadSymbol("cublasSspmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSspmv"); + return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasDspmv(char uplo, int n, double alpha, const double *AP, + const double *x, int incx, double beta, double *y, + int incy) { + using FuncPtr = + void(CUBLASWINAPI *)(char, int, double, const double *, const double *, + int, double, double *, int); + static auto func_ptr = LoadSymbol("cublasDspmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDspmv"); + return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasChpmv(char uplo, int n, cuComplex alpha, + const cuComplex *AP, const cuComplex *x, int incx, + cuComplex beta, cuComplex *y, int incy) { + using FuncPtr = + void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, + const cuComplex *, int, cuComplex, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasChpmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasChpmv"); + return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasZhpmv(char uplo, int n, cuDoubleComplex alpha, + const cuDoubleComplex *AP, + const cuDoubleComplex *x, int incx, + cuDoubleComplex beta, cuDoubleComplex *y, + int incy) { + using FuncPtr = void(CUBLASWINAPI *)( + char, int, cuDoubleComplex, const cuDoubleComplex *, + const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZhpmv"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZhpmv"); + return func_ptr(uplo, n, alpha, AP, x, incx, beta, y, incy); +} + +void CUBLASWINAPI cublasSger(int m, int n, float alpha, const float *x, + int incx, const float *y, int incy, float *A, + int lda) { + using FuncPtr = void(CUBLASWINAPI *)(int, int, float, const float *, int, + const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasSger"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSger"); + return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); +} + +void CUBLASWINAPI cublasDger(int m, int n, double alpha, const double *x, + int incx, const double *y, int incy, double *A, + int lda) { + using FuncPtr = void(CUBLASWINAPI *)(int, int, double, const double *, int, + const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDger"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDger"); + return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); +} + +void CUBLASWINAPI cublasCgeru(int m, int n, cuComplex alpha, const cuComplex *x, + int incx, const cuComplex *y, int incy, + cuComplex *A, int lda) { + using FuncPtr = + void(CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int, + const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCgeru"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCgeru"); + return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); +} + +void CUBLASWINAPI cublasCgerc(int m, int n, cuComplex alpha, const cuComplex *x, + int incx, const cuComplex *y, int incy, + cuComplex *A, int lda) { + using FuncPtr = + void(CUBLASWINAPI *)(int, int, cuComplex, const cuComplex *, int, + const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCgerc"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCgerc"); + return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); +} + +void CUBLASWINAPI cublasZgeru(int m, int n, cuDoubleComplex alpha, + const cuDoubleComplex *x, int incx, + const cuDoubleComplex *y, int incy, + cuDoubleComplex *A, int lda) { + using FuncPtr = void(CUBLASWINAPI *)( + int, int, cuDoubleComplex, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZgeru"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZgeru"); + return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); +} + +void CUBLASWINAPI cublasZgerc(int m, int n, cuDoubleComplex alpha, + const cuDoubleComplex *x, int incx, + const cuDoubleComplex *y, int incy, + cuDoubleComplex *A, int lda) { + using FuncPtr = void(CUBLASWINAPI *)( + int, int, cuDoubleComplex, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZgerc"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZgerc"); + return func_ptr(m, n, alpha, x, incx, y, incy, A, lda); +} + +void CUBLASWINAPI cublasSsyr(char uplo, int n, float alpha, const float *x, + int incx, float *A, int lda) { + using FuncPtr = + void(CUBLASWINAPI *)(char, int, float, const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasSsyr"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr"); + return func_ptr(uplo, n, alpha, x, incx, A, lda); +} + +void CUBLASWINAPI cublasDsyr(char uplo, int n, double alpha, const double *x, + int incx, double *A, int lda) { + using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int, + double *, int); + static auto func_ptr = LoadSymbol("cublasDsyr"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr"); + return func_ptr(uplo, n, alpha, x, incx, A, lda); +} + +void CUBLASWINAPI cublasCher(char uplo, int n, float alpha, const cuComplex *x, + int incx, cuComplex *A, int lda) { + using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const cuComplex *, int, + cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCher"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCher"); + return func_ptr(uplo, n, alpha, x, incx, A, lda); +} + +void CUBLASWINAPI cublasZher(char uplo, int n, double alpha, + const cuDoubleComplex *x, int incx, + cuDoubleComplex *A, int lda) { + using FuncPtr = void(CUBLASWINAPI *)( + char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZher"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZher"); + return func_ptr(uplo, n, alpha, x, incx, A, lda); +} + +void CUBLASWINAPI cublasSspr(char uplo, int n, float alpha, const float *x, + int incx, float *AP) { + using FuncPtr = + void(CUBLASWINAPI *)(char, int, float, const float *, int, float *); + static auto func_ptr = LoadSymbol("cublasSspr"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSspr"); + return func_ptr(uplo, n, alpha, x, incx, AP); +} + +void CUBLASWINAPI cublasDspr(char uplo, int n, double alpha, const double *x, + int incx, double *AP) { + using FuncPtr = + void(CUBLASWINAPI *)(char, int, double, const double *, int, double *); + static auto func_ptr = LoadSymbol("cublasDspr"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDspr"); + return func_ptr(uplo, n, alpha, x, incx, AP); +} + +void CUBLASWINAPI cublasChpr(char uplo, int n, float alpha, const cuComplex *x, + int incx, cuComplex *AP) { + using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const cuComplex *, int, + cuComplex *); + static auto func_ptr = LoadSymbol("cublasChpr"); + if (!func_ptr) LogFatalSymbolNotFound("cublasChpr"); + return func_ptr(uplo, n, alpha, x, incx, AP); +} + +void CUBLASWINAPI cublasZhpr(char uplo, int n, double alpha, + const cuDoubleComplex *x, int incx, + cuDoubleComplex *AP) { + using FuncPtr = void(CUBLASWINAPI *)( + char, int, double, const cuDoubleComplex *, int, cuDoubleComplex *); + static auto func_ptr = LoadSymbol("cublasZhpr"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZhpr"); + return func_ptr(uplo, n, alpha, x, incx, AP); +} + +void CUBLASWINAPI cublasSsyr2(char uplo, int n, float alpha, const float *x, + int incx, const float *y, int incy, float *A, + int lda) { + using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int, + const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasSsyr2"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr2"); + return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); +} + +void CUBLASWINAPI cublasDsyr2(char uplo, int n, double alpha, const double *x, + int incx, const double *y, int incy, double *A, + int lda) { + using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int, + const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDsyr2"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr2"); + return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); +} + +void CUBLASWINAPI cublasCher2(char uplo, int n, cuComplex alpha, + const cuComplex *x, int incx, const cuComplex *y, + int incy, cuComplex *A, int lda) { + using FuncPtr = + void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, + const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCher2"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCher2"); + return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); +} + +void CUBLASWINAPI cublasZher2(char uplo, int n, cuDoubleComplex alpha, + const cuDoubleComplex *x, int incx, + const cuDoubleComplex *y, int incy, + cuDoubleComplex *A, int lda) { + using FuncPtr = void(CUBLASWINAPI *)( + char, int, cuDoubleComplex, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZher2"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZher2"); + return func_ptr(uplo, n, alpha, x, incx, y, incy, A, lda); +} + +void CUBLASWINAPI cublasSspr2(char uplo, int n, float alpha, const float *x, + int incx, const float *y, int incy, float *AP) { + using FuncPtr = void(CUBLASWINAPI *)(char, int, float, const float *, int, + const float *, int, float *); + static auto func_ptr = LoadSymbol("cublasSspr2"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSspr2"); + return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); +} + +void CUBLASWINAPI cublasDspr2(char uplo, int n, double alpha, const double *x, + int incx, const double *y, int incy, double *AP) { + using FuncPtr = void(CUBLASWINAPI *)(char, int, double, const double *, int, + const double *, int, double *); + static auto func_ptr = LoadSymbol("cublasDspr2"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDspr2"); + return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); +} + +void CUBLASWINAPI cublasChpr2(char uplo, int n, cuComplex alpha, + const cuComplex *x, int incx, const cuComplex *y, + int incy, cuComplex *AP) { + using FuncPtr = + void(CUBLASWINAPI *)(char, int, cuComplex, const cuComplex *, int, + const cuComplex *, int, cuComplex *); + static auto func_ptr = LoadSymbol("cublasChpr2"); + if (!func_ptr) LogFatalSymbolNotFound("cublasChpr2"); + return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); +} + +void CUBLASWINAPI cublasZhpr2(char uplo, int n, cuDoubleComplex alpha, + const cuDoubleComplex *x, int incx, + const cuDoubleComplex *y, int incy, + cuDoubleComplex *AP) { + using FuncPtr = void(CUBLASWINAPI *)( + char, int, cuDoubleComplex, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, cuDoubleComplex *); + static auto func_ptr = LoadSymbol("cublasZhpr2"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZhpr2"); + return func_ptr(uplo, n, alpha, x, incx, y, incy, AP); +} + +void CUBLASWINAPI cublasSgemm(char transa, char transb, int m, int n, int k, + float alpha, const float *A, int lda, + const float *B, int ldb, float beta, float *C, + int ldc) { + using FuncPtr = + void(CUBLASWINAPI *)(char, char, int, int, int, float, const float *, int, + const float *, int, float, float *, int); + static auto func_ptr = LoadSymbol("cublasSgemm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSgemm"); + return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +} + +void CUBLASWINAPI cublasDgemm(char transa, char transb, int m, int n, int k, + double alpha, const double *A, int lda, + const double *B, int ldb, double beta, double *C, + int ldc) { + using FuncPtr = + void(CUBLASWINAPI *)(char, char, int, int, int, double, const double *, + int, const double *, int, double, double *, int); + static auto func_ptr = LoadSymbol("cublasDgemm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDgemm"); + return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +} + +void CUBLASWINAPI cublasCgemm(char transa, char transb, int m, int n, int k, + cuComplex alpha, const cuComplex *A, int lda, + const cuComplex *B, int ldb, cuComplex beta, + cuComplex *C, int ldc) { + using FuncPtr = void(CUBLASWINAPI *)( + char, char, int, int, int, cuComplex, const cuComplex *, int, + const cuComplex *, int, cuComplex, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCgemm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCgemm"); + return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +} + +void CUBLASWINAPI cublasZgemm(char transa, char transb, int m, int n, int k, + cuDoubleComplex alpha, const cuDoubleComplex *A, + int lda, const cuDoubleComplex *B, int ldb, + cuDoubleComplex beta, cuDoubleComplex *C, + int ldc) { + using FuncPtr = void(CUBLASWINAPI *)( + char, char, int, int, int, cuDoubleComplex, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZgemm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZgemm"); + return func_ptr(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +} + +void CUBLASWINAPI cublasSsyrk(char uplo, char trans, int n, int k, float alpha, + const float *A, int lda, float beta, float *C, + int ldc) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, float, + const float *, int, float, float *, int); + static auto func_ptr = LoadSymbol("cublasSsyrk"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSsyrk"); + return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); +} + +void CUBLASWINAPI cublasDsyrk(char uplo, char trans, int n, int k, double alpha, + const double *A, int lda, double beta, double *C, + int ldc) { + using FuncPtr = void(CUBLASWINAPI *)( + char, char, int, int, double, const double *, int, double, double *, int); + static auto func_ptr = LoadSymbol("cublasDsyrk"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDsyrk"); + return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); +} + +void CUBLASWINAPI cublasCsyrk(char uplo, char trans, int n, int k, + cuComplex alpha, const cuComplex *A, int lda, + cuComplex beta, cuComplex *C, int ldc) { + using FuncPtr = + void(CUBLASWINAPI *)(char, char, int, int, cuComplex, const cuComplex *, + int, cuComplex, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCsyrk"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCsyrk"); + return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); +} + +void CUBLASWINAPI cublasZsyrk(char uplo, char trans, int n, int k, + cuDoubleComplex alpha, const cuDoubleComplex *A, + int lda, cuDoubleComplex beta, cuDoubleComplex *C, + int ldc) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, cuDoubleComplex, + const cuDoubleComplex *, int, + cuDoubleComplex, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZsyrk"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZsyrk"); + return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); +} + +void CUBLASWINAPI cublasCherk(char uplo, char trans, int n, int k, float alpha, + const cuComplex *A, int lda, float beta, + cuComplex *C, int ldc) { + using FuncPtr = + void(CUBLASWINAPI *)(char, char, int, int, float, const cuComplex *, int, + float, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCherk"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCherk"); + return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); +} + +void CUBLASWINAPI cublasZherk(char uplo, char trans, int n, int k, double alpha, + const cuDoubleComplex *A, int lda, double beta, + cuDoubleComplex *C, int ldc) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, int, int, double, + const cuDoubleComplex *, int, double, + cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZherk"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZherk"); + return func_ptr(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); +} + +void CUBLASWINAPI cublasSsyr2k(char uplo, char trans, int n, int k, float alpha, + const float *A, int lda, const float *B, int ldb, + float beta, float *C, int ldc) { + using FuncPtr = + void(CUBLASWINAPI *)(char, char, int, int, float, const float *, int, + const float *, int, float, float *, int); + static auto func_ptr = LoadSymbol("cublasSsyr2k"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSsyr2k"); + return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +} + +void CUBLASWINAPI cublasDsyr2k(char uplo, char trans, int n, int k, + double alpha, const double *A, int lda, + const double *B, int ldb, double beta, double *C, + int ldc) { + using FuncPtr = + void(CUBLASWINAPI *)(char, char, int, int, double, const double *, int, + const double *, int, double, double *, int); + static auto func_ptr = LoadSymbol("cublasDsyr2k"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDsyr2k"); + return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +} + +void CUBLASWINAPI cublasCsyr2k(char uplo, char trans, int n, int k, + cuComplex alpha, const cuComplex *A, int lda, + const cuComplex *B, int ldb, cuComplex beta, + cuComplex *C, int ldc) { + using FuncPtr = void(CUBLASWINAPI *)( + char, char, int, int, cuComplex, const cuComplex *, int, + const cuComplex *, int, cuComplex, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCsyr2k"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCsyr2k"); + return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +} + +void CUBLASWINAPI cublasZsyr2k(char uplo, char trans, int n, int k, + cuDoubleComplex alpha, const cuDoubleComplex *A, + int lda, const cuDoubleComplex *B, int ldb, + cuDoubleComplex beta, cuDoubleComplex *C, + int ldc) { + using FuncPtr = void(CUBLASWINAPI *)( + char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZsyr2k"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZsyr2k"); + return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +} + +void CUBLASWINAPI cublasCher2k(char uplo, char trans, int n, int k, + cuComplex alpha, const cuComplex *A, int lda, + const cuComplex *B, int ldb, float beta, + cuComplex *C, int ldc) { + using FuncPtr = void(CUBLASWINAPI *)( + char, char, int, int, cuComplex, const cuComplex *, int, + const cuComplex *, int, float, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCher2k"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCher2k"); + return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +} + +void CUBLASWINAPI cublasZher2k(char uplo, char trans, int n, int k, + cuDoubleComplex alpha, const cuDoubleComplex *A, + int lda, const cuDoubleComplex *B, int ldb, + double beta, cuDoubleComplex *C, int ldc) { + using FuncPtr = void(CUBLASWINAPI *)( + char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, double, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZher2k"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZher2k"); + return func_ptr(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); +} + +void CUBLASWINAPI cublasSsymm(char side, char uplo, int m, int n, float alpha, + const float *A, int lda, const float *B, int ldb, + float beta, float *C, int ldc) { + using FuncPtr = + void(CUBLASWINAPI *)(char, char, int, int, float, const float *, int, + const float *, int, float, float *, int); + static auto func_ptr = LoadSymbol("cublasSsymm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasSsymm"); + return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); +} + +void CUBLASWINAPI cublasDsymm(char side, char uplo, int m, int n, double alpha, + const double *A, int lda, const double *B, + int ldb, double beta, double *C, int ldc) { + using FuncPtr = + void(CUBLASWINAPI *)(char, char, int, int, double, const double *, int, + const double *, int, double, double *, int); + static auto func_ptr = LoadSymbol("cublasDsymm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDsymm"); + return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); +} + +void CUBLASWINAPI cublasCsymm(char side, char uplo, int m, int n, + cuComplex alpha, const cuComplex *A, int lda, + const cuComplex *B, int ldb, cuComplex beta, + cuComplex *C, int ldc) { + using FuncPtr = void(CUBLASWINAPI *)( + char, char, int, int, cuComplex, const cuComplex *, int, + const cuComplex *, int, cuComplex, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCsymm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCsymm"); + return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); +} + +void CUBLASWINAPI cublasZsymm(char side, char uplo, int m, int n, + cuDoubleComplex alpha, const cuDoubleComplex *A, + int lda, const cuDoubleComplex *B, int ldb, + cuDoubleComplex beta, cuDoubleComplex *C, + int ldc) { + using FuncPtr = void(CUBLASWINAPI *)( + char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZsymm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZsymm"); + return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); +} + +void CUBLASWINAPI cublasChemm(char side, char uplo, int m, int n, + cuComplex alpha, const cuComplex *A, int lda, + const cuComplex *B, int ldb, cuComplex beta, + cuComplex *C, int ldc) { + using FuncPtr = void(CUBLASWINAPI *)( + char, char, int, int, cuComplex, const cuComplex *, int, + const cuComplex *, int, cuComplex, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasChemm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasChemm"); + return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); +} + +void CUBLASWINAPI cublasZhemm(char side, char uplo, int m, int n, + cuDoubleComplex alpha, const cuDoubleComplex *A, + int lda, const cuDoubleComplex *B, int ldb, + cuDoubleComplex beta, cuDoubleComplex *C, + int ldc) { + using FuncPtr = void(CUBLASWINAPI *)( + char, char, int, int, cuDoubleComplex, const cuDoubleComplex *, int, + const cuDoubleComplex *, int, cuDoubleComplex, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZhemm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZhemm"); + return func_ptr(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); +} + +void CUBLASWINAPI cublasStrsm(char side, char uplo, char transa, char diag, + int m, int n, float alpha, const float *A, + int lda, float *B, int ldb) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, float, + const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasStrsm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasStrsm"); + return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); +} + +void CUBLASWINAPI cublasDtrsm(char side, char uplo, char transa, char diag, + int m, int n, double alpha, const double *A, + int lda, double *B, int ldb) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, double, + const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDtrsm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDtrsm"); + return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); +} + +void CUBLASWINAPI cublasCtrsm(char side, char uplo, char transa, char diag, + int m, int n, cuComplex alpha, const cuComplex *A, + int lda, cuComplex *B, int ldb) { + using FuncPtr = + void(CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex, + const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCtrsm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCtrsm"); + return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); +} + +void CUBLASWINAPI cublasZtrsm(char side, char uplo, char transa, char diag, + int m, int n, cuDoubleComplex alpha, + const cuDoubleComplex *A, int lda, + cuDoubleComplex *B, int ldb) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, + cuDoubleComplex, const cuDoubleComplex *, + int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZtrsm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZtrsm"); + return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); +} + +void CUBLASWINAPI cublasStrmm(char side, char uplo, char transa, char diag, + int m, int n, float alpha, const float *A, + int lda, float *B, int ldb) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, float, + const float *, int, float *, int); + static auto func_ptr = LoadSymbol("cublasStrmm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasStrmm"); + return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); +} + +void CUBLASWINAPI cublasDtrmm(char side, char uplo, char transa, char diag, + int m, int n, double alpha, const double *A, + int lda, double *B, int ldb) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, double, + const double *, int, double *, int); + static auto func_ptr = LoadSymbol("cublasDtrmm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasDtrmm"); + return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); +} + +void CUBLASWINAPI cublasCtrmm(char side, char uplo, char transa, char diag, + int m, int n, cuComplex alpha, const cuComplex *A, + int lda, cuComplex *B, int ldb) { + using FuncPtr = + void(CUBLASWINAPI *)(char, char, char, char, int, int, cuComplex, + const cuComplex *, int, cuComplex *, int); + static auto func_ptr = LoadSymbol("cublasCtrmm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasCtrmm"); + return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); +} + +void CUBLASWINAPI cublasZtrmm(char side, char uplo, char transa, char diag, + int m, int n, cuDoubleComplex alpha, + const cuDoubleComplex *A, int lda, + cuDoubleComplex *B, int ldb) { + using FuncPtr = void(CUBLASWINAPI *)(char, char, char, char, int, int, + cuDoubleComplex, const cuDoubleComplex *, + int, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cublasZtrmm"); + if (!func_ptr) LogFatalSymbolNotFound("cublasZtrmm"); + return func_ptr(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); +} + +} diff --git a/tensorflow/stream_executor/cuda/cublas_stub.cc b/tensorflow/stream_executor/cuda/cublas_stub.cc index b8e203fe235..f9cfcc9d4b3 100644 --- a/tensorflow/stream_executor/cuda/cublas_stub.cc +++ b/tensorflow/stream_executor/cuda/cublas_stub.cc @@ -61,5 +61,5 @@ typedef enum {} cublasMath_t; #if CUDA_VERSION < 9020 #include "tensorflow/stream_executor/cuda/cublas_9_0.inc" #else -#include "tensorflow/stream_executor/cuda/cublas_10_0.inc" +#include "tensorflow/stream_executor/cuda/cublas_11_0.inc" #endif diff --git a/tensorflow/stream_executor/cuda/cuda_12_0.inc b/tensorflow/stream_executor/cuda/cuda_12_0.inc new file mode 100644 index 00000000000..9d97438c68f --- /dev/null +++ b/tensorflow/stream_executor/cuda/cuda_12_0.inc @@ -0,0 +1,3324 @@ +// Auto-generated, do not edit. + +extern "C" { + +CUresult CUDAAPI cuGetErrorString(CUresult error, const char **pStr) { + using FuncPtr = CUresult(CUDAAPI *)(CUresult, const char **); + static auto func_ptr = LoadSymbol("cuGetErrorString"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(error, pStr); +} + +CUresult CUDAAPI cuGetErrorName(CUresult error, const char **pStr) { + using FuncPtr = CUresult(CUDAAPI *)(CUresult, const char **); + static auto func_ptr = LoadSymbol("cuGetErrorName"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(error, pStr); +} + +CUresult CUDAAPI cuInit(unsigned int Flags) { + using FuncPtr = CUresult(CUDAAPI *)(unsigned int); + static auto func_ptr = LoadSymbol("cuInit"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(Flags); +} + +CUresult CUDAAPI cuDriverGetVersion(int *driverVersion) { + using FuncPtr = CUresult(CUDAAPI *)(int *); + static auto func_ptr = LoadSymbol("cuDriverGetVersion"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(driverVersion); +} + +CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal) { + using FuncPtr = CUresult(CUDAAPI *)(CUdevice *, int); + static auto func_ptr = LoadSymbol("cuDeviceGet"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, ordinal); +} + +CUresult CUDAAPI cuDeviceGetCount(int *count) { + using FuncPtr = CUresult(CUDAAPI *)(int *); + static auto func_ptr = LoadSymbol("cuDeviceGetCount"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(count); +} + +CUresult CUDAAPI cuDeviceGetName(char *name, int len, CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(char *, int, CUdevice); + static auto func_ptr = LoadSymbol("cuDeviceGetName"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(name, len, dev); +} + +CUresult CUDAAPI cuDeviceGetUuid(CUuuid *uuid, CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(CUuuid *, CUdevice); + static auto func_ptr = LoadSymbol("cuDeviceGetUuid"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(uuid, dev); +} + +CUresult CUDAAPI cuDeviceGetUuid_v2(CUuuid *uuid, CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(CUuuid *, CUdevice); + static auto func_ptr = LoadSymbol("cuDeviceGetUuid_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(uuid, dev); +} + +CUresult CUDAAPI cuDeviceGetLuid(char *luid, unsigned int *deviceNodeMask, + CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(char *, unsigned int *, CUdevice); + static auto func_ptr = LoadSymbol("cuDeviceGetLuid"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(luid, deviceNodeMask, dev); +} + +CUresult CUDAAPI cuDeviceTotalMem(size_t *bytes, CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUdevice); + static auto func_ptr = LoadSymbol("cuDeviceTotalMem_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(bytes, dev); +} + +CUresult CUDAAPI cuDeviceGetTexture1DLinearMaxWidth(size_t *maxWidthInElements, + CUarray_format format, + unsigned numChannels, + CUdevice dev) { + using FuncPtr = + CUresult(CUDAAPI *)(size_t *, CUarray_format, unsigned int, CUdevice); + static auto func_ptr = + LoadSymbol("cuDeviceGetTexture1DLinearMaxWidth"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(maxWidthInElements, format, numChannels, dev); +} + +CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, + CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(int *, CUdevice_attribute, CUdevice); + static auto func_ptr = LoadSymbol("cuDeviceGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pi, attrib, dev); +} + +CUresult CUDAAPI cuDeviceGetNvSciSyncAttributes(void *nvSciSyncAttrList, + CUdevice dev, int flags) { + using FuncPtr = CUresult(CUDAAPI *)(void *, CUdevice, int); + static auto func_ptr = LoadSymbol("cuDeviceGetNvSciSyncAttributes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(nvSciSyncAttrList, dev, flags); +} + +CUresult CUDAAPI cuDeviceSetMemPool(CUdevice dev, CUmemoryPool pool) { + using FuncPtr = CUresult(CUDAAPI *)(CUdevice, CUmemoryPool); + static auto func_ptr = LoadSymbol("cuDeviceSetMemPool"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dev, pool); +} + +CUresult CUDAAPI cuDeviceGetMemPool(CUmemoryPool *pool, CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool *, CUdevice); + static auto func_ptr = LoadSymbol("cuDeviceGetMemPool"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pool, dev); +} + +CUresult CUDAAPI cuDeviceGetDefaultMemPool(CUmemoryPool *pool_out, + CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool *, CUdevice); + static auto func_ptr = LoadSymbol("cuDeviceGetDefaultMemPool"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pool_out, dev); +} + +CUresult CUDAAPI cuDeviceGetExecAffinitySupport(int *pi, + CUexecAffinityType type, + CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(int *, CUexecAffinityType, CUdevice); + static auto func_ptr = LoadSymbol("cuDeviceGetExecAffinitySupport"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pi, type, dev); +} + +CUresult CUDAAPI +cuFlushGPUDirectRDMAWrites(CUflushGPUDirectRDMAWritesTarget target, + CUflushGPUDirectRDMAWritesScope scope) { + using FuncPtr = CUresult(CUDAAPI *)(CUflushGPUDirectRDMAWritesTarget, + CUflushGPUDirectRDMAWritesScope); + static auto func_ptr = LoadSymbol("cuFlushGPUDirectRDMAWrites"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(target, scope); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuDeviceGetProperties(CUdevprop *prop, + CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(CUdevprop *, CUdevice); + static auto func_ptr = LoadSymbol("cuDeviceGetProperties"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(prop, dev); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuDeviceComputeCapability(int *major, + int *minor, + CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(int *, int *, CUdevice); + static auto func_ptr = LoadSymbol("cuDeviceComputeCapability"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(major, minor, dev); +} + +CUresult CUDAAPI cuDevicePrimaryCtxRetain(CUcontext *pctx, CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, CUdevice); + static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxRetain"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pctx, dev); +} + +CUresult CUDAAPI cuDevicePrimaryCtxRelease(CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(CUdevice); + static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxRelease_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dev); +} + +CUresult CUDAAPI cuDevicePrimaryCtxSetFlags(CUdevice dev, unsigned int flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUdevice, unsigned int); + static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxSetFlags_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dev, flags); +} + +CUresult CUDAAPI cuDevicePrimaryCtxGetState(CUdevice dev, unsigned int *flags, + int *active) { + using FuncPtr = CUresult(CUDAAPI *)(CUdevice, unsigned int *, int *); + static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxGetState"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dev, flags, active); +} + +CUresult CUDAAPI cuDevicePrimaryCtxReset(CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(CUdevice); + static auto func_ptr = LoadSymbol("cuDevicePrimaryCtxReset_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dev); +} + +CUresult CUDAAPI cuCtxCreate(CUcontext *pctx, unsigned int flags, + CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, unsigned int, CUdevice); + static auto func_ptr = LoadSymbol("cuCtxCreate_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pctx, flags, dev); +} + +CUresult CUDAAPI cuCtxCreate_v3(CUcontext *pctx, + CUexecAffinityParam *paramsArray, int numParams, + unsigned int flags, CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, CUexecAffinityParam *, int, + unsigned int, CUdevice); + static auto func_ptr = LoadSymbol("cuCtxCreate_v3"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pctx, paramsArray, numParams, flags, dev); +} + +CUresult CUDAAPI cuCtxDestroy(CUcontext ctx) { + using FuncPtr = CUresult(CUDAAPI *)(CUcontext); + static auto func_ptr = LoadSymbol("cuCtxDestroy_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctx); +} + +CUresult CUDAAPI cuCtxPushCurrent(CUcontext ctx) { + using FuncPtr = CUresult(CUDAAPI *)(CUcontext); + static auto func_ptr = LoadSymbol("cuCtxPushCurrent_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctx); +} + +CUresult CUDAAPI cuCtxPopCurrent(CUcontext *pctx) { + using FuncPtr = CUresult(CUDAAPI *)(CUcontext *); + static auto func_ptr = LoadSymbol("cuCtxPopCurrent_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pctx); +} + +CUresult CUDAAPI cuCtxSetCurrent(CUcontext ctx) { + using FuncPtr = CUresult(CUDAAPI *)(CUcontext); + static auto func_ptr = LoadSymbol("cuCtxSetCurrent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctx); +} + +CUresult CUDAAPI cuCtxGetCurrent(CUcontext *pctx) { + using FuncPtr = CUresult(CUDAAPI *)(CUcontext *); + static auto func_ptr = LoadSymbol("cuCtxGetCurrent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pctx); +} + +CUresult CUDAAPI cuCtxGetDevice(CUdevice *device) { + using FuncPtr = CUresult(CUDAAPI *)(CUdevice *); + static auto func_ptr = LoadSymbol("cuCtxGetDevice"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device); +} + +CUresult CUDAAPI cuCtxGetFlags(unsigned int *flags) { + using FuncPtr = CUresult(CUDAAPI *)(unsigned int *); + static auto func_ptr = LoadSymbol("cuCtxGetFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(flags); +} + +CUresult CUDAAPI cuCtxGetId(CUcontext ctx, unsigned long long *ctxId) { + using FuncPtr = CUresult(CUDAAPI *)(CUcontext, unsigned long long *); + static auto func_ptr = LoadSymbol("cuCtxGetId"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctx, ctxId); +} + +CUresult CUDAAPI cuCtxSynchronize(void) { + using FuncPtr = CUresult(CUDAAPI *)(); + static auto func_ptr = LoadSymbol("cuCtxSynchronize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +CUresult CUDAAPI cuCtxSetLimit(CUlimit limit, size_t value) { + using FuncPtr = CUresult(CUDAAPI *)(CUlimit, size_t); + static auto func_ptr = LoadSymbol("cuCtxSetLimit"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(limit, value); +} + +CUresult CUDAAPI cuCtxGetLimit(size_t *pvalue, CUlimit limit) { + using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUlimit); + static auto func_ptr = LoadSymbol("cuCtxGetLimit"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pvalue, limit); +} + +CUresult CUDAAPI cuCtxGetCacheConfig(CUfunc_cache *pconfig) { + using FuncPtr = CUresult(CUDAAPI *)(CUfunc_cache *); + static auto func_ptr = LoadSymbol("cuCtxGetCacheConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pconfig); +} + +CUresult CUDAAPI cuCtxSetCacheConfig(CUfunc_cache config) { + using FuncPtr = CUresult(CUDAAPI *)(CUfunc_cache); + static auto func_ptr = LoadSymbol("cuCtxSetCacheConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(config); +} + +CUresult CUDAAPI cuCtxGetSharedMemConfig(CUsharedconfig *pConfig) { + using FuncPtr = CUresult(CUDAAPI *)(CUsharedconfig *); + static auto func_ptr = LoadSymbol("cuCtxGetSharedMemConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pConfig); +} + +CUresult CUDAAPI cuCtxSetSharedMemConfig(CUsharedconfig config) { + using FuncPtr = CUresult(CUDAAPI *)(CUsharedconfig); + static auto func_ptr = LoadSymbol("cuCtxSetSharedMemConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(config); +} + +CUresult CUDAAPI cuCtxGetApiVersion(CUcontext ctx, unsigned int *version) { + using FuncPtr = CUresult(CUDAAPI *)(CUcontext, unsigned int *); + static auto func_ptr = LoadSymbol("cuCtxGetApiVersion"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctx, version); +} + +CUresult CUDAAPI cuCtxGetStreamPriorityRange(int *leastPriority, + int *greatestPriority) { + using FuncPtr = CUresult(CUDAAPI *)(int *, int *); + static auto func_ptr = LoadSymbol("cuCtxGetStreamPriorityRange"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(leastPriority, greatestPriority); +} + +CUresult CUDAAPI cuCtxResetPersistingL2Cache(void) { + using FuncPtr = CUresult(CUDAAPI *)(); + static auto func_ptr = LoadSymbol("cuCtxResetPersistingL2Cache"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +CUresult CUDAAPI cuCtxGetExecAffinity(CUexecAffinityParam *pExecAffinity, + CUexecAffinityType type) { + using FuncPtr = + CUresult(CUDAAPI *)(CUexecAffinityParam *, CUexecAffinityType); + static auto func_ptr = LoadSymbol("cuCtxGetExecAffinity"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pExecAffinity, type); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuCtxAttach(CUcontext *pctx, + unsigned int flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUcontext *, unsigned int); + static auto func_ptr = LoadSymbol("cuCtxAttach"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pctx, flags); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuCtxDetach(CUcontext ctx) { + using FuncPtr = CUresult(CUDAAPI *)(CUcontext); + static auto func_ptr = LoadSymbol("cuCtxDetach"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctx); +} + +CUresult CUDAAPI cuModuleLoad(CUmodule *module, const char *fname) { + using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const char *); + static auto func_ptr = LoadSymbol("cuModuleLoad"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(module, fname); +} + +CUresult CUDAAPI cuModuleLoadData(CUmodule *module, const void *image) { + using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *); + static auto func_ptr = LoadSymbol("cuModuleLoadData"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(module, image); +} + +CUresult CUDAAPI cuModuleLoadDataEx(CUmodule *module, const void *image, + unsigned int numOptions, + CUjit_option *options, + void **optionValues) { + using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *, unsigned int, + CUjit_option *, void **); + static auto func_ptr = LoadSymbol("cuModuleLoadDataEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(module, image, numOptions, options, optionValues); +} + +CUresult CUDAAPI cuModuleLoadFatBinary(CUmodule *module, const void *fatCubin) { + using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, const void *); + static auto func_ptr = LoadSymbol("cuModuleLoadFatBinary"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(module, fatCubin); +} + +CUresult CUDAAPI cuModuleUnload(CUmodule hmod) { + using FuncPtr = CUresult(CUDAAPI *)(CUmodule); + static auto func_ptr = LoadSymbol("cuModuleUnload"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hmod); +} + +CUresult CUDAAPI cuModuleGetLoadingMode(CUmoduleLoadingMode *mode) { + using FuncPtr = CUresult(CUDAAPI *)(CUmoduleLoadingMode *); + static auto func_ptr = LoadSymbol("cuModuleGetLoadingMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(mode); +} + +CUresult CUDAAPI cuModuleGetFunction(CUfunction *hfunc, CUmodule hmod, + const char *name) { + using FuncPtr = CUresult(CUDAAPI *)(CUfunction *, CUmodule, const char *); + static auto func_ptr = LoadSymbol("cuModuleGetFunction"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hfunc, hmod, name); +} + +CUresult CUDAAPI cuModuleGetGlobal(CUdeviceptr *dptr, size_t *bytes, + CUmodule hmod, const char *name) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUmodule, const char *); + static auto func_ptr = LoadSymbol("cuModuleGetGlobal_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dptr, bytes, hmod, name); +} + +CUresult CUDAAPI cuLinkCreate(unsigned int numOptions, CUjit_option *options, + void **optionValues, CUlinkState *stateOut) { + using FuncPtr = + CUresult(CUDAAPI *)(unsigned int, CUjit_option *, void **, CUlinkState *); + static auto func_ptr = LoadSymbol("cuLinkCreate_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(numOptions, options, optionValues, stateOut); +} + +CUresult CUDAAPI cuLinkAddData(CUlinkState state, CUjitInputType type, + void *data, size_t size, const char *name, + unsigned int numOptions, CUjit_option *options, + void **optionValues) { + using FuncPtr = + CUresult(CUDAAPI *)(CUlinkState, CUjitInputType, void *, size_t, + const char *, unsigned int, CUjit_option *, void **); + static auto func_ptr = LoadSymbol("cuLinkAddData_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(state, type, data, size, name, numOptions, options, + optionValues); +} + +CUresult CUDAAPI cuLinkAddFile(CUlinkState state, CUjitInputType type, + const char *path, unsigned int numOptions, + CUjit_option *options, void **optionValues) { + using FuncPtr = CUresult(CUDAAPI *)(CUlinkState, CUjitInputType, const char *, + unsigned int, CUjit_option *, void **); + static auto func_ptr = LoadSymbol("cuLinkAddFile_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(state, type, path, numOptions, options, optionValues); +} + +CUresult CUDAAPI cuLinkComplete(CUlinkState state, void **cubinOut, + size_t *sizeOut) { + using FuncPtr = CUresult(CUDAAPI *)(CUlinkState, void **, size_t *); + static auto func_ptr = LoadSymbol("cuLinkComplete"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(state, cubinOut, sizeOut); +} + +CUresult CUDAAPI cuLinkDestroy(CUlinkState state) { + using FuncPtr = CUresult(CUDAAPI *)(CUlinkState); + static auto func_ptr = LoadSymbol("cuLinkDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(state); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuModuleGetTexRef(CUtexref *pTexRef, + CUmodule hmod, + const char *name) { + using FuncPtr = CUresult(CUDAAPI *)(CUtexref *, CUmodule, const char *); + static auto func_ptr = LoadSymbol("cuModuleGetTexRef"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pTexRef, hmod, name); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuModuleGetSurfRef(CUsurfref *pSurfRef, + CUmodule hmod, + const char *name) { + using FuncPtr = CUresult(CUDAAPI *)(CUsurfref *, CUmodule, const char *); + static auto func_ptr = LoadSymbol("cuModuleGetSurfRef"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pSurfRef, hmod, name); +} + +CUresult CUDAAPI cuLibraryLoadData(CUlibrary *library, const void *code, + CUjit_option *jitOptions, + void **jitOptionsValues, + unsigned int numJitOptions, + CUlibraryOption *libraryOptions, + void **libraryOptionValues, + unsigned int numLibraryOptions) { + using FuncPtr = CUresult(CUDAAPI *)(CUlibrary *, const void *, CUjit_option *, + void **, unsigned int, CUlibraryOption *, + void **, unsigned int); + static auto func_ptr = LoadSymbol("cuLibraryLoadData"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(library, code, jitOptions, jitOptionsValues, numJitOptions, + libraryOptions, libraryOptionValues, numLibraryOptions); +} + +CUresult CUDAAPI cuLibraryLoadFromFile(CUlibrary *library, const char *fileName, + CUjit_option *jitOptions, + void **jitOptionsValues, + unsigned int numJitOptions, + CUlibraryOption *libraryOptions, + void **libraryOptionValues, + unsigned int numLibraryOptions) { + using FuncPtr = CUresult(CUDAAPI *)(CUlibrary *, const char *, CUjit_option *, + void **, unsigned int, CUlibraryOption *, + void **, unsigned int); + static auto func_ptr = LoadSymbol("cuLibraryLoadFromFile"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(library, fileName, jitOptions, jitOptionsValues, + numJitOptions, libraryOptions, libraryOptionValues, + numLibraryOptions); +} + +CUresult CUDAAPI cuLibraryUnload(CUlibrary library) { + using FuncPtr = CUresult(CUDAAPI *)(CUlibrary); + static auto func_ptr = LoadSymbol("cuLibraryUnload"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(library); +} + +CUresult CUDAAPI cuLibraryGetKernel(CUkernel *pKernel, CUlibrary library, + const char *name) { + using FuncPtr = CUresult(CUDAAPI *)(CUkernel *, CUlibrary, const char *); + static auto func_ptr = LoadSymbol("cuLibraryGetKernel"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pKernel, library, name); +} + +CUresult CUDAAPI cuLibraryGetModule(CUmodule *pMod, CUlibrary library) { + using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, CUlibrary); + static auto func_ptr = LoadSymbol("cuLibraryGetModule"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pMod, library); +} + +CUresult CUDAAPI cuKernelGetFunction(CUfunction *pFunc, CUkernel kernel) { + using FuncPtr = CUresult(CUDAAPI *)(CUfunction *, CUkernel); + static auto func_ptr = LoadSymbol("cuKernelGetFunction"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pFunc, kernel); +} + +CUresult CUDAAPI cuLibraryGetGlobal(CUdeviceptr *dptr, size_t *bytes, + CUlibrary library, const char *name) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUlibrary, const char *); + static auto func_ptr = LoadSymbol("cuLibraryGetGlobal"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dptr, bytes, library, name); +} + +CUresult CUDAAPI cuLibraryGetManaged(CUdeviceptr *dptr, size_t *bytes, + CUlibrary library, const char *name) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUlibrary, const char *); + static auto func_ptr = LoadSymbol("cuLibraryGetManaged"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dptr, bytes, library, name); +} + +CUresult CUDAAPI cuLibraryGetUnifiedFunction(void **fptr, CUlibrary library, + const char *symbol) { + using FuncPtr = CUresult(CUDAAPI *)(void **, CUlibrary, const char *); + static auto func_ptr = LoadSymbol("cuLibraryGetUnifiedFunction"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(fptr, library, symbol); +} + +CUresult CUDAAPI cuKernelGetAttribute(int *pi, CUfunction_attribute attrib, + CUkernel kernel, CUdevice dev) { + using FuncPtr = + CUresult(CUDAAPI *)(int *, CUfunction_attribute, CUkernel, CUdevice); + static auto func_ptr = LoadSymbol("cuKernelGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pi, attrib, kernel, dev); +} + +CUresult CUDAAPI cuKernelSetAttribute(CUfunction_attribute attrib, int val, + CUkernel kernel, CUdevice dev) { + using FuncPtr = + CUresult(CUDAAPI *)(CUfunction_attribute, int, CUkernel, CUdevice); + static auto func_ptr = LoadSymbol("cuKernelSetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(attrib, val, kernel, dev); +} + +CUresult CUDAAPI cuKernelSetCacheConfig(CUkernel kernel, CUfunc_cache config, + CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(CUkernel, CUfunc_cache, CUdevice); + static auto func_ptr = LoadSymbol("cuKernelSetCacheConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(kernel, config, dev); +} + +CUresult CUDAAPI cuMemGetInfo(size_t *free, size_t *total) { + using FuncPtr = CUresult(CUDAAPI *)(size_t *, size_t *); + static auto func_ptr = LoadSymbol("cuMemGetInfo_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(free, total); +} + +CUresult CUDAAPI cuMemAlloc(CUdeviceptr *dptr, size_t bytesize) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t); + static auto func_ptr = LoadSymbol("cuMemAlloc_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dptr, bytesize); +} + +CUresult CUDAAPI cuMemAllocPitch(CUdeviceptr *dptr, size_t *pPitch, + size_t WidthInBytes, size_t Height, + unsigned int ElementSizeBytes) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, size_t, size_t, + unsigned int); + static auto func_ptr = LoadSymbol("cuMemAllocPitch_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dptr, pPitch, WidthInBytes, Height, ElementSizeBytes); +} + +CUresult CUDAAPI cuMemFree(CUdeviceptr dptr) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr); + static auto func_ptr = LoadSymbol("cuMemFree_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dptr); +} + +CUresult CUDAAPI cuMemGetAddressRange(CUdeviceptr *pbase, size_t *psize, + CUdeviceptr dptr) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUdeviceptr); + static auto func_ptr = LoadSymbol("cuMemGetAddressRange_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pbase, psize, dptr); +} + +CUresult CUDAAPI cuMemAllocHost(void **pp, size_t bytesize) { + using FuncPtr = CUresult(CUDAAPI *)(void **, size_t); + static auto func_ptr = LoadSymbol("cuMemAllocHost_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pp, bytesize); +} + +CUresult CUDAAPI cuMemFreeHost(void *p) { + using FuncPtr = CUresult(CUDAAPI *)(void *); + static auto func_ptr = LoadSymbol("cuMemFreeHost"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(p); +} + +CUresult CUDAAPI cuMemHostAlloc(void **pp, size_t bytesize, + unsigned int Flags) { + using FuncPtr = CUresult(CUDAAPI *)(void **, size_t, unsigned int); + static auto func_ptr = LoadSymbol("cuMemHostAlloc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pp, bytesize, Flags); +} + +CUresult CUDAAPI cuMemHostGetDevicePointer(CUdeviceptr *pdptr, void *p, + unsigned int Flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, void *, unsigned int); + static auto func_ptr = LoadSymbol("cuMemHostGetDevicePointer_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pdptr, p, Flags); +} + +CUresult CUDAAPI cuMemHostGetFlags(unsigned int *pFlags, void *p) { + using FuncPtr = CUresult(CUDAAPI *)(unsigned int *, void *); + static auto func_ptr = LoadSymbol("cuMemHostGetFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pFlags, p); +} + +CUresult CUDAAPI cuMemAllocManaged(CUdeviceptr *dptr, size_t bytesize, + unsigned int flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t, unsigned int); + static auto func_ptr = LoadSymbol("cuMemAllocManaged"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dptr, bytesize, flags); +} + +CUresult CUDAAPI cuDeviceGetByPCIBusId(CUdevice *dev, const char *pciBusId) { + using FuncPtr = CUresult(CUDAAPI *)(CUdevice *, const char *); + static auto func_ptr = LoadSymbol("cuDeviceGetByPCIBusId"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dev, pciBusId); +} + +CUresult CUDAAPI cuDeviceGetPCIBusId(char *pciBusId, int len, CUdevice dev) { + using FuncPtr = CUresult(CUDAAPI *)(char *, int, CUdevice); + static auto func_ptr = LoadSymbol("cuDeviceGetPCIBusId"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pciBusId, len, dev); +} + +CUresult CUDAAPI cuIpcGetEventHandle(CUipcEventHandle *pHandle, CUevent event) { + using FuncPtr = CUresult(CUDAAPI *)(CUipcEventHandle *, CUevent); + static auto func_ptr = LoadSymbol("cuIpcGetEventHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pHandle, event); +} + +CUresult CUDAAPI cuIpcOpenEventHandle(CUevent *phEvent, + CUipcEventHandle handle) { + using FuncPtr = CUresult(CUDAAPI *)(CUevent *, CUipcEventHandle); + static auto func_ptr = LoadSymbol("cuIpcOpenEventHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phEvent, handle); +} + +CUresult CUDAAPI cuIpcGetMemHandle(CUipcMemHandle *pHandle, CUdeviceptr dptr) { + using FuncPtr = CUresult(CUDAAPI *)(CUipcMemHandle *, CUdeviceptr); + static auto func_ptr = LoadSymbol("cuIpcGetMemHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pHandle, dptr); +} + +CUresult CUDAAPI cuIpcOpenMemHandle(CUdeviceptr *pdptr, CUipcMemHandle handle, + unsigned int Flags) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr *, CUipcMemHandle, unsigned int); + static auto func_ptr = LoadSymbol("cuIpcOpenMemHandle_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pdptr, handle, Flags); +} + +CUresult CUDAAPI cuIpcCloseMemHandle(CUdeviceptr dptr) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr); + static auto func_ptr = LoadSymbol("cuIpcCloseMemHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dptr); +} + +CUresult CUDAAPI cuMemHostRegister(void *p, size_t bytesize, + unsigned int Flags) { + using FuncPtr = CUresult(CUDAAPI *)(void *, size_t, unsigned int); + static auto func_ptr = LoadSymbol("cuMemHostRegister_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(p, bytesize, Flags); +} + +CUresult CUDAAPI cuMemHostUnregister(void *p) { + using FuncPtr = CUresult(CUDAAPI *)(void *); + static auto func_ptr = LoadSymbol("cuMemHostUnregister"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(p); +} + +CUresult CUDAAPI cuMemcpy(CUdeviceptr dst, CUdeviceptr src, size_t ByteCount) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t); + static auto func_ptr = LoadSymbol("cuMemcpy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, src, ByteCount); +} + +CUresult CUDAAPI cuMemcpyPeer(CUdeviceptr dstDevice, CUcontext dstContext, + CUdeviceptr srcDevice, CUcontext srcContext, + size_t ByteCount) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUcontext, CUdeviceptr, + CUcontext, size_t); + static auto func_ptr = LoadSymbol("cuMemcpyPeer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, dstContext, srcDevice, srcContext, ByteCount); +} + +CUresult CUDAAPI cuMemcpyHtoD(CUdeviceptr dstDevice, const void *srcHost, + size_t ByteCount) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, const void *, size_t); + static auto func_ptr = LoadSymbol("cuMemcpyHtoD_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, srcHost, ByteCount); +} + +CUresult CUDAAPI cuMemcpyDtoH(void *dstHost, CUdeviceptr srcDevice, + size_t ByteCount) { + using FuncPtr = CUresult(CUDAAPI *)(void *, CUdeviceptr, size_t); + static auto func_ptr = LoadSymbol("cuMemcpyDtoH_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstHost, srcDevice, ByteCount); +} + +CUresult CUDAAPI cuMemcpyDtoD(CUdeviceptr dstDevice, CUdeviceptr srcDevice, + size_t ByteCount) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t); + static auto func_ptr = LoadSymbol("cuMemcpyDtoD_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, srcDevice, ByteCount); +} + +CUresult CUDAAPI cuMemcpyDtoA(CUarray dstArray, size_t dstOffset, + CUdeviceptr srcDevice, size_t ByteCount) { + using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, CUdeviceptr, size_t); + static auto func_ptr = LoadSymbol("cuMemcpyDtoA_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstArray, dstOffset, srcDevice, ByteCount); +} + +CUresult CUDAAPI cuMemcpyAtoD(CUdeviceptr dstDevice, CUarray srcArray, + size_t srcOffset, size_t ByteCount) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUarray, size_t, size_t); + static auto func_ptr = LoadSymbol("cuMemcpyAtoD_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, srcArray, srcOffset, ByteCount); +} + +CUresult CUDAAPI cuMemcpyHtoA(CUarray dstArray, size_t dstOffset, + const void *srcHost, size_t ByteCount) { + using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, const void *, size_t); + static auto func_ptr = LoadSymbol("cuMemcpyHtoA_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstArray, dstOffset, srcHost, ByteCount); +} + +CUresult CUDAAPI cuMemcpyAtoH(void *dstHost, CUarray srcArray, size_t srcOffset, + size_t ByteCount) { + using FuncPtr = CUresult(CUDAAPI *)(void *, CUarray, size_t, size_t); + static auto func_ptr = LoadSymbol("cuMemcpyAtoH_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstHost, srcArray, srcOffset, ByteCount); +} + +CUresult CUDAAPI cuMemcpyAtoA(CUarray dstArray, size_t dstOffset, + CUarray srcArray, size_t srcOffset, + size_t ByteCount) { + using FuncPtr = CUresult(CUDAAPI *)(CUarray, size_t, CUarray, size_t, size_t); + static auto func_ptr = LoadSymbol("cuMemcpyAtoA_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstArray, dstOffset, srcArray, srcOffset, ByteCount); +} + +CUresult CUDAAPI cuMemcpy2D(const CUDA_MEMCPY2D *pCopy) { + using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *); + static auto func_ptr = LoadSymbol("cuMemcpy2D_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pCopy); +} + +CUresult CUDAAPI cuMemcpy2DUnaligned(const CUDA_MEMCPY2D *pCopy) { + using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *); + static auto func_ptr = LoadSymbol("cuMemcpy2DUnaligned_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pCopy); +} + +CUresult CUDAAPI cuMemcpy3D(const CUDA_MEMCPY3D *pCopy) { + using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D *); + static auto func_ptr = LoadSymbol("cuMemcpy3D_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pCopy); +} + +CUresult CUDAAPI cuMemcpy3DPeer(const CUDA_MEMCPY3D_PEER *pCopy) { + using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D_PEER *); + static auto func_ptr = LoadSymbol("cuMemcpy3DPeer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pCopy); +} + +CUresult CUDAAPI cuMemcpyAsync(CUdeviceptr dst, CUdeviceptr src, + size_t ByteCount, CUstream hStream) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t, CUstream); + static auto func_ptr = LoadSymbol("cuMemcpyAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, src, ByteCount, hStream); +} + +CUresult CUDAAPI cuMemcpyPeerAsync(CUdeviceptr dstDevice, CUcontext dstContext, + CUdeviceptr srcDevice, CUcontext srcContext, + size_t ByteCount, CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUcontext, CUdeviceptr, + CUcontext, size_t, CUstream); + static auto func_ptr = LoadSymbol("cuMemcpyPeerAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, dstContext, srcDevice, srcContext, ByteCount, + hStream); +} + +CUresult CUDAAPI cuMemcpyHtoDAsync(CUdeviceptr dstDevice, const void *srcHost, + size_t ByteCount, CUstream hStream) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr, const void *, size_t, CUstream); + static auto func_ptr = LoadSymbol("cuMemcpyHtoDAsync_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, srcHost, ByteCount, hStream); +} + +CUresult CUDAAPI cuMemcpyDtoHAsync(void *dstHost, CUdeviceptr srcDevice, + size_t ByteCount, CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(void *, CUdeviceptr, size_t, CUstream); + static auto func_ptr = LoadSymbol("cuMemcpyDtoHAsync_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstHost, srcDevice, ByteCount, hStream); +} + +CUresult CUDAAPI cuMemcpyDtoDAsync(CUdeviceptr dstDevice, CUdeviceptr srcDevice, + size_t ByteCount, CUstream hStream) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr, CUdeviceptr, size_t, CUstream); + static auto func_ptr = LoadSymbol("cuMemcpyDtoDAsync_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, srcDevice, ByteCount, hStream); +} + +CUresult CUDAAPI cuMemcpyHtoAAsync(CUarray dstArray, size_t dstOffset, + const void *srcHost, size_t ByteCount, + CUstream hStream) { + using FuncPtr = + CUresult(CUDAAPI *)(CUarray, size_t, const void *, size_t, CUstream); + static auto func_ptr = LoadSymbol("cuMemcpyHtoAAsync_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstArray, dstOffset, srcHost, ByteCount, hStream); +} + +CUresult CUDAAPI cuMemcpyAtoHAsync(void *dstHost, CUarray srcArray, + size_t srcOffset, size_t ByteCount, + CUstream hStream) { + using FuncPtr = + CUresult(CUDAAPI *)(void *, CUarray, size_t, size_t, CUstream); + static auto func_ptr = LoadSymbol("cuMemcpyAtoHAsync_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstHost, srcArray, srcOffset, ByteCount, hStream); +} + +CUresult CUDAAPI cuMemcpy2DAsync(const CUDA_MEMCPY2D *pCopy, CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY2D *, CUstream); + static auto func_ptr = LoadSymbol("cuMemcpy2DAsync_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pCopy, hStream); +} + +CUresult CUDAAPI cuMemcpy3DAsync(const CUDA_MEMCPY3D *pCopy, CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D *, CUstream); + static auto func_ptr = LoadSymbol("cuMemcpy3DAsync_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pCopy, hStream); +} + +CUresult CUDAAPI cuMemcpy3DPeerAsync(const CUDA_MEMCPY3D_PEER *pCopy, + CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(const CUDA_MEMCPY3D_PEER *, CUstream); + static auto func_ptr = LoadSymbol("cuMemcpy3DPeerAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pCopy, hStream); +} + +CUresult CUDAAPI cuMemsetD8(CUdeviceptr dstDevice, unsigned char uc, size_t N) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned char, size_t); + static auto func_ptr = LoadSymbol("cuMemsetD8_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, uc, N); +} + +CUresult CUDAAPI cuMemsetD16(CUdeviceptr dstDevice, unsigned short us, + size_t N) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned short, size_t); + static auto func_ptr = LoadSymbol("cuMemsetD16_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, us, N); +} + +CUresult CUDAAPI cuMemsetD32(CUdeviceptr dstDevice, unsigned int ui, size_t N) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, unsigned int, size_t); + static auto func_ptr = LoadSymbol("cuMemsetD32_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, ui, N); +} + +CUresult CUDAAPI cuMemsetD2D8(CUdeviceptr dstDevice, size_t dstPitch, + unsigned char uc, size_t Width, size_t Height) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned char, size_t, size_t); + static auto func_ptr = LoadSymbol("cuMemsetD2D8_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, dstPitch, uc, Width, Height); +} + +CUresult CUDAAPI cuMemsetD2D16(CUdeviceptr dstDevice, size_t dstPitch, + unsigned short us, size_t Width, size_t Height) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned short, size_t, size_t); + static auto func_ptr = LoadSymbol("cuMemsetD2D16_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, dstPitch, us, Width, Height); +} + +CUresult CUDAAPI cuMemsetD2D32(CUdeviceptr dstDevice, size_t dstPitch, + unsigned int ui, size_t Width, size_t Height) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned int, size_t, size_t); + static auto func_ptr = LoadSymbol("cuMemsetD2D32_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, dstPitch, ui, Width, Height); +} + +CUresult CUDAAPI cuMemsetD8Async(CUdeviceptr dstDevice, unsigned char uc, + size_t N, CUstream hStream) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr, unsigned char, size_t, CUstream); + static auto func_ptr = LoadSymbol("cuMemsetD8Async"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, uc, N, hStream); +} + +CUresult CUDAAPI cuMemsetD16Async(CUdeviceptr dstDevice, unsigned short us, + size_t N, CUstream hStream) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr, unsigned short, size_t, CUstream); + static auto func_ptr = LoadSymbol("cuMemsetD16Async"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, us, N, hStream); +} + +CUresult CUDAAPI cuMemsetD32Async(CUdeviceptr dstDevice, unsigned int ui, + size_t N, CUstream hStream) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr, unsigned int, size_t, CUstream); + static auto func_ptr = LoadSymbol("cuMemsetD32Async"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, ui, N, hStream); +} + +CUresult CUDAAPI cuMemsetD2D8Async(CUdeviceptr dstDevice, size_t dstPitch, + unsigned char uc, size_t Width, + size_t Height, CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned char, + size_t, size_t, CUstream); + static auto func_ptr = LoadSymbol("cuMemsetD2D8Async"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, dstPitch, uc, Width, Height, hStream); +} + +CUresult CUDAAPI cuMemsetD2D16Async(CUdeviceptr dstDevice, size_t dstPitch, + unsigned short us, size_t Width, + size_t Height, CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned short, + size_t, size_t, CUstream); + static auto func_ptr = LoadSymbol("cuMemsetD2D16Async"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, dstPitch, us, Width, Height, hStream); +} + +CUresult CUDAAPI cuMemsetD2D32Async(CUdeviceptr dstDevice, size_t dstPitch, + unsigned int ui, size_t Width, + size_t Height, CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, unsigned int, size_t, + size_t, CUstream); + static auto func_ptr = LoadSymbol("cuMemsetD2D32Async"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dstDevice, dstPitch, ui, Width, Height, hStream); +} + +CUresult CUDAAPI cuArrayCreate(CUarray *pHandle, + const CUDA_ARRAY_DESCRIPTOR *pAllocateArray) { + using FuncPtr = CUresult(CUDAAPI *)(CUarray *, const CUDA_ARRAY_DESCRIPTOR *); + static auto func_ptr = LoadSymbol("cuArrayCreate_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pHandle, pAllocateArray); +} + +CUresult CUDAAPI cuArrayGetDescriptor(CUDA_ARRAY_DESCRIPTOR *pArrayDescriptor, + CUarray hArray) { + using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY_DESCRIPTOR *, CUarray); + static auto func_ptr = LoadSymbol("cuArrayGetDescriptor_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pArrayDescriptor, hArray); +} + +CUresult CUDAAPI cuArrayGetSparseProperties( + CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties, CUarray array) { + using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY_SPARSE_PROPERTIES *, CUarray); + static auto func_ptr = LoadSymbol("cuArrayGetSparseProperties"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(sparseProperties, array); +} + +CUresult CUDAAPI cuMipmappedArrayGetSparseProperties( + CUDA_ARRAY_SPARSE_PROPERTIES *sparseProperties, CUmipmappedArray mipmap) { + using FuncPtr = + CUresult(CUDAAPI *)(CUDA_ARRAY_SPARSE_PROPERTIES *, CUmipmappedArray); + static auto func_ptr = + LoadSymbol("cuMipmappedArrayGetSparseProperties"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(sparseProperties, mipmap); +} + +CUresult CUDAAPI +cuArrayGetMemoryRequirements(CUDA_ARRAY_MEMORY_REQUIREMENTS *memoryRequirements, + CUarray array, CUdevice device) { + using FuncPtr = + CUresult(CUDAAPI *)(CUDA_ARRAY_MEMORY_REQUIREMENTS *, CUarray, CUdevice); + static auto func_ptr = LoadSymbol("cuArrayGetMemoryRequirements"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(memoryRequirements, array, device); +} + +CUresult CUDAAPI cuMipmappedArrayGetMemoryRequirements( + CUDA_ARRAY_MEMORY_REQUIREMENTS *memoryRequirements, CUmipmappedArray mipmap, + CUdevice device) { + using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY_MEMORY_REQUIREMENTS *, + CUmipmappedArray, CUdevice); + static auto func_ptr = + LoadSymbol("cuMipmappedArrayGetMemoryRequirements"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(memoryRequirements, mipmap, device); +} + +CUresult CUDAAPI cuArrayGetPlane(CUarray *pPlaneArray, CUarray hArray, + unsigned int planeIdx) { + using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUarray, unsigned int); + static auto func_ptr = LoadSymbol("cuArrayGetPlane"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pPlaneArray, hArray, planeIdx); +} + +CUresult CUDAAPI cuArrayDestroy(CUarray hArray) { + using FuncPtr = CUresult(CUDAAPI *)(CUarray); + static auto func_ptr = LoadSymbol("cuArrayDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hArray); +} + +CUresult CUDAAPI cuArray3DCreate( + CUarray *pHandle, const CUDA_ARRAY3D_DESCRIPTOR *pAllocateArray) { + using FuncPtr = + CUresult(CUDAAPI *)(CUarray *, const CUDA_ARRAY3D_DESCRIPTOR *); + static auto func_ptr = LoadSymbol("cuArray3DCreate_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pHandle, pAllocateArray); +} + +CUresult CUDAAPI cuArray3DGetDescriptor( + CUDA_ARRAY3D_DESCRIPTOR *pArrayDescriptor, CUarray hArray) { + using FuncPtr = CUresult(CUDAAPI *)(CUDA_ARRAY3D_DESCRIPTOR *, CUarray); + static auto func_ptr = LoadSymbol("cuArray3DGetDescriptor_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pArrayDescriptor, hArray); +} + +CUresult CUDAAPI +cuMipmappedArrayCreate(CUmipmappedArray *pHandle, + const CUDA_ARRAY3D_DESCRIPTOR *pMipmappedArrayDesc, + unsigned int numMipmapLevels) { + using FuncPtr = CUresult(CUDAAPI *)( + CUmipmappedArray *, const CUDA_ARRAY3D_DESCRIPTOR *, unsigned int); + static auto func_ptr = LoadSymbol("cuMipmappedArrayCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pHandle, pMipmappedArrayDesc, numMipmapLevels); +} + +CUresult CUDAAPI cuMipmappedArrayGetLevel(CUarray *pLevelArray, + CUmipmappedArray hMipmappedArray, + unsigned int level) { + using FuncPtr = + CUresult(CUDAAPI *)(CUarray *, CUmipmappedArray, unsigned int); + static auto func_ptr = LoadSymbol("cuMipmappedArrayGetLevel"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pLevelArray, hMipmappedArray, level); +} + +CUresult CUDAAPI cuMipmappedArrayDestroy(CUmipmappedArray hMipmappedArray) { + using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray); + static auto func_ptr = LoadSymbol("cuMipmappedArrayDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hMipmappedArray); +} + +CUresult CUDAAPI cuMemGetHandleForAddressRange(void *handle, CUdeviceptr dptr, + size_t size, + CUmemRangeHandleType handleType, + unsigned long long flags) { + using FuncPtr = CUresult(CUDAAPI *)(void *, CUdeviceptr, size_t, + CUmemRangeHandleType, unsigned long long); + static auto func_ptr = LoadSymbol("cuMemGetHandleForAddressRange"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dptr, size, handleType, flags); +} + +CUresult CUDAAPI cuMemAddressReserve(CUdeviceptr *ptr, size_t size, + size_t alignment, CUdeviceptr addr, + unsigned long long flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t, size_t, + CUdeviceptr, unsigned long long); + static auto func_ptr = LoadSymbol("cuMemAddressReserve"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ptr, size, alignment, addr, flags); +} + +CUresult CUDAAPI cuMemAddressFree(CUdeviceptr ptr, size_t size) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t); + static auto func_ptr = LoadSymbol("cuMemAddressFree"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ptr, size); +} + +CUresult CUDAAPI cuMemCreate(CUmemGenericAllocationHandle *handle, size_t size, + const CUmemAllocationProp *prop, + unsigned long long flags) { + using FuncPtr = + CUresult(CUDAAPI *)(CUmemGenericAllocationHandle *, size_t, + const CUmemAllocationProp *, unsigned long long); + static auto func_ptr = LoadSymbol("cuMemCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, size, prop, flags); +} + +CUresult CUDAAPI cuMemRelease(CUmemGenericAllocationHandle handle) { + using FuncPtr = CUresult(CUDAAPI *)(CUmemGenericAllocationHandle); + static auto func_ptr = LoadSymbol("cuMemRelease"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle); +} + +CUresult CUDAAPI cuMemMap(CUdeviceptr ptr, size_t size, size_t offset, + CUmemGenericAllocationHandle handle, + unsigned long long flags) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr, size_t, size_t, + CUmemGenericAllocationHandle, unsigned long long); + static auto func_ptr = LoadSymbol("cuMemMap"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ptr, size, offset, handle, flags); +} + +CUresult CUDAAPI cuMemMapArrayAsync(CUarrayMapInfo *mapInfoList, + unsigned int count, CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(CUarrayMapInfo *, unsigned int, CUstream); + static auto func_ptr = LoadSymbol("cuMemMapArrayAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(mapInfoList, count, hStream); +} + +CUresult CUDAAPI cuMemUnmap(CUdeviceptr ptr, size_t size) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t); + static auto func_ptr = LoadSymbol("cuMemUnmap"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ptr, size); +} + +CUresult CUDAAPI cuMemSetAccess(CUdeviceptr ptr, size_t size, + const CUmemAccessDesc *desc, size_t count) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr, size_t, const CUmemAccessDesc *, size_t); + static auto func_ptr = LoadSymbol("cuMemSetAccess"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ptr, size, desc, count); +} + +CUresult CUDAAPI cuMemGetAccess(unsigned long long *flags, + const CUmemLocation *location, + CUdeviceptr ptr) { + using FuncPtr = CUresult(CUDAAPI *)(unsigned long long *, + const CUmemLocation *, CUdeviceptr); + static auto func_ptr = LoadSymbol("cuMemGetAccess"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(flags, location, ptr); +} + +CUresult CUDAAPI cuMemExportToShareableHandle( + void *shareableHandle, CUmemGenericAllocationHandle handle, + CUmemAllocationHandleType handleType, unsigned long long flags) { + using FuncPtr = + CUresult(CUDAAPI *)(void *, CUmemGenericAllocationHandle, + CUmemAllocationHandleType, unsigned long long); + static auto func_ptr = LoadSymbol("cuMemExportToShareableHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(shareableHandle, handle, handleType, flags); +} + +CUresult CUDAAPI cuMemImportFromShareableHandle( + CUmemGenericAllocationHandle *handle, void *osHandle, + CUmemAllocationHandleType shHandleType) { + using FuncPtr = CUresult(CUDAAPI *)(CUmemGenericAllocationHandle *, void *, + CUmemAllocationHandleType); + static auto func_ptr = LoadSymbol("cuMemImportFromShareableHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, osHandle, shHandleType); +} + +CUresult CUDAAPI cuMemGetAllocationGranularity( + size_t *granularity, const CUmemAllocationProp *prop, + CUmemAllocationGranularity_flags option) { + using FuncPtr = CUresult(CUDAAPI *)(size_t *, const CUmemAllocationProp *, + CUmemAllocationGranularity_flags); + static auto func_ptr = LoadSymbol("cuMemGetAllocationGranularity"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(granularity, prop, option); +} + +CUresult CUDAAPI cuMemGetAllocationPropertiesFromHandle( + CUmemAllocationProp *prop, CUmemGenericAllocationHandle handle) { + using FuncPtr = + CUresult(CUDAAPI *)(CUmemAllocationProp *, CUmemGenericAllocationHandle); + static auto func_ptr = + LoadSymbol("cuMemGetAllocationPropertiesFromHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(prop, handle); +} + +CUresult CUDAAPI +cuMemRetainAllocationHandle(CUmemGenericAllocationHandle *handle, void *addr) { + using FuncPtr = CUresult(CUDAAPI *)(CUmemGenericAllocationHandle *, void *); + static auto func_ptr = LoadSymbol("cuMemRetainAllocationHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, addr); +} + +CUresult CUDAAPI cuMemFreeAsync(CUdeviceptr dptr, CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, CUstream); + static auto func_ptr = LoadSymbol("cuMemFreeAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dptr, hStream); +} + +CUresult CUDAAPI cuMemAllocAsync(CUdeviceptr *dptr, size_t bytesize, + CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, size_t, CUstream); + static auto func_ptr = LoadSymbol("cuMemAllocAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dptr, bytesize, hStream); +} + +CUresult CUDAAPI cuMemPoolTrimTo(CUmemoryPool pool, size_t minBytesToKeep) { + using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool, size_t); + static auto func_ptr = LoadSymbol("cuMemPoolTrimTo"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pool, minBytesToKeep); +} + +CUresult CUDAAPI cuMemPoolSetAttribute(CUmemoryPool pool, + CUmemPool_attribute attr, void *value) { + using FuncPtr = + CUresult(CUDAAPI *)(CUmemoryPool, CUmemPool_attribute, void *); + static auto func_ptr = LoadSymbol("cuMemPoolSetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pool, attr, value); +} + +CUresult CUDAAPI cuMemPoolGetAttribute(CUmemoryPool pool, + CUmemPool_attribute attr, void *value) { + using FuncPtr = + CUresult(CUDAAPI *)(CUmemoryPool, CUmemPool_attribute, void *); + static auto func_ptr = LoadSymbol("cuMemPoolGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pool, attr, value); +} + +CUresult CUDAAPI cuMemPoolSetAccess(CUmemoryPool pool, + const CUmemAccessDesc *map, size_t count) { + using FuncPtr = + CUresult(CUDAAPI *)(CUmemoryPool, const CUmemAccessDesc *, size_t); + static auto func_ptr = LoadSymbol("cuMemPoolSetAccess"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pool, map, count); +} + +CUresult CUDAAPI cuMemPoolGetAccess(CUmemAccess_flags *flags, + CUmemoryPool memPool, + CUmemLocation *location) { + using FuncPtr = + CUresult(CUDAAPI *)(CUmemAccess_flags *, CUmemoryPool, CUmemLocation *); + static auto func_ptr = LoadSymbol("cuMemPoolGetAccess"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(flags, memPool, location); +} + +CUresult CUDAAPI cuMemPoolCreate(CUmemoryPool *pool, + const CUmemPoolProps *poolProps) { + using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool *, const CUmemPoolProps *); + static auto func_ptr = LoadSymbol("cuMemPoolCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pool, poolProps); +} + +CUresult CUDAAPI cuMemPoolDestroy(CUmemoryPool pool) { + using FuncPtr = CUresult(CUDAAPI *)(CUmemoryPool); + static auto func_ptr = LoadSymbol("cuMemPoolDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pool); +} + +CUresult CUDAAPI cuMemAllocFromPoolAsync(CUdeviceptr *dptr, size_t bytesize, + CUmemoryPool pool, CUstream hStream) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr *, size_t, CUmemoryPool, CUstream); + static auto func_ptr = LoadSymbol("cuMemAllocFromPoolAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dptr, bytesize, pool, hStream); +} + +CUresult CUDAAPI cuMemPoolExportToShareableHandle( + void *handle_out, CUmemoryPool pool, CUmemAllocationHandleType handleType, + unsigned long long flags) { + using FuncPtr = CUresult(CUDAAPI *)( + void *, CUmemoryPool, CUmemAllocationHandleType, unsigned long long); + static auto func_ptr = + LoadSymbol("cuMemPoolExportToShareableHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle_out, pool, handleType, flags); +} + +CUresult CUDAAPI cuMemPoolImportFromShareableHandle( + CUmemoryPool *pool_out, void *handle, CUmemAllocationHandleType handleType, + unsigned long long flags) { + using FuncPtr = CUresult(CUDAAPI *)( + CUmemoryPool *, void *, CUmemAllocationHandleType, unsigned long long); + static auto func_ptr = + LoadSymbol("cuMemPoolImportFromShareableHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pool_out, handle, handleType, flags); +} + +CUresult CUDAAPI cuMemPoolExportPointer(CUmemPoolPtrExportData *shareData_out, + CUdeviceptr ptr) { + using FuncPtr = CUresult(CUDAAPI *)(CUmemPoolPtrExportData *, CUdeviceptr); + static auto func_ptr = LoadSymbol("cuMemPoolExportPointer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(shareData_out, ptr); +} + +CUresult CUDAAPI cuMemPoolImportPointer(CUdeviceptr *ptr_out, CUmemoryPool pool, + CUmemPoolPtrExportData *shareData) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUmemoryPool, + CUmemPoolPtrExportData *); + static auto func_ptr = LoadSymbol("cuMemPoolImportPointer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ptr_out, pool, shareData); +} + +CUresult CUDAAPI cuPointerGetAttribute(void *data, + CUpointer_attribute attribute, + CUdeviceptr ptr) { + using FuncPtr = CUresult(CUDAAPI *)(void *, CUpointer_attribute, CUdeviceptr); + static auto func_ptr = LoadSymbol("cuPointerGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(data, attribute, ptr); +} + +CUresult CUDAAPI cuMemPrefetchAsync(CUdeviceptr devPtr, size_t count, + CUdevice dstDevice, CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr, size_t, CUdevice, CUstream); + static auto func_ptr = LoadSymbol("cuMemPrefetchAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, count, dstDevice, hStream); +} + +CUresult CUDAAPI cuMemAdvise(CUdeviceptr devPtr, size_t count, + CUmem_advise advice, CUdevice device) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr, size_t, CUmem_advise, CUdevice); + static auto func_ptr = LoadSymbol("cuMemAdvise"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, count, advice, device); +} + +CUresult CUDAAPI cuMemRangeGetAttribute(void *data, size_t dataSize, + CUmem_range_attribute attribute, + CUdeviceptr devPtr, size_t count) { + using FuncPtr = CUresult(CUDAAPI *)(void *, size_t, CUmem_range_attribute, + CUdeviceptr, size_t); + static auto func_ptr = LoadSymbol("cuMemRangeGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(data, dataSize, attribute, devPtr, count); +} + +CUresult CUDAAPI cuMemRangeGetAttributes(void **data, size_t *dataSizes, + CUmem_range_attribute *attributes, + size_t numAttributes, + CUdeviceptr devPtr, size_t count) { + using FuncPtr = CUresult(CUDAAPI *)( + void **, size_t *, CUmem_range_attribute *, size_t, CUdeviceptr, size_t); + static auto func_ptr = LoadSymbol("cuMemRangeGetAttributes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); +} + +CUresult CUDAAPI cuPointerSetAttribute(const void *value, + CUpointer_attribute attribute, + CUdeviceptr ptr) { + using FuncPtr = + CUresult(CUDAAPI *)(const void *, CUpointer_attribute, CUdeviceptr); + static auto func_ptr = LoadSymbol("cuPointerSetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(value, attribute, ptr); +} + +CUresult CUDAAPI cuPointerGetAttributes(unsigned int numAttributes, + CUpointer_attribute *attributes, + void **data, CUdeviceptr ptr) { + using FuncPtr = CUresult(CUDAAPI *)(unsigned int, CUpointer_attribute *, + void **, CUdeviceptr); + static auto func_ptr = LoadSymbol("cuPointerGetAttributes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(numAttributes, attributes, data, ptr); +} + +CUresult CUDAAPI cuStreamCreate(CUstream *phStream, unsigned int Flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUstream *, unsigned int); + static auto func_ptr = LoadSymbol("cuStreamCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phStream, Flags); +} + +CUresult CUDAAPI cuStreamCreateWithPriority(CUstream *phStream, + unsigned int flags, int priority) { + using FuncPtr = CUresult(CUDAAPI *)(CUstream *, unsigned int, int); + static auto func_ptr = LoadSymbol("cuStreamCreateWithPriority"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phStream, flags, priority); +} + +CUresult CUDAAPI cuStreamGetPriority(CUstream hStream, int *priority) { + using FuncPtr = CUresult(CUDAAPI *)(CUstream, int *); + static auto func_ptr = LoadSymbol("cuStreamGetPriority"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, priority); +} + +CUresult CUDAAPI cuStreamGetFlags(CUstream hStream, unsigned int *flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUstream, unsigned int *); + static auto func_ptr = LoadSymbol("cuStreamGetFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, flags); +} + +CUresult CUDAAPI cuStreamGetId(CUstream hStream, unsigned long long *streamId) { + using FuncPtr = CUresult(CUDAAPI *)(CUstream, unsigned long long *); + static auto func_ptr = LoadSymbol("cuStreamGetId"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, streamId); +} + +CUresult CUDAAPI cuStreamGetCtx(CUstream hStream, CUcontext *pctx) { + using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUcontext *); + static auto func_ptr = LoadSymbol("cuStreamGetCtx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, pctx); +} + +CUresult CUDAAPI cuStreamWaitEvent(CUstream hStream, CUevent hEvent, + unsigned int Flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUevent, unsigned int); + static auto func_ptr = LoadSymbol("cuStreamWaitEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, hEvent, Flags); +} + +CUresult CUDAAPI cuStreamAddCallback(CUstream hStream, + CUstreamCallback callback, void *userData, + unsigned int flags) { + using FuncPtr = + CUresult(CUDAAPI *)(CUstream, CUstreamCallback, void *, unsigned int); + static auto func_ptr = LoadSymbol("cuStreamAddCallback"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, callback, userData, flags); +} + +CUresult CUDAAPI cuStreamBeginCapture(CUstream hStream, + CUstreamCaptureMode mode) { + using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstreamCaptureMode); + static auto func_ptr = LoadSymbol("cuStreamBeginCapture_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, mode); +} + +CUresult CUDAAPI cuThreadExchangeStreamCaptureMode(CUstreamCaptureMode *mode) { + using FuncPtr = CUresult(CUDAAPI *)(CUstreamCaptureMode *); + static auto func_ptr = + LoadSymbol("cuThreadExchangeStreamCaptureMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(mode); +} + +CUresult CUDAAPI cuStreamEndCapture(CUstream hStream, CUgraph *phGraph) { + using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUgraph *); + static auto func_ptr = LoadSymbol("cuStreamEndCapture"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, phGraph); +} + +CUresult CUDAAPI cuStreamIsCapturing(CUstream hStream, + CUstreamCaptureStatus *captureStatus) { + using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstreamCaptureStatus *); + static auto func_ptr = LoadSymbol("cuStreamIsCapturing"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, captureStatus); +} + +CUresult CUDAAPI cuStreamGetCaptureInfo( + CUstream hStream, CUstreamCaptureStatus *captureStatus_out, + cuuint64_t *id_out, CUgraph *graph_out, + const CUgraphNode **dependencies_out, size_t *numDependencies_out) { + using FuncPtr = + CUresult(CUDAAPI *)(CUstream, CUstreamCaptureStatus *, cuuint64_t *, + CUgraph *, const CUgraphNode **, size_t *); + static auto func_ptr = LoadSymbol("cuStreamGetCaptureInfo_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, captureStatus_out, id_out, graph_out, + dependencies_out, numDependencies_out); +} + +CUresult CUDAAPI cuStreamUpdateCaptureDependencies(CUstream hStream, + CUgraphNode *dependencies, + size_t numDependencies, + unsigned int flags) { + using FuncPtr = + CUresult(CUDAAPI *)(CUstream, CUgraphNode *, size_t, unsigned int); + static auto func_ptr = + LoadSymbol("cuStreamUpdateCaptureDependencies"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, dependencies, numDependencies, flags); +} + +CUresult CUDAAPI cuStreamAttachMemAsync(CUstream hStream, CUdeviceptr dptr, + size_t length, unsigned int flags) { + using FuncPtr = + CUresult(CUDAAPI *)(CUstream, CUdeviceptr, size_t, unsigned int); + static auto func_ptr = LoadSymbol("cuStreamAttachMemAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, dptr, length, flags); +} + +CUresult CUDAAPI cuStreamQuery(CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(CUstream); + static auto func_ptr = LoadSymbol("cuStreamQuery"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream); +} + +CUresult CUDAAPI cuStreamSynchronize(CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(CUstream); + static auto func_ptr = LoadSymbol("cuStreamSynchronize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream); +} + +CUresult CUDAAPI cuStreamDestroy(CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(CUstream); + static auto func_ptr = LoadSymbol("cuStreamDestroy_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream); +} + +CUresult CUDAAPI cuStreamCopyAttributes(CUstream dst, CUstream src) { + using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUstream); + static auto func_ptr = LoadSymbol("cuStreamCopyAttributes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, src); +} + +CUresult CUDAAPI cuStreamGetAttribute(CUstream hStream, CUstreamAttrID attr, + CUstreamAttrValue *value_out) { + using FuncPtr = + CUresult(CUDAAPI *)(CUstream, CUstreamAttrID, CUstreamAttrValue *); + static auto func_ptr = LoadSymbol("cuStreamGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, attr, value_out); +} + +CUresult CUDAAPI cuStreamSetAttribute(CUstream hStream, CUstreamAttrID attr, + const CUstreamAttrValue *value) { + using FuncPtr = + CUresult(CUDAAPI *)(CUstream, CUstreamAttrID, const CUstreamAttrValue *); + static auto func_ptr = LoadSymbol("cuStreamSetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, attr, value); +} + +CUresult CUDAAPI cuEventCreate(CUevent *phEvent, unsigned int Flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUevent *, unsigned int); + static auto func_ptr = LoadSymbol("cuEventCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phEvent, Flags); +} + +CUresult CUDAAPI cuEventRecord(CUevent hEvent, CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(CUevent, CUstream); + static auto func_ptr = LoadSymbol("cuEventRecord"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hEvent, hStream); +} + +CUresult CUDAAPI cuEventRecordWithFlags(CUevent hEvent, CUstream hStream, + unsigned int flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUevent, CUstream, unsigned int); + static auto func_ptr = LoadSymbol("cuEventRecordWithFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hEvent, hStream, flags); +} + +CUresult CUDAAPI cuEventQuery(CUevent hEvent) { + using FuncPtr = CUresult(CUDAAPI *)(CUevent); + static auto func_ptr = LoadSymbol("cuEventQuery"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hEvent); +} + +CUresult CUDAAPI cuEventSynchronize(CUevent hEvent) { + using FuncPtr = CUresult(CUDAAPI *)(CUevent); + static auto func_ptr = LoadSymbol("cuEventSynchronize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hEvent); +} + +CUresult CUDAAPI cuEventDestroy(CUevent hEvent) { + using FuncPtr = CUresult(CUDAAPI *)(CUevent); + static auto func_ptr = LoadSymbol("cuEventDestroy_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hEvent); +} + +CUresult CUDAAPI cuEventElapsedTime(float *pMilliseconds, CUevent hStart, + CUevent hEnd) { + using FuncPtr = CUresult(CUDAAPI *)(float *, CUevent, CUevent); + static auto func_ptr = LoadSymbol("cuEventElapsedTime"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pMilliseconds, hStart, hEnd); +} + +CUresult CUDAAPI +cuImportExternalMemory(CUexternalMemory *extMem_out, + const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *memHandleDesc) { + using FuncPtr = CUresult(CUDAAPI *)(CUexternalMemory *, + const CUDA_EXTERNAL_MEMORY_HANDLE_DESC *); + static auto func_ptr = LoadSymbol("cuImportExternalMemory"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(extMem_out, memHandleDesc); +} + +CUresult CUDAAPI cuExternalMemoryGetMappedBuffer( + CUdeviceptr *devPtr, CUexternalMemory extMem, + const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *bufferDesc) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUexternalMemory, + const CUDA_EXTERNAL_MEMORY_BUFFER_DESC *); + static auto func_ptr = LoadSymbol("cuExternalMemoryGetMappedBuffer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, extMem, bufferDesc); +} + +CUresult CUDAAPI cuExternalMemoryGetMappedMipmappedArray( + CUmipmappedArray *mipmap, CUexternalMemory extMem, + const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *mipmapDesc) { + using FuncPtr = + CUresult(CUDAAPI *)(CUmipmappedArray *, CUexternalMemory, + const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC *); + static auto func_ptr = + LoadSymbol("cuExternalMemoryGetMappedMipmappedArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(mipmap, extMem, mipmapDesc); +} + +CUresult CUDAAPI cuDestroyExternalMemory(CUexternalMemory extMem) { + using FuncPtr = CUresult(CUDAAPI *)(CUexternalMemory); + static auto func_ptr = LoadSymbol("cuDestroyExternalMemory"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(extMem); +} + +CUresult CUDAAPI cuImportExternalSemaphore( + CUexternalSemaphore *extSem_out, + const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *semHandleDesc) { + using FuncPtr = CUresult(CUDAAPI *)( + CUexternalSemaphore *, const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC *); + static auto func_ptr = LoadSymbol("cuImportExternalSemaphore"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(extSem_out, semHandleDesc); +} + +CUresult CUDAAPI cuSignalExternalSemaphoresAsync( + const CUexternalSemaphore *extSemArray, + const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *paramsArray, + unsigned int numExtSems, CUstream stream) { + using FuncPtr = CUresult(CUDAAPI *)( + const CUexternalSemaphore *, + const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS *, unsigned int, CUstream); + static auto func_ptr = LoadSymbol("cuSignalExternalSemaphoresAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(extSemArray, paramsArray, numExtSems, stream); +} + +CUresult CUDAAPI cuWaitExternalSemaphoresAsync( + const CUexternalSemaphore *extSemArray, + const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *paramsArray, + unsigned int numExtSems, CUstream stream) { + using FuncPtr = CUresult(CUDAAPI *)( + const CUexternalSemaphore *, const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS *, + unsigned int, CUstream); + static auto func_ptr = LoadSymbol("cuWaitExternalSemaphoresAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(extSemArray, paramsArray, numExtSems, stream); +} + +CUresult CUDAAPI cuDestroyExternalSemaphore(CUexternalSemaphore extSem) { + using FuncPtr = CUresult(CUDAAPI *)(CUexternalSemaphore); + static auto func_ptr = LoadSymbol("cuDestroyExternalSemaphore"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(extSem); +} + +CUresult CUDAAPI cuStreamWaitValue32(CUstream stream, CUdeviceptr addr, + cuuint32_t value, unsigned int flags) { + using FuncPtr = + CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint32_t, unsigned int); + static auto func_ptr = LoadSymbol("cuStreamWaitValue32_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream, addr, value, flags); +} + +CUresult CUDAAPI cuStreamWaitValue64(CUstream stream, CUdeviceptr addr, + cuuint64_t value, unsigned int flags) { + using FuncPtr = + CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint64_t, unsigned int); + static auto func_ptr = LoadSymbol("cuStreamWaitValue64_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream, addr, value, flags); +} + +CUresult CUDAAPI cuStreamWriteValue32(CUstream stream, CUdeviceptr addr, + cuuint32_t value, unsigned int flags) { + using FuncPtr = + CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint32_t, unsigned int); + static auto func_ptr = LoadSymbol("cuStreamWriteValue32_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream, addr, value, flags); +} + +CUresult CUDAAPI cuStreamWriteValue64(CUstream stream, CUdeviceptr addr, + cuuint64_t value, unsigned int flags) { + using FuncPtr = + CUresult(CUDAAPI *)(CUstream, CUdeviceptr, cuuint64_t, unsigned int); + static auto func_ptr = LoadSymbol("cuStreamWriteValue64_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream, addr, value, flags); +} + +CUresult CUDAAPI cuStreamBatchMemOp(CUstream stream, unsigned int count, + CUstreamBatchMemOpParams *paramArray, + unsigned int flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUstream, unsigned int, + CUstreamBatchMemOpParams *, unsigned int); + static auto func_ptr = LoadSymbol("cuStreamBatchMemOp_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream, count, paramArray, flags); +} + +CUresult CUDAAPI cuFuncGetAttribute(int *pi, CUfunction_attribute attrib, + CUfunction hfunc) { + using FuncPtr = CUresult(CUDAAPI *)(int *, CUfunction_attribute, CUfunction); + static auto func_ptr = LoadSymbol("cuFuncGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pi, attrib, hfunc); +} + +CUresult CUDAAPI cuFuncSetAttribute(CUfunction hfunc, + CUfunction_attribute attrib, int value) { + using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUfunction_attribute, int); + static auto func_ptr = LoadSymbol("cuFuncSetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hfunc, attrib, value); +} + +CUresult CUDAAPI cuFuncSetCacheConfig(CUfunction hfunc, CUfunc_cache config) { + using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUfunc_cache); + static auto func_ptr = LoadSymbol("cuFuncSetCacheConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hfunc, config); +} + +CUresult CUDAAPI cuFuncSetSharedMemConfig(CUfunction hfunc, + CUsharedconfig config) { + using FuncPtr = CUresult(CUDAAPI *)(CUfunction, CUsharedconfig); + static auto func_ptr = LoadSymbol("cuFuncSetSharedMemConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hfunc, config); +} + +CUresult CUDAAPI cuFuncGetModule(CUmodule *hmod, CUfunction hfunc) { + using FuncPtr = CUresult(CUDAAPI *)(CUmodule *, CUfunction); + static auto func_ptr = LoadSymbol("cuFuncGetModule"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hmod, hfunc); +} + +CUresult CUDAAPI cuLaunchKernel(CUfunction f, unsigned int gridDimX, + unsigned int gridDimY, unsigned int gridDimZ, + unsigned int blockDimX, unsigned int blockDimY, + unsigned int blockDimZ, + unsigned int sharedMemBytes, CUstream hStream, + void **kernelParams, void **extra) { + using FuncPtr = CUresult(CUDAAPI *)( + CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, + unsigned int, unsigned int, unsigned int, CUstream, void **, void **); + static auto func_ptr = LoadSymbol("cuLaunchKernel"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, + blockDimZ, sharedMemBytes, hStream, kernelParams, extra); +} + +CUresult CUDAAPI cuLaunchKernelEx(const CUlaunchConfig *config, CUfunction f, + void **kernelParams, void **extra) { + using FuncPtr = + CUresult(CUDAAPI *)(const CUlaunchConfig *, CUfunction, void **, void **); + static auto func_ptr = LoadSymbol("cuLaunchKernelEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(config, f, kernelParams, extra); +} + +CUresult CUDAAPI cuLaunchCooperativeKernel( + CUfunction f, unsigned int gridDimX, unsigned int gridDimY, + unsigned int gridDimZ, unsigned int blockDimX, unsigned int blockDimY, + unsigned int blockDimZ, unsigned int sharedMemBytes, CUstream hStream, + void **kernelParams) { + using FuncPtr = CUresult(CUDAAPI *)( + CUfunction, unsigned int, unsigned int, unsigned int, unsigned int, + unsigned int, unsigned int, unsigned int, CUstream, void **); + static auto func_ptr = LoadSymbol("cuLaunchCooperativeKernel"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, + blockDimZ, sharedMemBytes, hStream, kernelParams); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuLaunchCooperativeKernelMultiDevice( + CUDA_LAUNCH_PARAMS *launchParamsList, unsigned int numDevices, + unsigned int flags) { + using FuncPtr = + CUresult(CUDAAPI *)(CUDA_LAUNCH_PARAMS *, unsigned int, unsigned int); + static auto func_ptr = + LoadSymbol("cuLaunchCooperativeKernelMultiDevice"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(launchParamsList, numDevices, flags); +} + +CUresult CUDAAPI cuLaunchHostFunc(CUstream hStream, CUhostFn fn, + void *userData) { + using FuncPtr = CUresult(CUDAAPI *)(CUstream, CUhostFn, void *); + static auto func_ptr = LoadSymbol("cuLaunchHostFunc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, fn, userData); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuFuncSetBlockShape(CUfunction hfunc, int x, + int y, int z) { + using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int, int); + static auto func_ptr = LoadSymbol("cuFuncSetBlockShape"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hfunc, x, y, z); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuFuncSetSharedSize(CUfunction hfunc, + unsigned int bytes) { + using FuncPtr = CUresult(CUDAAPI *)(CUfunction, unsigned int); + static auto func_ptr = LoadSymbol("cuFuncSetSharedSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hfunc, bytes); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetSize(CUfunction hfunc, + unsigned int numbytes) { + using FuncPtr = CUresult(CUDAAPI *)(CUfunction, unsigned int); + static auto func_ptr = LoadSymbol("cuParamSetSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hfunc, numbytes); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuParamSeti(CUfunction hfunc, int offset, + unsigned int value) { + using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, unsigned int); + static auto func_ptr = LoadSymbol("cuParamSeti"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hfunc, offset, value); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetf(CUfunction hfunc, int offset, + float value) { + using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, float); + static auto func_ptr = LoadSymbol("cuParamSetf"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hfunc, offset, value); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetv(CUfunction hfunc, int offset, + void *ptr, + unsigned int numbytes) { + using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, void *, unsigned int); + static auto func_ptr = LoadSymbol("cuParamSetv"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hfunc, offset, ptr, numbytes); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuLaunch(CUfunction f) { + using FuncPtr = CUresult(CUDAAPI *)(CUfunction); + static auto func_ptr = LoadSymbol("cuLaunch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(f); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuLaunchGrid(CUfunction f, int grid_width, + int grid_height) { + using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int); + static auto func_ptr = LoadSymbol("cuLaunchGrid"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(f, grid_width, grid_height); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuLaunchGridAsync(CUfunction f, + int grid_width, + int grid_height, + CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, int, CUstream); + static auto func_ptr = LoadSymbol("cuLaunchGridAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(f, grid_width, grid_height, hStream); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuParamSetTexRef(CUfunction hfunc, + int texunit, + CUtexref hTexRef) { + using FuncPtr = CUresult(CUDAAPI *)(CUfunction, int, CUtexref); + static auto func_ptr = LoadSymbol("cuParamSetTexRef"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hfunc, texunit, hTexRef); +} + +CUresult CUDAAPI cuGraphCreate(CUgraph *phGraph, unsigned int flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, unsigned int); + static auto func_ptr = LoadSymbol("cuGraphCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phGraph, flags); +} + +CUresult CUDAAPI cuGraphAddKernelNode( + CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, + size_t numDependencies, const CUDA_KERNEL_NODE_PARAMS *nodeParams) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, + const CUDA_KERNEL_NODE_PARAMS *); + static auto func_ptr = LoadSymbol("cuGraphAddKernelNode_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, + nodeParams); +} + +CUresult CUDAAPI cuGraphKernelNodeGetParams( + CUgraphNode hNode, CUDA_KERNEL_NODE_PARAMS *nodeParams) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_KERNEL_NODE_PARAMS *); + static auto func_ptr = LoadSymbol("cuGraphKernelNodeGetParams_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, nodeParams); +} + +CUresult CUDAAPI cuGraphKernelNodeSetParams( + CUgraphNode hNode, const CUDA_KERNEL_NODE_PARAMS *nodeParams) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode, const CUDA_KERNEL_NODE_PARAMS *); + static auto func_ptr = LoadSymbol("cuGraphKernelNodeSetParams_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, nodeParams); +} + +CUresult CUDAAPI cuGraphAddMemcpyNode(CUgraphNode *phGraphNode, CUgraph hGraph, + const CUgraphNode *dependencies, + size_t numDependencies, + const CUDA_MEMCPY3D *copyParams, + CUcontext ctx) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, + const CUDA_MEMCPY3D *, CUcontext); + static auto func_ptr = LoadSymbol("cuGraphAddMemcpyNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, + copyParams, ctx); +} + +CUresult CUDAAPI cuGraphMemcpyNodeGetParams(CUgraphNode hNode, + CUDA_MEMCPY3D *nodeParams) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMCPY3D *); + static auto func_ptr = LoadSymbol("cuGraphMemcpyNodeGetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, nodeParams); +} + +CUresult CUDAAPI cuGraphMemcpyNodeSetParams(CUgraphNode hNode, + const CUDA_MEMCPY3D *nodeParams) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMCPY3D *); + static auto func_ptr = LoadSymbol("cuGraphMemcpyNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, nodeParams); +} + +CUresult CUDAAPI cuGraphAddMemsetNode( + CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, + size_t numDependencies, const CUDA_MEMSET_NODE_PARAMS *memsetParams, + CUcontext ctx) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, + const CUDA_MEMSET_NODE_PARAMS *, CUcontext); + static auto func_ptr = LoadSymbol("cuGraphAddMemsetNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, + memsetParams, ctx); +} + +CUresult CUDAAPI cuGraphMemsetNodeGetParams( + CUgraphNode hNode, CUDA_MEMSET_NODE_PARAMS *nodeParams) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEMSET_NODE_PARAMS *); + static auto func_ptr = LoadSymbol("cuGraphMemsetNodeGetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, nodeParams); +} + +CUresult CUDAAPI cuGraphMemsetNodeSetParams( + CUgraphNode hNode, const CUDA_MEMSET_NODE_PARAMS *nodeParams) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode, const CUDA_MEMSET_NODE_PARAMS *); + static auto func_ptr = LoadSymbol("cuGraphMemsetNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, nodeParams); +} + +CUresult CUDAAPI cuGraphAddHostNode(CUgraphNode *phGraphNode, CUgraph hGraph, + const CUgraphNode *dependencies, + size_t numDependencies, + const CUDA_HOST_NODE_PARAMS *nodeParams) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, + const CUDA_HOST_NODE_PARAMS *); + static auto func_ptr = LoadSymbol("cuGraphAddHostNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, + nodeParams); +} + +CUresult CUDAAPI cuGraphHostNodeGetParams(CUgraphNode hNode, + CUDA_HOST_NODE_PARAMS *nodeParams) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUDA_HOST_NODE_PARAMS *); + static auto func_ptr = LoadSymbol("cuGraphHostNodeGetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, nodeParams); +} + +CUresult CUDAAPI cuGraphHostNodeSetParams( + CUgraphNode hNode, const CUDA_HOST_NODE_PARAMS *nodeParams) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode, const CUDA_HOST_NODE_PARAMS *); + static auto func_ptr = LoadSymbol("cuGraphHostNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, nodeParams); +} + +CUresult CUDAAPI cuGraphAddChildGraphNode(CUgraphNode *phGraphNode, + CUgraph hGraph, + const CUgraphNode *dependencies, + size_t numDependencies, + CUgraph childGraph) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, + const CUgraphNode *, size_t, CUgraph); + static auto func_ptr = LoadSymbol("cuGraphAddChildGraphNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, + childGraph); +} + +CUresult CUDAAPI cuGraphChildGraphNodeGetGraph(CUgraphNode hNode, + CUgraph *phGraph) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraph *); + static auto func_ptr = LoadSymbol("cuGraphChildGraphNodeGetGraph"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, phGraph); +} + +CUresult CUDAAPI cuGraphAddEmptyNode(CUgraphNode *phGraphNode, CUgraph hGraph, + const CUgraphNode *dependencies, + size_t numDependencies) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t); + static auto func_ptr = LoadSymbol("cuGraphAddEmptyNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phGraphNode, hGraph, dependencies, numDependencies); +} + +CUresult CUDAAPI cuGraphAddEventRecordNode(CUgraphNode *phGraphNode, + CUgraph hGraph, + const CUgraphNode *dependencies, + size_t numDependencies, + CUevent event) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, + const CUgraphNode *, size_t, CUevent); + static auto func_ptr = LoadSymbol("cuGraphAddEventRecordNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, event); +} + +CUresult CUDAAPI cuGraphEventRecordNodeGetEvent(CUgraphNode hNode, + CUevent *event_out) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUevent *); + static auto func_ptr = LoadSymbol("cuGraphEventRecordNodeGetEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, event_out); +} + +CUresult CUDAAPI cuGraphEventRecordNodeSetEvent(CUgraphNode hNode, + CUevent event) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUevent); + static auto func_ptr = LoadSymbol("cuGraphEventRecordNodeSetEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, event); +} + +CUresult CUDAAPI cuGraphAddEventWaitNode(CUgraphNode *phGraphNode, + CUgraph hGraph, + const CUgraphNode *dependencies, + size_t numDependencies, + CUevent event) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, + const CUgraphNode *, size_t, CUevent); + static auto func_ptr = LoadSymbol("cuGraphAddEventWaitNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, event); +} + +CUresult CUDAAPI cuGraphEventWaitNodeGetEvent(CUgraphNode hNode, + CUevent *event_out) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUevent *); + static auto func_ptr = LoadSymbol("cuGraphEventWaitNodeGetEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, event_out); +} + +CUresult CUDAAPI cuGraphEventWaitNodeSetEvent(CUgraphNode hNode, + CUevent event) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUevent); + static auto func_ptr = LoadSymbol("cuGraphEventWaitNodeSetEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, event); +} + +CUresult CUDAAPI cuGraphAddExternalSemaphoresSignalNode( + CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, + size_t numDependencies, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, + const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *); + static auto func_ptr = + LoadSymbol("cuGraphAddExternalSemaphoresSignalNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, + nodeParams); +} + +CUresult CUDAAPI cuGraphExternalSemaphoresSignalNodeGetParams( + CUgraphNode hNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *params_out) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode, CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *); + static auto func_ptr = + LoadSymbol("cuGraphExternalSemaphoresSignalNodeGetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, params_out); +} + +CUresult CUDAAPI cuGraphExternalSemaphoresSignalNodeSetParams( + CUgraphNode hNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode, const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *); + static auto func_ptr = + LoadSymbol("cuGraphExternalSemaphoresSignalNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, nodeParams); +} + +CUresult CUDAAPI cuGraphAddExternalSemaphoresWaitNode( + CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, + size_t numDependencies, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, + const CUDA_EXT_SEM_WAIT_NODE_PARAMS *); + static auto func_ptr = + LoadSymbol("cuGraphAddExternalSemaphoresWaitNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, + nodeParams); +} + +CUresult CUDAAPI cuGraphExternalSemaphoresWaitNodeGetParams( + CUgraphNode hNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS *params_out) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode, CUDA_EXT_SEM_WAIT_NODE_PARAMS *); + static auto func_ptr = + LoadSymbol("cuGraphExternalSemaphoresWaitNodeGetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, params_out); +} + +CUresult CUDAAPI cuGraphExternalSemaphoresWaitNodeSetParams( + CUgraphNode hNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode, const CUDA_EXT_SEM_WAIT_NODE_PARAMS *); + static auto func_ptr = + LoadSymbol("cuGraphExternalSemaphoresWaitNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, nodeParams); +} + +CUresult CUDAAPI cuGraphAddBatchMemOpNode( + CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, + size_t numDependencies, const CUDA_BATCH_MEM_OP_NODE_PARAMS *nodeParams) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, + const CUDA_BATCH_MEM_OP_NODE_PARAMS *); + static auto func_ptr = LoadSymbol("cuGraphAddBatchMemOpNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, + nodeParams); +} + +CUresult CUDAAPI cuGraphBatchMemOpNodeGetParams( + CUgraphNode hNode, CUDA_BATCH_MEM_OP_NODE_PARAMS *nodeParams_out) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode, CUDA_BATCH_MEM_OP_NODE_PARAMS *); + static auto func_ptr = LoadSymbol("cuGraphBatchMemOpNodeGetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, nodeParams_out); +} + +CUresult CUDAAPI cuGraphBatchMemOpNodeSetParams( + CUgraphNode hNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS *nodeParams) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode, const CUDA_BATCH_MEM_OP_NODE_PARAMS *); + static auto func_ptr = LoadSymbol("cuGraphBatchMemOpNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, nodeParams); +} + +CUresult CUDAAPI cuGraphExecBatchMemOpNodeSetParams( + CUgraphExec hGraphExec, CUgraphNode hNode, + const CUDA_BATCH_MEM_OP_NODE_PARAMS *nodeParams) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, + const CUDA_BATCH_MEM_OP_NODE_PARAMS *); + static auto func_ptr = + LoadSymbol("cuGraphExecBatchMemOpNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, nodeParams); +} + +CUresult CUDAAPI cuGraphAddMemAllocNode( + CUgraphNode *phGraphNode, CUgraph hGraph, const CUgraphNode *dependencies, + size_t numDependencies, CUDA_MEM_ALLOC_NODE_PARAMS *nodeParams) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, const CUgraphNode *, size_t, + CUDA_MEM_ALLOC_NODE_PARAMS *); + static auto func_ptr = LoadSymbol("cuGraphAddMemAllocNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, + nodeParams); +} + +CUresult CUDAAPI cuGraphMemAllocNodeGetParams( + CUgraphNode hNode, CUDA_MEM_ALLOC_NODE_PARAMS *params_out) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphNode, CUDA_MEM_ALLOC_NODE_PARAMS *); + static auto func_ptr = LoadSymbol("cuGraphMemAllocNodeGetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, params_out); +} + +CUresult CUDAAPI cuGraphAddMemFreeNode(CUgraphNode *phGraphNode, CUgraph hGraph, + const CUgraphNode *dependencies, + size_t numDependencies, + CUdeviceptr dptr) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraph, + const CUgraphNode *, size_t, CUdeviceptr); + static auto func_ptr = LoadSymbol("cuGraphAddMemFreeNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phGraphNode, hGraph, dependencies, numDependencies, dptr); +} + +CUresult CUDAAPI cuGraphMemFreeNodeGetParams(CUgraphNode hNode, + CUdeviceptr *dptr_out) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUdeviceptr *); + static auto func_ptr = LoadSymbol("cuGraphMemFreeNodeGetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, dptr_out); +} + +CUresult CUDAAPI cuDeviceGraphMemTrim(CUdevice device) { + using FuncPtr = CUresult(CUDAAPI *)(CUdevice); + static auto func_ptr = LoadSymbol("cuDeviceGraphMemTrim"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device); +} + +CUresult CUDAAPI cuDeviceGetGraphMemAttribute(CUdevice device, + CUgraphMem_attribute attr, + void *value) { + using FuncPtr = CUresult(CUDAAPI *)(CUdevice, CUgraphMem_attribute, void *); + static auto func_ptr = LoadSymbol("cuDeviceGetGraphMemAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, attr, value); +} + +CUresult CUDAAPI cuDeviceSetGraphMemAttribute(CUdevice device, + CUgraphMem_attribute attr, + void *value) { + using FuncPtr = CUresult(CUDAAPI *)(CUdevice, CUgraphMem_attribute, void *); + static auto func_ptr = LoadSymbol("cuDeviceSetGraphMemAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, attr, value); +} + +CUresult CUDAAPI cuGraphClone(CUgraph *phGraphClone, CUgraph originalGraph) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraph *, CUgraph); + static auto func_ptr = LoadSymbol("cuGraphClone"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phGraphClone, originalGraph); +} + +CUresult CUDAAPI cuGraphNodeFindInClone(CUgraphNode *phNode, + CUgraphNode hOriginalNode, + CUgraph hClonedGraph) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode *, CUgraphNode, CUgraph); + static auto func_ptr = LoadSymbol("cuGraphNodeFindInClone"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phNode, hOriginalNode, hClonedGraph); +} + +CUresult CUDAAPI cuGraphNodeGetType(CUgraphNode hNode, CUgraphNodeType *type) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNodeType *); + static auto func_ptr = LoadSymbol("cuGraphNodeGetType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, type); +} + +CUresult CUDAAPI cuGraphGetNodes(CUgraph hGraph, CUgraphNode *nodes, + size_t *numNodes) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *); + static auto func_ptr = LoadSymbol("cuGraphGetNodes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraph, nodes, numNodes); +} + +CUresult CUDAAPI cuGraphGetRootNodes(CUgraph hGraph, CUgraphNode *rootNodes, + size_t *numRootNodes) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, size_t *); + static auto func_ptr = LoadSymbol("cuGraphGetRootNodes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraph, rootNodes, numRootNodes); +} + +CUresult CUDAAPI cuGraphGetEdges(CUgraph hGraph, CUgraphNode *from, + CUgraphNode *to, size_t *numEdges) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraph, CUgraphNode *, CUgraphNode *, size_t *); + static auto func_ptr = LoadSymbol("cuGraphGetEdges"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraph, from, to, numEdges); +} + +CUresult CUDAAPI cuGraphNodeGetDependencies(CUgraphNode hNode, + CUgraphNode *dependencies, + size_t *numDependencies) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *); + static auto func_ptr = LoadSymbol("cuGraphNodeGetDependencies"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, dependencies, numDependencies); +} + +CUresult CUDAAPI cuGraphNodeGetDependentNodes(CUgraphNode hNode, + CUgraphNode *dependentNodes, + size_t *numDependentNodes) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode *, size_t *); + static auto func_ptr = LoadSymbol("cuGraphNodeGetDependentNodes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, dependentNodes, numDependentNodes); +} + +CUresult CUDAAPI cuGraphAddDependencies(CUgraph hGraph, const CUgraphNode *from, + const CUgraphNode *to, + size_t numDependencies) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraph, const CUgraphNode *, + const CUgraphNode *, size_t); + static auto func_ptr = LoadSymbol("cuGraphAddDependencies"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraph, from, to, numDependencies); +} + +CUresult CUDAAPI cuGraphRemoveDependencies(CUgraph hGraph, + const CUgraphNode *from, + const CUgraphNode *to, + size_t numDependencies) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraph, const CUgraphNode *, + const CUgraphNode *, size_t); + static auto func_ptr = LoadSymbol("cuGraphRemoveDependencies"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraph, from, to, numDependencies); +} + +CUresult CUDAAPI cuGraphDestroyNode(CUgraphNode hNode) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode); + static auto func_ptr = LoadSymbol("cuGraphDestroyNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode); +} + +CUresult CUDAAPI cuGraphInstantiate(CUgraphExec *phGraphExec, CUgraph hGraph, + unsigned long long flags) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphExec *, CUgraph, unsigned long long); + static auto func_ptr = LoadSymbol("cuGraphInstantiateWithFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phGraphExec, hGraph, flags); +} + +CUresult CUDAAPI +cuGraphInstantiateWithParams(CUgraphExec *phGraphExec, CUgraph hGraph, + CUDA_GRAPH_INSTANTIATE_PARAMS *instantiateParams) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec *, CUgraph, + CUDA_GRAPH_INSTANTIATE_PARAMS *); + static auto func_ptr = LoadSymbol("cuGraphInstantiateWithParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phGraphExec, hGraph, instantiateParams); +} + +CUresult CUDAAPI cuGraphExecGetFlags(CUgraphExec hGraphExec, + cuuint64_t *flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, cuuint64_t *); + static auto func_ptr = LoadSymbol("cuGraphExecGetFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, flags); +} + +CUresult CUDAAPI +cuGraphExecKernelNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, + const CUDA_KERNEL_NODE_PARAMS *nodeParams) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, + const CUDA_KERNEL_NODE_PARAMS *); + static auto func_ptr = + LoadSymbol("cuGraphExecKernelNodeSetParams_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, nodeParams); +} + +CUresult CUDAAPI cuGraphExecMemcpyNodeSetParams(CUgraphExec hGraphExec, + CUgraphNode hNode, + const CUDA_MEMCPY3D *copyParams, + CUcontext ctx) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, + const CUDA_MEMCPY3D *, CUcontext); + static auto func_ptr = LoadSymbol("cuGraphExecMemcpyNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, copyParams, ctx); +} + +CUresult CUDAAPI cuGraphExecMemsetNodeSetParams( + CUgraphExec hGraphExec, CUgraphNode hNode, + const CUDA_MEMSET_NODE_PARAMS *memsetParams, CUcontext ctx) { + using FuncPtr = CUresult(CUDAAPI *)( + CUgraphExec, CUgraphNode, const CUDA_MEMSET_NODE_PARAMS *, CUcontext); + static auto func_ptr = LoadSymbol("cuGraphExecMemsetNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, memsetParams, ctx); +} + +CUresult CUDAAPI +cuGraphExecHostNodeSetParams(CUgraphExec hGraphExec, CUgraphNode hNode, + const CUDA_HOST_NODE_PARAMS *nodeParams) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, + const CUDA_HOST_NODE_PARAMS *); + static auto func_ptr = LoadSymbol("cuGraphExecHostNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, nodeParams); +} + +CUresult CUDAAPI cuGraphExecChildGraphNodeSetParams(CUgraphExec hGraphExec, + CUgraphNode hNode, + CUgraph childGraph) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, CUgraph); + static auto func_ptr = + LoadSymbol("cuGraphExecChildGraphNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, childGraph); +} + +CUresult CUDAAPI cuGraphExecEventRecordNodeSetEvent(CUgraphExec hGraphExec, + CUgraphNode hNode, + CUevent event) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, CUevent); + static auto func_ptr = + LoadSymbol("cuGraphExecEventRecordNodeSetEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, event); +} + +CUresult CUDAAPI cuGraphExecEventWaitNodeSetEvent(CUgraphExec hGraphExec, + CUgraphNode hNode, + CUevent event) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, CUevent); + static auto func_ptr = + LoadSymbol("cuGraphExecEventWaitNodeSetEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, event); +} + +CUresult CUDAAPI cuGraphExecExternalSemaphoresSignalNodeSetParams( + CUgraphExec hGraphExec, CUgraphNode hNode, + const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *nodeParams) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, + const CUDA_EXT_SEM_SIGNAL_NODE_PARAMS *); + static auto func_ptr = + LoadSymbol("cuGraphExecExternalSemaphoresSignalNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, nodeParams); +} + +CUresult CUDAAPI cuGraphExecExternalSemaphoresWaitNodeSetParams( + CUgraphExec hGraphExec, CUgraphNode hNode, + const CUDA_EXT_SEM_WAIT_NODE_PARAMS *nodeParams) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, + const CUDA_EXT_SEM_WAIT_NODE_PARAMS *); + static auto func_ptr = + LoadSymbol("cuGraphExecExternalSemaphoresWaitNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, nodeParams); +} + +CUresult CUDAAPI cuGraphNodeSetEnabled(CUgraphExec hGraphExec, + CUgraphNode hNode, + unsigned int isEnabled) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, unsigned int); + static auto func_ptr = LoadSymbol("cuGraphNodeSetEnabled"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, isEnabled); +} + +CUresult CUDAAPI cuGraphNodeGetEnabled(CUgraphExec hGraphExec, + CUgraphNode hNode, + unsigned int *isEnabled) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUgraphNode, unsigned int *); + static auto func_ptr = LoadSymbol("cuGraphNodeGetEnabled"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, isEnabled); +} + +CUresult CUDAAPI cuGraphUpload(CUgraphExec hGraphExec, CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUstream); + static auto func_ptr = LoadSymbol("cuGraphUpload"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hStream); +} + +CUresult CUDAAPI cuGraphLaunch(CUgraphExec hGraphExec, CUstream hStream) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec, CUstream); + static auto func_ptr = LoadSymbol("cuGraphLaunch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hStream); +} + +CUresult CUDAAPI cuGraphExecDestroy(CUgraphExec hGraphExec) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphExec); + static auto func_ptr = LoadSymbol("cuGraphExecDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec); +} + +CUresult CUDAAPI cuGraphDestroy(CUgraph hGraph) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraph); + static auto func_ptr = LoadSymbol("cuGraphDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraph); +} + +CUresult CUDAAPI cuGraphExecUpdate(CUgraphExec hGraphExec, CUgraph hGraph, + CUgraphExecUpdateResultInfo *resultInfo) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraphExec, CUgraph, CUgraphExecUpdateResultInfo *); + static auto func_ptr = LoadSymbol("cuGraphExecUpdate_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hGraph, resultInfo); +} + +CUresult CUDAAPI cuGraphKernelNodeCopyAttributes(CUgraphNode dst, + CUgraphNode src) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUgraphNode); + static auto func_ptr = LoadSymbol("cuGraphKernelNodeCopyAttributes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, src); +} + +CUresult CUDAAPI +cuGraphKernelNodeGetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, + CUkernelNodeAttrValue *value_out) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUkernelNodeAttrID, + CUkernelNodeAttrValue *); + static auto func_ptr = LoadSymbol("cuGraphKernelNodeGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, attr, value_out); +} + +CUresult CUDAAPI +cuGraphKernelNodeSetAttribute(CUgraphNode hNode, CUkernelNodeAttrID attr, + const CUkernelNodeAttrValue *value) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphNode, CUkernelNodeAttrID, + const CUkernelNodeAttrValue *); + static auto func_ptr = LoadSymbol("cuGraphKernelNodeSetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, attr, value); +} + +CUresult CUDAAPI cuGraphDebugDotPrint(CUgraph hGraph, const char *path, + unsigned int flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraph, const char *, unsigned int); + static auto func_ptr = LoadSymbol("cuGraphDebugDotPrint"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraph, path, flags); +} + +CUresult CUDAAPI cuUserObjectCreate(CUuserObject *object_out, void *ptr, + CUhostFn destroy, + unsigned int initialRefcount, + unsigned int flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUuserObject *, void *, CUhostFn, + unsigned int, unsigned int); + static auto func_ptr = LoadSymbol("cuUserObjectCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(object_out, ptr, destroy, initialRefcount, flags); +} + +CUresult CUDAAPI cuUserObjectRetain(CUuserObject object, unsigned int count) { + using FuncPtr = CUresult(CUDAAPI *)(CUuserObject, unsigned int); + static auto func_ptr = LoadSymbol("cuUserObjectRetain"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(object, count); +} + +CUresult CUDAAPI cuUserObjectRelease(CUuserObject object, unsigned int count) { + using FuncPtr = CUresult(CUDAAPI *)(CUuserObject, unsigned int); + static auto func_ptr = LoadSymbol("cuUserObjectRelease"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(object, count); +} + +CUresult CUDAAPI cuGraphRetainUserObject(CUgraph graph, CUuserObject object, + unsigned int count, + unsigned int flags) { + using FuncPtr = + CUresult(CUDAAPI *)(CUgraph, CUuserObject, unsigned int, unsigned int); + static auto func_ptr = LoadSymbol("cuGraphRetainUserObject"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(graph, object, count, flags); +} + +CUresult CUDAAPI cuGraphReleaseUserObject(CUgraph graph, CUuserObject object, + unsigned int count) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraph, CUuserObject, unsigned int); + static auto func_ptr = LoadSymbol("cuGraphReleaseUserObject"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(graph, object, count); +} + +CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessor( + int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize) { + using FuncPtr = CUresult(CUDAAPI *)(int *, CUfunction, int, size_t); + static auto func_ptr = + LoadSymbol("cuOccupancyMaxActiveBlocksPerMultiprocessor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); +} + +CUresult CUDAAPI cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags( + int *numBlocks, CUfunction func, int blockSize, size_t dynamicSMemSize, + unsigned int flags) { + using FuncPtr = + CUresult(CUDAAPI *)(int *, CUfunction, int, size_t, unsigned int); + static auto func_ptr = LoadSymbol( + "cuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); +} + +CUresult CUDAAPI cuOccupancyMaxPotentialBlockSize( + int *minGridSize, int *blockSize, CUfunction func, + CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, + int blockSizeLimit) { + using FuncPtr = CUresult(CUDAAPI *)(int *, int *, CUfunction, + CUoccupancyB2DSize, size_t, int); + static auto func_ptr = + LoadSymbol("cuOccupancyMaxPotentialBlockSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, + dynamicSMemSize, blockSizeLimit); +} + +CUresult CUDAAPI cuOccupancyMaxPotentialBlockSizeWithFlags( + int *minGridSize, int *blockSize, CUfunction func, + CUoccupancyB2DSize blockSizeToDynamicSMemSize, size_t dynamicSMemSize, + int blockSizeLimit, unsigned int flags) { + using FuncPtr = CUresult(CUDAAPI *)( + int *, int *, CUfunction, CUoccupancyB2DSize, size_t, int, unsigned int); + static auto func_ptr = + LoadSymbol("cuOccupancyMaxPotentialBlockSizeWithFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(minGridSize, blockSize, func, blockSizeToDynamicSMemSize, + dynamicSMemSize, blockSizeLimit, flags); +} + +CUresult CUDAAPI cuOccupancyAvailableDynamicSMemPerBlock( + size_t *dynamicSmemSize, CUfunction func, int numBlocks, int blockSize) { + using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUfunction, int, int); + static auto func_ptr = + LoadSymbol("cuOccupancyAvailableDynamicSMemPerBlock"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dynamicSmemSize, func, numBlocks, blockSize); +} + +CUresult CUDAAPI cuOccupancyMaxPotentialClusterSize( + int *clusterSize, CUfunction func, const CUlaunchConfig *config) { + using FuncPtr = + CUresult(CUDAAPI *)(int *, CUfunction, const CUlaunchConfig *); + static auto func_ptr = + LoadSymbol("cuOccupancyMaxPotentialClusterSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(clusterSize, func, config); +} + +CUresult CUDAAPI cuOccupancyMaxActiveClusters(int *numClusters, CUfunction func, + const CUlaunchConfig *config) { + using FuncPtr = + CUresult(CUDAAPI *)(int *, CUfunction, const CUlaunchConfig *); + static auto func_ptr = LoadSymbol("cuOccupancyMaxActiveClusters"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(numClusters, func, config); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetArray(CUtexref hTexRef, + CUarray hArray, + unsigned int Flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUarray, unsigned int); + static auto func_ptr = LoadSymbol("cuTexRefSetArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hTexRef, hArray, Flags); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetMipmappedArray( + CUtexref hTexRef, CUmipmappedArray hMipmappedArray, unsigned int Flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUmipmappedArray, unsigned int); + static auto func_ptr = LoadSymbol("cuTexRefSetMipmappedArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hTexRef, hMipmappedArray, Flags); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetAddress(size_t *ByteOffset, + CUtexref hTexRef, + CUdeviceptr dptr, + size_t bytes) { + using FuncPtr = CUresult(CUDAAPI *)(size_t *, CUtexref, CUdeviceptr, size_t); + static auto func_ptr = LoadSymbol("cuTexRefSetAddress_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ByteOffset, hTexRef, dptr, bytes); +} + +__CUDA_DEPRECATED CUresult CUDAAPI +cuTexRefSetAddress2D(CUtexref hTexRef, const CUDA_ARRAY_DESCRIPTOR *desc, + CUdeviceptr dptr, size_t Pitch) { + using FuncPtr = CUresult(CUDAAPI *)(CUtexref, const CUDA_ARRAY_DESCRIPTOR *, + CUdeviceptr, size_t); + static auto func_ptr = LoadSymbol("cuTexRefSetAddress2D_v3"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hTexRef, desc, dptr, Pitch); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetFormat(CUtexref hTexRef, + CUarray_format fmt, + int NumPackedComponents) { + using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUarray_format, int); + static auto func_ptr = LoadSymbol("cuTexRefSetFormat"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hTexRef, fmt, NumPackedComponents); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetAddressMode(CUtexref hTexRef, + int dim, + CUaddress_mode am) { + using FuncPtr = CUresult(CUDAAPI *)(CUtexref, int, CUaddress_mode); + static auto func_ptr = LoadSymbol("cuTexRefSetAddressMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hTexRef, dim, am); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetFilterMode(CUtexref hTexRef, + CUfilter_mode fm) { + using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUfilter_mode); + static auto func_ptr = LoadSymbol("cuTexRefSetFilterMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hTexRef, fm); +} + +__CUDA_DEPRECATED CUresult CUDAAPI +cuTexRefSetMipmapFilterMode(CUtexref hTexRef, CUfilter_mode fm) { + using FuncPtr = CUresult(CUDAAPI *)(CUtexref, CUfilter_mode); + static auto func_ptr = LoadSymbol("cuTexRefSetMipmapFilterMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hTexRef, fm); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetMipmapLevelBias(CUtexref hTexRef, + float bias) { + using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float); + static auto func_ptr = LoadSymbol("cuTexRefSetMipmapLevelBias"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hTexRef, bias); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetMipmapLevelClamp( + CUtexref hTexRef, float minMipmapLevelClamp, float maxMipmapLevelClamp) { + using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float, float); + static auto func_ptr = LoadSymbol("cuTexRefSetMipmapLevelClamp"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hTexRef, minMipmapLevelClamp, maxMipmapLevelClamp); +} + +__CUDA_DEPRECATED CUresult CUDAAPI +cuTexRefSetMaxAnisotropy(CUtexref hTexRef, unsigned int maxAniso) { + using FuncPtr = CUresult(CUDAAPI *)(CUtexref, unsigned int); + static auto func_ptr = LoadSymbol("cuTexRefSetMaxAnisotropy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hTexRef, maxAniso); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetBorderColor(CUtexref hTexRef, + float *pBorderColor) { + using FuncPtr = CUresult(CUDAAPI *)(CUtexref, float *); + static auto func_ptr = LoadSymbol("cuTexRefSetBorderColor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hTexRef, pBorderColor); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefSetFlags(CUtexref hTexRef, + unsigned int Flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUtexref, unsigned int); + static auto func_ptr = LoadSymbol("cuTexRefSetFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hTexRef, Flags); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetAddress(CUdeviceptr *pdptr, + CUtexref hTexRef) { + using FuncPtr = CUresult(CUDAAPI *)(CUdeviceptr *, CUtexref); + static auto func_ptr = LoadSymbol("cuTexRefGetAddress_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pdptr, hTexRef); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetArray(CUarray *phArray, + CUtexref hTexRef) { + using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUtexref); + static auto func_ptr = LoadSymbol("cuTexRefGetArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phArray, hTexRef); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetMipmappedArray( + CUmipmappedArray *phMipmappedArray, CUtexref hTexRef) { + using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray *, CUtexref); + static auto func_ptr = LoadSymbol("cuTexRefGetMipmappedArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phMipmappedArray, hTexRef); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetAddressMode(CUaddress_mode *pam, + CUtexref hTexRef, + int dim) { + using FuncPtr = CUresult(CUDAAPI *)(CUaddress_mode *, CUtexref, int); + static auto func_ptr = LoadSymbol("cuTexRefGetAddressMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pam, hTexRef, dim); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetFilterMode(CUfilter_mode *pfm, + CUtexref hTexRef) { + using FuncPtr = CUresult(CUDAAPI *)(CUfilter_mode *, CUtexref); + static auto func_ptr = LoadSymbol("cuTexRefGetFilterMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pfm, hTexRef); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetFormat(CUarray_format *pFormat, + int *pNumChannels, + CUtexref hTexRef) { + using FuncPtr = CUresult(CUDAAPI *)(CUarray_format *, int *, CUtexref); + static auto func_ptr = LoadSymbol("cuTexRefGetFormat"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pFormat, pNumChannels, hTexRef); +} + +__CUDA_DEPRECATED CUresult CUDAAPI +cuTexRefGetMipmapFilterMode(CUfilter_mode *pfm, CUtexref hTexRef) { + using FuncPtr = CUresult(CUDAAPI *)(CUfilter_mode *, CUtexref); + static auto func_ptr = LoadSymbol("cuTexRefGetMipmapFilterMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pfm, hTexRef); +} + +__CUDA_DEPRECATED CUresult CUDAAPI +cuTexRefGetMipmapLevelBias(float *pbias, CUtexref hTexRef) { + using FuncPtr = CUresult(CUDAAPI *)(float *, CUtexref); + static auto func_ptr = LoadSymbol("cuTexRefGetMipmapLevelBias"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pbias, hTexRef); +} + +__CUDA_DEPRECATED CUresult CUDAAPI +cuTexRefGetMipmapLevelClamp(float *pminMipmapLevelClamp, + float *pmaxMipmapLevelClamp, CUtexref hTexRef) { + using FuncPtr = CUresult(CUDAAPI *)(float *, float *, CUtexref); + static auto func_ptr = LoadSymbol("cuTexRefGetMipmapLevelClamp"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pminMipmapLevelClamp, pmaxMipmapLevelClamp, hTexRef); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetMaxAnisotropy(int *pmaxAniso, + CUtexref hTexRef) { + using FuncPtr = CUresult(CUDAAPI *)(int *, CUtexref); + static auto func_ptr = LoadSymbol("cuTexRefGetMaxAnisotropy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pmaxAniso, hTexRef); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetBorderColor(float *pBorderColor, + CUtexref hTexRef) { + using FuncPtr = CUresult(CUDAAPI *)(float *, CUtexref); + static auto func_ptr = LoadSymbol("cuTexRefGetBorderColor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pBorderColor, hTexRef); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefGetFlags(unsigned int *pFlags, + CUtexref hTexRef) { + using FuncPtr = CUresult(CUDAAPI *)(unsigned int *, CUtexref); + static auto func_ptr = LoadSymbol("cuTexRefGetFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pFlags, hTexRef); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefCreate(CUtexref *pTexRef) { + using FuncPtr = CUresult(CUDAAPI *)(CUtexref *); + static auto func_ptr = LoadSymbol("cuTexRefCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pTexRef); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuTexRefDestroy(CUtexref hTexRef) { + using FuncPtr = CUresult(CUDAAPI *)(CUtexref); + static auto func_ptr = LoadSymbol("cuTexRefDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hTexRef); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuSurfRefSetArray(CUsurfref hSurfRef, + CUarray hArray, + unsigned int Flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUsurfref, CUarray, unsigned int); + static auto func_ptr = LoadSymbol("cuSurfRefSetArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hSurfRef, hArray, Flags); +} + +__CUDA_DEPRECATED CUresult CUDAAPI cuSurfRefGetArray(CUarray *phArray, + CUsurfref hSurfRef) { + using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUsurfref); + static auto func_ptr = LoadSymbol("cuSurfRefGetArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(phArray, hSurfRef); +} + +CUresult CUDAAPI +cuTexObjectCreate(CUtexObject *pTexObject, const CUDA_RESOURCE_DESC *pResDesc, + const CUDA_TEXTURE_DESC *pTexDesc, + const CUDA_RESOURCE_VIEW_DESC *pResViewDesc) { + using FuncPtr = CUresult(CUDAAPI *)(CUtexObject *, const CUDA_RESOURCE_DESC *, + const CUDA_TEXTURE_DESC *, + const CUDA_RESOURCE_VIEW_DESC *); + static auto func_ptr = LoadSymbol("cuTexObjectCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); +} + +CUresult CUDAAPI cuTexObjectDestroy(CUtexObject texObject) { + using FuncPtr = CUresult(CUDAAPI *)(CUtexObject); + static auto func_ptr = LoadSymbol("cuTexObjectDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(texObject); +} + +CUresult CUDAAPI cuTexObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, + CUtexObject texObject) { + using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_DESC *, CUtexObject); + static auto func_ptr = LoadSymbol("cuTexObjectGetResourceDesc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pResDesc, texObject); +} + +CUresult CUDAAPI cuTexObjectGetTextureDesc(CUDA_TEXTURE_DESC *pTexDesc, + CUtexObject texObject) { + using FuncPtr = CUresult(CUDAAPI *)(CUDA_TEXTURE_DESC *, CUtexObject); + static auto func_ptr = LoadSymbol("cuTexObjectGetTextureDesc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pTexDesc, texObject); +} + +CUresult CUDAAPI cuTexObjectGetResourceViewDesc( + CUDA_RESOURCE_VIEW_DESC *pResViewDesc, CUtexObject texObject) { + using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_VIEW_DESC *, CUtexObject); + static auto func_ptr = LoadSymbol("cuTexObjectGetResourceViewDesc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pResViewDesc, texObject); +} + +CUresult CUDAAPI cuSurfObjectCreate(CUsurfObject *pSurfObject, + const CUDA_RESOURCE_DESC *pResDesc) { + using FuncPtr = + CUresult(CUDAAPI *)(CUsurfObject *, const CUDA_RESOURCE_DESC *); + static auto func_ptr = LoadSymbol("cuSurfObjectCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pSurfObject, pResDesc); +} + +CUresult CUDAAPI cuSurfObjectDestroy(CUsurfObject surfObject) { + using FuncPtr = CUresult(CUDAAPI *)(CUsurfObject); + static auto func_ptr = LoadSymbol("cuSurfObjectDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(surfObject); +} + +CUresult CUDAAPI cuSurfObjectGetResourceDesc(CUDA_RESOURCE_DESC *pResDesc, + CUsurfObject surfObject) { + using FuncPtr = CUresult(CUDAAPI *)(CUDA_RESOURCE_DESC *, CUsurfObject); + static auto func_ptr = LoadSymbol("cuSurfObjectGetResourceDesc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pResDesc, surfObject); +} + +CUresult CUDAAPI cuTensorMapEncodeTiled( + CUtensorMap *tensorMap, CUtensorMapDataType tensorDataType, + cuuint32_t tensorRank, void *globalAddress, const cuuint64_t *globalDim, + const cuuint64_t *globalStrides, const cuuint32_t *boxDim, + const cuuint32_t *elementStrides, CUtensorMapInterleave interleave, + CUtensorMapSwizzle swizzle, CUtensorMapL2promotion l2Promotion, + CUtensorMapFloatOOBfill oobFill) { + using FuncPtr = CUresult(CUDAAPI *)( + CUtensorMap *, CUtensorMapDataType, cuuint32_t, void *, + const cuuint64_t *, const cuuint64_t *, const cuuint32_t *, + const cuuint32_t *, CUtensorMapInterleave, CUtensorMapSwizzle, + CUtensorMapL2promotion, CUtensorMapFloatOOBfill); + static auto func_ptr = LoadSymbol("cuTensorMapEncodeTiled"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorMap, tensorDataType, tensorRank, globalAddress, + globalDim, globalStrides, boxDim, elementStrides, interleave, + swizzle, l2Promotion, oobFill); +} + +CUresult CUDAAPI cuTensorMapEncodeIm2col( + CUtensorMap *tensorMap, CUtensorMapDataType tensorDataType, + cuuint32_t tensorRank, void *globalAddress, const cuuint64_t *globalDim, + const cuuint64_t *globalStrides, const int *pixelBoxLowerCorner, + const int *pixelBoxUpperCorner, cuuint32_t channelsPerPixel, + cuuint32_t pixelsPerColumn, const cuuint32_t *elementStrides, + CUtensorMapInterleave interleave, CUtensorMapSwizzle swizzle, + CUtensorMapL2promotion l2Promotion, CUtensorMapFloatOOBfill oobFill) { + using FuncPtr = CUresult(CUDAAPI *)( + CUtensorMap *, CUtensorMapDataType, cuuint32_t, void *, + const cuuint64_t *, const cuuint64_t *, const int *, const int *, + cuuint32_t, cuuint32_t, const cuuint32_t *, CUtensorMapInterleave, + CUtensorMapSwizzle, CUtensorMapL2promotion, CUtensorMapFloatOOBfill); + static auto func_ptr = LoadSymbol("cuTensorMapEncodeIm2col"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorMap, tensorDataType, tensorRank, globalAddress, + globalDim, globalStrides, pixelBoxLowerCorner, + pixelBoxUpperCorner, channelsPerPixel, pixelsPerColumn, + elementStrides, interleave, swizzle, l2Promotion, oobFill); +} + +CUresult CUDAAPI cuTensorMapReplaceAddress(CUtensorMap *tensorMap, + void *globalAddress) { + using FuncPtr = CUresult(CUDAAPI *)(CUtensorMap *, void *); + static auto func_ptr = LoadSymbol("cuTensorMapReplaceAddress"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorMap, globalAddress); +} + +CUresult CUDAAPI cuDeviceCanAccessPeer(int *canAccessPeer, CUdevice dev, + CUdevice peerDev) { + using FuncPtr = CUresult(CUDAAPI *)(int *, CUdevice, CUdevice); + static auto func_ptr = LoadSymbol("cuDeviceCanAccessPeer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(canAccessPeer, dev, peerDev); +} + +CUresult CUDAAPI cuCtxEnablePeerAccess(CUcontext peerContext, + unsigned int Flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUcontext, unsigned int); + static auto func_ptr = LoadSymbol("cuCtxEnablePeerAccess"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(peerContext, Flags); +} + +CUresult CUDAAPI cuCtxDisablePeerAccess(CUcontext peerContext) { + using FuncPtr = CUresult(CUDAAPI *)(CUcontext); + static auto func_ptr = LoadSymbol("cuCtxDisablePeerAccess"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(peerContext); +} + +CUresult CUDAAPI cuDeviceGetP2PAttribute(int *value, + CUdevice_P2PAttribute attrib, + CUdevice srcDevice, + CUdevice dstDevice) { + using FuncPtr = + CUresult(CUDAAPI *)(int *, CUdevice_P2PAttribute, CUdevice, CUdevice); + static auto func_ptr = LoadSymbol("cuDeviceGetP2PAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(value, attrib, srcDevice, dstDevice); +} + +CUresult CUDAAPI cuGraphicsUnregisterResource(CUgraphicsResource resource) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphicsResource); + static auto func_ptr = LoadSymbol("cuGraphicsUnregisterResource"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(resource); +} + +CUresult CUDAAPI cuGraphicsSubResourceGetMappedArray( + CUarray *pArray, CUgraphicsResource resource, unsigned int arrayIndex, + unsigned int mipLevel) { + using FuncPtr = CUresult(CUDAAPI *)(CUarray *, CUgraphicsResource, + unsigned int, unsigned int); + static auto func_ptr = + LoadSymbol("cuGraphicsSubResourceGetMappedArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pArray, resource, arrayIndex, mipLevel); +} + +CUresult CUDAAPI cuGraphicsResourceGetMappedMipmappedArray( + CUmipmappedArray *pMipmappedArray, CUgraphicsResource resource) { + using FuncPtr = CUresult(CUDAAPI *)(CUmipmappedArray *, CUgraphicsResource); + static auto func_ptr = + LoadSymbol("cuGraphicsResourceGetMappedMipmappedArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pMipmappedArray, resource); +} + +CUresult CUDAAPI cuGraphicsResourceGetMappedPointer( + CUdeviceptr *pDevPtr, size_t *pSize, CUgraphicsResource resource) { + using FuncPtr = + CUresult(CUDAAPI *)(CUdeviceptr *, size_t *, CUgraphicsResource); + static auto func_ptr = + LoadSymbol("cuGraphicsResourceGetMappedPointer_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pDevPtr, pSize, resource); +} + +CUresult CUDAAPI cuGraphicsResourceSetMapFlags(CUgraphicsResource resource, + unsigned int flags) { + using FuncPtr = CUresult(CUDAAPI *)(CUgraphicsResource, unsigned int); + static auto func_ptr = + LoadSymbol("cuGraphicsResourceSetMapFlags_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(resource, flags); +} + +CUresult CUDAAPI cuGraphicsMapResources(unsigned int count, + CUgraphicsResource *resources, + CUstream hStream) { + using FuncPtr = + CUresult(CUDAAPI *)(unsigned int, CUgraphicsResource *, CUstream); + static auto func_ptr = LoadSymbol("cuGraphicsMapResources"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(count, resources, hStream); +} + +CUresult CUDAAPI cuGraphicsUnmapResources(unsigned int count, + CUgraphicsResource *resources, + CUstream hStream) { + using FuncPtr = + CUresult(CUDAAPI *)(unsigned int, CUgraphicsResource *, CUstream); + static auto func_ptr = LoadSymbol("cuGraphicsUnmapResources"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(count, resources, hStream); +} + +CUresult CUDAAPI cuGetProcAddress( + const char *symbol, void **pfn, int cudaVersion, cuuint64_t flags, + CUdriverProcAddressQueryResult *symbolStatus) { + using FuncPtr = CUresult(CUDAAPI *)(const char *, void **, int, cuuint64_t, + CUdriverProcAddressQueryResult *); + static auto func_ptr = LoadSymbol("cuGetProcAddress_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(symbol, pfn, cudaVersion, flags, symbolStatus); +} + +CUresult CUDAAPI cuGetExportTable(const void **ppExportTable, + const CUuuid *pExportTableId) { + using FuncPtr = CUresult(CUDAAPI *)(const void **, const CUuuid *); + static auto func_ptr = LoadSymbol("cuGetExportTable"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ppExportTable, pExportTableId); +} + +} diff --git a/tensorflow/stream_executor/cuda/cuda_dnn.cc b/tensorflow/stream_executor/cuda/cuda_dnn.cc index 076831671ea..760aa4c71f4 100755 --- a/tensorflow/stream_executor/cuda/cuda_dnn.cc +++ b/tensorflow/stream_executor/cuda/cuda_dnn.cc @@ -181,7 +181,7 @@ class CudnnAccess { // therefore a bad idea (performance wise) to call any cuDNN APIs that // enqueue work in the stream. CudnnHandle GetHandle(GpuExecutor* executor, Stream* stream) { - auto lock = absl::make_unique(&mutex_); + auto lock = std::make_unique(&mutex_); mutex_.AssertHeld(); gpu::ScopedActivateExecutorContext context(executor); CUstream cu_stream = stream ? AsGpuStreamValue(stream) : cudaStreamLegacy; @@ -1208,7 +1208,7 @@ port::Status CheckAndFetchProjectionWeights( cudnnRNNMode_t mode; cudnnRNNAlgo_t algo; cudnnDataType_t data_type; - RETURN_IF_CUDNN_ERROR(cudnnGetRNNDescriptor( + RETURN_IF_CUDNN_ERROR(cudnnGetRNNDescriptor_v6( /*handle=*/cudnn.handle(), /*rnnDesc=*/rnn_desc, /*hiddenSize=*/&hidden_size_v, /*numLayers=*/&num_layers_v, @@ -2314,7 +2314,7 @@ namespace { // TODO(csigg): Merge a lot of duplicate code below for forward, backward data, // and backward filter. - +/* port::StatusOr GetCudnnConvolutionForwardAlgo( const CudnnHandle& cudnn, const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter, const CudnnConvolutionDescriptor& conv, @@ -2367,7 +2367,7 @@ GetCudnnConvolutionBackwardFilterAlgo(const CudnnHandle& cudnn, filter.handle(), preference, memory_limit_bytes, &algo_to_use)); return algo_to_use; } - +*/ port::StatusOr> AllocateCudnnConvolutionForwardWorkspace( Stream* stream, const CudnnHandle& cudnn, const CudnnTensorDescriptor& input_nd, const CudnnFilterDescriptor& filter, @@ -2515,6 +2515,7 @@ port::StatusOr GetCudnnConvolutionForwardAlgorithm( if (!algo_desc.has_value()) { // Pick fastest algorithm within memory limit according to cuDNN's // heuristics. + /* bool specify_workspace_limit = scratch_allocator != nullptr; auto memory_limit_bytes = specify_workspace_limit @@ -2524,12 +2525,13 @@ port::StatusOr GetCudnnConvolutionForwardAlgorithm( GetCudnnConvolutionForwardAlgo( cudnn, input_nd, filter, conv, output_nd, specify_workspace_limit, memory_limit_bytes)); + */ + cudnnConvolutionFwdAlgo_t algo = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM; int cc_major, cc_minor; std::tie(cc_major, cc_minor) = GetCcMajorMinor(stream); algo_desc = dnn::AlgorithmDesc( algo, /*use_tensor_ops=*/TensorOpMathAvailable(cc_major)); } - const auto scratch_or = AllocateCudnnConvolutionForwardWorkspace( stream, cudnn, input_nd, filter, conv, output_nd, *algo_desc, scratch_allocator); @@ -2568,6 +2570,7 @@ port::StatusOr GetCudnnConvolutionBackwardDataAlgorithm( if (!algo_desc.has_value()) { // Pick fastest algorithm within memory limit according to cuDNN's // heuristics. + /* bool specify_workspace_limit = scratch_allocator != nullptr; auto memory_limit_bytes = specify_workspace_limit @@ -2577,12 +2580,13 @@ port::StatusOr GetCudnnConvolutionBackwardDataAlgorithm( GetCudnnConvolutionBackwardDataAlgo( cudnn, input_nd, filter, conv, output_nd, specify_workspace_limit, memory_limit_bytes)); + */ + cudnnConvolutionBwdDataAlgo_t algo = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0; int cc_major, cc_minor; std::tie(cc_major, cc_minor) = GetCcMajorMinor(stream); algo_desc = dnn::AlgorithmDesc( algo, /*use_tensor_ops=*/TensorOpMathAvailable(cc_major)); } - const auto scratch_or = AllocateCudnnConvolutionBackwardDataWorkspace( stream, cudnn, input_nd, filter, conv, output_nd, *algo_desc, scratch_allocator); @@ -2620,6 +2624,7 @@ port::StatusOr GetCudnnConvolutionBackwardFilterAlgorithm( if (!algo_desc.has_value()) { // Pick fastest algorithm within memory limit according to cuDNN's // heuristics. + /* bool specify_workspace_limit = scratch_allocator != nullptr; auto memory_limit_bytes = specify_workspace_limit @@ -2629,12 +2634,13 @@ port::StatusOr GetCudnnConvolutionBackwardFilterAlgorithm( GetCudnnConvolutionBackwardFilterAlgo( cudnn, input_nd, filter, conv, output_nd, specify_workspace_limit, memory_limit_bytes)); + */ + cudnnConvolutionBwdFilterAlgo_t algo = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0; int cc_major, cc_minor; std::tie(cc_major, cc_minor) = GetCcMajorMinor(stream); algo_desc = dnn::AlgorithmDesc( algo, /*use_tensor_ops=*/TensorOpMathAvailable(cc_major)); } - auto scratch_or = AllocateCudnnConvolutionBackwardFilterWorkspace( stream, cudnn, input_nd, filter, conv, output_nd, *algo_desc, scratch_allocator); diff --git a/tensorflow/stream_executor/cuda/cuda_driver.cc b/tensorflow/stream_executor/cuda/cuda_driver.cc index f7a69fc086a..92a4e911da0 100644 --- a/tensorflow/stream_executor/cuda/cuda_driver.cc +++ b/tensorflow/stream_executor/cuda/cuda_driver.cc @@ -178,7 +178,7 @@ void CheckPointerIsValid(const PtrT ptr, absl::string_view name) { // If we failed, reset cuda error status to avoid poisoning cuda streams. if (err != cudaSuccess) cudaGetLastError(); bool points_to_host_memory = (err == cudaErrorInvalidValue || - attributes.memoryType != cudaMemoryTypeDevice); + attributes.type != cudaMemoryTypeDevice); CHECK_EQ(is_host_ptr, points_to_host_memory) << absl::StreamFormat( "%s pointer is not actually on %s: %p", name, is_host_ptr ? "CPU" : "GPU", reinterpret_cast(ptr)); diff --git a/tensorflow/stream_executor/cuda/cuda_runtime_12_0.inc b/tensorflow/stream_executor/cuda/cuda_runtime_12_0.inc new file mode 100644 index 00000000000..343db231322 --- /dev/null +++ b/tensorflow/stream_executor/cuda/cuda_runtime_12_0.inc @@ -0,0 +1,2676 @@ +// Auto-generated, do not edit. + +extern "C" { +extern __host__ cudaError_t CUDARTAPI cudaDeviceReset(void) { + using FuncPtr = cudaError_t(CUDARTAPI *)(); + static auto func_ptr = LoadSymbol("cudaDeviceReset"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaDeviceSynchronize(void) { + using FuncPtr = cudaError_t(CUDARTAPI *)(); + static auto func_ptr = LoadSymbol("cudaDeviceSynchronize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +extern __host__ cudaError_t CUDARTAPI cudaDeviceSetLimit(enum cudaLimit limit, + size_t value) { + using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); + static auto func_ptr = LoadSymbol("cudaDeviceSetLimit"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(limit, value); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaDeviceGetLimit(size_t *pValue, enum cudaLimit limit) { + using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); + static auto func_ptr = LoadSymbol("cudaDeviceGetLimit"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pValue, limit); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaDeviceGetTexture1DLinearMaxWidth( + size_t *maxWidthInElements, const struct cudaChannelFormatDesc *fmtDesc, + int device) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + size_t *, const struct cudaChannelFormatDesc *, int); + static auto func_ptr = + LoadSymbol("cudaDeviceGetTexture1DLinearMaxWidth"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(maxWidthInElements, fmtDesc, device); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaDeviceGetCacheConfig(enum cudaFuncCache *pCacheConfig) { + using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); + static auto func_ptr = LoadSymbol("cudaDeviceGetCacheConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pCacheConfig); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaDeviceGetStreamPriorityRange(int *leastPriority, int *greatestPriority) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int *); + static auto func_ptr = + LoadSymbol("cudaDeviceGetStreamPriorityRange"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(leastPriority, greatestPriority); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDeviceSetCacheConfig(enum cudaFuncCache cacheConfig) { + using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); + static auto func_ptr = LoadSymbol("cudaDeviceSetCacheConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(cacheConfig); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaDeviceGetSharedMemConfig(enum cudaSharedMemConfig *pConfig) { + using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig *); + static auto func_ptr = LoadSymbol("cudaDeviceGetSharedMemConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pConfig); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDeviceSetSharedMemConfig(enum cudaSharedMemConfig config) { + using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaSharedMemConfig); + static auto func_ptr = LoadSymbol("cudaDeviceSetSharedMemConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(config); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDeviceGetByPCIBusId(int *device, const char *pciBusId) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const char *); + static auto func_ptr = LoadSymbol("cudaDeviceGetByPCIBusId"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, pciBusId); +} + +extern __host__ cudaError_t CUDARTAPI cudaDeviceGetPCIBusId(char *pciBusId, + int len, + int device) { + using FuncPtr = cudaError_t(CUDARTAPI *)(char *, int, int); + static auto func_ptr = LoadSymbol("cudaDeviceGetPCIBusId"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pciBusId, len, device); +} + +extern __host__ cudaError_t CUDARTAPI +cudaIpcGetEventHandle(cudaIpcEventHandle_t *handle, cudaEvent_t event) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcEventHandle_t *, cudaEvent_t); + static auto func_ptr = LoadSymbol("cudaIpcGetEventHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, event); +} + +extern __host__ cudaError_t CUDARTAPI +cudaIpcOpenEventHandle(cudaEvent_t *event, cudaIpcEventHandle_t handle) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, cudaIpcEventHandle_t); + static auto func_ptr = LoadSymbol("cudaIpcOpenEventHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(event, handle); +} + +extern __host__ cudaError_t CUDARTAPI +cudaIpcGetMemHandle(cudaIpcMemHandle_t *handle, void *devPtr) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaIpcMemHandle_t *, void *); + static auto func_ptr = LoadSymbol("cudaIpcGetMemHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, devPtr); +} + +extern __host__ cudaError_t CUDARTAPI cudaIpcOpenMemHandle( + void **devPtr, cudaIpcMemHandle_t handle, unsigned int flags) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(void **, cudaIpcMemHandle_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaIpcOpenMemHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, handle, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaIpcCloseMemHandle(void *devPtr) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *); + static auto func_ptr = LoadSymbol("cudaIpcCloseMemHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr); +} + +extern __host__ cudaError_t CUDARTAPI cudaDeviceFlushGPUDirectRDMAWrites( + enum cudaFlushGPUDirectRDMAWritesTarget target, + enum cudaFlushGPUDirectRDMAWritesScope scope) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(enum cudaFlushGPUDirectRDMAWritesTarget, + enum cudaFlushGPUDirectRDMAWritesScope); + static auto func_ptr = + LoadSymbol("cudaDeviceFlushGPUDirectRDMAWrites"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(target, scope); +} + +extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadExit(void) { + using FuncPtr = cudaError_t(CUDARTAPI *)(); + static auto func_ptr = LoadSymbol("cudaThreadExit"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI +cudaThreadSynchronize(void) { + using FuncPtr = cudaError_t(CUDARTAPI *)(); + static auto func_ptr = LoadSymbol("cudaThreadSynchronize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI +cudaThreadSetLimit(enum cudaLimit limit, size_t value) { + using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaLimit, size_t); + static auto func_ptr = LoadSymbol("cudaThreadSetLimit"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(limit, value); +} + +extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI +cudaThreadGetLimit(size_t *pValue, enum cudaLimit limit) { + using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, enum cudaLimit); + static auto func_ptr = LoadSymbol("cudaThreadGetLimit"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pValue, limit); +} + +extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI +cudaThreadGetCacheConfig(enum cudaFuncCache *pCacheConfig) { + using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache *); + static auto func_ptr = LoadSymbol("cudaThreadGetCacheConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pCacheConfig); +} + +extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI +cudaThreadSetCacheConfig(enum cudaFuncCache cacheConfig) { + using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaFuncCache); + static auto func_ptr = LoadSymbol("cudaThreadSetCacheConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(cacheConfig); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaGetLastError(void) { + using FuncPtr = cudaError_t(CUDARTAPI *)(); + static auto func_ptr = LoadSymbol("cudaGetLastError"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaPeekAtLastError(void) { + using FuncPtr = cudaError_t(CUDARTAPI *)(); + static auto func_ptr = LoadSymbol("cudaPeekAtLastError"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +extern __host__ __cudart_builtin__ const char *CUDARTAPI +cudaGetErrorName(cudaError_t error) { + using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); + static auto func_ptr = LoadSymbol("cudaGetErrorName"); + if (!func_ptr) return "cudaGetErrorName symbol not found."; + return func_ptr(error); +} + +extern __host__ __cudart_builtin__ const char *CUDARTAPI +cudaGetErrorString(cudaError_t error) { + using FuncPtr = const char *(CUDARTAPI *)(cudaError_t); + static auto func_ptr = LoadSymbol("cudaGetErrorString"); + if (!func_ptr) return "cudaGetErrorString symbol not found."; + return func_ptr(error); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaGetDeviceCount(int *count) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *); + static auto func_ptr = LoadSymbol("cudaGetDeviceCount"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(count); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaGetDeviceProperties(struct cudaDeviceProp *prop, int device) { + using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaDeviceProp *, int); + static auto func_ptr = LoadSymbol("cudaGetDeviceProperties_v2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(prop, device); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaDeviceGetAttribute(int *value, enum cudaDeviceAttr attr, int device) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceAttr, int); + static auto func_ptr = LoadSymbol("cudaDeviceGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(value, attr, device); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDeviceGetDefaultMemPool(cudaMemPool_t *memPool, int device) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); + static auto func_ptr = LoadSymbol("cudaDeviceGetDefaultMemPool"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(memPool, device); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDeviceSetMemPool(int device, cudaMemPool_t memPool) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int, cudaMemPool_t); + static auto func_ptr = LoadSymbol("cudaDeviceSetMemPool"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, memPool); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDeviceGetMemPool(cudaMemPool_t *memPool, int device) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, int); + static auto func_ptr = LoadSymbol("cudaDeviceGetMemPool"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(memPool, device); +} + +extern __host__ cudaError_t CUDARTAPI cudaDeviceGetNvSciSyncAttributes( + void *nvSciSyncAttrList, int device, int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, int); + static auto func_ptr = + LoadSymbol("cudaDeviceGetNvSciSyncAttributes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(nvSciSyncAttrList, device, flags); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaDeviceGetP2PAttribute(int *value, enum cudaDeviceP2PAttr attr, + int srcDevice, int dstDevice) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(int *, enum cudaDeviceP2PAttr, int, int); + static auto func_ptr = LoadSymbol("cudaDeviceGetP2PAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(value, attr, srcDevice, dstDevice); +} + +extern __host__ cudaError_t CUDARTAPI +cudaChooseDevice(int *device, const struct cudaDeviceProp *prop) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(int *, const struct cudaDeviceProp *); + static auto func_ptr = LoadSymbol("cudaChooseDevice"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, prop); +} + +extern __host__ cudaError_t CUDARTAPI cudaInitDevice(int device, + unsigned int deviceFlags, + unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int, unsigned int); + static auto func_ptr = LoadSymbol("cudaInitDevice"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, deviceFlags, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaSetDevice(int device) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int); + static auto func_ptr = LoadSymbol("cudaSetDevice"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaGetDevice(int *device) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *); + static auto func_ptr = LoadSymbol("cudaGetDevice"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device); +} + +extern __host__ cudaError_t CUDARTAPI cudaSetValidDevices(int *device_arr, + int len) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int); + static auto func_ptr = LoadSymbol("cudaSetValidDevices"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device_arr, len); +} + +extern __host__ cudaError_t CUDARTAPI cudaSetDeviceFlags(unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int); + static auto func_ptr = LoadSymbol("cudaSetDeviceFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetDeviceFlags(unsigned int *flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *); + static auto func_ptr = LoadSymbol("cudaGetDeviceFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaStreamCreate(cudaStream_t *pStream) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *); + static auto func_ptr = LoadSymbol("cudaStreamCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pStream); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaStreamCreateWithFlags(cudaStream_t *pStream, unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int); + static auto func_ptr = LoadSymbol("cudaStreamCreateWithFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pStream, flags); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaStreamCreateWithPriority(cudaStream_t *pStream, unsigned int flags, + int priority) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t *, unsigned int, int); + static auto func_ptr = LoadSymbol("cudaStreamCreateWithPriority"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pStream, flags, priority); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaStreamGetPriority(cudaStream_t hStream, int *priority) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, int *); + static auto func_ptr = LoadSymbol("cudaStreamGetPriority"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, priority); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaStreamGetFlags(cudaStream_t hStream, unsigned int *flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned int *); + static auto func_ptr = LoadSymbol("cudaStreamGetFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, flags); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaStreamGetId(cudaStream_t hStream, unsigned long long *streamId) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, unsigned long long *); + static auto func_ptr = LoadSymbol("cudaStreamGetId"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, streamId); +} + +extern __host__ cudaError_t CUDARTAPI cudaCtxResetPersistingL2Cache(void) { + using FuncPtr = cudaError_t(CUDARTAPI *)(); + static auto func_ptr = LoadSymbol("cudaCtxResetPersistingL2Cache"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaStreamCopyAttributes(cudaStream_t dst, cudaStream_t src) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaStreamCopyAttributes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, src); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaStreamGetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, + cudaStreamAttrValue *value_out) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamAttrID, + cudaStreamAttrValue *); + static auto func_ptr = LoadSymbol("cudaStreamGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, attr, value_out); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaStreamSetAttribute(cudaStream_t hStream, cudaStreamAttrID attr, + const cudaStreamAttrValue *value) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamAttrID, + const cudaStreamAttrValue *); + static auto func_ptr = LoadSymbol("cudaStreamSetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hStream, attr, value); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaStreamDestroy(cudaStream_t stream) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); + static auto func_ptr = LoadSymbol("cudaStreamDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaStreamWaitEvent( + cudaStream_t stream, cudaEvent_t event, unsigned int flags __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaStream_t, cudaEvent_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaStreamWaitEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream, event, flags); +} + +extern __host__ cudaError_t CUDARTAPI +cudaStreamAddCallback(cudaStream_t stream, cudaStreamCallback_t callback, + void *userData, unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaStreamCallback_t, + void *, unsigned int); + static auto func_ptr = LoadSymbol("cudaStreamAddCallback"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream, callback, userData, flags); +} + +extern __host__ cudaError_t CUDARTAPI +cudaStreamSynchronize(cudaStream_t stream) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); + static auto func_ptr = LoadSymbol("cudaStreamSynchronize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaStreamQuery(cudaStream_t stream) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t); + static auto func_ptr = LoadSymbol("cudaStreamQuery"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaStreamAttachMemAsync(cudaStream_t stream, void *devPtr, + size_t length __dv(0), unsigned int flags) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaStream_t, void *, size_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaStreamAttachMemAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream, devPtr, length, flags); +} + +extern __host__ cudaError_t CUDARTAPI +cudaStreamBeginCapture(cudaStream_t stream, enum cudaStreamCaptureMode mode) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureMode); + static auto func_ptr = LoadSymbol("cudaStreamBeginCapture"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream, mode); +} + +extern __host__ cudaError_t CUDARTAPI +cudaThreadExchangeStreamCaptureMode(enum cudaStreamCaptureMode *mode) { + using FuncPtr = cudaError_t(CUDARTAPI *)(enum cudaStreamCaptureMode *); + static auto func_ptr = + LoadSymbol("cudaThreadExchangeStreamCaptureMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(mode); +} + +extern __host__ cudaError_t CUDARTAPI +cudaStreamEndCapture(cudaStream_t stream, cudaGraph_t *pGraph) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraph_t *); + static auto func_ptr = LoadSymbol("cudaStreamEndCapture"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream, pGraph); +} + +extern __host__ cudaError_t CUDARTAPI cudaStreamIsCapturing( + cudaStream_t stream, enum cudaStreamCaptureStatus *pCaptureStatus) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaStream_t, enum cudaStreamCaptureStatus *); + static auto func_ptr = LoadSymbol("cudaStreamIsCapturing"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream, pCaptureStatus); +} + +extern __host__ cudaError_t CUDARTAPI cudaStreamGetCaptureInfo( + cudaStream_t stream, enum cudaStreamCaptureStatus *captureStatus_out, + unsigned long long *id_out __dv(0), cudaGraph_t *graph_out __dv(0), + const cudaGraphNode_t **dependencies_out __dv(0), + size_t *numDependencies_out __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaStream_t, enum cudaStreamCaptureStatus *, unsigned long long *, + cudaGraph_t *, const cudaGraphNode_t **, size_t *); + static auto func_ptr = + LoadSymbol("__CUDART_API_PTSZ(cudaStreamGetCaptureInfo_v2)"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream, captureStatus_out, id_out, graph_out, + dependencies_out, numDependencies_out); +} + +extern __host__ cudaError_t CUDARTAPI cudaStreamUpdateCaptureDependencies( + cudaStream_t stream, cudaGraphNode_t *dependencies, size_t numDependencies, + unsigned int flags __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaGraphNode_t *, + size_t, unsigned int); + static auto func_ptr = + LoadSymbol("cudaStreamUpdateCaptureDependencies"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream, dependencies, numDependencies, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaEventCreate(cudaEvent_t *event) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *); + static auto func_ptr = LoadSymbol("cudaEventCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(event); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaEventCreateWithFlags(cudaEvent_t *event, unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t *, unsigned int); + static auto func_ptr = LoadSymbol("cudaEventCreateWithFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(event, flags); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaEventRecord(cudaEvent_t event, cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaEventRecord"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(event, stream); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaEventRecordWithFlags(cudaEvent_t event, cudaStream_t stream __dv(0), + unsigned int flags __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaEvent_t, cudaStream_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaEventRecordWithFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(event, stream, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaEventQuery(cudaEvent_t event) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); + static auto func_ptr = LoadSymbol("cudaEventQuery"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(event); +} + +extern __host__ cudaError_t CUDARTAPI cudaEventSynchronize(cudaEvent_t event) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); + static auto func_ptr = LoadSymbol("cudaEventSynchronize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(event); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaEventDestroy(cudaEvent_t event) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaEvent_t); + static auto func_ptr = LoadSymbol("cudaEventDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(event); +} + +extern __host__ cudaError_t CUDARTAPI cudaEventElapsedTime(float *ms, + cudaEvent_t start, + cudaEvent_t end) { + using FuncPtr = cudaError_t(CUDARTAPI *)(float *, cudaEvent_t, cudaEvent_t); + static auto func_ptr = LoadSymbol("cudaEventElapsedTime"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ms, start, end); +} + +extern __host__ cudaError_t CUDARTAPI cudaImportExternalMemory( + cudaExternalMemory_t *extMem_out, + const struct cudaExternalMemoryHandleDesc *memHandleDesc) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaExternalMemory_t *, const struct cudaExternalMemoryHandleDesc *); + static auto func_ptr = LoadSymbol("cudaImportExternalMemory"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(extMem_out, memHandleDesc); +} + +extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedBuffer( + void **devPtr, cudaExternalMemory_t extMem, + const struct cudaExternalMemoryBufferDesc *bufferDesc) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(void **, cudaExternalMemory_t, + const struct cudaExternalMemoryBufferDesc *); + static auto func_ptr = + LoadSymbol("cudaExternalMemoryGetMappedBuffer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, extMem, bufferDesc); +} + +extern __host__ cudaError_t CUDARTAPI cudaExternalMemoryGetMappedMipmappedArray( + cudaMipmappedArray_t *mipmap, cudaExternalMemory_t extMem, + const struct cudaExternalMemoryMipmappedArrayDesc *mipmapDesc) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaMipmappedArray_t *, cudaExternalMemory_t, + const struct cudaExternalMemoryMipmappedArrayDesc *); + static auto func_ptr = + LoadSymbol("cudaExternalMemoryGetMappedMipmappedArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(mipmap, extMem, mipmapDesc); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDestroyExternalMemory(cudaExternalMemory_t extMem) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalMemory_t); + static auto func_ptr = LoadSymbol("cudaDestroyExternalMemory"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(extMem); +} + +extern __host__ cudaError_t CUDARTAPI cudaImportExternalSemaphore( + cudaExternalSemaphore_t *extSem_out, + const struct cudaExternalSemaphoreHandleDesc *semHandleDesc) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t *, + const struct cudaExternalSemaphoreHandleDesc *); + static auto func_ptr = LoadSymbol("cudaImportExternalSemaphore"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(extSem_out, semHandleDesc); +} + +extern __host__ cudaError_t CUDARTAPI cudaSignalExternalSemaphoresAsync( + const cudaExternalSemaphore_t *extSemArray, + const struct cudaExternalSemaphoreSignalParams *paramsArray, + unsigned int numExtSems, cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, + const struct cudaExternalSemaphoreSignalParams *, + unsigned int, cudaStream_t); + static auto func_ptr = LoadSymbol( + "__CUDART_API_PTSZ(cudaSignalExternalSemaphoresAsync_v2)"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(extSemArray, paramsArray, numExtSems, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaWaitExternalSemaphoresAsync( + const cudaExternalSemaphore_t *extSemArray, + const struct cudaExternalSemaphoreWaitParams *paramsArray, + unsigned int numExtSems, cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(const cudaExternalSemaphore_t *, + const struct cudaExternalSemaphoreWaitParams *, + unsigned int, cudaStream_t); + static auto func_ptr = LoadSymbol( + "__CUDART_API_PTSZ(cudaWaitExternalSemaphoresAsync_v2)"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(extSemArray, paramsArray, numExtSems, stream); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDestroyExternalSemaphore(cudaExternalSemaphore_t extSem) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaExternalSemaphore_t); + static auto func_ptr = LoadSymbol("cudaDestroyExternalSemaphore"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(extSem); +} + +extern __host__ cudaError_t CUDARTAPI +cudaLaunchKernel(const void *func, dim3 gridDim, dim3 blockDim, void **args, + size_t sharedMem, cudaStream_t stream) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, + size_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaLaunchKernel"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaLaunchKernelExC( + const cudaLaunchConfig_t *config, const void *func, void **args) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const cudaLaunchConfig_t *, + const void *, void **); + static auto func_ptr = LoadSymbol("cudaLaunchKernelExC"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(config, func, args); +} + +extern __host__ cudaError_t CUDARTAPI cudaLaunchCooperativeKernel( + const void *func, dim3 gridDim, dim3 blockDim, void **args, + size_t sharedMem, cudaStream_t stream) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, dim3, dim3, void **, + size_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaLaunchCooperativeKernel"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(func, gridDim, blockDim, args, sharedMem, stream); +} + +extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI +cudaLaunchCooperativeKernelMultiDevice( + struct cudaLaunchParams *launchParamsList, unsigned int numDevices, + unsigned int flags __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaLaunchParams *, + unsigned int, unsigned int); + static auto func_ptr = + LoadSymbol("cudaLaunchCooperativeKernelMultiDevice"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(launchParamsList, numDevices, flags); +} + +extern __host__ cudaError_t CUDARTAPI +cudaFuncSetCacheConfig(const void *func, enum cudaFuncCache cacheConfig) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncCache); + static auto func_ptr = LoadSymbol("cudaFuncSetCacheConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(func, cacheConfig); +} + +extern __host__ cudaError_t CUDARTAPI +cudaFuncSetSharedMemConfig(const void *func, enum cudaSharedMemConfig config) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(const void *, enum cudaSharedMemConfig); + static auto func_ptr = LoadSymbol("cudaFuncSetSharedMemConfig"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(func, config); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaFuncGetAttributes(struct cudaFuncAttributes *attr, const void *func) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(struct cudaFuncAttributes *, const void *); + static auto func_ptr = LoadSymbol("cudaFuncGetAttributes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(attr, func); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaFuncSetAttribute(const void *func, enum cudaFuncAttribute attr, int value) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(const void *, enum cudaFuncAttribute, int); + static auto func_ptr = LoadSymbol("cudaFuncSetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(func, attr, value); +} + +extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI +cudaSetDoubleForDevice(double *d) { + using FuncPtr = cudaError_t(CUDARTAPI *)(double *); + static auto func_ptr = LoadSymbol("cudaSetDoubleForDevice"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(d); +} + +extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI +cudaSetDoubleForHost(double *d) { + using FuncPtr = cudaError_t(CUDARTAPI *)(double *); + static auto func_ptr = LoadSymbol("cudaSetDoubleForHost"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(d); +} + +extern __host__ cudaError_t CUDARTAPI cudaLaunchHostFunc(cudaStream_t stream, + cudaHostFn_t fn, + void *userData) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaStream_t, cudaHostFn_t, void *); + static auto func_ptr = LoadSymbol("cudaLaunchHostFunc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stream, fn, userData); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaOccupancyMaxActiveBlocksPerMultiprocessor(int *numBlocks, const void *func, + int blockSize, + size_t dynamicSMemSize) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t); + static auto func_ptr = + LoadSymbol("cudaOccupancyMaxActiveBlocksPerMultiprocessor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(numBlocks, func, blockSize, dynamicSMemSize); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaOccupancyAvailableDynamicSMemPerBlock(size_t *dynamicSmemSize, + const void *func, int numBlocks, + int blockSize) { + using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *, int, int); + static auto func_ptr = + LoadSymbol("cudaOccupancyAvailableDynamicSMemPerBlock"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dynamicSmemSize, func, numBlocks, blockSize); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(int *numBlocks, + const void *func, + int blockSize, + size_t dynamicSMemSize, + unsigned int flags) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(int *, const void *, int, size_t, unsigned int); + static auto func_ptr = LoadSymbol( + "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(numBlocks, func, blockSize, dynamicSMemSize, flags); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaOccupancyMaxPotentialClusterSize(int *clusterSize, const void *func, + const cudaLaunchConfig_t *launchConfig) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(int *, const void *, const cudaLaunchConfig_t *); + static auto func_ptr = + LoadSymbol("cudaOccupancyMaxPotentialClusterSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(clusterSize, func, launchConfig); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaOccupancyMaxActiveClusters(int *numClusters, const void *func, + const cudaLaunchConfig_t *launchConfig) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(int *, const void *, const cudaLaunchConfig_t *); + static auto func_ptr = LoadSymbol("cudaOccupancyMaxActiveClusters"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(numClusters, func, launchConfig); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaMallocManaged(void **devPtr, size_t size, unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaMallocManaged"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, size, flags); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaMalloc(void **devPtr, size_t size) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); + static auto func_ptr = LoadSymbol("cudaMalloc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, size); +} + +extern __host__ cudaError_t CUDARTAPI cudaMallocHost(void **ptr, size_t size) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t); + static auto func_ptr = LoadSymbol("cudaMallocHost"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ptr, size); +} + +extern __host__ cudaError_t CUDARTAPI cudaMallocPitch(void **devPtr, + size_t *pitch, + size_t width, + size_t height) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t *, size_t, size_t); + static auto func_ptr = LoadSymbol("cudaMallocPitch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, pitch, width, height); +} + +extern __host__ cudaError_t CUDARTAPI cudaMallocArray( + cudaArray_t *array, const struct cudaChannelFormatDesc *desc, size_t width, + size_t height __dv(0), unsigned int flags __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, + const struct cudaChannelFormatDesc *, + size_t, size_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaMallocArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(array, desc, width, height, flags); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaFree(void *devPtr) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *); + static auto func_ptr = LoadSymbol("cudaFree"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr); +} + +extern __host__ cudaError_t CUDARTAPI cudaFreeHost(void *ptr) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *); + static auto func_ptr = LoadSymbol("cudaFreeHost"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ptr); +} + +extern __host__ cudaError_t CUDARTAPI cudaFreeArray(cudaArray_t array) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t); + static auto func_ptr = LoadSymbol("cudaFreeArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(array); +} + +extern __host__ cudaError_t CUDARTAPI +cudaFreeMipmappedArray(cudaMipmappedArray_t mipmappedArray) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t); + static auto func_ptr = LoadSymbol("cudaFreeMipmappedArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(mipmappedArray); +} + +extern __host__ cudaError_t CUDARTAPI cudaHostAlloc(void **pHost, size_t size, + unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaHostAlloc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pHost, size, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaHostRegister(void *ptr, size_t size, + unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaHostRegister"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ptr, size, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaHostUnregister(void *ptr) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *); + static auto func_ptr = LoadSymbol("cudaHostUnregister"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ptr); +} + +extern __host__ cudaError_t CUDARTAPI +cudaHostGetDevicePointer(void **pDevice, void *pHost, unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void **, void *, unsigned int); + static auto func_ptr = LoadSymbol("cudaHostGetDevicePointer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pDevice, pHost, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaHostGetFlags(unsigned int *pFlags, + void *pHost) { + using FuncPtr = cudaError_t(CUDARTAPI *)(unsigned int *, void *); + static auto func_ptr = LoadSymbol("cudaHostGetFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pFlags, pHost); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMalloc3D(struct cudaPitchedPtr *pitchedDevPtr, struct cudaExtent extent) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr *, struct cudaExtent); + static auto func_ptr = LoadSymbol("cudaMalloc3D"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pitchedDevPtr, extent); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMalloc3DArray(cudaArray_t *array, const struct cudaChannelFormatDesc *desc, + struct cudaExtent extent, unsigned int flags __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t *, + const struct cudaChannelFormatDesc *, + struct cudaExtent, unsigned int); + static auto func_ptr = LoadSymbol("cudaMalloc3DArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(array, desc, extent, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaMallocMipmappedArray( + cudaMipmappedArray_t *mipmappedArray, + const struct cudaChannelFormatDesc *desc, struct cudaExtent extent, + unsigned int numLevels, unsigned int flags __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaMipmappedArray_t *, const struct cudaChannelFormatDesc *, + struct cudaExtent, unsigned int, unsigned int); + static auto func_ptr = LoadSymbol("cudaMallocMipmappedArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(mipmappedArray, desc, extent, numLevels, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetMipmappedArrayLevel( + cudaArray_t *levelArray, cudaMipmappedArray_const_t mipmappedArray, + unsigned int level) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaArray_t *, cudaMipmappedArray_const_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaGetMipmappedArrayLevel"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(levelArray, mipmappedArray, level); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMemcpy3D(const struct cudaMemcpy3DParms *p) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *); + static auto func_ptr = LoadSymbol("cudaMemcpy3D"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(p); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMemcpy3DPeer(const struct cudaMemcpy3DPeerParms *p) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *); + static auto func_ptr = LoadSymbol("cudaMemcpy3DPeer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(p); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy3DAsync( + const struct cudaMemcpy3DParms *p, cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DParms *, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpy3DAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(p, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpy3DPeerAsync( + const struct cudaMemcpy3DPeerParms *p, cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const struct cudaMemcpy3DPeerParms *, + cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpy3DPeerAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(p, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemGetInfo(size_t *free, + size_t *total) { + using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, size_t *); + static auto func_ptr = LoadSymbol("cudaMemGetInfo"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(free, total); +} + +extern __host__ cudaError_t CUDARTAPI +cudaArrayGetInfo(struct cudaChannelFormatDesc *desc, struct cudaExtent *extent, + unsigned int *flags, cudaArray_t array) { + using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, + struct cudaExtent *, unsigned int *, + cudaArray_t); + static auto func_ptr = LoadSymbol("cudaArrayGetInfo"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(desc, extent, flags, array); +} + +extern __host__ cudaError_t CUDARTAPI cudaArrayGetPlane( + cudaArray_t *pPlaneArray, cudaArray_t hArray, unsigned int planeIdx) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaArray_t *, cudaArray_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaArrayGetPlane"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pPlaneArray, hArray, planeIdx); +} + +extern __host__ cudaError_t CUDARTAPI cudaArrayGetMemoryRequirements( + struct cudaArrayMemoryRequirements *memoryRequirements, cudaArray_t array, + int device) { + using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaArrayMemoryRequirements *, + cudaArray_t, int); + static auto func_ptr = LoadSymbol("cudaArrayGetMemoryRequirements"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(memoryRequirements, array, device); +} + +extern __host__ cudaError_t CUDARTAPI cudaMipmappedArrayGetMemoryRequirements( + struct cudaArrayMemoryRequirements *memoryRequirements, + cudaMipmappedArray_t mipmap, int device) { + using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaArrayMemoryRequirements *, + cudaMipmappedArray_t, int); + static auto func_ptr = + LoadSymbol("cudaMipmappedArrayGetMemoryRequirements"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(memoryRequirements, mipmap, device); +} + +extern __host__ cudaError_t CUDARTAPI cudaArrayGetSparseProperties( + struct cudaArraySparseProperties *sparseProperties, cudaArray_t array) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(struct cudaArraySparseProperties *, cudaArray_t); + static auto func_ptr = LoadSymbol("cudaArrayGetSparseProperties"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(sparseProperties, array); +} + +extern __host__ cudaError_t CUDARTAPI cudaMipmappedArrayGetSparseProperties( + struct cudaArraySparseProperties *sparseProperties, + cudaMipmappedArray_t mipmap) { + using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaArraySparseProperties *, + cudaMipmappedArray_t); + static auto func_ptr = + LoadSymbol("cudaMipmappedArrayGetSparseProperties"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(sparseProperties, mipmap); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpy(void *dst, const void *src, + size_t count, + enum cudaMemcpyKind kind) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, + enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, src, count, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpyPeer(void *dst, int dstDevice, + const void *src, + int srcDevice, + size_t count) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(void *, int, const void *, int, size_t); + static auto func_ptr = LoadSymbol("cudaMemcpyPeer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, dstDevice, src, srcDevice, count); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpy2D(void *dst, size_t dpitch, + const void *src, + size_t spitch, size_t width, + size_t height, + enum cudaMemcpyKind kind) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, + size_t, size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpy2D"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, dpitch, src, spitch, width, height, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArray( + cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, + size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, + size_t, size_t, size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpy2DToArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArray( + void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, + size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, size_t, + size_t, size_t, size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DArrayToArray( + cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, + cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t width, + size_t height, enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, + cudaArray_const_t, size_t, size_t, + size_t, size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpy2DArrayToArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, + width, height, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbol( + const void *symbol, const void *src, size_t count, size_t offset __dv(0), + enum cudaMemcpyKind kind __dv(cudaMemcpyHostToDevice)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, + size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpyToSymbol"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(symbol, src, count, offset, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbol( + void *dst, const void *symbol, size_t count, size_t offset __dv(0), + enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToHost)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, + enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbol"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, symbol, count, offset, kind); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaMemcpyAsync(void *dst, const void *src, size_t count, + enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, + enum cudaMemcpyKind, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpyAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, src, count, kind, stream); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMemcpyPeerAsync(void *dst, int dstDevice, const void *src, int srcDevice, + size_t count, cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, const void *, int, + size_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpyPeerAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, dstDevice, src, srcDevice, count, stream); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemcpy2DAsync( + void *dst, size_t dpitch, const void *src, size_t spitch, size_t width, + size_t height, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(void *, size_t, const void *, size_t, size_t, + size_t, enum cudaMemcpyKind, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpy2DAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, dpitch, src, spitch, width, height, kind, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DToArrayAsync( + cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, + size_t spitch, size_t width, size_t height, enum cudaMemcpyKind kind, + cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, + const void *, size_t, size_t, size_t, + enum cudaMemcpyKind, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpy2DToArrayAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, wOffset, hOffset, src, spitch, width, height, kind, + stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpy2DFromArrayAsync( + void *dst, size_t dpitch, cudaArray_const_t src, size_t wOffset, + size_t hOffset, size_t width, size_t height, enum cudaMemcpyKind kind, + cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, cudaArray_const_t, + size_t, size_t, size_t, size_t, + enum cudaMemcpyKind, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpy2DFromArrayAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, dpitch, src, wOffset, hOffset, width, height, kind, + stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpyToSymbolAsync( + const void *symbol, const void *src, size_t count, size_t offset, + enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(const void *, const void *, size_t, size_t, + enum cudaMemcpyKind, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpyToSymbolAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(symbol, src, count, offset, kind, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemcpyFromSymbolAsync( + void *dst, const void *symbol, size_t count, size_t offset, + enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, const void *, size_t, size_t, + enum cudaMemcpyKind, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpyFromSymbolAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, symbol, count, offset, kind, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemset(void *devPtr, int value, + size_t count) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t); + static auto func_ptr = LoadSymbol("cudaMemset"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, value, count); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemset2D(void *devPtr, size_t pitch, + int value, size_t width, + size_t height) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t); + static auto func_ptr = LoadSymbol("cudaMemset2D"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, pitch, value, width, height); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemset3D( + struct cudaPitchedPtr pitchedDevPtr, int value, struct cudaExtent extent) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, struct cudaExtent); + static auto func_ptr = LoadSymbol("cudaMemset3D"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pitchedDevPtr, value, extent); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI cudaMemsetAsync( + void *devPtr, int value, size_t count, cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, int, size_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemsetAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, value, count, stream); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaMemset2DAsync(void *devPtr, size_t pitch, int value, size_t width, + size_t height, cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, size_t, int, size_t, size_t, + cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemset2DAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, pitch, value, width, height, stream); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaMemset3DAsync(struct cudaPitchedPtr pitchedDevPtr, int value, + struct cudaExtent extent, cudaStream_t stream __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaPitchedPtr, int, + struct cudaExtent, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemset3DAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pitchedDevPtr, value, extent, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetSymbolAddress(void **devPtr, + const void *symbol) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void **, const void *); + static auto func_ptr = LoadSymbol("cudaGetSymbolAddress"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, symbol); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetSymbolSize(size_t *size, + const void *symbol) { + using FuncPtr = cudaError_t(CUDARTAPI *)(size_t *, const void *); + static auto func_ptr = LoadSymbol("cudaGetSymbolSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(size, symbol); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMemPrefetchAsync(const void *devPtr, size_t count, int dstDevice, + cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(const void *, size_t, int, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemPrefetchAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, count, dstDevice, stream); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMemAdvise(const void *devPtr, size_t count, enum cudaMemoryAdvise advice, + int device) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const void *, size_t, + enum cudaMemoryAdvise, int); + static auto func_ptr = LoadSymbol("cudaMemAdvise"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, count, advice, device); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttribute( + void *data, size_t dataSize, enum cudaMemRangeAttribute attribute, + const void *devPtr, size_t count) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + void *, size_t, enum cudaMemRangeAttribute, const void *, size_t); + static auto func_ptr = LoadSymbol("cudaMemRangeGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(data, dataSize, attribute, devPtr, count); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemRangeGetAttributes( + void **data, size_t *dataSizes, enum cudaMemRangeAttribute *attributes, + size_t numAttributes, const void *devPtr, size_t count) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(void **, size_t *, enum cudaMemRangeAttribute *, + size_t, const void *, size_t); + static auto func_ptr = LoadSymbol("cudaMemRangeGetAttributes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(data, dataSizes, attributes, numAttributes, devPtr, count); +} + +extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI +cudaMemcpyToArray(cudaArray_t dst, size_t wOffset, size_t hOffset, + const void *src, size_t count, enum cudaMemcpyKind kind) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaArray_t, size_t, size_t, const void *, size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpyToArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, wOffset, hOffset, src, count, kind); +} + +extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI +cudaMemcpyFromArray(void *dst, cudaArray_const_t src, size_t wOffset, + size_t hOffset, size_t count, enum cudaMemcpyKind kind) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, + size_t, size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpyFromArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, src, wOffset, hOffset, count, kind); +} + +extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyArrayToArray( + cudaArray_t dst, size_t wOffsetDst, size_t hOffsetDst, + cudaArray_const_t src, size_t wOffsetSrc, size_t hOffsetSrc, size_t count, + enum cudaMemcpyKind kind __dv(cudaMemcpyDeviceToDevice)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, cudaArray_const_t, + size_t, size_t, size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaMemcpyArrayToArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, wOffsetDst, hOffsetDst, src, wOffsetSrc, hOffsetSrc, + count, kind); +} + +extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaMemcpyToArrayAsync( + cudaArray_t dst, size_t wOffset, size_t hOffset, const void *src, + size_t count, enum cudaMemcpyKind kind, cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaArray_t, size_t, size_t, const void *, + size_t, enum cudaMemcpyKind, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpyToArrayAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, wOffset, hOffset, src, count, kind, stream); +} + +extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI +cudaMemcpyFromArrayAsync(void *dst, cudaArray_const_t src, size_t wOffset, + size_t hOffset, size_t count, enum cudaMemcpyKind kind, + cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(void *, cudaArray_const_t, size_t, size_t, + size_t, enum cudaMemcpyKind, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMemcpyFromArrayAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dst, src, wOffset, hOffset, count, kind, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMallocAsync(void **devPtr, + size_t size, + cudaStream_t hStream) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void **, size_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMallocAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, size, hStream); +} + +extern __host__ cudaError_t CUDARTAPI cudaFreeAsync(void *devPtr, + cudaStream_t hStream) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void *, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaFreeAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, hStream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemPoolTrimTo(cudaMemPool_t memPool, + size_t minBytesToKeep) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t, size_t); + static auto func_ptr = LoadSymbol("cudaMemPoolTrimTo"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(memPool, minBytesToKeep); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemPoolSetAttribute( + cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); + static auto func_ptr = LoadSymbol("cudaMemPoolSetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(memPool, attr, value); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemPoolGetAttribute( + cudaMemPool_t memPool, enum cudaMemPoolAttr attr, void *value) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaMemPool_t, enum cudaMemPoolAttr, void *); + static auto func_ptr = LoadSymbol("cudaMemPoolGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(memPool, attr, value); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMemPoolSetAccess(cudaMemPool_t memPool, + const struct cudaMemAccessDesc *descList, size_t count) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaMemPool_t, const struct cudaMemAccessDesc *, size_t); + static auto func_ptr = LoadSymbol("cudaMemPoolSetAccess"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(memPool, descList, count); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMemPoolGetAccess(enum cudaMemAccessFlags *flags, cudaMemPool_t memPool, + struct cudaMemLocation *location) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + enum cudaMemAccessFlags *, cudaMemPool_t, struct cudaMemLocation *); + static auto func_ptr = LoadSymbol("cudaMemPoolGetAccess"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(flags, memPool, location); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemPoolCreate( + cudaMemPool_t *memPool, const struct cudaMemPoolProps *poolProps) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t *, + const struct cudaMemPoolProps *); + static auto func_ptr = LoadSymbol("cudaMemPoolCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(memPool, poolProps); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMemPoolDestroy(cudaMemPool_t memPool) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaMemPool_t); + static auto func_ptr = LoadSymbol("cudaMemPoolDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(memPool); +} + +extern __host__ cudaError_t CUDARTAPI cudaMallocFromPoolAsync( + void **ptr, size_t size, cudaMemPool_t memPool, cudaStream_t stream) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(void **, size_t, cudaMemPool_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaMallocFromPoolAsync"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ptr, size, memPool, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportToShareableHandle( + void *shareableHandle, cudaMemPool_t memPool, + enum cudaMemAllocationHandleType handleType, unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + void *, cudaMemPool_t, enum cudaMemAllocationHandleType, unsigned int); + static auto func_ptr = + LoadSymbol("cudaMemPoolExportToShareableHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(shareableHandle, memPool, handleType, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemPoolImportFromShareableHandle( + cudaMemPool_t *memPool, void *shareableHandle, + enum cudaMemAllocationHandleType handleType, unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaMemPool_t *, void *, enum cudaMemAllocationHandleType, unsigned int); + static auto func_ptr = + LoadSymbol("cudaMemPoolImportFromShareableHandle"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(memPool, shareableHandle, handleType, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaMemPoolExportPointer( + struct cudaMemPoolPtrExportData *exportData, void *ptr) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(struct cudaMemPoolPtrExportData *, void *); + static auto func_ptr = LoadSymbol("cudaMemPoolExportPointer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(exportData, ptr); +} + +extern __host__ cudaError_t CUDARTAPI +cudaMemPoolImportPointer(void **ptr, cudaMemPool_t memPool, + struct cudaMemPoolPtrExportData *exportData) { + using FuncPtr = cudaError_t(CUDARTAPI *)(void **, cudaMemPool_t, + struct cudaMemPoolPtrExportData *); + static auto func_ptr = LoadSymbol("cudaMemPoolImportPointer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ptr, memPool, exportData); +} + +extern __host__ cudaError_t CUDARTAPI cudaPointerGetAttributes( + struct cudaPointerAttributes *attributes, const void *ptr) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(struct cudaPointerAttributes *, const void *); + static auto func_ptr = LoadSymbol("cudaPointerGetAttributes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(attributes, ptr); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDeviceCanAccessPeer(int *canAccessPeer, int device, int peerDevice) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *, int, int); + static auto func_ptr = LoadSymbol("cudaDeviceCanAccessPeer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(canAccessPeer, device, peerDevice); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDeviceEnablePeerAccess(int peerDevice, unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int, unsigned int); + static auto func_ptr = LoadSymbol("cudaDeviceEnablePeerAccess"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(peerDevice, flags); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDeviceDisablePeerAccess(int peerDevice) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int); + static auto func_ptr = LoadSymbol("cudaDeviceDisablePeerAccess"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(peerDevice); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphicsUnregisterResource(cudaGraphicsResource_t resource) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t); + static auto func_ptr = LoadSymbol("cudaGraphicsUnregisterResource"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(resource); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceSetMapFlags( + cudaGraphicsResource_t resource, unsigned int flags) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphicsResource_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaGraphicsResourceSetMapFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(resource, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphicsMapResources( + int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaGraphicsMapResources"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(count, resources, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphicsUnmapResources( + int count, cudaGraphicsResource_t *resources, cudaStream_t stream __dv(0)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(int, cudaGraphicsResource_t *, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaGraphicsUnmapResources"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(count, resources, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphicsResourceGetMappedPointer( + void **devPtr, size_t *size, cudaGraphicsResource_t resource) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(void **, size_t *, cudaGraphicsResource_t); + static auto func_ptr = + LoadSymbol("cudaGraphicsResourceGetMappedPointer"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(devPtr, size, resource); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphicsSubResourceGetMappedArray( + cudaArray_t *array, cudaGraphicsResource_t resource, + unsigned int arrayIndex, unsigned int mipLevel) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaArray_t *, cudaGraphicsResource_t, unsigned int, unsigned int); + static auto func_ptr = + LoadSymbol("cudaGraphicsSubResourceGetMappedArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(array, resource, arrayIndex, mipLevel); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphicsResourceGetMappedMipmappedArray( + cudaMipmappedArray_t *mipmappedArray, cudaGraphicsResource_t resource) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaMipmappedArray_t *, cudaGraphicsResource_t); + static auto func_ptr = + LoadSymbol("cudaGraphicsResourceGetMappedMipmappedArray"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(mipmappedArray, resource); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetChannelDesc( + struct cudaChannelFormatDesc *desc, cudaArray_const_t array) { + using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaChannelFormatDesc *, + cudaArray_const_t); + static auto func_ptr = LoadSymbol("cudaGetChannelDesc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(desc, array); +} + +extern __host__ struct cudaChannelFormatDesc CUDARTAPI cudaCreateChannelDesc( + int x, int y, int z, int w, enum cudaChannelFormatKind f) { + using FuncPtr = struct cudaChannelFormatDesc(CUDARTAPI *)( + int, int, int, int, enum cudaChannelFormatKind); + static auto func_ptr = LoadSymbol("cudaCreateChannelDesc"); + return func_ptr(x, y, z, w, f); +} + +extern __host__ cudaError_t CUDARTAPI cudaCreateTextureObject( + cudaTextureObject_t *pTexObject, const struct cudaResourceDesc *pResDesc, + const struct cudaTextureDesc *pTexDesc, + const struct cudaResourceViewDesc *pResViewDesc) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaTextureObject_t *, const struct cudaResourceDesc *, + const struct cudaTextureDesc *, const struct cudaResourceViewDesc *); + static auto func_ptr = LoadSymbol("cudaCreateTextureObject"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pTexObject, pResDesc, pTexDesc, pResViewDesc); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDestroyTextureObject(cudaTextureObject_t texObject) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaTextureObject_t); + static auto func_ptr = LoadSymbol("cudaDestroyTextureObject"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(texObject); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceDesc( + struct cudaResourceDesc *pResDesc, cudaTextureObject_t texObject) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaTextureObject_t); + static auto func_ptr = + LoadSymbol("cudaGetTextureObjectResourceDesc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pResDesc, texObject); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectTextureDesc( + struct cudaTextureDesc *pTexDesc, cudaTextureObject_t texObject) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(struct cudaTextureDesc *, cudaTextureObject_t); + static auto func_ptr = LoadSymbol("cudaGetTextureObjectTextureDesc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pTexDesc, texObject); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetTextureObjectResourceViewDesc( + struct cudaResourceViewDesc *pResViewDesc, cudaTextureObject_t texObject) { + using FuncPtr = cudaError_t(CUDARTAPI *)(struct cudaResourceViewDesc *, + cudaTextureObject_t); + static auto func_ptr = + LoadSymbol("cudaGetTextureObjectResourceViewDesc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pResViewDesc, texObject); +} + +extern __host__ cudaError_t CUDARTAPI cudaCreateSurfaceObject( + cudaSurfaceObject_t *pSurfObject, const struct cudaResourceDesc *pResDesc) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t *, + const struct cudaResourceDesc *); + static auto func_ptr = LoadSymbol("cudaCreateSurfaceObject"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pSurfObject, pResDesc); +} + +extern __host__ cudaError_t CUDARTAPI +cudaDestroySurfaceObject(cudaSurfaceObject_t surfObject) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaSurfaceObject_t); + static auto func_ptr = LoadSymbol("cudaDestroySurfaceObject"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(surfObject); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetSurfaceObjectResourceDesc( + struct cudaResourceDesc *pResDesc, cudaSurfaceObject_t surfObject) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(struct cudaResourceDesc *, cudaSurfaceObject_t); + static auto func_ptr = + LoadSymbol("cudaGetSurfaceObjectResourceDesc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pResDesc, surfObject); +} + +extern __host__ cudaError_t CUDARTAPI cudaDriverGetVersion(int *driverVersion) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *); + static auto func_ptr = LoadSymbol("cudaDriverGetVersion"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(driverVersion); +} + +extern __host__ __cudart_builtin__ cudaError_t CUDARTAPI +cudaRuntimeGetVersion(int *runtimeVersion) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int *); + static auto func_ptr = LoadSymbol("cudaRuntimeGetVersion"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(runtimeVersion); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphCreate(cudaGraph_t *pGraph, + unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, unsigned int); + static auto func_ptr = LoadSymbol("cudaGraphCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraph, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphAddKernelNode( + cudaGraphNode_t *pGraphNode, cudaGraph_t graph, + const cudaGraphNode_t *pDependencies, size_t numDependencies, + const struct cudaKernelNodeParams *pNodeParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, + const cudaGraphNode_t *, size_t, + const struct cudaKernelNodeParams *); + static auto func_ptr = LoadSymbol("cudaGraphAddKernelNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphNode, graph, pDependencies, numDependencies, + pNodeParams); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetParams( + cudaGraphNode_t node, struct cudaKernelNodeParams *pNodeParams) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaKernelNodeParams *); + static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, pNodeParams); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetParams( + cudaGraphNode_t node, const struct cudaKernelNodeParams *pNodeParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, + const struct cudaKernelNodeParams *); + static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, pNodeParams); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphKernelNodeCopyAttributes(cudaGraphNode_t hSrc, cudaGraphNode_t hDst) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t); + static auto func_ptr = + LoadSymbol("cudaGraphKernelNodeCopyAttributes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hSrc, hDst); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeGetAttribute( + cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, + cudaKernelNodeAttrValue *value_out) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaGraphNode_t, cudaKernelNodeAttrID, cudaKernelNodeAttrValue *); + static auto func_ptr = LoadSymbol("cudaGraphKernelNodeGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, attr, value_out); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphKernelNodeSetAttribute( + cudaGraphNode_t hNode, cudaKernelNodeAttrID attr, + const cudaKernelNodeAttrValue *value) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaGraphNode_t, cudaKernelNodeAttrID, const cudaKernelNodeAttrValue *); + static auto func_ptr = LoadSymbol("cudaGraphKernelNodeSetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, attr, value); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode( + cudaGraphNode_t *pGraphNode, cudaGraph_t graph, + const cudaGraphNode_t *pDependencies, size_t numDependencies, + const struct cudaMemcpy3DParms *pCopyParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, + const cudaGraphNode_t *, size_t, + const struct cudaMemcpy3DParms *); + static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphNode, graph, pDependencies, numDependencies, + pCopyParams); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeToSymbol( + cudaGraphNode_t *pGraphNode, cudaGraph_t graph, + const cudaGraphNode_t *pDependencies, size_t numDependencies, + const void *symbol, const void *src, size_t count, size_t offset, + enum cudaMemcpyKind kind) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, + const void *, const void *, size_t, size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNodeToSymbol"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphNode, graph, pDependencies, numDependencies, symbol, + src, count, offset, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNodeFromSymbol( + cudaGraphNode_t *pGraphNode, cudaGraph_t graph, + const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dst, + const void *symbol, size_t count, size_t offset, enum cudaMemcpyKind kind) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *, + const void *, size_t, size_t, enum cudaMemcpyKind); + static auto func_ptr = + LoadSymbol("cudaGraphAddMemcpyNodeFromSymbol"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dst, + symbol, count, offset, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemcpyNode1D( + cudaGraphNode_t *pGraphNode, cudaGraph_t graph, + const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dst, + const void *src, size_t count, enum cudaMemcpyKind kind) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *, + const void *, size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaGraphAddMemcpyNode1D"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dst, src, + count, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeGetParams( + cudaGraphNode_t node, struct cudaMemcpy3DParms *pNodeParams) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemcpy3DParms *); + static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeGetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, pNodeParams); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParams( + cudaGraphNode_t node, const struct cudaMemcpy3DParms *pNodeParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, + const struct cudaMemcpy3DParms *); + static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, pNodeParams); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsToSymbol( + cudaGraphNode_t node, const void *symbol, const void *src, size_t count, + size_t offset, enum cudaMemcpyKind kind) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphNode_t, const void *, const void *, + size_t, size_t, enum cudaMemcpyKind); + static auto func_ptr = + LoadSymbol("cudaGraphMemcpyNodeSetParamsToSymbol"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, symbol, src, count, offset, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphMemcpyNodeSetParamsFromSymbol( + cudaGraphNode_t node, void *dst, const void *symbol, size_t count, + size_t offset, enum cudaMemcpyKind kind) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphNode_t, void *, const void *, size_t, + size_t, enum cudaMemcpyKind); + static auto func_ptr = + LoadSymbol("cudaGraphMemcpyNodeSetParamsFromSymbol"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, dst, symbol, count, offset, kind); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphMemcpyNodeSetParams1D(cudaGraphNode_t node, void *dst, const void *src, + size_t count, enum cudaMemcpyKind kind) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaGraphNode_t, void *, const void *, size_t, enum cudaMemcpyKind); + static auto func_ptr = LoadSymbol("cudaGraphMemcpyNodeSetParams1D"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, dst, src, count, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemsetNode( + cudaGraphNode_t *pGraphNode, cudaGraph_t graph, + const cudaGraphNode_t *pDependencies, size_t numDependencies, + const struct cudaMemsetParams *pMemsetParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, + const cudaGraphNode_t *, size_t, + const struct cudaMemsetParams *); + static auto func_ptr = LoadSymbol("cudaGraphAddMemsetNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphNode, graph, pDependencies, numDependencies, + pMemsetParams); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeGetParams( + cudaGraphNode_t node, struct cudaMemsetParams *pNodeParams) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaMemsetParams *); + static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeGetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, pNodeParams); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphMemsetNodeSetParams( + cudaGraphNode_t node, const struct cudaMemsetParams *pNodeParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, + const struct cudaMemsetParams *); + static auto func_ptr = LoadSymbol("cudaGraphMemsetNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, pNodeParams); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphAddHostNode( + cudaGraphNode_t *pGraphNode, cudaGraph_t graph, + const cudaGraphNode_t *pDependencies, size_t numDependencies, + const struct cudaHostNodeParams *pNodeParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, + const cudaGraphNode_t *, size_t, + const struct cudaHostNodeParams *); + static auto func_ptr = LoadSymbol("cudaGraphAddHostNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphNode, graph, pDependencies, numDependencies, + pNodeParams); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeGetParams( + cudaGraphNode_t node, struct cudaHostNodeParams *pNodeParams) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphNode_t, struct cudaHostNodeParams *); + static auto func_ptr = LoadSymbol("cudaGraphHostNodeGetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, pNodeParams); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphHostNodeSetParams( + cudaGraphNode_t node, const struct cudaHostNodeParams *pNodeParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, + const struct cudaHostNodeParams *); + static auto func_ptr = LoadSymbol("cudaGraphHostNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, pNodeParams); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphAddChildGraphNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, + const cudaGraphNode_t *pDependencies, + size_t numDependencies, cudaGraph_t childGraph) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, + const cudaGraphNode_t *, size_t, cudaGraph_t); + static auto func_ptr = LoadSymbol("cudaGraphAddChildGraphNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphNode, graph, pDependencies, numDependencies, + childGraph); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphChildGraphNodeGetGraph(cudaGraphNode_t node, cudaGraph_t *pGraph) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraph_t *); + static auto func_ptr = LoadSymbol("cudaGraphChildGraphNodeGetGraph"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, pGraph); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphAddEmptyNode( + cudaGraphNode_t *pGraphNode, cudaGraph_t graph, + const cudaGraphNode_t *pDependencies, size_t numDependencies) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, + const cudaGraphNode_t *, size_t); + static auto func_ptr = LoadSymbol("cudaGraphAddEmptyNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphNode, graph, pDependencies, numDependencies); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphAddEventRecordNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, + const cudaGraphNode_t *pDependencies, + size_t numDependencies, cudaEvent_t event) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, + const cudaGraphNode_t *, size_t, cudaEvent_t); + static auto func_ptr = LoadSymbol("cudaGraphAddEventRecordNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphEventRecordNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t *); + static auto func_ptr = + LoadSymbol("cudaGraphEventRecordNodeGetEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, event_out); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphEventRecordNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t); + static auto func_ptr = + LoadSymbol("cudaGraphEventRecordNodeSetEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, event); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphAddEventWaitNode(cudaGraphNode_t *pGraphNode, cudaGraph_t graph, + const cudaGraphNode_t *pDependencies, + size_t numDependencies, cudaEvent_t event) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, + const cudaGraphNode_t *, size_t, cudaEvent_t); + static auto func_ptr = LoadSymbol("cudaGraphAddEventWaitNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphNode, graph, pDependencies, numDependencies, event); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphEventWaitNodeGetEvent(cudaGraphNode_t node, cudaEvent_t *event_out) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t *); + static auto func_ptr = LoadSymbol("cudaGraphEventWaitNodeGetEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, event_out); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphEventWaitNodeSetEvent(cudaGraphNode_t node, cudaEvent_t event) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaEvent_t); + static auto func_ptr = LoadSymbol("cudaGraphEventWaitNodeSetEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, event); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphAddExternalSemaphoresSignalNode( + cudaGraphNode_t *pGraphNode, cudaGraph_t graph, + const cudaGraphNode_t *pDependencies, size_t numDependencies, + const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, + const struct cudaExternalSemaphoreSignalNodeParams *); + static auto func_ptr = + LoadSymbol("cudaGraphAddExternalSemaphoresSignalNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphNode, graph, pDependencies, numDependencies, + nodeParams); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphExternalSemaphoresSignalNodeGetParams( + cudaGraphNode_t hNode, + struct cudaExternalSemaphoreSignalNodeParams *params_out) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaGraphNode_t, struct cudaExternalSemaphoreSignalNodeParams *); + static auto func_ptr = + LoadSymbol("cudaGraphExternalSemaphoresSignalNodeGetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, params_out); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphExternalSemaphoresSignalNodeSetParams( + cudaGraphNode_t hNode, + const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaGraphNode_t, const struct cudaExternalSemaphoreSignalNodeParams *); + static auto func_ptr = + LoadSymbol("cudaGraphExternalSemaphoresSignalNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, nodeParams); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphAddExternalSemaphoresWaitNode( + cudaGraphNode_t *pGraphNode, cudaGraph_t graph, + const cudaGraphNode_t *pDependencies, size_t numDependencies, + const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, + const struct cudaExternalSemaphoreWaitNodeParams *); + static auto func_ptr = + LoadSymbol("cudaGraphAddExternalSemaphoresWaitNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphNode, graph, pDependencies, numDependencies, + nodeParams); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphExternalSemaphoresWaitNodeGetParams( + cudaGraphNode_t hNode, + struct cudaExternalSemaphoreWaitNodeParams *params_out) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaGraphNode_t, struct cudaExternalSemaphoreWaitNodeParams *); + static auto func_ptr = + LoadSymbol("cudaGraphExternalSemaphoresWaitNodeGetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, params_out); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphExternalSemaphoresWaitNodeSetParams( + cudaGraphNode_t hNode, + const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaGraphNode_t, const struct cudaExternalSemaphoreWaitNodeParams *); + static auto func_ptr = + LoadSymbol("cudaGraphExternalSemaphoresWaitNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hNode, nodeParams); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemAllocNode( + cudaGraphNode_t *pGraphNode, cudaGraph_t graph, + const cudaGraphNode_t *pDependencies, size_t numDependencies, + struct cudaMemAllocNodeParams *nodeParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraph_t, + const cudaGraphNode_t *, size_t, + struct cudaMemAllocNodeParams *); + static auto func_ptr = LoadSymbol("cudaGraphAddMemAllocNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphNode, graph, pDependencies, numDependencies, + nodeParams); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphMemAllocNodeGetParams( + cudaGraphNode_t node, struct cudaMemAllocNodeParams *params_out) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, + struct cudaMemAllocNodeParams *); + static auto func_ptr = LoadSymbol("cudaGraphMemAllocNodeGetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, params_out); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphAddMemFreeNode( + cudaGraphNode_t *pGraphNode, cudaGraph_t graph, + const cudaGraphNode_t *pDependencies, size_t numDependencies, void *dptr) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaGraphNode_t *, cudaGraph_t, const cudaGraphNode_t *, size_t, void *); + static auto func_ptr = LoadSymbol("cudaGraphAddMemFreeNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphNode, graph, pDependencies, numDependencies, dptr); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphMemFreeNodeGetParams(cudaGraphNode_t node, void *dptr_out) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t, void *); + static auto func_ptr = LoadSymbol("cudaGraphMemFreeNodeGetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, dptr_out); +} + +extern __host__ cudaError_t CUDARTAPI cudaDeviceGraphMemTrim(int device) { + using FuncPtr = cudaError_t(CUDARTAPI *)(int); + static auto func_ptr = LoadSymbol("cudaDeviceGraphMemTrim"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device); +} + +extern __host__ cudaError_t CUDARTAPI cudaDeviceGetGraphMemAttribute( + int device, enum cudaGraphMemAttributeType attr, void *value) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(int, enum cudaGraphMemAttributeType, void *); + static auto func_ptr = LoadSymbol("cudaDeviceGetGraphMemAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, attr, value); +} + +extern __host__ cudaError_t CUDARTAPI cudaDeviceSetGraphMemAttribute( + int device, enum cudaGraphMemAttributeType attr, void *value) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(int, enum cudaGraphMemAttributeType, void *); + static auto func_ptr = LoadSymbol("cudaDeviceSetGraphMemAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(device, attr, value); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphClone(cudaGraph_t *pGraphClone, cudaGraph_t originalGraph) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t *, cudaGraph_t); + static auto func_ptr = LoadSymbol("cudaGraphClone"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphClone, originalGraph); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphNodeFindInClone(cudaGraphNode_t *pNode, cudaGraphNode_t originalNode, + cudaGraph_t clonedGraph) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphNode_t *, cudaGraphNode_t, cudaGraph_t); + static auto func_ptr = LoadSymbol("cudaGraphNodeFindInClone"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pNode, originalNode, clonedGraph); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphNodeGetType(cudaGraphNode_t node, enum cudaGraphNodeType *pType) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphNode_t, enum cudaGraphNodeType *); + static auto func_ptr = LoadSymbol("cudaGraphNodeGetType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, pType); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphGetNodes(cudaGraph_t graph, + cudaGraphNode_t *nodes, + size_t *numNodes) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); + static auto func_ptr = LoadSymbol("cudaGraphGetNodes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(graph, nodes, numNodes); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphGetRootNodes( + cudaGraph_t graph, cudaGraphNode_t *pRootNodes, size_t *pNumRootNodes) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, size_t *); + static auto func_ptr = LoadSymbol("cudaGraphGetRootNodes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(graph, pRootNodes, pNumRootNodes); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphGetEdges(cudaGraph_t graph, + cudaGraphNode_t *from, + cudaGraphNode_t *to, + size_t *numEdges) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaGraphNode_t *, + cudaGraphNode_t *, size_t *); + static auto func_ptr = LoadSymbol("cudaGraphGetEdges"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(graph, from, to, numEdges); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependencies( + cudaGraphNode_t node, cudaGraphNode_t *pDependencies, + size_t *pNumDependencies) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); + static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependencies"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, pDependencies, pNumDependencies); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphNodeGetDependentNodes( + cudaGraphNode_t node, cudaGraphNode_t *pDependentNodes, + size_t *pNumDependentNodes) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphNode_t, cudaGraphNode_t *, size_t *); + static auto func_ptr = LoadSymbol("cudaGraphNodeGetDependentNodes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node, pDependentNodes, pNumDependentNodes); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphAddDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, + const cudaGraphNode_t *to, size_t numDependencies) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, + const cudaGraphNode_t *, size_t); + static auto func_ptr = LoadSymbol("cudaGraphAddDependencies"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(graph, from, to, numDependencies); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphRemoveDependencies(cudaGraph_t graph, const cudaGraphNode_t *from, + const cudaGraphNode_t *to, size_t numDependencies) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, const cudaGraphNode_t *, + const cudaGraphNode_t *, size_t); + static auto func_ptr = LoadSymbol("cudaGraphRemoveDependencies"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(graph, from, to, numDependencies); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphDestroyNode(cudaGraphNode_t node) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphNode_t); + static auto func_ptr = LoadSymbol("cudaGraphDestroyNode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(node); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphInstantiate(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, + unsigned long long flags __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, + unsigned long long); + static auto func_ptr = LoadSymbol("cudaGraphInstantiate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphExec, graph, flags); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphInstantiateWithFlags(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, + unsigned long long flags __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, + unsigned long long); + static auto func_ptr = LoadSymbol("cudaGraphInstantiateWithFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphExec, graph, flags); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphInstantiateWithParams(cudaGraphExec_t *pGraphExec, cudaGraph_t graph, + cudaGraphInstantiateParams *instantiateParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t *, cudaGraph_t, + cudaGraphInstantiateParams *); + static auto func_ptr = LoadSymbol("cudaGraphInstantiateWithParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(pGraphExec, graph, instantiateParams); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphExecGetFlags(cudaGraphExec_t graphExec, unsigned long long *flags) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphExec_t, unsigned long long *); + static auto func_ptr = LoadSymbol("cudaGraphExecGetFlags"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(graphExec, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphExecKernelNodeSetParams( + cudaGraphExec_t hGraphExec, cudaGraphNode_t node, + const struct cudaKernelNodeParams *pNodeParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, + const struct cudaKernelNodeParams *); + static auto func_ptr = + LoadSymbol("cudaGraphExecKernelNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, node, pNodeParams); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams( + cudaGraphExec_t hGraphExec, cudaGraphNode_t node, + const struct cudaMemcpy3DParms *pNodeParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, + const struct cudaMemcpy3DParms *); + static auto func_ptr = + LoadSymbol("cudaGraphExecMemcpyNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, node, pNodeParams); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParamsToSymbol( + cudaGraphExec_t hGraphExec, cudaGraphNode_t node, const void *symbol, + const void *src, size_t count, size_t offset, enum cudaMemcpyKind kind) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, + const void *, const void *, size_t, + size_t, enum cudaMemcpyKind); + static auto func_ptr = + LoadSymbol("cudaGraphExecMemcpyNodeSetParamsToSymbol"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, node, symbol, src, count, offset, kind); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphExecMemcpyNodeSetParamsFromSymbol(cudaGraphExec_t hGraphExec, + cudaGraphNode_t node, void *dst, + const void *symbol, size_t count, + size_t offset, + enum cudaMemcpyKind kind) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, + void *, const void *, size_t, size_t, + enum cudaMemcpyKind); + static auto func_ptr = + LoadSymbol("cudaGraphExecMemcpyNodeSetParamsFromSymbol"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, node, dst, symbol, count, offset, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemcpyNodeSetParams1D( + cudaGraphExec_t hGraphExec, cudaGraphNode_t node, void *dst, + const void *src, size_t count, enum cudaMemcpyKind kind) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, void *, + const void *, size_t, enum cudaMemcpyKind); + static auto func_ptr = + LoadSymbol("cudaGraphExecMemcpyNodeSetParams1D"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, node, dst, src, count, kind); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphExecMemsetNodeSetParams( + cudaGraphExec_t hGraphExec, cudaGraphNode_t node, + const struct cudaMemsetParams *pNodeParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, + const struct cudaMemsetParams *); + static auto func_ptr = + LoadSymbol("cudaGraphExecMemsetNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, node, pNodeParams); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphExecHostNodeSetParams(cudaGraphExec_t hGraphExec, cudaGraphNode_t node, + const struct cudaHostNodeParams *pNodeParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, + const struct cudaHostNodeParams *); + static auto func_ptr = LoadSymbol("cudaGraphExecHostNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, node, pNodeParams); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphExecChildGraphNodeSetParams( + cudaGraphExec_t hGraphExec, cudaGraphNode_t node, cudaGraph_t childGraph) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaGraph_t); + static auto func_ptr = + LoadSymbol("cudaGraphExecChildGraphNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, node, childGraph); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventRecordNodeSetEvent( + cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaEvent_t); + static auto func_ptr = + LoadSymbol("cudaGraphExecEventRecordNodeSetEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, event); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphExecEventWaitNodeSetEvent( + cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, cudaEvent_t event) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, cudaEvent_t); + static auto func_ptr = + LoadSymbol("cudaGraphExecEventWaitNodeSetEvent"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, event); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphExecExternalSemaphoresSignalNodeSetParams( + cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, + const struct cudaExternalSemaphoreSignalNodeParams *nodeParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaGraphExec_t, cudaGraphNode_t, + const struct cudaExternalSemaphoreSignalNodeParams *); + static auto func_ptr = + LoadSymbol("cudaGraphExecExternalSemaphoresSignalNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, nodeParams); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphExecExternalSemaphoresWaitNodeSetParams( + cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, + const struct cudaExternalSemaphoreWaitNodeParams *nodeParams) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaGraphExec_t, cudaGraphNode_t, + const struct cudaExternalSemaphoreWaitNodeParams *); + static auto func_ptr = + LoadSymbol("cudaGraphExecExternalSemaphoresWaitNodeSetParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, nodeParams); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphNodeSetEnabled( + cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, unsigned int isEnabled) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaGraphNodeSetEnabled"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, isEnabled); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphNodeGetEnabled(cudaGraphExec_t hGraphExec, cudaGraphNode_t hNode, + unsigned int *isEnabled) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraphNode_t, + unsigned int *); + static auto func_ptr = LoadSymbol("cudaGraphNodeGetEnabled"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hNode, isEnabled); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphExecUpdate(cudaGraphExec_t hGraphExec, cudaGraph_t hGraph, + cudaGraphExecUpdateResultInfo *resultInfo) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaGraph_t, + cudaGraphExecUpdateResultInfo *); + static auto func_ptr = LoadSymbol("cudaGraphExecUpdate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(hGraphExec, hGraph, resultInfo); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphUpload(cudaGraphExec_t graphExec, + cudaStream_t stream) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaGraphUpload"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(graphExec, stream); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphLaunch(cudaGraphExec_t graphExec, + cudaStream_t stream) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudaGraphLaunch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(graphExec, stream); +} + +extern __host__ cudaError_t CUDARTAPI +cudaGraphExecDestroy(cudaGraphExec_t graphExec) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraphExec_t); + static auto func_ptr = LoadSymbol("cudaGraphExecDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(graphExec); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphDestroy(cudaGraph_t graph) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t); + static auto func_ptr = LoadSymbol("cudaGraphDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(graph); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphDebugDotPrint( + cudaGraph_t graph, const char *path, unsigned int flags) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraph_t, const char *, unsigned int); + static auto func_ptr = LoadSymbol("cudaGraphDebugDotPrint"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(graph, path, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaUserObjectCreate( + cudaUserObject_t *object_out, void *ptr, cudaHostFn_t destroy, + unsigned int initialRefcount, unsigned int flags) { + using FuncPtr = cudaError_t(CUDARTAPI *)( + cudaUserObject_t *, void *, cudaHostFn_t, unsigned int, unsigned int); + static auto func_ptr = LoadSymbol("cudaUserObjectCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(object_out, ptr, destroy, initialRefcount, flags); +} + +extern __host__ cudaError_t CUDARTAPI +cudaUserObjectRetain(cudaUserObject_t object, unsigned int count __dv(1)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaUserObjectRetain"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(object, count); +} + +extern __host__ cudaError_t CUDARTAPI +cudaUserObjectRelease(cudaUserObject_t object, unsigned int count __dv(1)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaUserObject_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaUserObjectRelease"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(object, count); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphRetainUserObject( + cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1), + unsigned int flags __dv(0)) { + using FuncPtr = cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, + unsigned int, unsigned int); + static auto func_ptr = LoadSymbol("cudaGraphRetainUserObject"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(graph, object, count, flags); +} + +extern __host__ cudaError_t CUDARTAPI cudaGraphReleaseUserObject( + cudaGraph_t graph, cudaUserObject_t object, unsigned int count __dv(1)) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(cudaGraph_t, cudaUserObject_t, unsigned int); + static auto func_ptr = LoadSymbol("cudaGraphReleaseUserObject"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(graph, object, count); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetDriverEntryPoint( + const char *symbol, void **funcPtr, unsigned long long flags, + enum cudaDriverEntryPointQueryResult *driverStatus) { + using FuncPtr = + cudaError_t(CUDARTAPI *)(const char *, void **, unsigned long long, + enum cudaDriverEntryPointQueryResult *); + static auto func_ptr = LoadSymbol("cudaGetDriverEntryPoint"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(symbol, funcPtr, flags, driverStatus); +} + +extern __host__ cudaError_t CUDARTAPI cudaGetExportTable( + const void **ppExportTable, const cudaUUID_t *pExportTableId) { + using FuncPtr = cudaError_t(CUDARTAPI *)(const void **, const cudaUUID_t *); + static auto func_ptr = LoadSymbol("cudaGetExportTable"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ppExportTable, pExportTableId); +} + +} // extern "C" diff --git a/tensorflow/stream_executor/cuda/cuda_stub.cc b/tensorflow/stream_executor/cuda/cuda_stub.cc index 3248c9ddefd..db9948d5a07 100644 --- a/tensorflow/stream_executor/cuda/cuda_stub.cc +++ b/tensorflow/stream_executor/cuda/cuda_stub.cc @@ -61,6 +61,7 @@ typedef struct CUDA_LAUNCH_PARAMS_st CUDA_LAUNCH_PARAMS; #define __CUDA_DEPRECATED #endif +#if CUDA_VERSION < 12000 #if CUDA_VERSION < 10000 // Define fake enums introduced in CUDA 10.0. typedef enum CUgraphNodeType_enum {} CUgraphNodeType; @@ -93,7 +94,9 @@ typedef struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS; typedef void(CUDA_CB* CUhostFn)(void* userData); +#endif + // For now only one stub implementation is needed. If a function that is not // available in the given CUDA release, the corresponding wrapper returns // CUDA_ERROR_SHARED_OBJECT_INIT_FAILED. -#include "tensorflow/stream_executor/cuda/cuda_10_0.inc" +#include "tensorflow/stream_executor/cuda/cuda_12_0.inc" diff --git a/tensorflow/stream_executor/cuda/cudart_stub.cc b/tensorflow/stream_executor/cuda/cudart_stub.cc index 0c6b274f88b..d19eeea7dc9 100644 --- a/tensorflow/stream_executor/cuda/cudart_stub.cc +++ b/tensorflow/stream_executor/cuda/cudart_stub.cc @@ -56,7 +56,7 @@ cudaError_t GetSymbolNotFoundError() { #elif CUDART_VERSION < 10010 #include "tensorflow/stream_executor/cuda/cuda_runtime_10_0.inc" #else -#include "tensorflow/stream_executor/cuda/cuda_runtime_10_1.inc" +#include "tensorflow/stream_executor/cuda/cuda_runtime_12_0.inc" #endif #undef __dv #undef __CUDA_DEPRECATED diff --git a/tensorflow/stream_executor/cuda/cudnn_8_0.inc b/tensorflow/stream_executor/cuda/cudnn_8_0.inc new file mode 100644 index 00000000000..d9bf35184e4 --- /dev/null +++ b/tensorflow/stream_executor/cuda/cudnn_8_0.inc @@ -0,0 +1,3213 @@ +// Auto-generated, do not edit. + +extern "C" { +size_t CUDNNWINAPI cudnnGetVersion(void) { + using FuncPtr = size_t(CUDNNWINAPI *)(); + static auto func_ptr = LoadSymbol("cudnnGetVersion"); + if (!func_ptr) return 0; + return func_ptr(); +} + +size_t CUDNNWINAPI cudnnGetCudartVersion(void) { + using FuncPtr = size_t(CUDNNWINAPI *)(); + static auto func_ptr = LoadSymbol("cudnnGetCudartVersion"); + if (!func_ptr) return 0; + return func_ptr(); +} + +const char *CUDNNWINAPI cudnnGetErrorString(cudnnStatus_t status) { + using FuncPtr = const char *(CUDNNWINAPI *)(cudnnStatus_t); + static auto func_ptr = LoadSymbol("cudnnGetErrorString"); + if (!func_ptr) return "cudnnGetErrorString symbol not found."; + return func_ptr(status); +} + +cudnnStatus_t CUDNNWINAPI cudnnQueryRuntimeError(cudnnHandle_t handle, + cudnnStatus_t *rstatus, + cudnnErrQueryMode_t mode, + cudnnRuntimeTag_t *tag) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnStatus_t *, cudnnErrQueryMode_t, cudnnRuntimeTag_t *); + static auto func_ptr = LoadSymbol("cudnnQueryRuntimeError"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rstatus, mode, tag); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetProperty(libraryPropertyType type, + int *value) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(libraryPropertyType, int *); + static auto func_ptr = LoadSymbol("cudnnGetProperty"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(type, value); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreate(cudnnHandle_t *handle) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t *); + static auto func_ptr = LoadSymbol("cudnnCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle); +} + +cudnnStatus_t CUDNNWINAPI cudnnDestroy(cudnnHandle_t handle) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t); + static auto func_ptr = LoadSymbol("cudnnDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle); +} + + +#if CUDNN_MAJOR>=8 && (CUDNN_MINOR > 0 || CUDNN_PATCHLEVEL >= 4) +cudnnStatus_t CUDNNWINAPI cudnnCnnInferVersionCheck(void) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(); + static auto func_ptr = LoadSymbol("cudnnCnnInferVersionCheck"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +cudnnStatus_t CUDNNWINAPI cudnnCnnTrainVersionCheck(void) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(); + static auto func_ptr = LoadSymbol("cudnnCnnTrainVersionCheck"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} +#endif + +cudnnStatus_t CUDNNWINAPI cudnnSetStream(cudnnHandle_t handle, + cudaStream_t streamId) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cudnnSetStream"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, streamId); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetStream(cudnnHandle_t handle, + cudaStream_t *streamId) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, cudaStream_t *); + static auto func_ptr = LoadSymbol("cudnnGetStream"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, streamId); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptor( + cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, + cudnnDataType_t dataType, /* image data type */ + int n, /* number of inputs (batch size) */ + int c, /* number of input feature maps */ + int h, /* height of input section */ + int w) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, + cudnnDataType_t, int, int, int, int); + static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc, format, dataType, n, c, h, w); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptorEx( + cudnnTensorDescriptor_t tensorDesc, + cudnnDataType_t dataType, /* image data type */ + int n, /* number of inputs (batch size) */ + int c, /* number of input feature maps */ + int h, /* height of input section */ + int w, /* width of input section */ + int nStride, int cStride, int hStride, int wStride) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnDataType_t, + int, int, int, int, int, int, int, int); + static auto func_ptr = LoadSymbol("cudnnSetTensor4dDescriptorEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, + wStride); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetTensor4dDescriptor( + const cudnnTensorDescriptor_t tensorDesc, + cudnnDataType_t *dataType, /* image data type */ + int *n, /* number of inputs (batch size) */ + int *c, /* number of input feature maps */ + int *h, /* height of input section */ + int *w, /* width of input section */ + int *nStride, int *cStride, int *hStride, int *wStride) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnTensorDescriptor_t, cudnnDataType_t *, int *, int *, int *, + int *, int *, int *, int *, int *); + static auto func_ptr = LoadSymbol("cudnnGetTensor4dDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc, dataType, n, c, h, w, nStride, cStride, hStride, + wStride); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptor( + cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims, + const int dimA[], const int strideA[]) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnTensorDescriptor_t, cudnnDataType_t, int, const int[], const int[]); + static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc, dataType, nbDims, dimA, strideA); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptorEx( + cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, + cudnnDataType_t dataType, int nbDims, const int dimA[]) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, cudnnTensorFormat_t, + cudnnDataType_t, int, const int[]); + static auto func_ptr = LoadSymbol("cudnnSetTensorNdDescriptorEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc, format, dataType, nbDims, dimA); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetTensorNdDescriptor( + const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested, + cudnnDataType_t *dataType, int *nbDims, int dimA[], int strideA[]) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, int, + cudnnDataType_t *, int *, int[], int[]); + static auto func_ptr = LoadSymbol("cudnnGetTensorNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc, nbDimsRequested, dataType, nbDims, dimA, strideA); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetTensorSizeInBytes( + const cudnnTensorDescriptor_t tensorDesc, size_t *size) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(const cudnnTensorDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetTensorSizeInBytes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc, size); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(tensorDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnInitTransformDest( + const cudnnTensorTransformDescriptor_t transformDesc, + const cudnnTensorDescriptor_t srcDesc, cudnnTensorDescriptor_t destDesc, + size_t *destSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnTensorTransformDescriptor_t, const cudnnTensorDescriptor_t, + cudnnTensorDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnInitTransformDest"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(transformDesc, srcDesc, destDesc, destSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreateTensorTransformDescriptor( + cudnnTensorTransformDescriptor_t *transformDesc) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorTransformDescriptor_t *); + static auto func_ptr = + LoadSymbol("cudnnCreateTensorTransformDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(transformDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetTensorTransformDescriptor( + cudnnTensorTransformDescriptor_t transformDesc, const uint32_t nbDims, + const cudnnTensorFormat_t destFormat, const int32_t padBeforeA[], + const int32_t padAfterA[], const uint32_t foldA[], + const cudnnFoldingDirection_t direction) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnTensorTransformDescriptor_t, const uint32_t, + const cudnnTensorFormat_t, const int32_t[], const int32_t[], + const uint32_t[], const cudnnFoldingDirection_t); + static auto func_ptr = + LoadSymbol("cudnnSetTensorTransformDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(transformDesc, nbDims, destFormat, padBeforeA, padAfterA, + foldA, direction); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetTensorTransformDescriptor( + cudnnTensorTransformDescriptor_t transformDesc, uint32_t nbDimsRequested, + cudnnTensorFormat_t *destFormat, int32_t padBeforeA[], int32_t padAfterA[], + uint32_t foldA[], cudnnFoldingDirection_t *direction) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnTensorTransformDescriptor_t, uint32_t, cudnnTensorFormat_t *, + int32_t[], int32_t[], uint32_t[], cudnnFoldingDirection_t *); + static auto func_ptr = + LoadSymbol("cudnnGetTensorTransformDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(transformDesc, nbDimsRequested, destFormat, padBeforeA, + padAfterA, foldA, direction); +} + +cudnnStatus_t CUDNNWINAPI cudnnDestroyTensorTransformDescriptor( + cudnnTensorTransformDescriptor_t transformDesc) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorTransformDescriptor_t); + static auto func_ptr = + LoadSymbol("cudnnDestroyTensorTransformDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(transformDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnTransformTensor( + cudnnHandle_t handle, const void *alpha, + const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, + const cudnnTensorDescriptor_t yDesc, void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, + const void *, const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnTransformTensor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, alpha, xDesc, x, beta, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI cudnnTransformTensorEx( + cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc, + const void *alpha, const cudnnTensorDescriptor_t srcDesc, + const void *srcData, const void *beta, + const cudnnTensorDescriptor_t destDesc, void *destData) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorTransformDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnTransformTensorEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transDesc, alpha, srcDesc, srcData, beta, destDesc, + destData); +} + +cudnnStatus_t CUDNNWINAPI cudnnAddTensor(cudnnHandle_t handle, + const void *alpha, + const cudnnTensorDescriptor_t aDesc, + const void *A, const void *beta, + const cudnnTensorDescriptor_t cDesc, + void *C) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, + const void *, const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnAddTensor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, alpha, aDesc, A, beta, cDesc, C); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateOpTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(opTensorDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetOpTensorDescriptor( + cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp, + cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t, + cudnnDataType_t, cudnnNanPropagation_t); + static auto func_ptr = LoadSymbol("cudnnSetOpTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetOpTensorDescriptor( + const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t *opTensorOp, + cudnnDataType_t *opTensorCompType, cudnnNanPropagation_t *opTensorNanOpt) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnOpTensorDescriptor_t, cudnnOpTensorOp_t *, cudnnDataType_t *, + cudnnNanPropagation_t *); + static auto func_ptr = LoadSymbol("cudnnGetOpTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(opTensorDesc, opTensorOp, opTensorCompType, opTensorNanOpt); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnOpTensorDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyOpTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(opTensorDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnOpTensor( + cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc, + const void *alpha1, const cudnnTensorDescriptor_t aDesc, const void *A, + const void *alpha2, const cudnnTensorDescriptor_t bDesc, const void *B, + const void *beta, const cudnnTensorDescriptor_t cDesc, void *C) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnOpTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnOpTensor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opTensorDesc, alpha1, aDesc, A, alpha2, bDesc, B, + beta, cDesc, C); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreateReduceTensorDescriptor( + cudnnReduceTensorDescriptor_t *reduceTensorDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t *); + static auto func_ptr = + LoadSymbol("cudnnCreateReduceTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(reduceTensorDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor( + cudnnReduceTensorDescriptor_t reduceTensorDesc, + cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType, + cudnnNanPropagation_t reduceTensorNanOpt, + cudnnReduceTensorIndices_t reduceTensorIndices, + cudnnIndicesType_t reduceTensorIndicesType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t, cudnnDataType_t, + cudnnNanPropagation_t, cudnnReduceTensorIndices_t, cudnnIndicesType_t); + static auto func_ptr = LoadSymbol("cudnnSetReduceTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, + reduceTensorNanOpt, reduceTensorIndices, + reduceTensorIndicesType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetReduceTensorDescriptor( + const cudnnReduceTensorDescriptor_t reduceTensorDesc, + cudnnReduceTensorOp_t *reduceTensorOp, + cudnnDataType_t *reduceTensorCompType, + cudnnNanPropagation_t *reduceTensorNanOpt, + cudnnReduceTensorIndices_t *reduceTensorIndices, + cudnnIndicesType_t *reduceTensorIndicesType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnReduceTensorDescriptor_t, cudnnReduceTensorOp_t *, + cudnnDataType_t *, cudnnNanPropagation_t *, cudnnReduceTensorIndices_t *, + cudnnIndicesType_t *); + static auto func_ptr = LoadSymbol("cudnnGetReduceTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(reduceTensorDesc, reduceTensorOp, reduceTensorCompType, + reduceTensorNanOpt, reduceTensorIndices, + reduceTensorIndicesType); +} + +cudnnStatus_t CUDNNWINAPI cudnnDestroyReduceTensorDescriptor( + cudnnReduceTensorDescriptor_t reduceTensorDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnReduceTensorDescriptor_t); + static auto func_ptr = + LoadSymbol("cudnnDestroyReduceTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(reduceTensorDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetReductionIndicesSize( + cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, + const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, + size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnReduceTensorDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetReductionIndicesSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetReductionWorkspaceSize( + cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, + const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, + size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnReduceTensorDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetReductionWorkspaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, reduceTensorDesc, aDesc, cDesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnReduceTensor( + cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, + void *indices, size_t indicesSizeInBytes, void *workspace, + size_t workspaceSizeInBytes, const void *alpha, + const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta, + const cudnnTensorDescriptor_t cDesc, void *C) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnReduceTensorDescriptor_t, void *, size_t, + void *, size_t, const void *, const cudnnTensorDescriptor_t, const void *, + const void *, const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnReduceTensor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, reduceTensorDesc, indices, indicesSizeInBytes, + workspace, workspaceSizeInBytes, alpha, aDesc, A, beta, cDesc, + C); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetTensor(cudnnHandle_t handle, + const cudnnTensorDescriptor_t yDesc, + void *y, const void *valuePtr) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); + static auto func_ptr = LoadSymbol("cudnnSetTensor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, yDesc, y, valuePtr); +} + +cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(cudnnHandle_t handle, + const cudnnTensorDescriptor_t yDesc, + void *y, const void *alpha) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, void *, const void *); + static auto func_ptr = LoadSymbol("cudnnScaleTensor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, yDesc, y, alpha); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateFilterDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(filterDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetFilter4dDescriptor( + cudnnFilterDescriptor_t filterDesc, + cudnnDataType_t dataType, /* image data type */ + cudnnTensorFormat_t format, int k, /* number of output feature maps */ + int c, /* number of input feature maps */ + int h, /* height of each input filter */ + int w) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, + cudnnTensorFormat_t, int, int, int, int); + static auto func_ptr = LoadSymbol("cudnnSetFilter4dDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(filterDesc, dataType, format, k, c, h, w); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetFilter4dDescriptor( + const cudnnFilterDescriptor_t filterDesc, + cudnnDataType_t *dataType, /* image data type */ + cudnnTensorFormat_t *format, int *k, /* number of output feature maps */ + int *c, /* number of input feature maps */ + int *h, /* height of each input filter */ + int *w) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnFilterDescriptor_t, cudnnDataType_t *, cudnnTensorFormat_t *, + int *, int *, int *, int *); + static auto func_ptr = LoadSymbol("cudnnGetFilter4dDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(filterDesc, dataType, format, k, c, h, w); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetFilterNdDescriptor( + cudnnFilterDescriptor_t filterDesc, + cudnnDataType_t dataType, /* image data type */ + cudnnTensorFormat_t format, int nbDims, const int filterDimA[]) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t, cudnnDataType_t, + cudnnTensorFormat_t, int, const int[]); + static auto func_ptr = LoadSymbol("cudnnSetFilterNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(filterDesc, dataType, format, nbDims, filterDimA); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetFilterNdDescriptor( + const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested, + cudnnDataType_t *dataType, /* image data type */ + cudnnTensorFormat_t *format, int *nbDims, int filterDimA[]) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnFilterDescriptor_t, int, cudnnDataType_t *, + cudnnTensorFormat_t *, int *, int[]); + static auto func_ptr = LoadSymbol("cudnnGetFilterNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(filterDesc, nbDimsRequested, dataType, format, nbDims, + filterDimA); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetFilterSizeInBytes( + const cudnnFilterDescriptor_t filterDesc, size_t *size) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(const cudnnFilterDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetFilterSizeInBytes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(filterDesc, size); +} + +cudnnStatus_t CUDNNWINAPI cudnnTransformFilter( + cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc, + const void *alpha, const cudnnFilterDescriptor_t srcDesc, + const void *srcData, const void *beta, + const cudnnFilterDescriptor_t destDesc, void *destData) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorTransformDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, const void *, + const cudnnFilterDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnTransformFilter"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transDesc, alpha, srcDesc, srcData, beta, destDesc, + destData); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFilterDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyFilterDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(filterDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward( + cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, + const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, + const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnSoftmaxForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, mode, alpha, xDesc, x, beta, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreatePoolingDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(poolingDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor( + cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode, + cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth, + int verticalPadding, int horizontalPadding, int verticalStride, + int horizontalStride) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnPoolingDescriptor_t, cudnnPoolingMode_t, cudnnNanPropagation_t, int, + int, int, int, int, int); + static auto func_ptr = LoadSymbol("cudnnSetPooling2dDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, + windowWidth, verticalPadding, horizontalPadding, + verticalStride, horizontalStride); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dDescriptor( + const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t *mode, + cudnnNanPropagation_t *maxpoolingNanOpt, int *windowHeight, + int *windowWidth, int *verticalPadding, int *horizontalPadding, + int *verticalStride, int *horizontalStride) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnPoolingDescriptor_t, cudnnPoolingMode_t *, + cudnnNanPropagation_t *, int *, int *, int *, int *, int *, int *); + static auto func_ptr = LoadSymbol("cudnnGetPooling2dDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(poolingDesc, mode, maxpoolingNanOpt, windowHeight, + windowWidth, verticalPadding, horizontalPadding, + verticalStride, horizontalStride); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetPoolingNdDescriptor( + cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode, + const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims, + const int windowDimA[], const int paddingA[], const int strideA[]) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnPoolingDescriptor_t, const cudnnPoolingMode_t, + const cudnnNanPropagation_t, int, const int[], const int[], const int[]); + static auto func_ptr = LoadSymbol("cudnnSetPoolingNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(poolingDesc, mode, maxpoolingNanOpt, nbDims, windowDimA, + paddingA, strideA); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdDescriptor( + const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested, + cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt, + int *nbDims, int windowDimA[], int paddingA[], int strideA[]) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnPoolingDescriptor_t, int, cudnnPoolingMode_t *, + cudnnNanPropagation_t *, int *, int[], int[], int[]); + static auto func_ptr = LoadSymbol("cudnnGetPoolingNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(poolingDesc, nbDimsRequested, mode, maxpoolingNanOpt, nbDims, + windowDimA, paddingA, strideA); +} + +cudnnStatus_t CUDNNWINAPI +cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, + const cudnnTensorDescriptor_t inputTensorDesc, + int nbDims, int outputTensorDimA[]) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, + const cudnnTensorDescriptor_t, int, int[]); + static auto func_ptr = + LoadSymbol("cudnnGetPoolingNdForwardOutputDim"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(poolingDesc, inputTensorDesc, nbDims, outputTensorDimA); +} + +cudnnStatus_t CUDNNWINAPI +cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, + const cudnnTensorDescriptor_t inputTensorDesc, + int *n, int *c, int *h, int *w) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(const cudnnPoolingDescriptor_t, + const cudnnTensorDescriptor_t, + int *, int *, int *, int *); + static auto func_ptr = + LoadSymbol("cudnnGetPooling2dForwardOutputDim"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(poolingDesc, inputTensorDesc, n, c, h, w); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPoolingDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyPoolingDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(poolingDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnPoolingForward( + cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, + const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, + const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnPoolingForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, poolingDesc, alpha, xDesc, x, beta, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateActivationDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(activationDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptor( + cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode, + cudnnNanPropagation_t reluNanOpt, double coef) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t, + cudnnActivationMode_t, + cudnnNanPropagation_t, double); + static auto func_ptr = LoadSymbol("cudnnSetActivationDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(activationDesc, mode, reluNanOpt, coef); +} + +cudnnStatus_t CUDNNWINAPI +cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc, + cudnnActivationMode_t *mode, + cudnnNanPropagation_t *reluNanOpt, double *coef) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnActivationDescriptor_t, cudnnActivationMode_t *, + cudnnNanPropagation_t *, double *); + static auto func_ptr = LoadSymbol("cudnnGetActivationDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(activationDesc, mode, reluNanOpt, coef); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnActivationDescriptor_t); + static auto func_ptr = + LoadSymbol("cudnnDestroyActivationDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(activationDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnActivationForward( + cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, + const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, + const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnActivationDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnActivationForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, activationDesc, alpha, xDesc, x, beta, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateLRNDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(normDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, + unsigned lrnN, double lrnAlpha, + double lrnBeta, double lrnK) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnLRNDescriptor_t, unsigned int, double, double, double); + static auto func_ptr = LoadSymbol("cudnnSetLRNDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, + unsigned *lrnN, + double *lrnAlpha, + double *lrnBeta, double *lrnK) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnLRNDescriptor_t, unsigned int *, double *, double *, double *); + static auto func_ptr = LoadSymbol("cudnnGetLRNDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(normDesc, lrnN, lrnAlpha, lrnBeta, lrnK); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnLRNDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyLRNDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(lrnDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelForward( + cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, + const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, + const void *beta, const cudnnTensorDescriptor_t yDesc, void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, normDesc, lrnMode, alpha, xDesc, x, beta, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationForward( + cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, + cudnnDivNormMode_t mode, const void *alpha, + const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */ + const void *x, + const void *means, /* if NULL, means are assumed to be zero */ + void *temp, void *temp2, const void *beta, + const cudnnTensorDescriptor_t yDesc, void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, void *, void *, + const void *, const cudnnTensorDescriptor_t, void *); + static auto func_ptr = + LoadSymbol("cudnnDivisiveNormalizationForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, temp, temp2, + beta, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI cudnnDeriveBNTensorDescriptor( + cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc, + cudnnBatchNormMode_t mode) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, + const cudnnTensorDescriptor_t, + cudnnBatchNormMode_t); + static auto func_ptr = LoadSymbol("cudnnDeriveBNTensorDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(derivedBnDesc, xDesc, mode); +} + +cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardInference( + cudnnHandle_t handle, cudnnBatchNormMode_t mode, + const void *alpha, /* alpha[0] = result blend factor */ + const void *beta, /* beta[0] = dest layer blend factor */ + const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ + const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ + const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, + const void *bnBias, const void *estimatedMean, + const void *estimatedVariance, double epsilon) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, + const void *, const void *, const void *, const void *, double); + static auto func_ptr = + LoadSymbol("cudnnBatchNormalizationForwardInference"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode, alpha, beta, xDesc, x, yDesc, y, + bnScaleBiasMeanVarDesc, bnScale, bnBias, estimatedMean, + estimatedVariance, epsilon); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor( + cudnnSpatialTransformerDescriptor_t *stDesc) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t *); + static auto func_ptr = + LoadSymbol("cudnnCreateSpatialTransformerDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetSpatialTransformerNdDescriptor( + cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType, + cudnnDataType_t dataType, const int nbDims, const int dimA[]) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnSpatialTransformerDescriptor_t, cudnnSamplerType_t, cudnnDataType_t, + const int, const int[]); + static auto func_ptr = + LoadSymbol("cudnnSetSpatialTransformerNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stDesc, samplerType, dataType, nbDims, dimA); +} + +cudnnStatus_t CUDNNWINAPI cudnnDestroySpatialTransformerDescriptor( + cudnnSpatialTransformerDescriptor_t stDesc) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnSpatialTransformerDescriptor_t); + static auto func_ptr = + LoadSymbol("cudnnDestroySpatialTransformerDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(stDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorForward( + cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, + const void *theta, void *grid) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, + void *); + static auto func_ptr = + LoadSymbol("cudnnSpatialTfGridGeneratorForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, stDesc, theta, grid); +} + +cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerForward( + cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, + const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, + const void *grid, const void *beta, cudnnTensorDescriptor_t yDesc, + void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, const void *, + cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, stDesc, alpha, xDesc, x, grid, beta, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateDropoutDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dropoutDesc); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyDropoutDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dropoutDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle, + size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnDropoutGetStatesSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize( + cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnTensorDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnDropoutGetReserveSpaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(xdesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor( + cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, + void *states, size_t stateSizeInBytes, unsigned long long seed) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, + float, void *, size_t, unsigned long long); + static auto func_ptr = LoadSymbol("cudnnSetDropoutDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed); +} + +cudnnStatus_t CUDNNWINAPI cudnnRestoreDropoutDescriptor( + cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, + void *states, size_t stateSizeInBytes, unsigned long long seed) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, + float, void *, size_t, unsigned long long); + static auto func_ptr = LoadSymbol("cudnnRestoreDropoutDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dropoutDesc, handle, dropout, states, stateSizeInBytes, seed); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetDropoutDescriptor( + cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float *dropout, + void **states, unsigned long long *seed) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnDropoutDescriptor_t, cudnnHandle_t, + float *, void **, unsigned long long *); + static auto func_ptr = LoadSymbol("cudnnGetDropoutDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dropoutDesc, handle, dropout, states, seed); +} + +cudnnStatus_t CUDNNWINAPI cudnnDropoutForward( + cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, + const cudnnTensorDescriptor_t xdesc, const void *x, + const cudnnTensorDescriptor_t ydesc, void *y, void *reserveSpace, + size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnDropoutDescriptor_t, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, void *, void *, size_t); + static auto func_ptr = LoadSymbol("cudnnDropoutForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dropoutDesc, xdesc, x, ydesc, y, reserveSpace, + reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreateAlgorithmPerformance( + cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int); + static auto func_ptr = LoadSymbol("cudnnCreateAlgorithmPerformance"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(algoPerf, numberToCreate); +} + +cudnnStatus_t CUDNNWINAPI cudnnDestroyAlgorithmPerformance( + cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnAlgorithmPerformance_t *, int); + static auto func_ptr = + LoadSymbol("cudnnDestroyAlgorithmPerformance"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(algoPerf, numberToDestroy); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetCallback(unsigned mask, void *udata, + cudnnCallback_t fptr) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(unsigned int, void *, cudnnCallback_t); + static auto func_ptr = LoadSymbol("cudnnSetCallback"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(mask, udata, fptr); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetCallback(unsigned *mask, void **udata, + cudnnCallback_t *fptr) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(unsigned int *, void **, cudnnCallback_t *); + static auto func_ptr = LoadSymbol("cudnnGetCallback"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(mask, udata, fptr); +} + +cudnnStatus_t CUDNNWINAPI cudnnOpsInferVersionCheck(void) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(); + static auto func_ptr = LoadSymbol("cudnnOpsInferVersionCheck"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateConvolutionDescriptor(cudnnConvolutionDescriptor_t *convDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t *); + static auto func_ptr = + LoadSymbol("cudnnCreateConvolutionDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyConvolutionDescriptor(cudnnConvolutionDescriptor_t convDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t); + static auto func_ptr = + LoadSymbol("cudnnDestroyConvolutionDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionMathType( + cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t mathType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, + cudnnMathType_t); + static auto func_ptr = LoadSymbol("cudnnSetConvolutionMathType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, mathType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionMathType( + cudnnConvolutionDescriptor_t convDesc, cudnnMathType_t *mathType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, + cudnnMathType_t *); + static auto func_ptr = LoadSymbol("cudnnGetConvolutionMathType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, mathType); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionGroupCount( + cudnnConvolutionDescriptor_t convDesc, int groupCount) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int); + static auto func_ptr = LoadSymbol("cudnnSetConvolutionGroupCount"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, groupCount); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionGroupCount( + cudnnConvolutionDescriptor_t convDesc, int *groupCount) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, int *); + static auto func_ptr = LoadSymbol("cudnnGetConvolutionGroupCount"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, groupCount); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionReorderType( + cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t reorderType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, + cudnnReorderType_t); + static auto func_ptr = LoadSymbol("cudnnSetConvolutionReorderType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, reorderType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionReorderType( + cudnnConvolutionDescriptor_t convDesc, cudnnReorderType_t *reorderType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnConvolutionDescriptor_t, + cudnnReorderType_t *); + static auto func_ptr = LoadSymbol("cudnnGetConvolutionReorderType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, reorderType); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetConvolution2dDescriptor( + cudnnConvolutionDescriptor_t convDesc, int pad_h, /* zero-padding height */ + int pad_w, /* zero-padding width */ + int u, /* vertical filter stride */ + int v, /* horizontal filter stride */ + int dilation_h, /* filter dilation in the vertical dimension */ + int dilation_w, /* filter dilation in the horizontal dimension */ + cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnConvolutionDescriptor_t, int, int, int, int, int, int, + cudnnConvolutionMode_t, cudnnDataType_t); + static auto func_ptr = LoadSymbol("cudnnSetConvolution2dDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, + computeType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dDescriptor( + const cudnnConvolutionDescriptor_t convDesc, + int *pad_h, /* zero-padding height */ + int *pad_w, /* zero-padding width */ + int *u, /* vertical filter stride */ + int *v, /* horizontal filter stride */ + int *dilation_h, /* filter dilation in the vertical dimension */ + int *dilation_w, /* filter dilation in the horizontal dimension */ + cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnConvolutionDescriptor_t, int *, int *, int *, int *, int *, + int *, cudnnConvolutionMode_t *, cudnnDataType_t *); + static auto func_ptr = LoadSymbol("cudnnGetConvolution2dDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, pad_h, pad_w, u, v, dilation_h, dilation_w, mode, + computeType); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetConvolutionNdDescriptor( + cudnnConvolutionDescriptor_t convDesc, int arrayLength, /* nbDims-2 size */ + const int padA[], const int filterStrideA[], const int dilationA[], + cudnnConvolutionMode_t mode, cudnnDataType_t computeType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnConvolutionDescriptor_t, int, const int[], const int[], const int[], + cudnnConvolutionMode_t, cudnnDataType_t); + static auto func_ptr = LoadSymbol("cudnnSetConvolutionNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, arrayLength, padA, filterStrideA, dilationA, mode, + computeType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdDescriptor( + const cudnnConvolutionDescriptor_t convDesc, int arrayLengthRequested, + int *arrayLength, int padA[], int strideA[], int dilationA[], + cudnnConvolutionMode_t *mode, cudnnDataType_t *computeType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnConvolutionDescriptor_t, int, int *, int[], int[], int[], + cudnnConvolutionMode_t *, cudnnDataType_t *); + static auto func_ptr = LoadSymbol("cudnnGetConvolutionNdDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, arrayLengthRequested, arrayLength, padA, strideA, + dilationA, mode, computeType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolution2dForwardOutputDim( + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t inputTensorDesc, + const cudnnFilterDescriptor_t filterDesc, int *n, int *c, int *h, int *w) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, + const cudnnFilterDescriptor_t, int *, int *, int *, int *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolution2dForwardOutputDim"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, inputTensorDesc, filterDesc, n, c, h, w); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionNdForwardOutputDim( + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t inputTensorDesc, + const cudnnFilterDescriptor_t filterDesc, int nbDims, + int tensorOuputDimA[]) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, + const cudnnFilterDescriptor_t, int, int[]); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionNdForwardOutputDim"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(convDesc, inputTensorDesc, filterDesc, nbDims, + tensorOuputDimA); +} + +cudnnStatus_t CUDNNWINAPI +cudnnGetConvolutionForwardAlgorithmMaxCount(cudnnHandle_t handle, int *count) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionForwardAlgorithmMaxCount"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, count); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardAlgorithm_v7( + cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, + const cudnnFilterDescriptor_t filterDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t destDesc, const int requestedAlgoCount, + int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, + const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnTensorDescriptor_t, const int, int *, + cudnnConvolutionFwdAlgoPerf_t *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionForwardAlgorithm_v7"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, srcDesc, filterDesc, convDesc, destDesc, + requestedAlgoCount, returnedAlgoCount, perfResults); +} + +cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithm( + cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, + const cudnnFilterDescriptor_t wDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t yDesc, const int requestedAlgoCount, + int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, + const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnTensorDescriptor_t, const int, int *, + cudnnConvolutionFwdAlgoPerf_t *); + static auto func_ptr = + LoadSymbol("cudnnFindConvolutionForwardAlgorithm"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, requestedAlgoCount, + returnedAlgoCount, perfResults); +} + +cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionForwardAlgorithmEx( + cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t yDesc, void *y, const int requestedAlgoCount, + int *returnedAlgoCount, cudnnConvolutionFwdAlgoPerf_t *perfResults, + void *workSpace, size_t workSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, + const int, int *, cudnnConvolutionFwdAlgoPerf_t *, void *, size_t); + static auto func_ptr = + LoadSymbol("cudnnFindConvolutionForwardAlgorithmEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, xDesc, x, wDesc, w, convDesc, yDesc, y, + requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, + workSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI +cudnnIm2Col(cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, + const void *x, const cudnnFilterDescriptor_t wDesc, + const cudnnConvolutionDescriptor_t convDesc, void *colBuffer) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnTensorDescriptor_t, + const void *, const cudnnFilterDescriptor_t, + const cudnnConvolutionDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnIm2Col"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, xDesc, x, wDesc, convDesc, colBuffer); +} + +cudnnStatus_t CUDNNWINAPI cudnnReorderFilterAndBias( + cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, + cudnnReorderType_t reorderType, const void *filterData, + void *reorderedFilterData, int reorderBias, const void *biasData, + void *reorderedBiasData) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnFilterDescriptor_t, cudnnReorderType_t, + const void *, void *, int, const void *, void *); + static auto func_ptr = LoadSymbol("cudnnReorderFilterAndBias"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, filterDesc, reorderType, filterData, + reorderedFilterData, reorderBias, biasData, + reorderedBiasData); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionForwardWorkspaceSize( + cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, + const cudnnFilterDescriptor_t wDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t yDesc, cudnnConvolutionFwdAlgo_t algo, + size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, + const cudnnFilterDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnTensorDescriptor_t, cudnnConvolutionFwdAlgo_t, size_t *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionForwardWorkspaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, xDesc, wDesc, convDesc, yDesc, algo, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnConvolutionForward( + cudnnHandle_t handle, const void *alpha, + const cudnnTensorDescriptor_t xDesc, const void *x, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, + void *workSpace, size_t workSpaceSizeInBytes, const void *beta, + const cudnnTensorDescriptor_t yDesc, void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, + size_t, const void *, const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnConvolutionForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, alpha, xDesc, x, wDesc, w, convDesc, algo, workSpace, + workSpaceSizeInBytes, beta, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI cudnnConvolutionBiasActivationForward( + cudnnHandle_t handle, const void *alpha1, + const cudnnTensorDescriptor_t xDesc, const void *x, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnConvolutionDescriptor_t convDesc, cudnnConvolutionFwdAlgo_t algo, + void *workSpace, size_t workSpaceSizeInBytes, const void *alpha2, + const cudnnTensorDescriptor_t zDesc, const void *z, + const cudnnTensorDescriptor_t biasDesc, const void *bias, + const cudnnActivationDescriptor_t activationDesc, + const cudnnTensorDescriptor_t yDesc, void *y) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnConvolutionDescriptor_t, cudnnConvolutionFwdAlgo_t, void *, + size_t, const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, void *); + static auto func_ptr = + LoadSymbol("cudnnConvolutionBiasActivationForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, alpha1, xDesc, x, wDesc, w, convDesc, algo, workSpace, + workSpaceSizeInBytes, alpha2, zDesc, z, biasDesc, bias, + activationDesc, yDesc, y); +} + +cudnnStatus_t CUDNNWINAPI +cudnnBackendCreateDescriptor(cudnnBackendDescriptorType_t descriptorType, + cudnnBackendDescriptor_t *descriptor) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnBackendDescriptorType_t, + cudnnBackendDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnBackendCreateDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(descriptorType, descriptor); +} + +cudnnStatus_t CUDNNWINAPI +cudnnBackendDestroyDescriptor(cudnnBackendDescriptor_t descriptor) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnBackendDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnBackendDestroyDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(descriptor); +} + +cudnnStatus_t CUDNNWINAPI +cudnnBackendFinalize(cudnnBackendDescriptor_t descriptor) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnBackendDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnBackendFinalize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(descriptor); +} + +cudnnStatus_t CUDNNWINAPI +cudnnBackendSetAttribute(cudnnBackendDescriptor_t descriptor, + cudnnBackendAttributeName_t attributeName, + cudnnBackendAttributeType_t attributeType, + int64_t elementCount, const void *arrayOfElements) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnBackendDescriptor_t, cudnnBackendAttributeName_t, + cudnnBackendAttributeType_t, int64_t, const void *); + static auto func_ptr = LoadSymbol("cudnnBackendSetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(descriptor, attributeName, attributeType, elementCount, + arrayOfElements); +} + +cudnnStatus_t CUDNNWINAPI cudnnBackendGetAttribute( + cudnnBackendDescriptor_t const descriptor, + cudnnBackendAttributeName_t attributeName, + cudnnBackendAttributeType_t attributeType, int64_t requestedElementCount, + int64_t *elementCount, void *arrayOfElements) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnBackendDescriptor_t const, cudnnBackendAttributeName_t, + cudnnBackendAttributeType_t, int64_t, int64_t *, void *); + static auto func_ptr = LoadSymbol("cudnnBackendGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(descriptor, attributeName, attributeType, + requestedElementCount, elementCount, arrayOfElements); +} + +cudnnStatus_t CUDNNWINAPI cudnnBackendExecute( + cudnnHandle_t handle, cudnnBackendDescriptor_t executionPlan, + cudnnBackendDescriptor_t variantPack) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnBackendDescriptor_t, cudnnBackendDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnBackendExecute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, executionPlan, variantPack); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithmMaxCount( + cudnnHandle_t handle, int *count) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithmMaxCount"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, count); +} + +cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithm( + cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, + const cudnnTensorDescriptor_t dyDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t dxDesc, const int requestedAlgoCount, + int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnFilterDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnTensorDescriptor_t, const int, int *, + cudnnConvolutionBwdDataAlgoPerf_t *); + static auto func_ptr = + LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithm"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, requestedAlgoCount, + returnedAlgoCount, perfResults); +} + +cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardDataAlgorithmEx( + cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnTensorDescriptor_t dyDesc, const void *dy, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t dxDesc, void *dx, + const int requestedAlgoCount, int *returnedAlgoCount, + cudnnConvolutionBwdDataAlgoPerf_t *perfResults, void *workSpace, + size_t workSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnFilterDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnConvolutionDescriptor_t, const cudnnTensorDescriptor_t, void *, + const int, int *, cudnnConvolutionBwdDataAlgoPerf_t *, void *, size_t); + static auto func_ptr = + LoadSymbol("cudnnFindConvolutionBackwardDataAlgorithmEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, + requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, + workSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataAlgorithm_v7( + cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, + const cudnnTensorDescriptor_t diffDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t gradDesc, const int requestedAlgoCount, + int *returnedAlgoCount, cudnnConvolutionBwdDataAlgoPerf_t *perfResults) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnFilterDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnTensorDescriptor_t, const int, int *, + cudnnConvolutionBwdDataAlgoPerf_t *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionBackwardDataAlgorithm_v7"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, + requestedAlgoCount, returnedAlgoCount, perfResults); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardDataWorkspaceSize( + cudnnHandle_t handle, const cudnnFilterDescriptor_t wDesc, + const cudnnTensorDescriptor_t dyDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t dxDesc, cudnnConvolutionBwdDataAlgo_t algo, + size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnFilterDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnTensorDescriptor_t, cudnnConvolutionBwdDataAlgo_t, size_t *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionBackwardDataWorkspaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, wDesc, dyDesc, convDesc, dxDesc, algo, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardData( + cudnnHandle_t handle, const void *alpha, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnTensorDescriptor_t dyDesc, const void *dy, + const cudnnConvolutionDescriptor_t convDesc, + cudnnConvolutionBwdDataAlgo_t algo, void *workSpace, + size_t workSpaceSizeInBytes, const void *beta, + const cudnnTensorDescriptor_t dxDesc, void *dx) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const void *, const cudnnFilterDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdDataAlgo_t, void *, + size_t, const void *, const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardData"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, alpha, wDesc, w, dyDesc, dy, convDesc, algo, + workSpace, workSpaceSizeInBytes, beta, dxDesc, dx); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetFoldedConvBackwardDataDescriptors( + const cudnnHandle_t handle, const cudnnFilterDescriptor_t filterDesc, + const cudnnTensorDescriptor_t diffDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnTensorDescriptor_t gradDesc, + const cudnnTensorFormat_t transformFormat, + cudnnFilterDescriptor_t foldedFilterDesc, + cudnnTensorDescriptor_t paddedDiffDesc, + cudnnConvolutionDescriptor_t foldedConvDesc, + cudnnTensorDescriptor_t foldedGradDesc, + cudnnTensorTransformDescriptor_t filterFoldTransDesc, + cudnnTensorTransformDescriptor_t diffPadTransDesc, + cudnnTensorTransformDescriptor_t gradFoldTransDesc, + cudnnTensorTransformDescriptor_t gradUnfoldTransDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnHandle_t, const cudnnFilterDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnTensorFormat_t, + cudnnFilterDescriptor_t, cudnnTensorDescriptor_t, + cudnnConvolutionDescriptor_t, cudnnTensorDescriptor_t, + cudnnTensorTransformDescriptor_t, cudnnTensorTransformDescriptor_t, + cudnnTensorTransformDescriptor_t, cudnnTensorTransformDescriptor_t); + static auto func_ptr = + LoadSymbol("cudnnGetFoldedConvBackwardDataDescriptors"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, filterDesc, diffDesc, convDesc, gradDesc, + transformFormat, foldedFilterDesc, paddedDiffDesc, + foldedConvDesc, foldedGradDesc, filterFoldTransDesc, + diffPadTransDesc, gradFoldTransDesc, gradUnfoldTransDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreateFusedOpsConstParamPack( + cudnnFusedOpsConstParamPack_t *constPack, cudnnFusedOps_t ops) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t *, + cudnnFusedOps_t); + static auto func_ptr = + LoadSymbol("cudnnCreateFusedOpsConstParamPack"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(constPack, ops); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyFusedOpsConstParamPack(cudnnFusedOpsConstParamPack_t constPack) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t); + static auto func_ptr = + LoadSymbol("cudnnDestroyFusedOpsConstParamPack"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(constPack); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetFusedOpsConstParamPackAttribute( + cudnnFusedOpsConstParamPack_t constPack, + cudnnFusedOpsConstParamLabel_t paramLabel, const void *param) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsConstParamPack_t, + cudnnFusedOpsConstParamLabel_t, + const void *); + static auto func_ptr = + LoadSymbol("cudnnSetFusedOpsConstParamPackAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(constPack, paramLabel, param); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetFusedOpsConstParamPackAttribute( + const cudnnFusedOpsConstParamPack_t constPack, + cudnnFusedOpsConstParamLabel_t paramLabel, void *param, int *isNULL) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnFusedOpsConstParamPack_t, cudnnFusedOpsConstParamLabel_t, + void *, int *); + static auto func_ptr = + LoadSymbol("cudnnGetFusedOpsConstParamPackAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(constPack, paramLabel, param, isNULL); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreateFusedOpsVariantParamPack( + cudnnFusedOpsVariantParamPack_t *varPack, cudnnFusedOps_t ops) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnFusedOpsVariantParamPack_t *, cudnnFusedOps_t); + static auto func_ptr = + LoadSymbol("cudnnCreateFusedOpsVariantParamPack"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(varPack, ops); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyFusedOpsVariantParamPack(cudnnFusedOpsVariantParamPack_t varPack) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsVariantParamPack_t); + static auto func_ptr = + LoadSymbol("cudnnDestroyFusedOpsVariantParamPack"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(varPack); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetFusedOpsVariantParamPackAttribute( + cudnnFusedOpsVariantParamPack_t varPack, + cudnnFusedOpsVariantParamLabel_t paramLabel, void *ptr) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsVariantParamPack_t, + cudnnFusedOpsVariantParamLabel_t, void *); + static auto func_ptr = + LoadSymbol("cudnnSetFusedOpsVariantParamPackAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(varPack, paramLabel, ptr); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetFusedOpsVariantParamPackAttribute( + const cudnnFusedOpsVariantParamPack_t varPack, + cudnnFusedOpsVariantParamLabel_t paramLabel, void *ptr) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(const cudnnFusedOpsVariantParamPack_t, + cudnnFusedOpsVariantParamLabel_t, void *); + static auto func_ptr = + LoadSymbol("cudnnGetFusedOpsVariantParamPackAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(varPack, paramLabel, ptr); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreateFusedOpsPlan(cudnnFusedOpsPlan_t *plan, + cudnnFusedOps_t ops) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsPlan_t *, cudnnFusedOps_t); + static auto func_ptr = LoadSymbol("cudnnCreateFusedOpsPlan"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(plan, ops); +} + +cudnnStatus_t CUDNNWINAPI cudnnDestroyFusedOpsPlan(cudnnFusedOpsPlan_t plan) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnFusedOpsPlan_t); + static auto func_ptr = LoadSymbol("cudnnDestroyFusedOpsPlan"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(plan); +} + +cudnnStatus_t CUDNNWINAPI +cudnnMakeFusedOpsPlan(cudnnHandle_t handle, cudnnFusedOpsPlan_t plan, + const cudnnFusedOpsConstParamPack_t constPack, + size_t *workspaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnFusedOpsPlan_t, const cudnnFusedOpsConstParamPack_t, + size_t *); + static auto func_ptr = LoadSymbol("cudnnMakeFusedOpsPlan"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, plan, constPack, workspaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI +cudnnFusedOpsExecute(cudnnHandle_t handle, const cudnnFusedOpsPlan_t plan, + cudnnFusedOpsVariantParamPack_t varPack) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, const cudnnFusedOpsPlan_t, + cudnnFusedOpsVariantParamPack_t); + static auto func_ptr = LoadSymbol("cudnnFusedOpsExecute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, plan, varPack); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateRNNDescriptor(cudnnRNNDescriptor_t *rnnDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateRNNDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyRNNDescriptor(cudnnRNNDescriptor_t rnnDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyRNNDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v8( + cudnnRNNDescriptor_t rnnDesc, cudnnRNNAlgo_t algo, cudnnRNNMode_t cellMode, + cudnnRNNBiasMode_t biasMode, cudnnDirectionMode_t dirMode, + cudnnRNNInputMode_t inputMode, cudnnDataType_t dataType, + cudnnDataType_t mathPrec, cudnnMathType_t mathType, int32_t inputSize, + int32_t hiddenSize, int32_t projSize, int32_t numLayers, + cudnnDropoutDescriptor_t dropoutDesc, uint32_t auxFlags) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnRNNDescriptor_t, cudnnRNNAlgo_t, cudnnRNNMode_t, cudnnRNNBiasMode_t, + cudnnDirectionMode_t, cudnnRNNInputMode_t, cudnnDataType_t, + cudnnDataType_t, cudnnMathType_t, int32_t, int32_t, int32_t, int32_t, + cudnnDropoutDescriptor_t, uint32_t); + static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor_v8"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, algo, cellMode, biasMode, dirMode, inputMode, + dataType, mathPrec, mathType, inputSize, hiddenSize, projSize, + numLayers, dropoutDesc, auxFlags); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor_v8( + cudnnRNNDescriptor_t rnnDesc, cudnnRNNAlgo_t *algo, + cudnnRNNMode_t *cellMode, cudnnRNNBiasMode_t *biasMode, + cudnnDirectionMode_t *dirMode, cudnnRNNInputMode_t *inputMode, + cudnnDataType_t *dataType, cudnnDataType_t *mathPrec, + cudnnMathType_t *mathType, int32_t *inputSize, int32_t *hiddenSize, + int32_t *projSize, int32_t *numLayers, + cudnnDropoutDescriptor_t *dropoutDesc, uint32_t *auxFlags) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnRNNDescriptor_t, cudnnRNNAlgo_t *, cudnnRNNMode_t *, + cudnnRNNBiasMode_t *, cudnnDirectionMode_t *, cudnnRNNInputMode_t *, + cudnnDataType_t *, cudnnDataType_t *, cudnnMathType_t *, int32_t *, + int32_t *, int32_t *, int32_t *, cudnnDropoutDescriptor_t *, uint32_t *); + static auto func_ptr = LoadSymbol("cudnnGetRNNDescriptor_v8"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, algo, cellMode, biasMode, dirMode, inputMode, + dataType, mathPrec, mathType, inputSize, hiddenSize, projSize, + numLayers, dropoutDesc, auxFlags); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetRNNDescriptor_v6( + cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, const int hiddenSize, + const int numLayers, cudnnDropoutDescriptor_t dropoutDesc, + cudnnRNNInputMode_t inputMode, cudnnDirectionMode_t direction, + cudnnRNNMode_t cellMode, cudnnRNNAlgo_t algo, cudnnDataType_t mathPrec) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int, + cudnnDropoutDescriptor_t, cudnnRNNInputMode_t, cudnnDirectionMode_t, + cudnnRNNMode_t, cudnnRNNAlgo_t, cudnnDataType_t); + static auto func_ptr = LoadSymbol("cudnnSetRNNDescriptor_v6"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, + inputMode, direction, cellMode, algo, mathPrec); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNDescriptor_v6( + cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, int *hiddenSize, + int *numLayers, cudnnDropoutDescriptor_t *dropoutDesc, + cudnnRNNInputMode_t *inputMode, cudnnDirectionMode_t *direction, + cudnnRNNMode_t *cellMode, cudnnRNNAlgo_t *algo, cudnnDataType_t *mathPrec) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnRNNDescriptor_t, int *, int *, + cudnnDropoutDescriptor_t *, cudnnRNNInputMode_t *, cudnnDirectionMode_t *, + cudnnRNNMode_t *, cudnnRNNAlgo_t *, cudnnDataType_t *); + static auto func_ptr = LoadSymbol("cudnnGetRNNDescriptor_v6"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, hiddenSize, numLayers, dropoutDesc, + inputMode, direction, cellMode, algo, mathPrec); +} + +cudnnStatus_t CUDNNWINAPI +cudnnSetRNNMatrixMathType(cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t mType) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t); + static auto func_ptr = LoadSymbol("cudnnSetRNNMatrixMathType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, mType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNMatrixMathType( + cudnnRNNDescriptor_t rnnDesc, cudnnMathType_t *mType) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnMathType_t *); + static auto func_ptr = LoadSymbol("cudnnGetRNNMatrixMathType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, mType); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc, + cudnnRNNBiasMode_t biasMode) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNBiasMode_t); + static auto func_ptr = LoadSymbol("cudnnSetRNNBiasMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, biasMode); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNBiasMode(cudnnRNNDescriptor_t rnnDesc, + cudnnRNNBiasMode_t *biasMode) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, cudnnRNNBiasMode_t *); + static auto func_ptr = LoadSymbol("cudnnGetRNNBiasMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, biasMode); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNSetClip(cudnnHandle_t handle, + cudnnRNNDescriptor_t rnnDesc, + cudnnRNNClipMode_t clipMode, + cudnnNanPropagation_t clipNanOpt, + double lclip, double rclip) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t, + cudnnNanPropagation_t, double, double); + static auto func_ptr = LoadSymbol("cudnnRNNSetClip"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNGetClip(cudnnHandle_t handle, + cudnnRNNDescriptor_t rnnDesc, + cudnnRNNClipMode_t *clipMode, + cudnnNanPropagation_t *clipNanOpt, + double *lclip, double *rclip) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnRNNDescriptor_t, cudnnRNNClipMode_t *, + cudnnNanPropagation_t *, double *, double *); + static auto func_ptr = LoadSymbol("cudnnRNNGetClip"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, clipMode, clipNanOpt, lclip, rclip); +} + +cudnnStatus_t CUDNNWINAPI +cudnnSetRNNProjectionLayers(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, + const int recProjSize, const int outProjSize) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnRNNDescriptor_t, const int, const int); + static auto func_ptr = LoadSymbol("cudnnSetRNNProjectionLayers"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, recProjSize, outProjSize); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNProjectionLayers( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, int *recProjSize, + int *outProjSize) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, int *, int *); + static auto func_ptr = LoadSymbol("cudnnGetRNNProjectionLayers"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, recProjSize, outProjSize); +} + +cudnnStatus_t CUDNNWINAPI cudnnCreatePersistentRNNPlan( + cudnnRNNDescriptor_t rnnDesc, const int minibatch, + const cudnnDataType_t dataType, cudnnPersistentRNNPlan_t *plan) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, const int, + const cudnnDataType_t, + cudnnPersistentRNNPlan_t *); + static auto func_ptr = LoadSymbol("cudnnCreatePersistentRNNPlan"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, minibatch, dataType, plan); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyPersistentRNNPlan(cudnnPersistentRNNPlan_t plan) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnPersistentRNNPlan_t); + static auto func_ptr = LoadSymbol("cudnnDestroyPersistentRNNPlan"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(plan); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetPersistentRNNPlan( + cudnnRNNDescriptor_t rnnDesc, cudnnPersistentRNNPlan_t plan) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, + cudnnPersistentRNNPlan_t); + static auto func_ptr = LoadSymbol("cudnnSetPersistentRNNPlan"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, plan); +} + +cudnnStatus_t CUDNNWINAPI +cudnnGetRNNWeightSpaceSize(cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, + size_t *weightSpaceSize) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, + cudnnRNNDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetRNNWeightSpaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, weightSpaceSize); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNWorkspaceSize( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *xDesc, + size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t *, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetRNNWorkspaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNTempSpaceSizes( + cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, + cudnnForwardMode_t fMode, cudnnRNNDataDescriptor_t xDesc, + size_t *workSpaceSize, size_t *reserveSpaceSize) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnRNNDescriptor_t, cudnnForwardMode_t, + cudnnRNNDataDescriptor_t, size_t *, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetRNNTempSpaceSizes"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, fMode, xDesc, workSpaceSize, + reserveSpaceSize); +} + +cudnnStatus_t CUDNNWINAPI +cudnnGetRNNParamsSize(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes, + cudnnDataType_t dataType) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnTensorDescriptor_t, + size_t *, cudnnDataType_t); + static auto func_ptr = LoadSymbol("cudnnGetRNNParamsSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, xDesc, sizeInBytes, dataType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerMatrixParams( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, + const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID, + cudnnFilterDescriptor_t linLayerMatDesc, void **linLayerMat) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, + const void *, const int, cudnnFilterDescriptor_t, void **); + static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerMatrixParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, + linLayerMatDesc, linLayerMat); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNLinLayerBiasParams( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int pseudoLayer, const cudnnTensorDescriptor_t xDesc, + const cudnnFilterDescriptor_t wDesc, const void *w, const int linLayerID, + cudnnFilterDescriptor_t linLayerBiasDesc, void **linLayerBias) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t, const cudnnFilterDescriptor_t, + const void *, const int, cudnnFilterDescriptor_t, void **); + static auto func_ptr = LoadSymbol("cudnnGetRNNLinLayerBiasParams"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, pseudoLayer, xDesc, wDesc, w, linLayerID, + linLayerBiasDesc, linLayerBias); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInference( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnTensorDescriptor_t cxDesc, const void *cx, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnTensorDescriptor_t *yDesc, void *y, + const cudnnTensorDescriptor_t hyDesc, void *hy, + const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, + size_t workSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t *, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, + void *, const cudnnTensorDescriptor_t, void *, void *, size_t); + static auto func_ptr = LoadSymbol("cudnnRNNForwardInference"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, + wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, + workSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, + unsigned paddingMode) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, unsigned int); + static auto func_ptr = LoadSymbol("cudnnSetRNNPaddingMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, paddingMode); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNPaddingMode(cudnnRNNDescriptor_t rnnDesc, + unsigned *paddingMode) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDescriptor_t, unsigned int *); + static auto func_ptr = LoadSymbol("cudnnGetRNNPaddingMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDesc, paddingMode); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateRNNDataDescriptor(cudnnRNNDataDescriptor_t *rnnDataDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateRNNDataDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDataDesc); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyRNNDataDescriptor(cudnnRNNDataDescriptor_t rnnDataDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnRNNDataDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyRNNDataDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDataDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetRNNDataDescriptor( + cudnnRNNDataDescriptor_t rnnDataDesc, cudnnDataType_t dataType, + cudnnRNNDataLayout_t layout, int maxSeqLength, int batchSize, + int vectorSize, + const int seqLengthArray[], /* length of each sequence in the batch */ + void *paddingFill) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnRNNDataDescriptor_t, cudnnDataType_t, cudnnRNNDataLayout_t, int, int, + int, const int[], void *); + static auto func_ptr = LoadSymbol("cudnnSetRNNDataDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDataDesc, dataType, layout, maxSeqLength, batchSize, + vectorSize, seqLengthArray, paddingFill); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNDataDescriptor( + cudnnRNNDataDescriptor_t rnnDataDesc, cudnnDataType_t *dataType, + cudnnRNNDataLayout_t *layout, int *maxSeqLength, int *batchSize, + int *vectorSize, int arrayLengthRequested, int seqLengthArray[], + void *paddingFill) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnRNNDataDescriptor_t, cudnnDataType_t *, cudnnRNNDataLayout_t *, + int *, int *, int *, int, int[], void *); + static auto func_ptr = LoadSymbol("cudnnGetRNNDataDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(rnnDataDesc, dataType, layout, maxSeqLength, batchSize, + vectorSize, arrayLengthRequested, seqLengthArray, + paddingFill); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNForwardInferenceEx( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const cudnnRNNDataDescriptor_t xDesc, const void *x, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnTensorDescriptor_t cxDesc, const void *cx, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnRNNDataDescriptor_t yDesc, void *y, + const cudnnTensorDescriptor_t hyDesc, void *hy, + const cudnnTensorDescriptor_t cyDesc, void *cy, + const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */ + const void *keys, /* reserved, should pass NULL */ + const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */ + void *cAttn, /* reserved, should pass NULL */ + const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */ + void *iAttn, /* reserved, should pass NULL */ + const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */ + void *queries, /* reserved, should pass NULL */ + void *workSpace, size_t workSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, + const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, + void *, const cudnnTensorDescriptor_t, void *, + const cudnnRNNDataDescriptor_t, const void *, + const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, + void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t); + static auto func_ptr = LoadSymbol("cudnnRNNForwardInferenceEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, + yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, + iDesc, iAttn, qDesc, queries, workSpace, + workSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateSeqDataDescriptor(cudnnSeqDataDescriptor_t *seqDataDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnSeqDataDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateSeqDataDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(seqDataDesc); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroySeqDataDescriptor(cudnnSeqDataDescriptor_t seqDataDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnSeqDataDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroySeqDataDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(seqDataDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetSeqDataDescriptor( + cudnnSeqDataDescriptor_t seqDataDesc, cudnnDataType_t dataType, int nbDims, + const int dimA[], const cudnnSeqDataAxis_t axes[], + size_t seqLengthArraySize, const int seqLengthArray[], void *paddingFill) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnSeqDataDescriptor_t, cudnnDataType_t, int, const int[], + const cudnnSeqDataAxis_t[], size_t, const int[], void *); + static auto func_ptr = LoadSymbol("cudnnSetSeqDataDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(seqDataDesc, dataType, nbDims, dimA, axes, seqLengthArraySize, + seqLengthArray, paddingFill); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetSeqDataDescriptor( + const cudnnSeqDataDescriptor_t seqDataDesc, cudnnDataType_t *dataType, + int *nbDims, int nbDimsRequested, int dimA[], cudnnSeqDataAxis_t axes[], + size_t *seqLengthArraySize, size_t seqLengthSizeRequested, + int seqLengthArray[], void *paddingFill) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + const cudnnSeqDataDescriptor_t, cudnnDataType_t *, int *, int, int[], + cudnnSeqDataAxis_t[], size_t *, size_t, int[], void *); + static auto func_ptr = LoadSymbol("cudnnGetSeqDataDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(seqDataDesc, dataType, nbDims, nbDimsRequested, dimA, axes, + seqLengthArraySize, seqLengthSizeRequested, seqLengthArray, + paddingFill); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateAttnDescriptor(cudnnAttnDescriptor_t *attnDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAttnDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateAttnDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(attnDesc); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyAttnDescriptor(cudnnAttnDescriptor_t attnDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnAttnDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyAttnDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(attnDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetAttnDescriptor( + cudnnAttnDescriptor_t attnDesc, unsigned attnMode, int nHeads, + double smScaler, cudnnDataType_t dataType, cudnnDataType_t computePrec, + cudnnMathType_t mathType, cudnnDropoutDescriptor_t attnDropoutDesc, + cudnnDropoutDescriptor_t postDropoutDesc, int qSize, int kSize, int vSize, + int qProjSize, int kProjSize, int vProjSize, int oProjSize, + int qoMaxSeqLength, int kvMaxSeqLength, int maxBatchSize, int maxBeamSize) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnAttnDescriptor_t, unsigned int, int, double, cudnnDataType_t, + cudnnDataType_t, cudnnMathType_t, cudnnDropoutDescriptor_t, + cudnnDropoutDescriptor_t, int, int, int, int, int, int, int, int, int, + int, int); + static auto func_ptr = LoadSymbol("cudnnSetAttnDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(attnDesc, attnMode, nHeads, smScaler, dataType, computePrec, + mathType, attnDropoutDesc, postDropoutDesc, qSize, kSize, + vSize, qProjSize, kProjSize, vProjSize, oProjSize, + qoMaxSeqLength, kvMaxSeqLength, maxBatchSize, maxBeamSize); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetAttnDescriptor( + cudnnAttnDescriptor_t attnDesc, unsigned *attnMode, int *nHeads, + double *smScaler, cudnnDataType_t *dataType, cudnnDataType_t *computePrec, + cudnnMathType_t *mathType, cudnnDropoutDescriptor_t *attnDropoutDesc, + cudnnDropoutDescriptor_t *postDropoutDesc, int *qSize, int *kSize, + int *vSize, int *qProjSize, int *kProjSize, int *vProjSize, int *oProjSize, + int *qoMaxSeqLength, int *kvMaxSeqLength, int *maxBatchSize, + int *maxBeamSize) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnAttnDescriptor_t, unsigned int *, int *, double *, cudnnDataType_t *, + cudnnDataType_t *, cudnnMathType_t *, cudnnDropoutDescriptor_t *, + cudnnDropoutDescriptor_t *, int *, int *, int *, int *, int *, int *, + int *, int *, int *, int *, int *); + static auto func_ptr = LoadSymbol("cudnnGetAttnDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(attnDesc, attnMode, nHeads, smScaler, dataType, computePrec, + mathType, attnDropoutDesc, postDropoutDesc, qSize, kSize, + vSize, qProjSize, kProjSize, vProjSize, oProjSize, + qoMaxSeqLength, kvMaxSeqLength, maxBatchSize, maxBeamSize); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetMultiHeadAttnBuffers( + cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, + size_t *weightSizeInBytes, size_t *workSpaceSizeInBytes, + size_t *reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnAttnDescriptor_t, size_t *, size_t *, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetMultiHeadAttnBuffers"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, attnDesc, weightSizeInBytes, workSpaceSizeInBytes, + reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetMultiHeadAttnWeights( + cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, + cudnnMultiHeadAttnWeightKind_t wKind, size_t weightSizeInBytes, + const void *weights, cudnnTensorDescriptor_t wDesc, void **wAddr) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnAttnDescriptor_t, + cudnnMultiHeadAttnWeightKind_t, size_t, const void *, + cudnnTensorDescriptor_t, void **); + static auto func_ptr = LoadSymbol("cudnnGetMultiHeadAttnWeights"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, attnDesc, wKind, weightSizeInBytes, weights, wDesc, + wAddr); +} + +cudnnStatus_t CUDNNWINAPI cudnnMultiHeadAttnForward( + cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, int currIdx, + const int loWinIdx[], const int hiWinIdx[], const int devSeqLengthsQO[], + const int devSeqLengthsKV[], const cudnnSeqDataDescriptor_t qDesc, + const void *queries, const void *residuals, + const cudnnSeqDataDescriptor_t kDesc, const void *keys, + const cudnnSeqDataDescriptor_t vDesc, const void *values, + const cudnnSeqDataDescriptor_t oDesc, void *out, size_t weightSizeInBytes, + const void *weights, size_t workSpaceSizeInBytes, void *workSpace, + size_t reserveSpaceSizeInBytes, void *reserveSpace) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnAttnDescriptor_t, int, const int[], const int[], + const int[], const int[], const cudnnSeqDataDescriptor_t, const void *, + const void *, const cudnnSeqDataDescriptor_t, const void *, + const cudnnSeqDataDescriptor_t, const void *, + const cudnnSeqDataDescriptor_t, void *, size_t, const void *, size_t, + void *, size_t, void *); + static auto func_ptr = LoadSymbol("cudnnMultiHeadAttnForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, attnDesc, currIdx, loWinIdx, hiWinIdx, + devSeqLengthsQO, devSeqLengthsKV, qDesc, queries, residuals, + kDesc, keys, vDesc, values, oDesc, out, weightSizeInBytes, + weights, workSpaceSizeInBytes, workSpace, + reserveSpaceSizeInBytes, reserveSpace); +} + +cudnnStatus_t CUDNNWINAPI cudnnAdvInferVersionCheck(void) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(); + static auto func_ptr = LoadSymbol("cudnnAdvInferVersionCheck"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +cudnnStatus_t CUDNNWINAPI cudnnSoftmaxBackward( + cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, + const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, + const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, + const cudnnTensorDescriptor_t dxDesc, void *dx) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnSoftmaxAlgorithm_t, cudnnSoftmaxMode_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnSoftmaxBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, mode, alpha, yDesc, y, dyDesc, dy, beta, dxDesc, + dx); +} + +cudnnStatus_t CUDNNWINAPI cudnnPoolingBackward( + cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, + const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, + const cudnnTensorDescriptor_t dyDesc, const void *dy, + const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, + const cudnnTensorDescriptor_t dxDesc, void *dx) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnPoolingDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnPoolingBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, poolingDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, + beta, dxDesc, dx); +} + +cudnnStatus_t CUDNNWINAPI cudnnActivationBackward( + cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, + const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, + const cudnnTensorDescriptor_t dyDesc, const void *dy, + const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, + const cudnnTensorDescriptor_t dxDesc, void *dx) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnActivationDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnActivationBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, activationDesc, alpha, yDesc, y, dyDesc, dy, xDesc, x, + beta, dxDesc, dx); +} + +cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelBackward( + cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, + const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, + const cudnnTensorDescriptor_t dyDesc, const void *dy, + const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, + const cudnnTensorDescriptor_t dxDesc, void *dx) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnLRNDescriptor_t, cudnnLRNMode_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnLRNCrossChannelBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, normDesc, lrnMode, alpha, yDesc, y, dyDesc, dy, xDesc, + x, beta, dxDesc, dx); +} + +cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationBackward( + cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, + cudnnDivNormMode_t mode, const void *alpha, + const cudnnTensorDescriptor_t + xDesc, /* same desc for x, means, dy, temp, temp2 */ + const void *x, + const void *means, /* if NULL, means are assumed to be zero */ + const void *dy, void *temp, void *temp2, const void *beta, + const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */ + void *dx, /* output x differential */ + void *dMeans) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnLRNDescriptor_t, cudnnDivNormMode_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, const void *, + void *, void *, const void *, const cudnnTensorDescriptor_t, void *, + void *); + static auto func_ptr = + LoadSymbol("cudnnDivisiveNormalizationBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, normDesc, mode, alpha, xDesc, x, means, dy, temp, + temp2, beta, dXdMeansDesc, dx, dMeans); +} + +cudnnStatus_t CUDNNWINAPI +cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize( + cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, + const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t zDesc, + const cudnnTensorDescriptor_t yDesc, + const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, + const cudnnActivationDescriptor_t activationDesc, size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, + const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, + const cudnnActivationDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol( + "cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode, bnOps, xDesc, zDesc, yDesc, + bnScaleBiasMeanVarDesc, activationDesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetBatchNormalizationBackwardExWorkspaceSize( + cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, + const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t yDesc, + const cudnnTensorDescriptor_t dyDesc, const cudnnTensorDescriptor_t dzDesc, + const cudnnTensorDescriptor_t dxDesc, + const cudnnTensorDescriptor_t dBnScaleBiasDesc, + const cudnnActivationDescriptor_t activationDesc, size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, + const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, + const cudnnActivationDescriptor_t, size_t *); + static auto func_ptr = + LoadSymbol("cudnnGetBatchNormalizationBackwardExWorkspaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode, bnOps, xDesc, yDesc, dyDesc, dzDesc, dxDesc, + dBnScaleBiasDesc, activationDesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetBatchNormalizationTrainingExReserveSpaceSize( + cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, + const cudnnActivationDescriptor_t activationDesc, + const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, + const cudnnActivationDescriptor_t, const cudnnTensorDescriptor_t, + size_t *); + static auto func_ptr = LoadSymbol( + "cudnnGetBatchNormalizationTrainingExReserveSpaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode, bnOps, activationDesc, xDesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTraining( + cudnnHandle_t handle, cudnnBatchNormMode_t mode, + + const void *alpha, /* alpha[0] = result blend factor */ + const void *beta, /* beta[0] = dest layer blend factor */ + + const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ + const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ + + /* Shared desc for the next 6 tensors in the argument list. + Data type to be set as follows: + type = (typeOf(x) == double) ? double : float + Dimensions for this descriptor depend on normalization mode + - Spatial Normalization : tensors are expected to have dims 1xCx1x1 + (normalization is performed across NxHxW) + - Per-Activation Normalization : tensors are expected to have dims of + 1xCxHxW (normalization is performed across N) */ + const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, + + /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation + */ + const void *bnScale, const void *bnBias, + + /* MUST use factor=1 in the very first call of a complete training cycle. + Use a factor=1/(1+n) at N-th call to the function to get + Cumulative Moving Average (CMA) behavior + CMA[n] = (x[1]+...+x[n])/n + Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) = + ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) = + CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */ + double exponentialAverageFactor, + + /* Used in Training phase only. + runningMean = newMean*factor + runningMean*(1-factor) */ + void *resultRunningMean, + /* Output in training mode, input in inference. Is the moving average + of variance[x] (factor is applied in the same way as for runningMean) */ + void *resultRunningVariance, + + /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and + backward functions. */ + double epsilon, + + /* Optionally save intermediate results from the forward pass here + - can be reused to speed up backward pass. NULL if unused */ + void *resultSaveMean, void *resultSaveInvVariance) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, + const void *, const void *, double, void *, void *, double, void *, + void *); + static auto func_ptr = + LoadSymbol("cudnnBatchNormalizationForwardTraining"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr( + handle, mode, alpha, beta, xDesc, x, yDesc, y, bnScaleBiasMeanVarDesc, + bnScale, bnBias, exponentialAverageFactor, resultRunningMean, + resultRunningVariance, epsilon, resultSaveMean, resultSaveInvVariance); +} + +cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTrainingEx( + cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, + + const void *alpha, /* alpha[0] = result blend factor */ + const void *beta, /* beta[0] = dest layer blend factor */ + + const cudnnTensorDescriptor_t xDesc, const void *xData, + const cudnnTensorDescriptor_t zDesc, const void *zData, + const cudnnTensorDescriptor_t yDesc, void *yData, + + const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, + const void *bnBias, + + double exponentialAverageFactor, void *resultRunningMean, + void *resultRunningVariance, + + /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and + backward functions. */ + double epsilon, + + /* Optionally save intermediate results from the forward pass here + - can be reused to speed up backward pass. NULL if unused */ + void *resultSaveMean, void *resultSaveInvVariance, + + cudnnActivationDescriptor_t activationDesc, void *workspace, + size_t workSpaceSizeInBytes, void *reserveSpace, + size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *, + const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, + const void *, const void *, double, void *, void *, double, void *, + void *, cudnnActivationDescriptor_t, void *, size_t, void *, size_t); + static auto func_ptr = + LoadSymbol("cudnnBatchNormalizationForwardTrainingEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode, bnOps, alpha, beta, xDesc, xData, zDesc, zData, + yDesc, yData, bnScaleBiasMeanVarDesc, bnScale, bnBias, + exponentialAverageFactor, resultRunningMean, + resultRunningVariance, epsilon, resultSaveMean, + resultSaveInvVariance, activationDesc, workspace, + workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackward( + cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alphaDataDiff, + const void *betaDataDiff, const void *alphaParamDiff, + const void *betaParamDiff, + const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */ + const void *x, const cudnnTensorDescriptor_t dyDesc, const void *dy, + const cudnnTensorDescriptor_t dxDesc, void *dx, + /* Shared tensor desc for the 4 tensors below */ + const cudnnTensorDescriptor_t dBnScaleBiasDesc, + const void *bnScale, /* bnBias doesn't affect backpropagation */ + /* scale and bias diff are not backpropagated below this layer */ + void *dBnScaleResult, void *dBnBiasResult, + /* Same epsilon as forward pass */ + double epsilon, + + /* Optionally cached intermediate results from + forward pass */ + const void *savedMean, const void *savedInvVariance) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnBatchNormMode_t, const void *, const void *, + const void *, const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, + const void *, void *, void *, double, const void *, const void *); + static auto func_ptr = LoadSymbol("cudnnBatchNormalizationBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode, alphaDataDiff, betaDataDiff, alphaParamDiff, + betaParamDiff, xDesc, x, dyDesc, dy, dxDesc, dx, + dBnScaleBiasDesc, bnScale, dBnScaleResult, dBnBiasResult, + epsilon, savedMean, savedInvVariance); +} + +cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackwardEx( + cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, + + const void *alphaDataDiff, const void *betaDataDiff, + const void *alphaParamDiff, const void *betaParamDiff, + const cudnnTensorDescriptor_t xDesc, const void *xData, + const cudnnTensorDescriptor_t yDesc, const void *yData, + const cudnnTensorDescriptor_t dyDesc, const void *dyData, + const cudnnTensorDescriptor_t dzDesc, void *dzData, + const cudnnTensorDescriptor_t dxDesc, void *dxData, + + /* Shared tensor desc for the 4 tensors below */ + const cudnnTensorDescriptor_t dBnScaleBiasDesc, const void *bnScaleData, + const void *bnBiasData, /* needed if there is activation */ + void *dBnScaleData, void *dBnBiasData, + double epsilon, /* Same epsilon as forward pass */ + + /* Optionally cached intermediate results from + forward pass */ + const void *savedMean, const void *savedInvVariance, + cudnnActivationDescriptor_t activationDesc, void *workSpace, + size_t workSpaceSizeInBytes, void *reserveSpace, + size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnBatchNormMode_t, cudnnBatchNormOps_t, const void *, + const void *, const void *, const void *, const cudnnTensorDescriptor_t, + const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, void *, const cudnnTensorDescriptor_t, + void *, const cudnnTensorDescriptor_t, const void *, const void *, void *, + void *, double, const void *, const void *, cudnnActivationDescriptor_t, + void *, size_t, void *, size_t); + static auto func_ptr = + LoadSymbol("cudnnBatchNormalizationBackwardEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr( + handle, mode, bnOps, alphaDataDiff, betaDataDiff, alphaParamDiff, + betaParamDiff, xDesc, xData, yDesc, yData, dyDesc, dyData, dzDesc, dzData, + dxDesc, dxData, dBnScaleBiasDesc, bnScaleData, bnBiasData, dBnScaleData, + dBnBiasData, epsilon, savedMean, savedInvVariance, activationDesc, + workSpace, workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorBackward( + cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, + const void *dgrid, void *dtheta) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnSpatialTransformerDescriptor_t, const void *, + void *); + static auto func_ptr = + LoadSymbol("cudnnSpatialTfGridGeneratorBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, stDesc, dgrid, dtheta); +} + +cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerBackward( + cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, + const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, + const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx, + const void *alphaDgrid, const cudnnTensorDescriptor_t dyDesc, + const void *dy, const void *grid, const void *betaDgrid, void *dgrid) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnSpatialTransformerDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, + const cudnnTensorDescriptor_t, void *, const void *, + const cudnnTensorDescriptor_t, const void *, const void *, const void *, + void *); + static auto func_ptr = LoadSymbol("cudnnSpatialTfSamplerBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, stDesc, alpha, xDesc, x, beta, dxDesc, dx, alphaDgrid, + dyDesc, dy, grid, betaDgrid, dgrid); +} + +cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward( + cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, + const cudnnTensorDescriptor_t dydesc, const void *dy, + const cudnnTensorDescriptor_t dxdesc, void *dx, void *reserveSpace, + size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnDropoutDescriptor_t, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, void *, void *, size_t); + static auto func_ptr = LoadSymbol("cudnnDropoutBackward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dropoutDesc, dydesc, dy, dxdesc, dx, reserveSpace, + reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnOpsTrainVersionCheck(void) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(); + static auto func_ptr = LoadSymbol("cudnnOpsTrainVersionCheck"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithmMaxCount( + cudnnHandle_t handle, int *count) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnHandle_t, int *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithmMaxCount"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, count); +} + +cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithm( + cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, + const cudnnTensorDescriptor_t dyDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnFilterDescriptor_t dwDesc, const int requestedAlgoCount, + int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnFilterDescriptor_t, const int, int *, + cudnnConvolutionBwdFilterAlgoPerf_t *); + static auto func_ptr = + LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithm"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, xDesc, dyDesc, convDesc, dwDesc, requestedAlgoCount, + returnedAlgoCount, perfResults); +} + +cudnnStatus_t CUDNNWINAPI cudnnFindConvolutionBackwardFilterAlgorithmEx( + cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, const void *x, + const cudnnTensorDescriptor_t dyDesc, const void *y, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnFilterDescriptor_t dwDesc, void *dw, + const int requestedAlgoCount, int *returnedAlgoCount, + cudnnConvolutionBwdFilterAlgoPerf_t *perfResults, void *workSpace, + size_t workSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnConvolutionDescriptor_t, const cudnnFilterDescriptor_t, void *, + const int, int *, cudnnConvolutionBwdFilterAlgoPerf_t *, void *, size_t); + static auto func_ptr = + LoadSymbol("cudnnFindConvolutionBackwardFilterAlgorithmEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, xDesc, x, dyDesc, y, convDesc, dwDesc, dw, + requestedAlgoCount, returnedAlgoCount, perfResults, workSpace, + workSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterAlgorithm_v7( + cudnnHandle_t handle, const cudnnTensorDescriptor_t srcDesc, + const cudnnTensorDescriptor_t diffDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnFilterDescriptor_t gradDesc, const int requestedAlgoCount, + int *returnedAlgoCount, cudnnConvolutionBwdFilterAlgoPerf_t *perfResults) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnFilterDescriptor_t, const int, int *, + cudnnConvolutionBwdFilterAlgoPerf_t *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionBackwardFilterAlgorithm_v7"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, srcDesc, diffDesc, convDesc, gradDesc, + requestedAlgoCount, returnedAlgoCount, perfResults); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetConvolutionBackwardFilterWorkspaceSize( + cudnnHandle_t handle, const cudnnTensorDescriptor_t xDesc, + const cudnnTensorDescriptor_t dyDesc, + const cudnnConvolutionDescriptor_t convDesc, + const cudnnFilterDescriptor_t gradDesc, + cudnnConvolutionBwdFilterAlgo_t algo, size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnConvolutionDescriptor_t, + const cudnnFilterDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, size_t *); + static auto func_ptr = + LoadSymbol("cudnnGetConvolutionBackwardFilterWorkspaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, xDesc, dyDesc, convDesc, gradDesc, algo, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardFilter( + cudnnHandle_t handle, const void *alpha, + const cudnnTensorDescriptor_t xDesc, const void *x, + const cudnnTensorDescriptor_t dyDesc, const void *dy, + const cudnnConvolutionDescriptor_t convDesc, + cudnnConvolutionBwdFilterAlgo_t algo, void *workSpace, + size_t workSpaceSizeInBytes, const void *beta, + const cudnnFilterDescriptor_t dwDesc, void *dw) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnConvolutionDescriptor_t, cudnnConvolutionBwdFilterAlgo_t, + void *, size_t, const void *, const cudnnFilterDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardFilter"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, alpha, xDesc, x, dyDesc, dy, convDesc, algo, + workSpace, workSpaceSizeInBytes, beta, dwDesc, dw); +} + +cudnnStatus_t CUDNNWINAPI cudnnConvolutionBackwardBias( + cudnnHandle_t handle, const void *alpha, + const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, + const cudnnTensorDescriptor_t dbDesc, void *db) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const void *, const cudnnTensorDescriptor_t, const void *, + const void *, const cudnnTensorDescriptor_t, void *); + static auto func_ptr = LoadSymbol("cudnnConvolutionBackwardBias"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, alpha, dyDesc, dy, beta, dbDesc, db); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetRNNTrainingReserveSize( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *xDesc, + size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t *, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetRNNTrainingReserveSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, seqLength, xDesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTraining( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnTensorDescriptor_t cxDesc, const void *cx, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnTensorDescriptor_t *yDesc, void *y, + const cudnnTensorDescriptor_t hyDesc, void *hy, + const cudnnTensorDescriptor_t cyDesc, void *cy, void *workspace, + size_t workSpaceSizeInBytes, void *reserveSpace, + size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t *, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, + void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, + size_t); + static auto func_ptr = LoadSymbol("cudnnRNNForwardTraining"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, cxDesc, cx, + wDesc, w, yDesc, y, hyDesc, hy, cyDesc, cy, workspace, + workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI +cudnnRNNBackwardData(cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *yDesc, + const void *y, const cudnnTensorDescriptor_t *dyDesc, + const void *dy, const cudnnTensorDescriptor_t dhyDesc, + const void *dhy, const cudnnTensorDescriptor_t dcyDesc, + const void *dcy, const cudnnFilterDescriptor_t wDesc, + const void *w, const cudnnTensorDescriptor_t hxDesc, + const void *hx, const cudnnTensorDescriptor_t cxDesc, + const void *cx, const cudnnTensorDescriptor_t *dxDesc, + void *dx, const cudnnTensorDescriptor_t dhxDesc, void *dhx, + const cudnnTensorDescriptor_t dcxDesc, void *dcx, + void *workspace, size_t workSpaceSizeInBytes, + void *reserveSpace, size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t *, const void *, + const cudnnTensorDescriptor_t *, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t *, void *, const cudnnTensorDescriptor_t, + void *, const cudnnTensorDescriptor_t, void *, void *, size_t, void *, + size_t); + static auto func_ptr = LoadSymbol("cudnnRNNBackwardData"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, seqLength, yDesc, y, dyDesc, dy, dhyDesc, + dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, dxDesc, + dx, dhxDesc, dhx, dcxDesc, dcx, workspace, + workSpaceSizeInBytes, reserveSpace, reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const int seqLength, const cudnnTensorDescriptor_t *xDesc, const void *x, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnTensorDescriptor_t *yDesc, const void *y, const void *workspace, + size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw, + const void *reserveSpace, size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const int, + const cudnnTensorDescriptor_t *, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t *, const void *, const void *, size_t, + const cudnnFilterDescriptor_t, void *, const void *, size_t); + static auto func_ptr = LoadSymbol("cudnnRNNBackwardWeights"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, seqLength, xDesc, x, hxDesc, hx, yDesc, y, + workspace, workSpaceSizeInBytes, dwDesc, dw, reserveSpace, + reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNForwardTrainingEx( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const cudnnRNNDataDescriptor_t xDesc, const void *x, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnTensorDescriptor_t cxDesc, const void *cx, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnRNNDataDescriptor_t yDesc, void *y, + const cudnnTensorDescriptor_t hyDesc, void *hy, + const cudnnTensorDescriptor_t cyDesc, void *cy, + const cudnnRNNDataDescriptor_t kDesc, /* reserved, should pass NULL */ + const void *keys, /* reserved, should pass NULL */ + const cudnnRNNDataDescriptor_t cDesc, /* reserved, should pass NULL */ + void *cAttn, /* reserved, should pass NULL */ + const cudnnRNNDataDescriptor_t iDesc, /* reserved, should pass NULL */ + void *iAttn, /* reserved, should pass NULL */ + const cudnnRNNDataDescriptor_t qDesc, /* reserved, should pass NULL */ + void *queries, /* reserved, should pass NULL */ + void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace, + size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, + const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, + void *, const cudnnTensorDescriptor_t, void *, + const cudnnRNNDataDescriptor_t, const void *, + const cudnnRNNDataDescriptor_t, void *, const cudnnRNNDataDescriptor_t, + void *, const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, + size_t); + static auto func_ptr = LoadSymbol("cudnnRNNForwardTrainingEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, cxDesc, cx, wDesc, w, + yDesc, y, hyDesc, hy, cyDesc, cy, kDesc, keys, cDesc, cAttn, + iDesc, iAttn, qDesc, queries, workSpace, workSpaceSizeInBytes, + reserveSpace, reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNForward( + cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, + cudnnForwardMode_t fwdMode, const int32_t devSeqLengths[], + cudnnRNNDataDescriptor_t xDesc, const void *x, + cudnnRNNDataDescriptor_t yDesc, void *y, cudnnTensorDescriptor_t hDesc, + const void *hx, void *hy, cudnnTensorDescriptor_t cDesc, const void *cx, + void *cy, size_t weightSpaceSize, const void *weightSpace, + size_t workSpaceSize, void *workSpace, size_t reserveSpaceSize, + void *reserveSpace) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnRNNDescriptor_t, cudnnForwardMode_t, const int32_t[], + cudnnRNNDataDescriptor_t, const void *, cudnnRNNDataDescriptor_t, void *, + cudnnTensorDescriptor_t, const void *, void *, cudnnTensorDescriptor_t, + const void *, void *, size_t, const void *, size_t, void *, size_t, + void *); + static auto func_ptr = LoadSymbol("cudnnRNNForward"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, fwdMode, devSeqLengths, xDesc, x, yDesc, y, + hDesc, hx, hy, cDesc, cx, cy, weightSpaceSize, weightSpace, + workSpaceSize, workSpace, reserveSpaceSize, reserveSpace); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardDataEx( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const cudnnRNNDataDescriptor_t yDesc, const void *y, + const cudnnRNNDataDescriptor_t dyDesc, const void *dy, + const cudnnRNNDataDescriptor_t dcDesc, /* reserved, should pass NULL */ + const void *dcAttn, /* reserved, should pass NULL */ + const cudnnTensorDescriptor_t dhyDesc, const void *dhy, + const cudnnTensorDescriptor_t dcyDesc, const void *dcy, + const cudnnFilterDescriptor_t wDesc, const void *w, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnTensorDescriptor_t cxDesc, const void *cx, + const cudnnRNNDataDescriptor_t dxDesc, void *dx, + const cudnnTensorDescriptor_t dhxDesc, void *dhx, + const cudnnTensorDescriptor_t dcxDesc, void *dcx, + const cudnnRNNDataDescriptor_t dkDesc, /* reserved, should pass NULL */ + void *dkeys, /* reserved, should pass NULL */ + void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace, + size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, + const void *, const cudnnRNNDataDescriptor_t, const void *, + const cudnnRNNDataDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnFilterDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnTensorDescriptor_t, const void *, + const cudnnRNNDataDescriptor_t, void *, const cudnnTensorDescriptor_t, + void *, const cudnnTensorDescriptor_t, void *, + const cudnnRNNDataDescriptor_t, void *, void *, size_t, void *, size_t); + static auto func_ptr = LoadSymbol("cudnnRNNBackwardDataEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, yDesc, y, dyDesc, dy, dcDesc, dcAttn, + dhyDesc, dhy, dcyDesc, dcy, wDesc, w, hxDesc, hx, cxDesc, cx, + dxDesc, dx, dhxDesc, dhx, dcxDesc, dcx, dkDesc, dkeys, + workSpace, workSpaceSizeInBytes, reserveSpace, + reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardData_v8( + cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, + const int32_t devSeqLengths[], cudnnRNNDataDescriptor_t yDesc, + const void *y, const void *dy, cudnnRNNDataDescriptor_t xDesc, void *dx, + cudnnTensorDescriptor_t hDesc, const void *hx, const void *dhy, void *dhx, + cudnnTensorDescriptor_t cDesc, const void *cx, const void *dcy, void *dcx, + size_t weightSpaceSize, const void *weightSpace, size_t workSpaceSize, + void *workSpace, size_t reserveSpaceSize, void *reserveSpace) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnRNNDescriptor_t, const int32_t[], + cudnnRNNDataDescriptor_t, const void *, const void *, + cudnnRNNDataDescriptor_t, void *, cudnnTensorDescriptor_t, const void *, + const void *, void *, cudnnTensorDescriptor_t, const void *, const void *, + void *, size_t, const void *, size_t, void *, size_t, void *); + static auto func_ptr = LoadSymbol("cudnnRNNBackwardData_v8"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, devSeqLengths, yDesc, y, dy, xDesc, dx, + hDesc, hx, dhy, dhx, cDesc, cx, dcy, dcx, weightSpaceSize, + weightSpace, workSpaceSize, workSpace, reserveSpaceSize, + reserveSpace); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeightsEx( + cudnnHandle_t handle, const cudnnRNNDescriptor_t rnnDesc, + const cudnnRNNDataDescriptor_t xDesc, const void *x, + const cudnnTensorDescriptor_t hxDesc, const void *hx, + const cudnnRNNDataDescriptor_t yDesc, const void *y, void *workSpace, + size_t workSpaceSizeInBytes, const cudnnFilterDescriptor_t dwDesc, void *dw, + void *reserveSpace, size_t reserveSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnRNNDescriptor_t, const cudnnRNNDataDescriptor_t, + const void *, const cudnnTensorDescriptor_t, const void *, + const cudnnRNNDataDescriptor_t, const void *, void *, size_t, + const cudnnFilterDescriptor_t, void *, void *, size_t); + static auto func_ptr = LoadSymbol("cudnnRNNBackwardWeightsEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, xDesc, x, hxDesc, hx, yDesc, y, workSpace, + workSpaceSizeInBytes, dwDesc, dw, reserveSpace, + reserveSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnRNNBackwardWeights_v8( + cudnnHandle_t handle, cudnnRNNDescriptor_t rnnDesc, + cudnnWgradMode_t addGrad, const int32_t devSeqLengths[], + cudnnRNNDataDescriptor_t xDesc, const void *x, + cudnnTensorDescriptor_t hDesc, const void *hx, + cudnnRNNDataDescriptor_t yDesc, const void *y, size_t weightSpaceSize, + void *dweightSpace, size_t workSpaceSize, void *workSpace, + size_t reserveSpaceSize, void *reserveSpace) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnRNNDescriptor_t, cudnnWgradMode_t, const int32_t[], + cudnnRNNDataDescriptor_t, const void *, cudnnTensorDescriptor_t, + const void *, cudnnRNNDataDescriptor_t, const void *, size_t, void *, + size_t, void *, size_t, void *); + static auto func_ptr = LoadSymbol("cudnnRNNBackwardWeights_v8"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, rnnDesc, addGrad, devSeqLengths, xDesc, x, hDesc, hx, + yDesc, y, weightSpaceSize, dweightSpace, workSpaceSize, + workSpace, reserveSpaceSize, reserveSpace); +} + +cudnnStatus_t CUDNNWINAPI cudnnMultiHeadAttnBackwardData( + cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, + const int loWinIdx[], const int hiWinIdx[], const int devSeqLengthsDQDO[], + const int devSeqLengthsDKDV[], const cudnnSeqDataDescriptor_t doDesc, + const void *dout, const cudnnSeqDataDescriptor_t dqDesc, void *dqueries, + const void *queries, const cudnnSeqDataDescriptor_t dkDesc, void *dkeys, + const void *keys, const cudnnSeqDataDescriptor_t dvDesc, void *dvalues, + const void *values, size_t weightSizeInBytes, const void *weights, + size_t workSpaceSizeInBytes, void *workSpace, + size_t reserveSpaceSizeInBytes, void *reserveSpace) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnAttnDescriptor_t, const int[], const int[], + const int[], const int[], const cudnnSeqDataDescriptor_t, const void *, + const cudnnSeqDataDescriptor_t, void *, const void *, + const cudnnSeqDataDescriptor_t, void *, const void *, + const cudnnSeqDataDescriptor_t, void *, const void *, size_t, + const void *, size_t, void *, size_t, void *); + static auto func_ptr = LoadSymbol("cudnnMultiHeadAttnBackwardData"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, attnDesc, loWinIdx, hiWinIdx, devSeqLengthsDQDO, + devSeqLengthsDKDV, doDesc, dout, dqDesc, dqueries, queries, + dkDesc, dkeys, keys, dvDesc, dvalues, values, + weightSizeInBytes, weights, workSpaceSizeInBytes, workSpace, + reserveSpaceSizeInBytes, reserveSpace); +} + +cudnnStatus_t CUDNNWINAPI cudnnMultiHeadAttnBackwardWeights( + cudnnHandle_t handle, const cudnnAttnDescriptor_t attnDesc, + cudnnWgradMode_t addGrad, const cudnnSeqDataDescriptor_t qDesc, + const void *queries, const cudnnSeqDataDescriptor_t kDesc, const void *keys, + const cudnnSeqDataDescriptor_t vDesc, const void *values, + const cudnnSeqDataDescriptor_t doDesc, const void *dout, + size_t weightSizeInBytes, const void *weights, void *dweights, + size_t workSpaceSizeInBytes, void *workSpace, + size_t reserveSpaceSizeInBytes, void *reserveSpace) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnAttnDescriptor_t, cudnnWgradMode_t, + const cudnnSeqDataDescriptor_t, const void *, + const cudnnSeqDataDescriptor_t, const void *, + const cudnnSeqDataDescriptor_t, const void *, + const cudnnSeqDataDescriptor_t, const void *, size_t, const void *, + void *, size_t, void *, size_t, void *); + static auto func_ptr = + LoadSymbol("cudnnMultiHeadAttnBackwardWeights"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, attnDesc, addGrad, qDesc, queries, kDesc, keys, vDesc, + values, doDesc, dout, weightSizeInBytes, weights, dweights, + workSpaceSizeInBytes, workSpace, reserveSpaceSizeInBytes, + reserveSpace); +} + +cudnnStatus_t CUDNNWINAPI +cudnnCreateCTCLossDescriptor(cudnnCTCLossDescriptor_t *ctcLossDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t *); + static auto func_ptr = LoadSymbol("cudnnCreateCTCLossDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctcLossDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptor( + cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t); + static auto func_ptr = LoadSymbol("cudnnSetCTCLossDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctcLossDesc, compType); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptorEx( + cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType, + cudnnLossNormalizationMode_t normMode, cudnnNanPropagation_t gradMode) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnCTCLossDescriptor_t, cudnnDataType_t, cudnnLossNormalizationMode_t, + cudnnNanPropagation_t); + static auto func_ptr = LoadSymbol("cudnnSetCTCLossDescriptorEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctcLossDesc, compType, normMode, gradMode); +} + +cudnnStatus_t CUDNNWINAPI cudnnSetCTCLossDescriptor_v8( + cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t compType, + cudnnLossNormalizationMode_t normMode, cudnnNanPropagation_t gradMode, + int maxLabelLength) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnCTCLossDescriptor_t, cudnnDataType_t, cudnnLossNormalizationMode_t, + cudnnNanPropagation_t, int); + static auto func_ptr = LoadSymbol("cudnnSetCTCLossDescriptor_v8"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctcLossDesc, compType, normMode, gradMode, maxLabelLength); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptor( + cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType) { + using FuncPtr = + cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t, cudnnDataType_t *); + static auto func_ptr = LoadSymbol("cudnnGetCTCLossDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctcLossDesc, compType); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptorEx( + cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType, + cudnnLossNormalizationMode_t *normMode, cudnnNanPropagation_t *gradMode) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnCTCLossDescriptor_t, cudnnDataType_t *, + cudnnLossNormalizationMode_t *, cudnnNanPropagation_t *); + static auto func_ptr = LoadSymbol("cudnnGetCTCLossDescriptorEx"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctcLossDesc, compType, normMode, gradMode); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossDescriptor_v8( + cudnnCTCLossDescriptor_t ctcLossDesc, cudnnDataType_t *compType, + cudnnLossNormalizationMode_t *normMode, cudnnNanPropagation_t *gradMode, + int *maxLabelLength) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnCTCLossDescriptor_t, cudnnDataType_t *, + cudnnLossNormalizationMode_t *, cudnnNanPropagation_t *, int *); + static auto func_ptr = LoadSymbol("cudnnGetCTCLossDescriptor_v8"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctcLossDesc, compType, normMode, gradMode, maxLabelLength); +} + +cudnnStatus_t CUDNNWINAPI +cudnnDestroyCTCLossDescriptor(cudnnCTCLossDescriptor_t ctcLossDesc) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(cudnnCTCLossDescriptor_t); + static auto func_ptr = LoadSymbol("cudnnDestroyCTCLossDescriptor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(ctcLossDesc); +} + +cudnnStatus_t CUDNNWINAPI cudnnCTCLoss( + cudnnHandle_t handle, + const cudnnTensorDescriptor_t + probsDesc, /* Tensor descriptor for probabilities, the dimensions are + T,N,A (T is the timing steps, N is the + mini batch size, A is the alphabet size) */ + const void *probs, /* probabilities after softmax, in GPU memory */ + const int hostLabels[], /* labels, in CPU memory */ + const int hostLabelLengths[], /* the length of each label, in CPU memory */ + const int hostInputLengths[], /* the lengths of timing steps in each batch, + in CPU memory */ + void *costs, /* the returned costs of CTC, in GPU memory */ + const cudnnTensorDescriptor_t + gradientsDesc, /* Tensor descriptor for gradients, the dimensions are + T,N,A */ + void *gradients, /* the returned CTC gradients, in GPU memory, to compute + costs only, set it to NULL */ + cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ + cudnnCTCLossDescriptor_t ctcLossDesc, + void *workspace, /* pointer to the workspace, in GPU memory */ + size_t workSpaceSizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, const void *, const int[], + const int[], const int[], void *, const cudnnTensorDescriptor_t, void *, + cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, void *, size_t); + static auto func_ptr = LoadSymbol("cudnnCTCLoss"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, probsDesc, probs, hostLabels, hostLabelLengths, + hostInputLengths, costs, gradientsDesc, gradients, algo, + ctcLossDesc, workspace, workSpaceSizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnCTCLoss_v8( + cudnnHandle_t handle, + cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ + cudnnCTCLossDescriptor_t ctcLossDesc, + const cudnnTensorDescriptor_t + probsDesc, /* Tensor descriptor for probabilities, the dimensions are + T,N,A (T is the timing steps, N is the + mini batch size, A is the alphabet size) */ + const void *probs, /* probabilities after softmax, in GPU memory */ + const int labels[], /* labels, in GPU memory */ + const int labelLengths[], /* the length of each label, in GPU memory */ + const int inputLengths[], /* the lengths of timing steps in each batch, in + GPU memory */ + void *costs, /* the returned costs of CTC, in GPU memory */ + const cudnnTensorDescriptor_t + gradientsDesc, /* Tensor descriptor for gradients, the dimensions are + T,N,A */ + void *gradients, /* the returned CTC gradients, in GPU memory, to compute + costs only, set it to NULL */ + size_t workSpaceSizeInBytes, /* size of the workspace */ + void *workspace) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, + const cudnnTensorDescriptor_t, const void *, const int[], const int[], + const int[], void *, const cudnnTensorDescriptor_t, void *, size_t, + void *); + static auto func_ptr = LoadSymbol("cudnnCTCLoss_v8"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, ctcLossDesc, probsDesc, probs, labels, + labelLengths, inputLengths, costs, gradientsDesc, gradients, + workSpaceSizeInBytes, workspace); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossWorkspaceSize( + cudnnHandle_t handle, + const cudnnTensorDescriptor_t + probsDesc, /* Tensor descriptor for probabilities, the dimensions are + T,N,A (T is the + timing steps, N is the mini batch size, A is the alphabet + size) */ + const cudnnTensorDescriptor_t + gradientsDesc, /* Tensor descriptor for gradients, the + dimensions are T,N,A. To compute costs + only, set it to NULL */ + const int *labels, /* labels, in CPU memory */ + const int *labelLengths, /* the length of each label, in CPU memory */ + const int *inputLengths, /* the lengths of timing steps in each batch, in + CPU memory */ + cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ + cudnnCTCLossDescriptor_t ctcLossDesc, size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, const cudnnTensorDescriptor_t, + const cudnnTensorDescriptor_t, const int *, const int *, const int *, + cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetCTCLossWorkspaceSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, probsDesc, gradientsDesc, labels, labelLengths, + inputLengths, algo, ctcLossDesc, sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnGetCTCLossWorkspaceSize_v8( + cudnnHandle_t handle, + cudnnCTCLossAlgo_t algo, /* algorithm selected, supported now 0 and 1 */ + cudnnCTCLossDescriptor_t ctcLossDesc, + const cudnnTensorDescriptor_t + probsDesc, /* Tensor descriptor for probabilities, the dimensions are + T,N,A (T is the + timing steps, N is the mini batch size, A is the alphabet + size) */ + const cudnnTensorDescriptor_t + gradientsDesc, /* Tensor descriptor for gradients, the + dimensions are T,N,A. To compute costs + only, set it to NULL */ + size_t *sizeInBytes) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)( + cudnnHandle_t, cudnnCTCLossAlgo_t, cudnnCTCLossDescriptor_t, + const cudnnTensorDescriptor_t, const cudnnTensorDescriptor_t, size_t *); + static auto func_ptr = LoadSymbol("cudnnGetCTCLossWorkspaceSize_v8"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, ctcLossDesc, probsDesc, gradientsDesc, + sizeInBytes); +} + +cudnnStatus_t CUDNNWINAPI cudnnAdvTrainVersionCheck(void) { + using FuncPtr = cudnnStatus_t(CUDNNWINAPI *)(); + static auto func_ptr = LoadSymbol("cudnnAdvTrainVersionCheck"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +} // extern "C" diff --git a/tensorflow/stream_executor/cuda/cudnn_stub.cc b/tensorflow/stream_executor/cuda/cudnn_stub.cc index 5a05437480e..85141973ed4 100644 --- a/tensorflow/stream_executor/cuda/cudnn_stub.cc +++ b/tensorflow/stream_executor/cuda/cudnn_stub.cc @@ -60,5 +60,5 @@ cudnnStatus_t GetSymbolNotFoundError() { return CUDNN_STATUS_INTERNAL_ERROR; } #elif CUDNN_MINOR < 6 #include "tensorflow/stream_executor/cuda/cudnn_7_4.inc" #else -#include "tensorflow/stream_executor/cuda/cudnn_7_6.inc" +#include "tensorflow/stream_executor/cuda/cudnn_8_0.inc" #endif diff --git a/tensorflow/stream_executor/cuda/cusparse_12_0.inc b/tensorflow/stream_executor/cuda/cusparse_12_0.inc new file mode 100644 index 00000000000..91641482860 --- /dev/null +++ b/tensorflow/stream_executor/cuda/cusparse_12_0.inc @@ -0,0 +1,6080 @@ +// Auto-generated, do not edit. + +extern "C" { +cusparseStatus_t CUSPARSEAPI cusparseCreate(cusparseHandle_t *handle) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t *); + static auto func_ptr = LoadSymbol("cusparseCreate"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle); +} + +cusparseStatus_t CUSPARSEAPI cusparseDestroy(cusparseHandle_t handle) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t); + static auto func_ptr = LoadSymbol("cusparseDestroy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle); +} + +cusparseStatus_t CUSPARSEAPI cusparseGetVersion(cusparseHandle_t handle, + int *version) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int *); + static auto func_ptr = LoadSymbol("cusparseGetVersion"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, version); +} + +cusparseStatus_t CUSPARSEAPI cusparseGetProperty(libraryPropertyType type, + int *value) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(libraryPropertyType, int *); + static auto func_ptr = LoadSymbol("cusparseGetProperty"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(type, value); +} + +const char *CUSPARSEAPI cusparseGetErrorName(cusparseStatus_t status) { + using FuncPtr = const char *(CUSPARSEAPI *)(cusparseStatus_t); + static auto func_ptr = LoadSymbol("cusparseGetErrorName"); + if (!func_ptr) return "cusparseGetErrorName symbol not found."; + return func_ptr(status); +} + +const char *CUSPARSEAPI cusparseGetErrorString(cusparseStatus_t status) { + using FuncPtr = const char *(CUSPARSEAPI *)(cusparseStatus_t); + static auto func_ptr = LoadSymbol("cusparseGetErrorString"); + if (!func_ptr) return "cusparseGetErrorString symbol not found."; + return func_ptr(status); +} + +cusparseStatus_t CUSPARSEAPI cusparseSetStream(cusparseHandle_t handle, + cudaStream_t streamId) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cudaStream_t); + static auto func_ptr = LoadSymbol("cusparseSetStream"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, streamId); +} + +cusparseStatus_t CUSPARSEAPI cusparseGetStream(cusparseHandle_t handle, + cudaStream_t *streamId) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cudaStream_t *); + static auto func_ptr = LoadSymbol("cusparseGetStream"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, streamId); +} + +cusparseStatus_t CUSPARSEAPI +cusparseGetPointerMode(cusparseHandle_t handle, cusparsePointerMode_t *mode) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, + cusparsePointerMode_t *); + static auto func_ptr = LoadSymbol("cusparseGetPointerMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSetPointerMode(cusparseHandle_t handle, cusparsePointerMode_t mode) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, cusparsePointerMode_t); + static auto func_ptr = LoadSymbol("cusparseSetPointerMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mode); +} + +cusparseStatus_t CUSPARSEAPI +cusparseLoggerSetCallback(cusparseLoggerCallback_t callback) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseLoggerCallback_t); + static auto func_ptr = LoadSymbol("cusparseLoggerSetCallback"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(callback); +} + +cusparseStatus_t CUSPARSEAPI cusparseLoggerSetFile(FILE *file) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(FILE *); + static auto func_ptr = LoadSymbol("cusparseLoggerSetFile"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(file); +} + +cusparseStatus_t CUSPARSEAPI cusparseLoggerOpenFile(const char *logFile) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(const char *); + static auto func_ptr = LoadSymbol("cusparseLoggerOpenFile"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(logFile); +} + +cusparseStatus_t CUSPARSEAPI cusparseLoggerSetLevel(int level) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(int); + static auto func_ptr = LoadSymbol("cusparseLoggerSetLevel"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(level); +} + +cusparseStatus_t CUSPARSEAPI cusparseLoggerSetMask(int mask) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(int); + static auto func_ptr = LoadSymbol("cusparseLoggerSetMask"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(mask); +} + +cusparseStatus_t CUSPARSEAPI cusparseLoggerForceDisable(void) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(); + static auto func_ptr = LoadSymbol("cusparseLoggerForceDisable"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(); +} + +cusparseStatus_t CUSPARSEAPI +cusparseCreateMatDescr(cusparseMatDescr_t *descrA) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t *); + static auto func_ptr = LoadSymbol("cusparseCreateMatDescr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(descrA); +} + +cusparseStatus_t CUSPARSEAPI +cusparseDestroyMatDescr(cusparseMatDescr_t descrA) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t); + static auto func_ptr = LoadSymbol("cusparseDestroyMatDescr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(descrA); +} + +cusparseStatus_t CUSPARSEAPI cusparseSetMatType(cusparseMatDescr_t descrA, + cusparseMatrixType_t type) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseMatrixType_t); + static auto func_ptr = LoadSymbol("cusparseSetMatType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(descrA, type); +} + +cusparseMatrixType_t CUSPARSEAPI +cusparseGetMatType(const cusparseMatDescr_t descrA) { + using FuncPtr = cusparseMatrixType_t(CUSPARSEAPI *)(const cusparseMatDescr_t); + static auto func_ptr = LoadSymbol("cusparseGetMatType"); + return func_ptr(descrA); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSetMatFillMode(cusparseMatDescr_t descrA, cusparseFillMode_t fillMode) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseFillMode_t); + static auto func_ptr = LoadSymbol("cusparseSetMatFillMode"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(descrA, fillMode); +} + +cusparseFillMode_t CUSPARSEAPI +cusparseGetMatFillMode(const cusparseMatDescr_t descrA) { + using FuncPtr = cusparseFillMode_t(CUSPARSEAPI *)(const cusparseMatDescr_t); + static auto func_ptr = LoadSymbol("cusparseGetMatFillMode"); + return func_ptr(descrA); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSetMatDiagType(cusparseMatDescr_t descrA, cusparseDiagType_t diagType) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseDiagType_t); + static auto func_ptr = LoadSymbol("cusparseSetMatDiagType"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(descrA, diagType); +} + +cusparseDiagType_t CUSPARSEAPI +cusparseGetMatDiagType(const cusparseMatDescr_t descrA) { + using FuncPtr = cusparseDiagType_t(CUSPARSEAPI *)(const cusparseMatDescr_t); + static auto func_ptr = LoadSymbol("cusparseGetMatDiagType"); + return func_ptr(descrA); +} + +cusparseStatus_t CUSPARSEAPI cusparseSetMatIndexBase(cusparseMatDescr_t descrA, + cusparseIndexBase_t base) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseMatDescr_t, cusparseIndexBase_t); + static auto func_ptr = LoadSymbol("cusparseSetMatIndexBase"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(descrA, base); +} + +cusparseIndexBase_t CUSPARSEAPI +cusparseGetMatIndexBase(const cusparseMatDescr_t descrA) { + using FuncPtr = cusparseIndexBase_t(CUSPARSEAPI *)(const cusparseMatDescr_t); + static auto func_ptr = LoadSymbol("cusparseGetMatIndexBase"); + return func_ptr(descrA); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreateCsric02Info(csric02Info_t *info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csric02Info_t *); + static auto func_ptr = LoadSymbol("cusparseCreateCsric02Info"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI cusparseDestroyCsric02Info(csric02Info_t info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csric02Info_t); + static auto func_ptr = LoadSymbol("cusparseDestroyCsric02Info"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreateBsric02Info(bsric02Info_t *info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsric02Info_t *); + static auto func_ptr = LoadSymbol("cusparseCreateBsric02Info"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI cusparseDestroyBsric02Info(bsric02Info_t info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsric02Info_t); + static auto func_ptr = LoadSymbol("cusparseDestroyBsric02Info"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreateCsrilu02Info(csrilu02Info_t *info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrilu02Info_t *); + static auto func_ptr = LoadSymbol("cusparseCreateCsrilu02Info"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI cusparseDestroyCsrilu02Info(csrilu02Info_t info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csrilu02Info_t); + static auto func_ptr = LoadSymbol("cusparseDestroyCsrilu02Info"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreateBsrilu02Info(bsrilu02Info_t *info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrilu02Info_t *); + static auto func_ptr = LoadSymbol("cusparseCreateBsrilu02Info"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrilu02Info(bsrilu02Info_t info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrilu02Info_t); + static auto func_ptr = LoadSymbol("cusparseDestroyBsrilu02Info"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreateBsrsv2Info(bsrsv2Info_t *info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsv2Info_t *); + static auto func_ptr = LoadSymbol("cusparseCreateBsrsv2Info"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrsv2Info(bsrsv2Info_t info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsv2Info_t); + static auto func_ptr = LoadSymbol("cusparseDestroyBsrsv2Info"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreateBsrsm2Info(bsrsm2Info_t *info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsm2Info_t *); + static auto func_ptr = LoadSymbol("cusparseCreateBsrsm2Info"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI cusparseDestroyBsrsm2Info(bsrsm2Info_t info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(bsrsm2Info_t); + static auto func_ptr = LoadSymbol("cusparseDestroyBsrsm2Info"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreateCsru2csrInfo(csru2csrInfo_t *info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csru2csrInfo_t *); + static auto func_ptr = LoadSymbol("cusparseCreateCsru2csrInfo"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI cusparseDestroyCsru2csrInfo(csru2csrInfo_t info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(csru2csrInfo_t); + static auto func_ptr = LoadSymbol("cusparseDestroyCsru2csrInfo"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI +cusparseCreateColorInfo(cusparseColorInfo_t *info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t *); + static auto func_ptr = LoadSymbol("cusparseCreateColorInfo"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI +cusparseDestroyColorInfo(cusparseColorInfo_t info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t); + static auto func_ptr = LoadSymbol("cusparseDestroyColorInfo"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI cusparseSetColorAlgs(cusparseColorInfo_t info, + cusparseColorAlg_t alg) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t, cusparseColorAlg_t); + static auto func_ptr = LoadSymbol("cusparseSetColorAlgs"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info, alg); +} + +cusparseStatus_t CUSPARSEAPI cusparseGetColorAlgs(cusparseColorInfo_t info, + cusparseColorAlg_t *alg) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseColorInfo_t, + cusparseColorAlg_t *); + static auto func_ptr = LoadSymbol("cusparseGetColorAlgs"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info, alg); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreatePruneInfo(pruneInfo_t *info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(pruneInfo_t *); + static auto func_ptr = LoadSymbol("cusparseCreatePruneInfo"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI cusparseDestroyPruneInfo(pruneInfo_t info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(pruneInfo_t); + static auto func_ptr = LoadSymbol("cusparseDestroyPruneInfo"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(info); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSgemvi(cusparseHandle_t handle, cusparseOperation_t transA, int m, + int n, const float *alpha, const float *A, int lda, int nnz, + const float *xVal, const int *xInd, const float *beta, float *y, + cusparseIndexBase_t idxBase, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, int, int, const float *, + const float *, int, int, const float *, const int *, const float *, + float *, cusparseIndexBase_t, void *); + static auto func_ptr = LoadSymbol("cusparseSgemvi"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, + idxBase, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, + int m, int n, int nnz, int *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, int, int, int, int *); + static auto func_ptr = LoadSymbol("cusparseSgemvi_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transA, m, n, nnz, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI +cusparseDgemvi(cusparseHandle_t handle, cusparseOperation_t transA, int m, + int n, const double *alpha, const double *A, int lda, int nnz, + const double *xVal, const int *xInd, const double *beta, + double *y, cusparseIndexBase_t idxBase, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, int, int, const double *, + const double *, int, int, const double *, const int *, const double *, + double *, cusparseIndexBase_t, void *); + static auto func_ptr = LoadSymbol("cusparseDgemvi"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, + idxBase, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI +cusparseDgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, + int m, int n, int nnz, int *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, int, int, int, int *); + static auto func_ptr = LoadSymbol("cusparseDgemvi_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transA, m, n, nnz, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgemvi( + cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, + const cuComplex *alpha, const cuComplex *A, int lda, int nnz, + const cuComplex *xVal, const int *xInd, const cuComplex *beta, cuComplex *y, + cusparseIndexBase_t idxBase, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, int, int, const cuComplex *, + const cuComplex *, int, int, const cuComplex *, const int *, + const cuComplex *, cuComplex *, cusparseIndexBase_t, void *); + static auto func_ptr = LoadSymbol("cusparseCgemvi"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, + idxBase, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI +cusparseCgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, + int m, int n, int nnz, int *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, int, int, int, int *); + static auto func_ptr = LoadSymbol("cusparseCgemvi_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transA, m, n, nnz, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgemvi( + cusparseHandle_t handle, cusparseOperation_t transA, int m, int n, + const cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, int nnz, + const cuDoubleComplex *xVal, const int *xInd, const cuDoubleComplex *beta, + cuDoubleComplex *y, cusparseIndexBase_t idxBase, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, int, int, const cuDoubleComplex *, + const cuDoubleComplex *, int, int, const cuDoubleComplex *, const int *, + const cuDoubleComplex *, cuDoubleComplex *, cusparseIndexBase_t, void *); + static auto func_ptr = LoadSymbol("cusparseZgemvi"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transA, m, n, alpha, A, lda, nnz, xVal, xInd, beta, y, + idxBase, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI +cusparseZgemvi_bufferSize(cusparseHandle_t handle, cusparseOperation_t transA, + int m, int n, int nnz, int *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, int, int, int, int *); + static auto func_ptr = LoadSymbol("cusparseZgemvi_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, transA, m, n, nnz, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsrmv( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nb, int nnzb, const float *alpha, + const cusparseMatDescr_t descrA, const float *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + const float *x, const float *beta, float *y) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, + const float *, const cusparseMatDescr_t, const float *, const int *, + const int *, int, const float *, const float *, float *); + static auto func_ptr = LoadSymbol("cusparseSbsrmv"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, + bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, + x, beta, y); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsrmv( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nb, int nnzb, const double *alpha, + const cusparseMatDescr_t descrA, const double *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + const double *x, const double *beta, double *y) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, + const double *, const cusparseMatDescr_t, const double *, const int *, + const int *, int, const double *, const double *, double *); + static auto func_ptr = LoadSymbol("cusparseDbsrmv"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, + bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, + x, beta, y); +} + +cusparseStatus_t CUSPARSEAPI +cusparseCbsrmv(cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nb, int nnzb, + const cuComplex *alpha, const cusparseMatDescr_t descrA, + const cuComplex *bsrSortedValA, const int *bsrSortedRowPtrA, + const int *bsrSortedColIndA, int blockDim, const cuComplex *x, + const cuComplex *beta, cuComplex *y) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, + const cuComplex *, const cusparseMatDescr_t, const cuComplex *, + const int *, const int *, int, const cuComplex *, const cuComplex *, + cuComplex *); + static auto func_ptr = LoadSymbol("cusparseCbsrmv"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, + bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, + x, beta, y); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsrmv( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nb, int nnzb, + const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, + const cuDoubleComplex *bsrSortedValA, const int *bsrSortedRowPtrA, + const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x, + const cuDoubleComplex *beta, cuDoubleComplex *y) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, + const cuDoubleComplex *, const cusparseMatDescr_t, + const cuDoubleComplex *, const int *, const int *, int, + const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *); + static auto func_ptr = LoadSymbol("cusparseZbsrmv"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nb, nnzb, alpha, descrA, + bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockDim, + x, beta, y); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSbsrxmv(cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int sizeOfMask, int mb, int nb, + int nnzb, const float *alpha, const cusparseMatDescr_t descrA, + const float *bsrSortedValA, const int *bsrSortedMaskPtrA, + const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, + const int *bsrSortedColIndA, int blockDim, const float *x, + const float *beta, float *y) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, + int, const float *, const cusparseMatDescr_t, const float *, const int *, + const int *, const int *, const int *, int, const float *, const float *, + float *); + static auto func_ptr = LoadSymbol("cusparseSbsrxmv"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, + bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, + bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); +} + +cusparseStatus_t CUSPARSEAPI +cusparseDbsrxmv(cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int sizeOfMask, int mb, int nb, + int nnzb, const double *alpha, const cusparseMatDescr_t descrA, + const double *bsrSortedValA, const int *bsrSortedMaskPtrA, + const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, + const int *bsrSortedColIndA, int blockDim, const double *x, + const double *beta, double *y) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, + int, const double *, const cusparseMatDescr_t, const double *, + const int *, const int *, const int *, const int *, int, const double *, + const double *, double *); + static auto func_ptr = LoadSymbol("cusparseDbsrxmv"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, + bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, + bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsrxmv( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int sizeOfMask, int mb, int nb, int nnzb, + const cuComplex *alpha, const cusparseMatDescr_t descrA, + const cuComplex *bsrSortedValA, const int *bsrSortedMaskPtrA, + const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, + const int *bsrSortedColIndA, int blockDim, const cuComplex *x, + const cuComplex *beta, cuComplex *y) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, + int, const cuComplex *, const cusparseMatDescr_t, const cuComplex *, + const int *, const int *, const int *, const int *, int, + const cuComplex *, const cuComplex *, cuComplex *); + static auto func_ptr = LoadSymbol("cusparseCbsrxmv"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, + bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, + bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsrxmv( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int sizeOfMask, int mb, int nb, int nnzb, + const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, + const cuDoubleComplex *bsrSortedValA, const int *bsrSortedMaskPtrA, + const int *bsrSortedRowPtrA, const int *bsrSortedEndPtrA, + const int *bsrSortedColIndA, int blockDim, const cuDoubleComplex *x, + const cuDoubleComplex *beta, cuDoubleComplex *y) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, int, + int, const cuDoubleComplex *, const cusparseMatDescr_t, + const cuDoubleComplex *, const int *, const int *, const int *, + const int *, int, const cuDoubleComplex *, const cuDoubleComplex *, + cuDoubleComplex *); + static auto func_ptr = LoadSymbol("cusparseZbsrxmv"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, sizeOfMask, mb, nb, nnzb, alpha, descrA, + bsrSortedValA, bsrSortedMaskPtrA, bsrSortedRowPtrA, + bsrSortedEndPtrA, bsrSortedColIndA, blockDim, x, beta, y); +} + +cusparseStatus_t CUSPARSEAPI cusparseXbsrsv2_zeroPivot(cusparseHandle_t handle, + bsrsv2Info_t info, + int *position) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrsv2Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseXbsrsv2_zeroPivot"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, info, position); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nnzb, + const cusparseMatDescr_t descrA, float *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + bsrsv2Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, + const cusparseMatDescr_t, float *, const int *, const int *, int, + bsrsv2Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseSbsrsv2_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nnzb, + const cusparseMatDescr_t descrA, double *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + bsrsv2Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, + const cusparseMatDescr_t, double *, const int *, const int *, int, + bsrsv2Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseDbsrsv2_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + bsrsv2Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, + const cusparseMatDescr_t, cuComplex *, const int *, const int *, int, + bsrsv2Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseCbsrsv2_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + bsrsv2Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, + const cusparseMatDescr_t, cuDoubleComplex *, const int *, const int *, + int, bsrsv2Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseZbsrsv2_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nnzb, + const cusparseMatDescr_t descrA, float *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, + bsrsv2Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, + const cusparseMatDescr_t, float *, const int *, const int *, int, + bsrsv2Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseSbsrsv2_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, + pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nnzb, + const cusparseMatDescr_t descrA, double *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, + bsrsv2Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, + const cusparseMatDescr_t, double *, const int *, const int *, int, + bsrsv2Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseDbsrsv2_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, + pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, + bsrsv2Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, + const cusparseMatDescr_t, cuComplex *, const int *, const int *, int, + bsrsv2Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseCbsrsv2_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, + pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockSize, + bsrsv2Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, + const cusparseMatDescr_t, cuDoubleComplex *, const int *, const int *, + int, bsrsv2Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseZbsrsv2_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, blockSize, info, + pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_analysis( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nnzb, + const cusparseMatDescr_t descrA, const float *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, + const cusparseMatDescr_t, const float *, const int *, const int *, int, + bsrsv2Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseSbsrsv2_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, + pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_analysis( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nnzb, + const cusparseMatDescr_t descrA, const double *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, + const cusparseMatDescr_t, const double *, const int *, const int *, int, + bsrsv2Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseDbsrsv2_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, + pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_analysis( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nnzb, + const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, + const cusparseMatDescr_t, const cuComplex *, const int *, const int *, + int, bsrsv2Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseCbsrsv2_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, + pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_analysis( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nnzb, + const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, + const cusparseMatDescr_t, const cuDoubleComplex *, const int *, + const int *, int, bsrsv2Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseZbsrsv2_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, policy, + pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsrsv2_solve( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nnzb, const float *alpha, + const cusparseMatDescr_t descrA, const float *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + bsrsv2Info_t info, const float *f, float *x, cusparseSolvePolicy_t policy, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, + const float *, const cusparseMatDescr_t, const float *, const int *, + const int *, int, bsrsv2Info_t, const float *, float *, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseSbsrsv2_solve"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, + policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsrsv2_solve( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nnzb, const double *alpha, + const cusparseMatDescr_t descrA, const double *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + bsrsv2Info_t info, const double *f, double *x, cusparseSolvePolicy_t policy, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, + const double *, const cusparseMatDescr_t, const double *, const int *, + const int *, int, bsrsv2Info_t, const double *, double *, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseDbsrsv2_solve"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, + policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsrsv2_solve( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nnzb, const cuComplex *alpha, + const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + bsrsv2Info_t info, const cuComplex *f, cuComplex *x, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, + const cuComplex *, const cusparseMatDescr_t, const cuComplex *, + const int *, const int *, int, bsrsv2Info_t, const cuComplex *, + cuComplex *, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseCbsrsv2_solve"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, + policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsrsv2_solve( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, int mb, int nnzb, const cuDoubleComplex *alpha, + const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + bsrsv2Info_t info, const cuDoubleComplex *f, cuDoubleComplex *x, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, int, int, + const cuDoubleComplex *, const cusparseMatDescr_t, + const cuDoubleComplex *, const int *, const int *, int, bsrsv2Info_t, + const cuDoubleComplex *, cuDoubleComplex *, cusparseSolvePolicy_t, + void *); + static auto func_ptr = LoadSymbol("cusparseZbsrsv2_solve"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, mb, nnzb, alpha, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, blockDim, info, f, x, + policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsrmm( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, + int kb, int nnzb, const float *alpha, const cusparseMatDescr_t descrA, + const float *bsrSortedValA, const int *bsrSortedRowPtrA, + const int *bsrSortedColIndA, const int blockSize, const float *B, + const int ldb, const float *beta, float *C, int ldc) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, int, const float *, + const cusparseMatDescr_t, const float *, const int *, const int *, + const int, const float *, const int, const float *, float *, int); + static auto func_ptr = LoadSymbol("cusparseSbsrmm"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, + bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, + B, ldb, beta, C, ldc); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsrmm( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, + int kb, int nnzb, const double *alpha, const cusparseMatDescr_t descrA, + const double *bsrSortedValA, const int *bsrSortedRowPtrA, + const int *bsrSortedColIndA, const int blockSize, const double *B, + const int ldb, const double *beta, double *C, int ldc) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, int, const double *, + const cusparseMatDescr_t, const double *, const int *, const int *, + const int, const double *, const int, const double *, double *, int); + static auto func_ptr = LoadSymbol("cusparseDbsrmm"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, + bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, + B, ldb, beta, C, ldc); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsrmm( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, + int kb, int nnzb, const cuComplex *alpha, const cusparseMatDescr_t descrA, + const cuComplex *bsrSortedValA, const int *bsrSortedRowPtrA, + const int *bsrSortedColIndA, const int blockSize, const cuComplex *B, + const int ldb, const cuComplex *beta, cuComplex *C, int ldc) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, int, const cuComplex *, + const cusparseMatDescr_t, const cuComplex *, const int *, const int *, + const int, const cuComplex *, const int, const cuComplex *, cuComplex *, + int); + static auto func_ptr = LoadSymbol("cusparseCbsrmm"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, + bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, + B, ldb, beta, C, ldc); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsrmm( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, + int kb, int nnzb, const cuDoubleComplex *alpha, + const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, + const int blockSize, const cuDoubleComplex *B, const int ldb, + const cuDoubleComplex *beta, cuDoubleComplex *C, int ldc) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, int, const cuDoubleComplex *, + const cusparseMatDescr_t, const cuDoubleComplex *, const int *, + const int *, const int, const cuDoubleComplex *, const int, + const cuDoubleComplex *, cuDoubleComplex *, int); + static auto func_ptr = LoadSymbol("cusparseZbsrmm"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transB, mb, n, kb, nnzb, alpha, descrA, + bsrSortedValA, bsrSortedRowPtrA, bsrSortedColIndA, blockSize, + B, ldb, beta, C, ldc); +} + +cusparseStatus_t CUSPARSEAPI cusparseXbsrsm2_zeroPivot(cusparseHandle_t handle, + bsrsm2Info_t info, + int *position) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrsm2Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseXbsrsm2_zeroPivot"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, info, position); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, + int nnzb, const cusparseMatDescr_t descrA, float *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsrsm2Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, const cusparseMatDescr_t, float *, + const int *, const int *, int, bsrsm2Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseSbsrsm2_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, + bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, + info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, + int nnzb, const cusparseMatDescr_t descrA, double *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsrsm2Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, const cusparseMatDescr_t, double *, + const int *, const int *, int, bsrsm2Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseDbsrsm2_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, + bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, + info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, + int nnzb, const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsrsm2Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, const cusparseMatDescr_t, cuComplex *, + const int *, const int *, int, bsrsm2Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseCbsrsm2_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, + bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, + info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, + int nnzb, const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsrsm2Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, const cusparseMatDescr_t, + cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseZbsrsm2_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, + bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, + info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, + int nnzb, const cusparseMatDescr_t descrA, float *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsrsm2Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, const cusparseMatDescr_t, float *, + const int *, const int *, int, bsrsm2Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseSbsrsm2_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, + bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, + info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, + int nnzb, const cusparseMatDescr_t descrA, double *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsrsm2Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, const cusparseMatDescr_t, double *, + const int *, const int *, int, bsrsm2Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseDbsrsm2_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, + bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, + info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, + int nnzb, const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsrsm2Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, const cusparseMatDescr_t, cuComplex *, + const int *, const int *, int, bsrsm2Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseCbsrsm2_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, + bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, + info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, + int nnzb, const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsrsm2Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, const cusparseMatDescr_t, + cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseZbsrsm2_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transB, mb, n, nnzb, descrA, + bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, + info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_analysis( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, + int nnzb, const cusparseMatDescr_t descrA, const float *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, const cusparseMatDescr_t, + const float *, const int *, const int *, int, bsrsm2Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseSbsrsm2_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, + bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, + info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_analysis( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, + int nnzb, const cusparseMatDescr_t descrA, const double *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, const cusparseMatDescr_t, + const double *, const int *, const int *, int, bsrsm2Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseDbsrsm2_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, + bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, + info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_analysis( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, + int nnzb, const cusparseMatDescr_t descrA, const cuComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, const cusparseMatDescr_t, + const cuComplex *, const int *, const int *, int, bsrsm2Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseCbsrsm2_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, + bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, + info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_analysis( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, + int nnzb, const cusparseMatDescr_t descrA, + const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, + const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, const cusparseMatDescr_t, + const cuDoubleComplex *, const int *, const int *, int, bsrsm2Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseZbsrsm2_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, descrA, + bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, + info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsrsm2_solve( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, + int nnzb, const float *alpha, const cusparseMatDescr_t descrA, + const float *bsrSortedVal, const int *bsrSortedRowPtr, + const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, + const float *B, int ldb, float *X, int ldx, cusparseSolvePolicy_t policy, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, const float *, + const cusparseMatDescr_t, const float *, const int *, const int *, int, + bsrsm2Info_t, const float *, int, float *, int, cusparseSolvePolicy_t, + void *); + static auto func_ptr = LoadSymbol("cusparseSbsrsm2_solve"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, + bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, + info, B, ldb, X, ldx, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsrsm2_solve( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, + int nnzb, const double *alpha, const cusparseMatDescr_t descrA, + const double *bsrSortedVal, const int *bsrSortedRowPtr, + const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, + const double *B, int ldb, double *X, int ldx, cusparseSolvePolicy_t policy, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, const double *, + const cusparseMatDescr_t, const double *, const int *, const int *, int, + bsrsm2Info_t, const double *, int, double *, int, cusparseSolvePolicy_t, + void *); + static auto func_ptr = LoadSymbol("cusparseDbsrsm2_solve"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, + bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, + info, B, ldb, X, ldx, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsrsm2_solve( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, + int nnzb, const cuComplex *alpha, const cusparseMatDescr_t descrA, + const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, + const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, + const cuComplex *B, int ldb, cuComplex *X, int ldx, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, const cuComplex *, + const cusparseMatDescr_t, const cuComplex *, const int *, const int *, + int, bsrsm2Info_t, const cuComplex *, int, cuComplex *, int, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseCbsrsm2_solve"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, + bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, + info, B, ldb, X, ldx, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsrsm2_solve( + cusparseHandle_t handle, cusparseDirection_t dirA, + cusparseOperation_t transA, cusparseOperation_t transXY, int mb, int n, + int nnzb, const cuDoubleComplex *alpha, const cusparseMatDescr_t descrA, + const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, + const int *bsrSortedColInd, int blockSize, bsrsm2Info_t info, + const cuDoubleComplex *B, int ldb, cuDoubleComplex *X, int ldx, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, cusparseOperation_t, + cusparseOperation_t, int, int, int, const cuDoubleComplex *, + const cusparseMatDescr_t, const cuDoubleComplex *, const int *, + const int *, int, bsrsm2Info_t, const cuDoubleComplex *, int, + cuDoubleComplex *, int, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseZbsrsm2_solve"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, transA, transXY, mb, n, nnzb, alpha, descrA, + bsrSortedVal, bsrSortedRowPtr, bsrSortedColInd, blockSize, + info, B, ldb, X, ldx, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_numericBoost( + cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, + float *boost_val) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, csrilu02Info_t, int, double *, float *); + static auto func_ptr = LoadSymbol("cusparseScsrilu02_numericBoost"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, info, enable_boost, tol, boost_val); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_numericBoost( + cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, + double *boost_val) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, csrilu02Info_t, int, double *, double *); + static auto func_ptr = LoadSymbol("cusparseDcsrilu02_numericBoost"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, info, enable_boost, tol, boost_val); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_numericBoost( + cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, + cuComplex *boost_val) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, csrilu02Info_t, int, double *, cuComplex *); + static auto func_ptr = LoadSymbol("cusparseCcsrilu02_numericBoost"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, info, enable_boost, tol, boost_val); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_numericBoost( + cusparseHandle_t handle, csrilu02Info_t info, int enable_boost, double *tol, + cuDoubleComplex *boost_val) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, csrilu02Info_t, int, double *, cuDoubleComplex *); + static auto func_ptr = LoadSymbol("cusparseZcsrilu02_numericBoost"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, info, enable_boost, tol, boost_val); +} + +cusparseStatus_t CUSPARSEAPI cusparseXcsrilu02_zeroPivot( + cusparseHandle_t handle, csrilu02Info_t info, int *position) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csrilu02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseXcsrilu02_zeroPivot"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, info, position); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_bufferSize( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + float *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, + const int *, const int *, csrilu02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseScsrilu02_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_bufferSize( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + double *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, + const int *, const int *, csrilu02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseDcsrilu02_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_bufferSize( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + cuComplex *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, + const int *, const int *, csrilu02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseCcsrilu02_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_bufferSize( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csrilu02Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, + const int *, const int *, csrilu02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseZcsrilu02_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_bufferSizeExt( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + float *csrSortedVal, const int *csrSortedRowPtr, const int *csrSortedColInd, + csrilu02Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, + const int *, const int *, csrilu02Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseScsrilu02_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, + csrSortedColInd, info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_bufferSizeExt( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + double *csrSortedVal, const int *csrSortedRowPtr, + const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, + const int *, const int *, csrilu02Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseDcsrilu02_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, + csrSortedColInd, info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_bufferSizeExt( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + cuComplex *csrSortedVal, const int *csrSortedRowPtr, + const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, + const int *, const int *, csrilu02Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseCcsrilu02_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, + csrSortedColInd, info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_bufferSizeExt( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, + const int *csrSortedColInd, csrilu02Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, + const int *, const int *, csrilu02Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseZcsrilu02_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, + csrSortedColInd, info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsrilu02_analysis( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + const float *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csrilu02Info_t info, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, + const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseScsrilu02_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02_analysis( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + const double *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csrilu02Info_t info, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, + const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseDcsrilu02_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02_analysis( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csrilu02Info_t info, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, + const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseCcsrilu02_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02_analysis( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csrilu02Info_t info, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, + const cuDoubleComplex *, const int *, const int *, csrilu02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseZcsrilu02_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsrilu02( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + float *csrSortedValA_valM, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csrilu02Info_t info, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, + const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseScsrilu02"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, + csrSortedColIndA, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsrilu02( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + double *csrSortedValA_valM, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csrilu02Info_t info, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, + const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseDcsrilu02"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, + csrSortedColIndA, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsrilu02( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + cuComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csrilu02Info_t info, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, + const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseCcsrilu02"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, + csrSortedColIndA, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsrilu02( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + cuDoubleComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csrilu02Info_t info, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, + const int *, const int *, csrilu02Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseZcsrilu02"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, + csrSortedColIndA, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_numericBoost( + cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, + float *boost_val) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, bsrilu02Info_t, int, double *, float *); + static auto func_ptr = LoadSymbol("cusparseSbsrilu02_numericBoost"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, info, enable_boost, tol, boost_val); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_numericBoost( + cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, + double *boost_val) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, bsrilu02Info_t, int, double *, double *); + static auto func_ptr = LoadSymbol("cusparseDbsrilu02_numericBoost"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, info, enable_boost, tol, boost_val); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_numericBoost( + cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, + cuComplex *boost_val) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, bsrilu02Info_t, int, double *, cuComplex *); + static auto func_ptr = LoadSymbol("cusparseCbsrilu02_numericBoost"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, info, enable_boost, tol, boost_val); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_numericBoost( + cusparseHandle_t handle, bsrilu02Info_t info, int enable_boost, double *tol, + cuDoubleComplex *boost_val) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, bsrilu02Info_t, int, double *, cuDoubleComplex *); + static auto func_ptr = LoadSymbol("cusparseZbsrilu02_numericBoost"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, info, enable_boost, tol, boost_val); +} + +cusparseStatus_t CUSPARSEAPI cusparseXbsrilu02_zeroPivot( + cusparseHandle_t handle, bsrilu02Info_t info, int *position) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsrilu02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseXbsrilu02_zeroPivot"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, info, position); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, float *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsrilu02Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + float *, const int *, const int *, int, bsrilu02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseSbsrilu02_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, double *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsrilu02Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + double *, const int *, const int *, int, bsrilu02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseDbsrilu02_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsrilu02Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + cuComplex *, const int *, const int *, int, bsrilu02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseCbsrilu02_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsrilu02Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseZbsrilu02_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, float *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsrilu02Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + float *, const int *, const int *, int, bsrilu02Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseSbsrilu02_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockSize, info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, double *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsrilu02Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + double *, const int *, const int *, int, bsrilu02Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseDbsrilu02_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockSize, info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsrilu02Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + cuComplex *, const int *, const int *, int, bsrilu02Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseCbsrilu02_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockSize, info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsrilu02Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, + size_t *); + static auto func_ptr = LoadSymbol("cusparseZbsrilu02_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockSize, info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02_analysis( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, float *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + float *, const int *, const int *, int, bsrilu02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseSbsrilu02_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02_analysis( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, double *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + double *, const int *, const int *, int, bsrilu02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseDbsrilu02_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02_analysis( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + cuComplex *, const int *, const int *, int, bsrilu02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseCbsrilu02_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02_analysis( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseZbsrilu02_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsrilu02( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, float *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + float *, const int *, const int *, int, bsrilu02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseSbsrilu02"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsrilu02( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, double *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + double *, const int *, const int *, int, bsrilu02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseDbsrilu02"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsrilu02( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + cuComplex *, const int *, const int *, int, bsrilu02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseCbsrilu02"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsrilu02( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsrilu02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + cuDoubleComplex *, const int *, const int *, int, bsrilu02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseZbsrilu02"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseXcsric02_zeroPivot(cusparseHandle_t handle, + csric02Info_t info, + int *position) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, csric02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseXcsric02_zeroPivot"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, info, position); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsric02_bufferSize( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + float *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, + const int *, const int *, csric02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseScsric02_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsric02_bufferSize( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + double *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, + const int *, const int *, csric02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseDcsric02_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsric02_bufferSize( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + cuComplex *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, + const int *, const int *, csric02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseCcsric02_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsric02_bufferSize( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csric02Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, + const int *, const int *, csric02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseZcsric02_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsric02_bufferSizeExt( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + float *csrSortedVal, const int *csrSortedRowPtr, const int *csrSortedColInd, + csric02Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, + const int *, const int *, csric02Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseScsric02_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, + csrSortedColInd, info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsric02_bufferSizeExt( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + double *csrSortedVal, const int *csrSortedRowPtr, + const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, + const int *, const int *, csric02Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseDcsric02_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, + csrSortedColInd, info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsric02_bufferSizeExt( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + cuComplex *csrSortedVal, const int *csrSortedRowPtr, + const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, + const int *, const int *, csric02Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseCcsric02_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, + csrSortedColInd, info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsric02_bufferSizeExt( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + cuDoubleComplex *csrSortedVal, const int *csrSortedRowPtr, + const int *csrSortedColInd, csric02Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, + const int *, const int *, csric02Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseZcsric02_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedVal, csrSortedRowPtr, + csrSortedColInd, info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsric02_analysis( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + const float *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csric02Info_t info, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, + const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseScsric02_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsric02_analysis( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + const double *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csric02Info_t info, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, + const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseDcsric02_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsric02_analysis( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csric02Info_t info, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, + const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseCcsric02_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsric02_analysis( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csric02Info_t info, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, + const cuDoubleComplex *, const int *, const int *, csric02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseZcsric02_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsric02( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + float *csrSortedValA_valM, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csric02Info_t info, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, float *, + const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseScsric02"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, + csrSortedColIndA, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsric02( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + double *csrSortedValA_valM, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csric02Info_t info, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, double *, + const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseDcsric02"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, + csrSortedColIndA, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsric02( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + cuComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csric02Info_t info, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, cuComplex *, + const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseCcsric02"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, + csrSortedColIndA, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsric02( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + cuDoubleComplex *csrSortedValA_valM, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, csric02Info_t info, + cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, cuDoubleComplex *, + const int *, const int *, csric02Info_t, cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseZcsric02"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA_valM, csrSortedRowPtrA, + csrSortedColIndA, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseXbsric02_zeroPivot(cusparseHandle_t handle, + bsric02Info_t info, + int *position) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, bsric02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseXbsric02_zeroPivot"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, info, position); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsric02_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, float *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsric02Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + float *, const int *, const int *, int, bsric02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseSbsric02_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsric02_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, double *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsric02Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + double *, const int *, const int *, int, bsric02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseDbsric02_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsric02_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsric02Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + cuComplex *, const int *, const int *, int, bsric02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseCbsric02_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsric02_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsric02Info_t info, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, int *); + static auto func_ptr = LoadSymbol("cusparseZbsric02_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsric02_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, float *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsric02Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + float *, const int *, const int *, int, bsric02Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseSbsric02_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockSize, info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsric02_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, double *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsric02Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + double *, const int *, const int *, int, bsric02Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseDbsric02_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockSize, info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsric02_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsric02Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + cuComplex *, const int *, const int *, int, bsric02Info_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseCbsric02_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockSize, info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsric02_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockSize, + bsric02Info_t info, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, + size_t *); + static auto func_ptr = LoadSymbol("cusparseZbsric02_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockSize, info, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsric02_analysis( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, const float *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const float *, const int *, const int *, int, bsric02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseSbsric02_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, policy, pInputBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsric02_analysis( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, const double *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const double *, const int *, const int *, int, bsric02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseDbsric02_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, policy, pInputBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsric02_analysis( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, const cuComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const cuComplex *, const int *, const int *, int, bsric02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseCbsric02_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, policy, pInputBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsric02_analysis( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsric02Info_t info, cusparseSolvePolicy_t policy, void *pInputBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseZbsric02_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, policy, pInputBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsric02( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, float *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + float *, const int *, const int *, int, bsric02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseSbsric02"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsric02( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, double *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + double *, const int *, const int *, int, bsric02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseDbsric02"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsric02( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + cuComplex *, const int *, const int *, int, bsric02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseCbsric02"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsric02( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nnzb, + const cusparseMatDescr_t descrA, cuDoubleComplex *bsrSortedVal, + const int *bsrSortedRowPtr, const int *bsrSortedColInd, int blockDim, + bsric02Info_t info, cusparseSolvePolicy_t policy, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + cuDoubleComplex *, const int *, const int *, int, bsric02Info_t, + cusparseSolvePolicy_t, void *); + static auto func_ptr = LoadSymbol("cusparseZbsric02"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nnzb, descrA, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, blockDim, info, policy, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_bufferSizeExt( + cusparseHandle_t handle, int m, int n, const float *dl, const float *d, + const float *du, const float *B, int ldb, size_t *bufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const float *, const float *, const float *, + const float *, int, size_t *); + static auto func_ptr = LoadSymbol("cusparseSgtsv2_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_bufferSizeExt( + cusparseHandle_t handle, int m, int n, const double *dl, const double *d, + const double *du, const double *B, int ldb, size_t *bufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const double *, const double *, + const double *, const double *, int, size_t *); + static auto func_ptr = LoadSymbol("cusparseDgtsv2_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_bufferSizeExt( + cusparseHandle_t handle, int m, int n, const cuComplex *dl, + const cuComplex *d, const cuComplex *du, const cuComplex *B, int ldb, + size_t *bufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, + const cuComplex *, const cuComplex *, int, size_t *); + static auto func_ptr = LoadSymbol("cusparseCgtsv2_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_bufferSizeExt( + cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, + const cuDoubleComplex *d, const cuDoubleComplex *du, + const cuDoubleComplex *B, int ldb, size_t *bufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cuDoubleComplex *, + const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, + int, size_t *); + static auto func_ptr = LoadSymbol("cusparseZgtsv2_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseSgtsv2(cusparseHandle_t handle, int m, + int n, const float *dl, + const float *d, const float *du, + float *B, int ldb, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const float *, const float *, const float *, + float *, int, void *); + static auto func_ptr = LoadSymbol("cusparseSgtsv2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDgtsv2(cusparseHandle_t handle, int m, + int n, const double *dl, + const double *d, const double *du, + double *B, int ldb, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const double *, const double *, + const double *, double *, int, void *); + static auto func_ptr = LoadSymbol("cusparseDgtsv2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgtsv2(cusparseHandle_t handle, int m, + int n, const cuComplex *dl, + const cuComplex *d, + const cuComplex *du, cuComplex *B, + int ldb, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, + const cuComplex *, cuComplex *, int, void *); + static auto func_ptr = LoadSymbol("cusparseCgtsv2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgtsv2(cusparseHandle_t handle, int m, + int n, const cuDoubleComplex *dl, + const cuDoubleComplex *d, + const cuDoubleComplex *du, + cuDoubleComplex *B, int ldb, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cuDoubleComplex *, + const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int, + void *); + static auto func_ptr = LoadSymbol("cusparseZgtsv2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_nopivot_bufferSizeExt( + cusparseHandle_t handle, int m, int n, const float *dl, const float *d, + const float *du, const float *B, int ldb, size_t *bufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const float *, const float *, const float *, + const float *, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseSgtsv2_nopivot_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_nopivot_bufferSizeExt( + cusparseHandle_t handle, int m, int n, const double *dl, const double *d, + const double *du, const double *B, int ldb, size_t *bufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const double *, const double *, + const double *, const double *, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseDgtsv2_nopivot_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_nopivot_bufferSizeExt( + cusparseHandle_t handle, int m, int n, const cuComplex *dl, + const cuComplex *d, const cuComplex *du, const cuComplex *B, int ldb, + size_t *bufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, + const cuComplex *, const cuComplex *, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseCgtsv2_nopivot_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_nopivot_bufferSizeExt( + cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, + const cuDoubleComplex *d, const cuDoubleComplex *du, + const cuDoubleComplex *B, int ldb, size_t *bufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cuDoubleComplex *, + const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, + int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseZgtsv2_nopivot_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, dl, d, du, B, ldb, bufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseSgtsv2_nopivot( + cusparseHandle_t handle, int m, int n, const float *dl, const float *d, + const float *du, float *B, int ldb, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const float *, const float *, const float *, + float *, int, void *); + static auto func_ptr = LoadSymbol("cusparseSgtsv2_nopivot"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDgtsv2_nopivot( + cusparseHandle_t handle, int m, int n, const double *dl, const double *d, + const double *du, double *B, int ldb, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const double *, const double *, + const double *, double *, int, void *); + static auto func_ptr = LoadSymbol("cusparseDgtsv2_nopivot"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgtsv2_nopivot( + cusparseHandle_t handle, int m, int n, const cuComplex *dl, + const cuComplex *d, const cuComplex *du, cuComplex *B, int ldb, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, + const cuComplex *, cuComplex *, int, void *); + static auto func_ptr = LoadSymbol("cusparseCgtsv2_nopivot"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgtsv2_nopivot( + cusparseHandle_t handle, int m, int n, const cuDoubleComplex *dl, + const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *B, + int ldb, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cuDoubleComplex *, + const cuDoubleComplex *, const cuDoubleComplex *, cuDoubleComplex *, int, + void *); + static auto func_ptr = LoadSymbol("cusparseZgtsv2_nopivot"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, dl, d, du, B, ldb, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSgtsv2StridedBatch_bufferSizeExt( + cusparseHandle_t handle, int m, const float *dl, const float *d, + const float *du, const float *x, int batchCount, int batchStride, + size_t *bufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, const float *, const float *, const float *, + const float *, int, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseSgtsv2StridedBatch_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, + bufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDgtsv2StridedBatch_bufferSizeExt( + cusparseHandle_t handle, int m, const double *dl, const double *d, + const double *du, const double *x, int batchCount, int batchStride, + size_t *bufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, const double *, const double *, const double *, + const double *, int, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseDgtsv2StridedBatch_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, + bufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgtsv2StridedBatch_bufferSizeExt( + cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, + const cuComplex *du, const cuComplex *x, int batchCount, int batchStride, + size_t *bufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, const cuComplex *, const cuComplex *, + const cuComplex *, const cuComplex *, int, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseCgtsv2StridedBatch_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, + bufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgtsv2StridedBatch_bufferSizeExt( + cusparseHandle_t handle, int m, const cuDoubleComplex *dl, + const cuDoubleComplex *d, const cuDoubleComplex *du, + const cuDoubleComplex *x, int batchCount, int batchStride, + size_t *bufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, + const cuDoubleComplex *, const cuDoubleComplex *, int, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseZgtsv2StridedBatch_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, + bufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseSgtsv2StridedBatch( + cusparseHandle_t handle, int m, const float *dl, const float *d, + const float *du, float *x, int batchCount, int batchStride, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, const float *, const float *, const float *, + float *, int, int, void *); + static auto func_ptr = LoadSymbol("cusparseSgtsv2StridedBatch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI +cusparseDgtsv2StridedBatch(cusparseHandle_t handle, int m, const double *dl, + const double *d, const double *du, double *x, + int batchCount, int batchStride, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, const double *, const double *, const double *, + double *, int, int, void *); + static auto func_ptr = LoadSymbol("cusparseDgtsv2StridedBatch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgtsv2StridedBatch( + cusparseHandle_t handle, int m, const cuComplex *dl, const cuComplex *d, + const cuComplex *du, cuComplex *x, int batchCount, int batchStride, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, const cuComplex *, const cuComplex *, + const cuComplex *, cuComplex *, int, int, void *); + static auto func_ptr = LoadSymbol("cusparseCgtsv2StridedBatch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgtsv2StridedBatch( + cusparseHandle_t handle, int m, const cuDoubleComplex *dl, + const cuDoubleComplex *d, const cuDoubleComplex *du, cuDoubleComplex *x, + int batchCount, int batchStride, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, const cuDoubleComplex *, const cuDoubleComplex *, + const cuDoubleComplex *, cuDoubleComplex *, int, int, void *); + static auto func_ptr = LoadSymbol("cusparseZgtsv2StridedBatch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, dl, d, du, x, batchCount, batchStride, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSgtsvInterleavedBatch_bufferSizeExt( + cusparseHandle_t handle, int algo, int m, const float *dl, const float *d, + const float *du, const float *x, int batchCount, + size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const float *, const float *, const float *, + const float *, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseSgtsvInterleavedBatch_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, m, dl, d, du, x, batchCount, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDgtsvInterleavedBatch_bufferSizeExt( + cusparseHandle_t handle, int algo, int m, const double *dl, const double *d, + const double *du, const double *x, int batchCount, + size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const double *, const double *, + const double *, const double *, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseDgtsvInterleavedBatch_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, m, dl, d, du, x, batchCount, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgtsvInterleavedBatch_bufferSizeExt( + cusparseHandle_t handle, int algo, int m, const cuComplex *dl, + const cuComplex *d, const cuComplex *du, const cuComplex *x, int batchCount, + size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, + const cuComplex *, const cuComplex *, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseCgtsvInterleavedBatch_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, m, dl, d, du, x, batchCount, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgtsvInterleavedBatch_bufferSizeExt( + cusparseHandle_t handle, int algo, int m, const cuDoubleComplex *dl, + const cuDoubleComplex *d, const cuDoubleComplex *du, + const cuDoubleComplex *x, int batchCount, size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cuDoubleComplex *, + const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, + int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseZgtsvInterleavedBatch_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, m, dl, d, du, x, batchCount, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseSgtsvInterleavedBatch( + cusparseHandle_t handle, int algo, int m, float *dl, float *d, float *du, + float *x, int batchCount, void *pBuffer) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, float *, + float *, float *, float *, int, void *); + static auto func_ptr = LoadSymbol("cusparseSgtsvInterleavedBatch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDgtsvInterleavedBatch( + cusparseHandle_t handle, int algo, int m, double *dl, double *d, double *du, + double *x, int batchCount, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int, + double *, double *, double *, + double *, int, void *); + static auto func_ptr = LoadSymbol("cusparseDgtsvInterleavedBatch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgtsvInterleavedBatch( + cusparseHandle_t handle, int algo, int m, cuComplex *dl, cuComplex *d, + cuComplex *du, cuComplex *x, int batchCount, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, cuComplex *, cuComplex *, cuComplex *, + cuComplex *, int, void *); + static auto func_ptr = LoadSymbol("cusparseCgtsvInterleavedBatch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgtsvInterleavedBatch( + cusparseHandle_t handle, int algo, int m, cuDoubleComplex *dl, + cuDoubleComplex *d, cuDoubleComplex *du, cuDoubleComplex *x, int batchCount, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, cuDoubleComplex *, cuDoubleComplex *, + cuDoubleComplex *, cuDoubleComplex *, int, void *); + static auto func_ptr = LoadSymbol("cusparseZgtsvInterleavedBatch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, m, dl, d, du, x, batchCount, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSgpsvInterleavedBatch_bufferSizeExt( + cusparseHandle_t handle, int algo, int m, const float *ds, const float *dl, + const float *d, const float *du, const float *dw, const float *x, + int batchCount, size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const float *, const float *, const float *, + const float *, const float *, const float *, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseSgpsvInterleavedBatch_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDgpsvInterleavedBatch_bufferSizeExt( + cusparseHandle_t handle, int algo, int m, const double *ds, + const double *dl, const double *d, const double *du, const double *dw, + const double *x, int batchCount, size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const double *, const double *, + const double *, const double *, const double *, const double *, int, + size_t *); + static auto func_ptr = + LoadSymbol("cusparseDgpsvInterleavedBatch_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgpsvInterleavedBatch_bufferSizeExt( + cusparseHandle_t handle, int algo, int m, const cuComplex *ds, + const cuComplex *dl, const cuComplex *d, const cuComplex *du, + const cuComplex *dw, const cuComplex *x, int batchCount, + size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cuComplex *, const cuComplex *, + const cuComplex *, const cuComplex *, const cuComplex *, + const cuComplex *, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseCgpsvInterleavedBatch_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgpsvInterleavedBatch_bufferSizeExt( + cusparseHandle_t handle, int algo, int m, const cuDoubleComplex *ds, + const cuDoubleComplex *dl, const cuDoubleComplex *d, + const cuDoubleComplex *du, const cuDoubleComplex *dw, + const cuDoubleComplex *x, int batchCount, size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cuDoubleComplex *, + const cuDoubleComplex *, const cuDoubleComplex *, const cuDoubleComplex *, + const cuDoubleComplex *, const cuDoubleComplex *, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseZgpsvInterleavedBatch_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseSgpsvInterleavedBatch( + cusparseHandle_t handle, int algo, int m, float *ds, float *dl, float *d, + float *du, float *dw, float *x, int batchCount, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, float *, float *, float *, float *, float *, + float *, int, void *); + static auto func_ptr = LoadSymbol("cusparseSgpsvInterleavedBatch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDgpsvInterleavedBatch( + cusparseHandle_t handle, int algo, int m, double *ds, double *dl, double *d, + double *du, double *dw, double *x, int batchCount, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, double *, double *, double *, double *, + double *, double *, int, void *); + static auto func_ptr = LoadSymbol("cusparseDgpsvInterleavedBatch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgpsvInterleavedBatch( + cusparseHandle_t handle, int algo, int m, cuComplex *ds, cuComplex *dl, + cuComplex *d, cuComplex *du, cuComplex *dw, cuComplex *x, int batchCount, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, cuComplex *, cuComplex *, cuComplex *, + cuComplex *, cuComplex *, cuComplex *, int, void *); + static auto func_ptr = LoadSymbol("cusparseCgpsvInterleavedBatch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgpsvInterleavedBatch( + cusparseHandle_t handle, int algo, int m, cuDoubleComplex *ds, + cuDoubleComplex *dl, cuDoubleComplex *d, cuDoubleComplex *du, + cuDoubleComplex *dw, cuDoubleComplex *x, int batchCount, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, cuDoubleComplex *, cuDoubleComplex *, + cuDoubleComplex *, cuDoubleComplex *, cuDoubleComplex *, + cuDoubleComplex *, int, void *); + static auto func_ptr = LoadSymbol("cusparseZgpsvInterleavedBatch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, algo, m, ds, dl, d, du, dw, x, batchCount, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsrgeam2_bufferSizeExt( + cusparseHandle_t handle, int m, int n, const float *alpha, + const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, + const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, + const int *csrSortedRowPtrB, const int *csrSortedColIndB, + const cusparseMatDescr_t descrC, const float *csrSortedValC, + const int *csrSortedRowPtrC, const int *csrSortedColIndC, + size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const float *, const cusparseMatDescr_t, int, + const float *, const int *, const int *, const float *, + const cusparseMatDescr_t, int, const float *, const int *, const int *, + const cusparseMatDescr_t, const float *, const int *, const int *, + size_t *); + static auto func_ptr = LoadSymbol("cusparseScsrgeam2_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, + csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, + csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, + csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam2_bufferSizeExt( + cusparseHandle_t handle, int m, int n, const double *alpha, + const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, + const double *beta, const cusparseMatDescr_t descrB, int nnzB, + const double *csrSortedValB, const int *csrSortedRowPtrB, + const int *csrSortedColIndB, const cusparseMatDescr_t descrC, + const double *csrSortedValC, const int *csrSortedRowPtrC, + const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const double *, const cusparseMatDescr_t, int, + const double *, const int *, const int *, const double *, + const cusparseMatDescr_t, int, const double *, const int *, const int *, + const cusparseMatDescr_t, const double *, const int *, const int *, + size_t *); + static auto func_ptr = LoadSymbol("cusparseDcsrgeam2_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, + csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, + csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, + csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam2_bufferSizeExt( + cusparseHandle_t handle, int m, int n, const cuComplex *alpha, + const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, + const cuComplex *beta, const cusparseMatDescr_t descrB, int nnzB, + const cuComplex *csrSortedValB, const int *csrSortedRowPtrB, + const int *csrSortedColIndB, const cusparseMatDescr_t descrC, + const cuComplex *csrSortedValC, const int *csrSortedRowPtrC, + const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cuComplex *, const cusparseMatDescr_t, + int, const cuComplex *, const int *, const int *, const cuComplex *, + const cusparseMatDescr_t, int, const cuComplex *, const int *, + const int *, const cusparseMatDescr_t, const cuComplex *, const int *, + const int *, size_t *); + static auto func_ptr = LoadSymbol("cusparseCcsrgeam2_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, + csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, + csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, + csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam2_bufferSizeExt( + cusparseHandle_t handle, int m, int n, const cuDoubleComplex *alpha, + const cusparseMatDescr_t descrA, int nnzA, + const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, const cuDoubleComplex *beta, + const cusparseMatDescr_t descrB, int nnzB, + const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, + const int *csrSortedColIndB, const cusparseMatDescr_t descrC, + const cuDoubleComplex *csrSortedValC, const int *csrSortedRowPtrC, + const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cuDoubleComplex *, + const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, + const int *, const cuDoubleComplex *, const cusparseMatDescr_t, int, + const cuDoubleComplex *, const int *, const int *, + const cusparseMatDescr_t, const cuDoubleComplex *, const int *, + const int *, size_t *); + static auto func_ptr = LoadSymbol("cusparseZcsrgeam2_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, + csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, + csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, + csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseXcsrgeam2Nnz( + cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, + int nnzA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, + const cusparseMatDescr_t descrB, int nnzB, const int *csrSortedRowPtrB, + const int *csrSortedColIndB, const cusparseMatDescr_t descrC, + int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *workspace) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, int, const int *, + const int *, const cusparseMatDescr_t, int, const int *, const int *, + const cusparseMatDescr_t, int *, int *, void *); + static auto func_ptr = LoadSymbol("cusparseXcsrgeam2Nnz"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, descrA, nnzA, csrSortedRowPtrA, + csrSortedColIndA, descrB, nnzB, csrSortedRowPtrB, + csrSortedColIndB, descrC, csrSortedRowPtrC, + nnzTotalDevHostPtr, workspace); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsrgeam2( + cusparseHandle_t handle, int m, int n, const float *alpha, + const cusparseMatDescr_t descrA, int nnzA, const float *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, const float *beta, + const cusparseMatDescr_t descrB, int nnzB, const float *csrSortedValB, + const int *csrSortedRowPtrB, const int *csrSortedColIndB, + const cusparseMatDescr_t descrC, float *csrSortedValC, + int *csrSortedRowPtrC, int *csrSortedColIndC, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const float *, const cusparseMatDescr_t, int, + const float *, const int *, const int *, const float *, + const cusparseMatDescr_t, int, const float *, const int *, const int *, + const cusparseMatDescr_t, float *, int *, int *, void *); + static auto func_ptr = LoadSymbol("cusparseScsrgeam2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, + csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, + csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, + csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsrgeam2( + cusparseHandle_t handle, int m, int n, const double *alpha, + const cusparseMatDescr_t descrA, int nnzA, const double *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, + const double *beta, const cusparseMatDescr_t descrB, int nnzB, + const double *csrSortedValB, const int *csrSortedRowPtrB, + const int *csrSortedColIndB, const cusparseMatDescr_t descrC, + double *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const double *, const cusparseMatDescr_t, int, + const double *, const int *, const int *, const double *, + const cusparseMatDescr_t, int, const double *, const int *, const int *, + const cusparseMatDescr_t, double *, int *, int *, void *); + static auto func_ptr = LoadSymbol("cusparseDcsrgeam2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, + csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, + csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, + csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsrgeam2( + cusparseHandle_t handle, int m, int n, const cuComplex *alpha, + const cusparseMatDescr_t descrA, int nnzA, const cuComplex *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, + const cuComplex *beta, const cusparseMatDescr_t descrB, int nnzB, + const cuComplex *csrSortedValB, const int *csrSortedRowPtrB, + const int *csrSortedColIndB, const cusparseMatDescr_t descrC, + cuComplex *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cuComplex *, const cusparseMatDescr_t, + int, const cuComplex *, const int *, const int *, const cuComplex *, + const cusparseMatDescr_t, int, const cuComplex *, const int *, + const int *, const cusparseMatDescr_t, cuComplex *, int *, int *, void *); + static auto func_ptr = LoadSymbol("cusparseCcsrgeam2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, + csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, + csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, + csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsrgeam2( + cusparseHandle_t handle, int m, int n, const cuDoubleComplex *alpha, + const cusparseMatDescr_t descrA, int nnzA, + const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, const cuDoubleComplex *beta, + const cusparseMatDescr_t descrB, int nnzB, + const cuDoubleComplex *csrSortedValB, const int *csrSortedRowPtrB, + const int *csrSortedColIndB, const cusparseMatDescr_t descrC, + cuDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, + int *csrSortedColIndC, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cuDoubleComplex *, + const cusparseMatDescr_t, int, const cuDoubleComplex *, const int *, + const int *, const cuDoubleComplex *, const cusparseMatDescr_t, int, + const cuDoubleComplex *, const int *, const int *, + const cusparseMatDescr_t, cuDoubleComplex *, int *, int *, void *); + static auto func_ptr = LoadSymbol("cusparseZcsrgeam2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, alpha, descrA, nnzA, csrSortedValA, + csrSortedRowPtrA, csrSortedColIndA, beta, descrB, nnzB, + csrSortedValB, csrSortedRowPtrB, csrSortedColIndB, descrC, + csrSortedValC, csrSortedRowPtrC, csrSortedColIndC, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsrcolor( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + const float *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, const float *fractionToColor, int *ncolors, + int *coloring, int *reordering, const cusparseColorInfo_t info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, + const int *, const int *, const float *, int *, int *, int *, + const cusparseColorInfo_t); + static auto func_ptr = LoadSymbol("cusparseScsrcolor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, fractionToColor, ncolors, coloring, + reordering, info); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsrcolor( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + const double *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, const double *fractionToColor, int *ncolors, + int *coloring, int *reordering, const cusparseColorInfo_t info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, + const int *, const int *, const double *, int *, int *, int *, + const cusparseColorInfo_t); + static auto func_ptr = LoadSymbol("cusparseDcsrcolor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, fractionToColor, ncolors, coloring, + reordering, info); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsrcolor( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, const float *fractionToColor, int *ncolors, + int *coloring, int *reordering, const cusparseColorInfo_t info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, + const int *, const int *, const float *, int *, int *, int *, + const cusparseColorInfo_t); + static auto func_ptr = LoadSymbol("cusparseCcsrcolor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, fractionToColor, ncolors, coloring, + reordering, info); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsrcolor( + cusparseHandle_t handle, int m, int nnz, const cusparseMatDescr_t descrA, + const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, const double *fractionToColor, int *ncolors, + int *coloring, int *reordering, const cusparseColorInfo_t info) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, + const cuDoubleComplex *, const int *, const int *, const double *, int *, + int *, int *, const cusparseColorInfo_t); + static auto func_ptr = LoadSymbol("cusparseZcsrcolor"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, nnz, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, fractionToColor, ncolors, coloring, + reordering, info); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const float *A, int lda, + int *nnzPerRowCol, int *nnzTotalDevHostPtr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const float *, int, int *, int *); + static auto func_ptr = LoadSymbol("cusparseSnnz"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, + nnzTotalDevHostPtr); +} + +cusparseStatus_t CUSPARSEAPI +cusparseDnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const double *A, int lda, + int *nnzPerRowCol, int *nnzTotalDevHostPtr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const double *, int, int *, int *); + static auto func_ptr = LoadSymbol("cusparseDnnz"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, + nnzTotalDevHostPtr); +} + +cusparseStatus_t CUSPARSEAPI +cusparseCnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const cuComplex *A, int lda, + int *nnzPerRowCol, int *nnzTotalDevHostPtr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const cuComplex *, int, int *, int *); + static auto func_ptr = LoadSymbol("cusparseCnnz"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, + nnzTotalDevHostPtr); +} + +cusparseStatus_t CUSPARSEAPI +cusparseZnnz(cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const cuDoubleComplex *A, int lda, + int *nnzPerRowCol, int *nnzTotalDevHostPtr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const cuDoubleComplex *, int, int *, int *); + static auto func_ptr = LoadSymbol("cusparseZnnz"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, A, lda, nnzPerRowCol, + nnzTotalDevHostPtr); +} + +cusparseStatus_t CUSPARSEAPI cusparseSnnz_compress( + cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, + const float *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, + int *nnzC, float tol) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, const cusparseMatDescr_t, const float *, + const int *, int *, int *, float); + static auto func_ptr = LoadSymbol("cusparseSnnz_compress"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, + nnzC, tol); +} + +cusparseStatus_t CUSPARSEAPI cusparseDnnz_compress( + cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, + const double *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, + int *nnzC, double tol) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, const cusparseMatDescr_t, const double *, + const int *, int *, int *, double); + static auto func_ptr = LoadSymbol("cusparseDnnz_compress"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, + nnzC, tol); +} + +cusparseStatus_t CUSPARSEAPI cusparseCnnz_compress( + cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, + const cuComplex *csrSortedValA, const int *csrSortedRowPtrA, int *nnzPerRow, + int *nnzC, cuComplex tol) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, const cusparseMatDescr_t, const cuComplex *, + const int *, int *, int *, cuComplex); + static auto func_ptr = LoadSymbol("cusparseCnnz_compress"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, + nnzC, tol); +} + +cusparseStatus_t CUSPARSEAPI cusparseZnnz_compress( + cusparseHandle_t handle, int m, const cusparseMatDescr_t descr, + const cuDoubleComplex *csrSortedValA, const int *csrSortedRowPtrA, + int *nnzPerRow, int *nnzC, cuDoubleComplex tol) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, const cusparseMatDescr_t, const cuDoubleComplex *, + const int *, int *, int *, cuDoubleComplex); + static auto func_ptr = LoadSymbol("cusparseZnnz_compress"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, descr, csrSortedValA, csrSortedRowPtrA, nnzPerRow, + nnzC, tol); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsr2csr_compress( + cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, + const float *csrSortedValA, const int *csrSortedColIndA, + const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, + float *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, + float tol) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, const float *, + const int *, const int *, int, const int *, float *, int *, int *, float); + static auto func_ptr = LoadSymbol("cusparseScsr2csr_compress"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, + csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, + csrSortedColIndC, csrSortedRowPtrC, tol); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsr2csr_compress( + cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, + const double *csrSortedValA, const int *csrSortedColIndA, + const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, + double *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, + double tol) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, const double *, + const int *, const int *, int, const int *, double *, int *, int *, + double); + static auto func_ptr = LoadSymbol("cusparseDcsr2csr_compress"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, + csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, + csrSortedColIndC, csrSortedRowPtrC, tol); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsr2csr_compress( + cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, + const cuComplex *csrSortedValA, const int *csrSortedColIndA, + const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, + cuComplex *csrSortedValC, int *csrSortedColIndC, int *csrSortedRowPtrC, + cuComplex tol) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, const cuComplex *, + const int *, const int *, int, const int *, cuComplex *, int *, int *, + cuComplex); + static auto func_ptr = LoadSymbol("cusparseCcsr2csr_compress"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, + csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, + csrSortedColIndC, csrSortedRowPtrC, tol); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsr2csr_compress( + cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, + const cuDoubleComplex *csrSortedValA, const int *csrSortedColIndA, + const int *csrSortedRowPtrA, int nnzA, const int *nnzPerRow, + cuDoubleComplex *csrSortedValC, int *csrSortedColIndC, + int *csrSortedRowPtrC, cuDoubleComplex tol) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const cusparseMatDescr_t, + const cuDoubleComplex *, const int *, const int *, int, const int *, + cuDoubleComplex *, int *, int *, cuDoubleComplex); + static auto func_ptr = LoadSymbol("cusparseZcsr2csr_compress"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, descrA, csrSortedValA, csrSortedColIndA, + csrSortedRowPtrA, nnzA, nnzPerRow, csrSortedValC, + csrSortedColIndC, csrSortedRowPtrC, tol); +} + +cusparseStatus_t CUSPARSEAPI cusparseXcoo2csr(cusparseHandle_t handle, + const int *cooRowInd, int nnz, + int m, int *csrSortedRowPtr, + cusparseIndexBase_t idxBase) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, const int *, int, int, int *, cusparseIndexBase_t); + static auto func_ptr = LoadSymbol("cusparseXcoo2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, cooRowInd, nnz, m, csrSortedRowPtr, idxBase); +} + +cusparseStatus_t CUSPARSEAPI cusparseXcsr2coo(cusparseHandle_t handle, + const int *csrSortedRowPtr, + int nnz, int m, int *cooRowInd, + cusparseIndexBase_t idxBase) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, const int *, int, int, int *, cusparseIndexBase_t); + static auto func_ptr = LoadSymbol("cusparseXcsr2coo"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, csrSortedRowPtr, nnz, m, cooRowInd, idxBase); +} + +cusparseStatus_t CUSPARSEAPI cusparseXcsr2bsrNnz( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, int blockDim, const cusparseMatDescr_t descrC, + int *bsrSortedRowPtrC, int *nnzTotalDevHostPtr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const int *, const int *, int, const cusparseMatDescr_t, int *, int *); + static auto func_ptr = LoadSymbol("cusparseXcsr2bsrNnz"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedRowPtrA, + csrSortedColIndA, blockDim, descrC, bsrSortedRowPtrC, + nnzTotalDevHostPtr); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsr2bsr( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const float *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, + const cusparseMatDescr_t descrC, float *bsrSortedValC, + int *bsrSortedRowPtrC, int *bsrSortedColIndC) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const float *, const int *, const int *, int, const cusparseMatDescr_t, + float *, int *, int *); + static auto func_ptr = LoadSymbol("cusparseScsr2bsr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, blockDim, descrC, bsrSortedValC, + bsrSortedRowPtrC, bsrSortedColIndC); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsr2bsr( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const double *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, + const cusparseMatDescr_t descrC, double *bsrSortedValC, + int *bsrSortedRowPtrC, int *bsrSortedColIndC) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const double *, const int *, const int *, int, const cusparseMatDescr_t, + double *, int *, int *); + static auto func_ptr = LoadSymbol("cusparseDcsr2bsr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, blockDim, descrC, bsrSortedValC, + bsrSortedRowPtrC, bsrSortedColIndC); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsr2bsr( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, + const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, + int *bsrSortedRowPtrC, int *bsrSortedColIndC) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const cuComplex *, const int *, const int *, int, + const cusparseMatDescr_t, cuComplex *, int *, int *); + static auto func_ptr = LoadSymbol("cusparseCcsr2bsr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, blockDim, descrC, bsrSortedValC, + bsrSortedRowPtrC, bsrSortedColIndC); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsr2bsr( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, int blockDim, + const cusparseMatDescr_t descrC, cuDoubleComplex *bsrSortedValC, + int *bsrSortedRowPtrC, int *bsrSortedColIndC) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const cuDoubleComplex *, const int *, const int *, int, + const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); + static auto func_ptr = LoadSymbol("cusparseZcsr2bsr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, blockDim, descrC, bsrSortedValC, + bsrSortedRowPtrC, bsrSortedColIndC); +} + +cusparseStatus_t CUSPARSEAPI cusparseSbsr2csr( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, + const cusparseMatDescr_t descrA, const float *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + const cusparseMatDescr_t descrC, float *csrSortedValC, + int *csrSortedRowPtrC, int *csrSortedColIndC) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const float *, const int *, const int *, int, const cusparseMatDescr_t, + float *, int *, int *); + static auto func_ptr = LoadSymbol("cusparseSbsr2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, + bsrSortedColIndA, blockDim, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC); +} + +cusparseStatus_t CUSPARSEAPI cusparseDbsr2csr( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, + const cusparseMatDescr_t descrA, const double *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + const cusparseMatDescr_t descrC, double *csrSortedValC, + int *csrSortedRowPtrC, int *csrSortedColIndC) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const double *, const int *, const int *, int, const cusparseMatDescr_t, + double *, int *, int *); + static auto func_ptr = LoadSymbol("cusparseDbsr2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, + bsrSortedColIndA, blockDim, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC); +} + +cusparseStatus_t CUSPARSEAPI cusparseCbsr2csr( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, + const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, + int *csrSortedRowPtrC, int *csrSortedColIndC) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const cuComplex *, const int *, const int *, int, + const cusparseMatDescr_t, cuComplex *, int *, int *); + static auto func_ptr = LoadSymbol("cusparseCbsr2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, + bsrSortedColIndA, blockDim, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC); +} + +cusparseStatus_t CUSPARSEAPI cusparseZbsr2csr( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, + const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int blockDim, + const cusparseMatDescr_t descrC, cuDoubleComplex *csrSortedValC, + int *csrSortedRowPtrC, int *csrSortedColIndC) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const cuDoubleComplex *, const int *, const int *, int, + const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); + static auto func_ptr = LoadSymbol("cusparseZbsr2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, + bsrSortedColIndA, blockDim, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC); +} + +cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc_bufferSize( + cusparseHandle_t handle, int mb, int nb, int nnzb, + const float *bsrSortedVal, const int *bsrSortedRowPtr, + const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, + int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const float *, const int *, const int *, + int, int, int *); + static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsc_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, rowBlockDim, colBlockDim, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc_bufferSize( + cusparseHandle_t handle, int mb, int nb, int nnzb, + const double *bsrSortedVal, const int *bsrSortedRowPtr, + const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, + int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const double *, const int *, const int *, + int, int, int *); + static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsc_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, rowBlockDim, colBlockDim, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc_bufferSize( + cusparseHandle_t handle, int mb, int nb, int nnzb, + const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, + const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, + int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cuComplex *, const int *, + const int *, int, int, int *); + static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsc_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, rowBlockDim, colBlockDim, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc_bufferSize( + cusparseHandle_t handle, int mb, int nb, int nnzb, + const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, + const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, + int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, + const int *, int, int, int *); + static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsc_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, rowBlockDim, colBlockDim, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc_bufferSizeExt( + cusparseHandle_t handle, int mb, int nb, int nnzb, + const float *bsrSortedVal, const int *bsrSortedRowPtr, + const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, + size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const float *, const int *, const int *, + int, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseSgebsr2gebsc_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc_bufferSizeExt( + cusparseHandle_t handle, int mb, int nb, int nnzb, + const double *bsrSortedVal, const int *bsrSortedRowPtr, + const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, + size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const double *, const int *, const int *, + int, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseDgebsr2gebsc_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc_bufferSizeExt( + cusparseHandle_t handle, int mb, int nb, int nnzb, + const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, + const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, + size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cuComplex *, const int *, + const int *, int, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseCgebsr2gebsc_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc_bufferSizeExt( + cusparseHandle_t handle, int mb, int nb, int nnzb, + const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, + const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, + size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, + const int *, int, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseZgebsr2gebsc_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, rowBlockDim, colBlockDim, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsc( + cusparseHandle_t handle, int mb, int nb, int nnzb, + const float *bsrSortedVal, const int *bsrSortedRowPtr, + const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, float *bscVal, + int *bscRowInd, int *bscColPtr, cusparseAction_t copyValues, + cusparseIndexBase_t idxBase, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const float *, const int *, const int *, + int, int, float *, int *, int *, cusparseAction_t, cusparseIndexBase_t, + void *); + static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, + bscColPtr, copyValues, idxBase, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsc( + cusparseHandle_t handle, int mb, int nb, int nnzb, + const double *bsrSortedVal, const int *bsrSortedRowPtr, + const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, + double *bscVal, int *bscRowInd, int *bscColPtr, cusparseAction_t copyValues, + cusparseIndexBase_t idxBase, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const double *, const int *, const int *, + int, int, double *, int *, int *, cusparseAction_t, cusparseIndexBase_t, + void *); + static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, + bscColPtr, copyValues, idxBase, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsc( + cusparseHandle_t handle, int mb, int nb, int nnzb, + const cuComplex *bsrSortedVal, const int *bsrSortedRowPtr, + const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, + cuComplex *bscVal, int *bscRowInd, int *bscColPtr, + cusparseAction_t copyValues, cusparseIndexBase_t idxBase, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cuComplex *, const int *, + const int *, int, int, cuComplex *, int *, int *, cusparseAction_t, + cusparseIndexBase_t, void *); + static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, + bscColPtr, copyValues, idxBase, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsc( + cusparseHandle_t handle, int mb, int nb, int nnzb, + const cuDoubleComplex *bsrSortedVal, const int *bsrSortedRowPtr, + const int *bsrSortedColInd, int rowBlockDim, int colBlockDim, + cuDoubleComplex *bscVal, int *bscRowInd, int *bscColPtr, + cusparseAction_t copyValues, cusparseIndexBase_t idxBase, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cuDoubleComplex *, const int *, + const int *, int, int, cuDoubleComplex *, int *, int *, cusparseAction_t, + cusparseIndexBase_t, void *); + static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, mb, nb, nnzb, bsrSortedVal, bsrSortedRowPtr, + bsrSortedColInd, rowBlockDim, colBlockDim, bscVal, bscRowInd, + bscColPtr, copyValues, idxBase, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseXgebsr2csr( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, + const cusparseMatDescr_t descrA, const int *bsrSortedRowPtrA, + const int *bsrSortedColIndA, int rowBlockDim, int colBlockDim, + const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, + int *csrSortedColIndC) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const int *, const int *, int, int, const cusparseMatDescr_t, int *, + int *); + static auto func_ptr = LoadSymbol("cusparseXgebsr2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedRowPtrA, + bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, + csrSortedRowPtrC, csrSortedColIndC); +} + +cusparseStatus_t CUSPARSEAPI cusparseSgebsr2csr( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, + const cusparseMatDescr_t descrA, const float *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, + int colBlockDim, const cusparseMatDescr_t descrC, float *csrSortedValC, + int *csrSortedRowPtrC, int *csrSortedColIndC) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const float *, const int *, const int *, int, int, + const cusparseMatDescr_t, float *, int *, int *); + static auto func_ptr = LoadSymbol("cusparseSgebsr2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, + bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, + csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); +} + +cusparseStatus_t CUSPARSEAPI cusparseDgebsr2csr( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, + const cusparseMatDescr_t descrA, const double *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, + int colBlockDim, const cusparseMatDescr_t descrC, double *csrSortedValC, + int *csrSortedRowPtrC, int *csrSortedColIndC) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const double *, const int *, const int *, int, int, + const cusparseMatDescr_t, double *, int *, int *); + static auto func_ptr = LoadSymbol("cusparseDgebsr2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, + bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, + csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgebsr2csr( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, + const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, + int colBlockDim, const cusparseMatDescr_t descrC, cuComplex *csrSortedValC, + int *csrSortedRowPtrC, int *csrSortedColIndC) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const cuComplex *, const int *, const int *, int, int, + const cusparseMatDescr_t, cuComplex *, int *, int *); + static auto func_ptr = LoadSymbol("cusparseCgebsr2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, + bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, + csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgebsr2csr( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, + const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDim, + int colBlockDim, const cusparseMatDescr_t descrC, + cuDoubleComplex *csrSortedValC, int *csrSortedRowPtrC, + int *csrSortedColIndC) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const cuDoubleComplex *, const int *, const int *, int, int, + const cusparseMatDescr_t, cuDoubleComplex *, int *, int *); + static auto func_ptr = LoadSymbol("cusparseZgebsr2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, descrA, bsrSortedValA, bsrSortedRowPtrA, + bsrSortedColIndA, rowBlockDim, colBlockDim, descrC, + csrSortedValC, csrSortedRowPtrC, csrSortedColIndC); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const float *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, + int colBlockDim, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const float *, const int *, const int *, int, int, int *); + static auto func_ptr = LoadSymbol("cusparseScsr2gebsr_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, rowBlockDim, colBlockDim, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const double *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, + int colBlockDim, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const double *, const int *, const int *, int, int, int *); + static auto func_ptr = LoadSymbol("cusparseDcsr2gebsr_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, rowBlockDim, colBlockDim, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, + int colBlockDim, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const cuComplex *, const int *, const int *, int, int, int *); + static auto func_ptr = LoadSymbol("cusparseCcsr2gebsr_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, rowBlockDim, colBlockDim, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, + int colBlockDim, int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const cuDoubleComplex *, const int *, const int *, int, int, int *); + static auto func_ptr = LoadSymbol("cusparseZcsr2gebsr_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, rowBlockDim, colBlockDim, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const float *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, + int colBlockDim, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const float *, const int *, const int *, int, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseScsr2gebsr_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const double *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, + int colBlockDim, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const double *, const int *, const int *, int, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseDcsr2gebsr_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, + int colBlockDim, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const cuComplex *, const int *, const int *, int, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseCcsr2gebsr_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, int rowBlockDim, + int colBlockDim, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const cuDoubleComplex *, const int *, const int *, int, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseZcsr2gebsr_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, rowBlockDim, colBlockDim, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseXcsr2gebsrNnz( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const int *csrSortedRowPtrA, + const int *csrSortedColIndA, const cusparseMatDescr_t descrC, + int *bsrSortedRowPtrC, int rowBlockDim, int colBlockDim, + int *nnzTotalDevHostPtr, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const int *, const int *, const cusparseMatDescr_t, int *, int, int, + int *, void *); + static auto func_ptr = LoadSymbol("cusparseXcsr2gebsrNnz"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedRowPtrA, + csrSortedColIndA, descrC, bsrSortedRowPtrC, rowBlockDim, + colBlockDim, nnzTotalDevHostPtr, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsr2gebsr( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const float *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, + const cusparseMatDescr_t descrC, float *bsrSortedValC, + int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, + int colBlockDim, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const float *, const int *, const int *, const cusparseMatDescr_t, + float *, int *, int *, int, int, void *); + static auto func_ptr = LoadSymbol("cusparseScsr2gebsr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, + bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsr2gebsr( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const double *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, + const cusparseMatDescr_t descrC, double *bsrSortedValC, + int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, + int colBlockDim, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const double *, const int *, const int *, const cusparseMatDescr_t, + double *, int *, int *, int, int, void *); + static auto func_ptr = LoadSymbol("cusparseDcsr2gebsr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, + bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsr2gebsr( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const cuComplex *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, + const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, + int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, + int colBlockDim, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const cuComplex *, const int *, const int *, const cusparseMatDescr_t, + cuComplex *, int *, int *, int, int, void *); + static auto func_ptr = LoadSymbol("cusparseCcsr2gebsr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, + bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsr2gebsr( + cusparseHandle_t handle, cusparseDirection_t dirA, int m, int n, + const cusparseMatDescr_t descrA, const cuDoubleComplex *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, + const cusparseMatDescr_t descrC, cuDoubleComplex *bsrSortedValC, + int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDim, + int colBlockDim, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, const cusparseMatDescr_t, + const cuDoubleComplex *, const int *, const int *, + const cusparseMatDescr_t, cuDoubleComplex *, int *, int *, int, int, + void *); + static auto func_ptr = LoadSymbol("cusparseZcsr2gebsr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, m, n, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, descrC, bsrSortedValC, bsrSortedRowPtrC, + bsrSortedColIndC, rowBlockDim, colBlockDim, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, + const cusparseMatDescr_t descrA, const float *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, + int colBlockDimA, int rowBlockDimC, int colBlockDimC, + int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, int, + const cusparseMatDescr_t, const float *, const int *, const int *, int, + int, int, int, int *); + static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsr_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, + colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, + const cusparseMatDescr_t descrA, const double *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, + int colBlockDimA, int rowBlockDimC, int colBlockDimC, + int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, int, + const cusparseMatDescr_t, const double *, const int *, const int *, int, + int, int, int, int *); + static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsr_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, + colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, + const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, + int colBlockDimA, int rowBlockDimC, int colBlockDimC, + int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, int, + const cusparseMatDescr_t, const cuComplex *, const int *, const int *, + int, int, int, int, int *); + static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsr_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, + colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr_bufferSize( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, + const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, + int colBlockDimA, int rowBlockDimC, int colBlockDimC, + int *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, int, + const cusparseMatDescr_t, const cuDoubleComplex *, const int *, + const int *, int, int, int, int, int *); + static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsr_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, + colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, + const cusparseMatDescr_t descrA, const float *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, + int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, int, + const cusparseMatDescr_t, const float *, const int *, const int *, int, + int, int, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseSgebsr2gebsr_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, + colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, + const cusparseMatDescr_t descrA, const double *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, + int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, int, + const cusparseMatDescr_t, const double *, const int *, const int *, int, + int, int, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseDgebsr2gebsr_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, + colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, + const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, + int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, int, + const cusparseMatDescr_t, const cuComplex *, const int *, const int *, + int, int, int, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseCgebsr2gebsr_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, + colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr_bufferSizeExt( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, + const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, + int colBlockDimA, int rowBlockDimC, int colBlockDimC, size_t *pBufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, int, + const cusparseMatDescr_t, const cuDoubleComplex *, const int *, + const int *, int, int, int, int, size_t *); + static auto func_ptr = + LoadSymbol("cusparseZgebsr2gebsr_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, + colBlockDimA, rowBlockDimC, colBlockDimC, pBufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseXgebsr2gebsrNnz( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, + const cusparseMatDescr_t descrA, const int *bsrSortedRowPtrA, + const int *bsrSortedColIndA, int rowBlockDimA, int colBlockDimA, + const cusparseMatDescr_t descrC, int *bsrSortedRowPtrC, int rowBlockDimC, + int colBlockDimC, int *nnzTotalDevHostPtr, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, int, + const cusparseMatDescr_t, const int *, const int *, int, int, + const cusparseMatDescr_t, int *, int, int, int *, void *); + static auto func_ptr = LoadSymbol("cusparseXgebsr2gebsrNnz"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedRowPtrA, + bsrSortedColIndA, rowBlockDimA, colBlockDimA, descrC, + bsrSortedRowPtrC, rowBlockDimC, colBlockDimC, + nnzTotalDevHostPtr, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSgebsr2gebsr( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, + const cusparseMatDescr_t descrA, const float *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, + int colBlockDimA, const cusparseMatDescr_t descrC, float *bsrSortedValC, + int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, + int colBlockDimC, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, int, + const cusparseMatDescr_t, const float *, const int *, const int *, int, + int, const cusparseMatDescr_t, float *, int *, int *, int, int, void *); + static auto func_ptr = LoadSymbol("cusparseSgebsr2gebsr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, + colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, + bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDgebsr2gebsr( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, + const cusparseMatDescr_t descrA, const double *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, + int colBlockDimA, const cusparseMatDescr_t descrC, double *bsrSortedValC, + int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, + int colBlockDimC, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, int, + const cusparseMatDescr_t, const double *, const int *, const int *, int, + int, const cusparseMatDescr_t, double *, int *, int *, int, int, void *); + static auto func_ptr = LoadSymbol("cusparseDgebsr2gebsr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, + colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, + bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCgebsr2gebsr( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, + const cusparseMatDescr_t descrA, const cuComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, + int colBlockDimA, const cusparseMatDescr_t descrC, cuComplex *bsrSortedValC, + int *bsrSortedRowPtrC, int *bsrSortedColIndC, int rowBlockDimC, + int colBlockDimC, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, int, + const cusparseMatDescr_t, const cuComplex *, const int *, const int *, + int, int, const cusparseMatDescr_t, cuComplex *, int *, int *, int, int, + void *); + static auto func_ptr = LoadSymbol("cusparseCgebsr2gebsr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, + colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, + bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZgebsr2gebsr( + cusparseHandle_t handle, cusparseDirection_t dirA, int mb, int nb, int nnzb, + const cusparseMatDescr_t descrA, const cuDoubleComplex *bsrSortedValA, + const int *bsrSortedRowPtrA, const int *bsrSortedColIndA, int rowBlockDimA, + int colBlockDimA, const cusparseMatDescr_t descrC, + cuDoubleComplex *bsrSortedValC, int *bsrSortedRowPtrC, + int *bsrSortedColIndC, int rowBlockDimC, int colBlockDimC, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseDirection_t, int, int, int, + const cusparseMatDescr_t, const cuDoubleComplex *, const int *, + const int *, int, int, const cusparseMatDescr_t, cuDoubleComplex *, int *, + int *, int, int, void *); + static auto func_ptr = LoadSymbol("cusparseZgebsr2gebsr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, dirA, mb, nb, nnzb, descrA, bsrSortedValA, + bsrSortedRowPtrA, bsrSortedColIndA, rowBlockDimA, + colBlockDimA, descrC, bsrSortedValC, bsrSortedRowPtrC, + bsrSortedColIndC, rowBlockDimC, colBlockDimC, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI +cusparseCreateIdentityPermutation(cusparseHandle_t handle, int n, int *p) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseHandle_t, int, int *); + static auto func_ptr = + LoadSymbol("cusparseCreateIdentityPermutation"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, n, p); +} + +cusparseStatus_t CUSPARSEAPI cusparseXcoosort_bufferSizeExt( + cusparseHandle_t handle, int m, int n, int nnz, const int *cooRowsA, + const int *cooColsA, size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const int *, const int *, size_t *); + static auto func_ptr = LoadSymbol("cusparseXcoosort_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseXcoosortByRow(cusparseHandle_t handle, + int m, int n, int nnz, + int *cooRowsA, int *cooColsA, + int *P, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, int *, int *, int *, void *); + static auto func_ptr = LoadSymbol("cusparseXcoosortByRow"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, P, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseXcoosortByColumn(cusparseHandle_t handle, + int m, int n, int nnz, + int *cooRowsA, + int *cooColsA, int *P, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, int *, int *, int *, void *); + static auto func_ptr = LoadSymbol("cusparseXcoosortByColumn"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, cooRowsA, cooColsA, P, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseXcsrsort_bufferSizeExt( + cusparseHandle_t handle, int m, int n, int nnz, const int *csrRowPtrA, + const int *csrColIndA, size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const int *, const int *, size_t *); + static auto func_ptr = LoadSymbol("cusparseXcsrsort_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, csrRowPtrA, csrColIndA, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseXcsrsort(cusparseHandle_t handle, int m, + int n, int nnz, + const cusparseMatDescr_t descrA, + const int *csrRowPtrA, + int *csrColIndA, int *P, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const int *, + int *, int *, void *); + static auto func_ptr = LoadSymbol("cusparseXcsrsort"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, descrA, csrRowPtrA, csrColIndA, P, + pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseXcscsort_bufferSizeExt( + cusparseHandle_t handle, int m, int n, int nnz, const int *cscColPtrA, + const int *cscRowIndA, size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const int *, const int *, size_t *); + static auto func_ptr = LoadSymbol("cusparseXcscsort_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, cscColPtrA, cscRowIndA, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseXcscsort(cusparseHandle_t handle, int m, + int n, int nnz, + const cusparseMatDescr_t descrA, + const int *cscColPtrA, + int *cscRowIndA, int *P, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const int *, + int *, int *, void *); + static auto func_ptr = LoadSymbol("cusparseXcscsort"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, descrA, cscColPtrA, cscRowIndA, P, + pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsru2csr_bufferSizeExt( + cusparseHandle_t handle, int m, int n, int nnz, float *csrVal, + const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, + size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, float *, const int *, int *, + csru2csrInfo_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseScsru2csr_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsru2csr_bufferSizeExt( + cusparseHandle_t handle, int m, int n, int nnz, double *csrVal, + const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, + size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, double *, const int *, int *, + csru2csrInfo_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseDcsru2csr_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsru2csr_bufferSizeExt( + cusparseHandle_t handle, int m, int n, int nnz, cuComplex *csrVal, + const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, + size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, cuComplex *, const int *, int *, + csru2csrInfo_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseCcsru2csr_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsru2csr_bufferSizeExt( + cusparseHandle_t handle, int m, int n, int nnz, cuDoubleComplex *csrVal, + const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, + size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, cuDoubleComplex *, const int *, int *, + csru2csrInfo_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseZcsru2csr_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, info, + pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsru2csr( + cusparseHandle_t handle, int m, int n, int nnz, + const cusparseMatDescr_t descrA, float *csrVal, const int *csrRowPtr, + int *csrColInd, csru2csrInfo_t info, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, float *, + const int *, int *, csru2csrInfo_t, void *); + static auto func_ptr = LoadSymbol("cusparseScsru2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, + pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsru2csr( + cusparseHandle_t handle, int m, int n, int nnz, + const cusparseMatDescr_t descrA, double *csrVal, const int *csrRowPtr, + int *csrColInd, csru2csrInfo_t info, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, double *, + const int *, int *, csru2csrInfo_t, void *); + static auto func_ptr = LoadSymbol("cusparseDcsru2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, + pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsru2csr( + cusparseHandle_t handle, int m, int n, int nnz, + const cusparseMatDescr_t descrA, cuComplex *csrVal, const int *csrRowPtr, + int *csrColInd, csru2csrInfo_t info, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, cuComplex *, + const int *, int *, csru2csrInfo_t, void *); + static auto func_ptr = LoadSymbol("cusparseCcsru2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, + pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsru2csr( + cusparseHandle_t handle, int m, int n, int nnz, + const cusparseMatDescr_t descrA, cuDoubleComplex *csrVal, + const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, + cuDoubleComplex *, const int *, int *, csru2csrInfo_t, void *); + static auto func_ptr = LoadSymbol("cusparseZcsru2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, + pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseScsr2csru( + cusparseHandle_t handle, int m, int n, int nnz, + const cusparseMatDescr_t descrA, float *csrVal, const int *csrRowPtr, + int *csrColInd, csru2csrInfo_t info, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, float *, + const int *, int *, csru2csrInfo_t, void *); + static auto func_ptr = LoadSymbol("cusparseScsr2csru"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, + pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDcsr2csru( + cusparseHandle_t handle, int m, int n, int nnz, + const cusparseMatDescr_t descrA, double *csrVal, const int *csrRowPtr, + int *csrColInd, csru2csrInfo_t info, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, double *, + const int *, int *, csru2csrInfo_t, void *); + static auto func_ptr = LoadSymbol("cusparseDcsr2csru"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, + pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCcsr2csru( + cusparseHandle_t handle, int m, int n, int nnz, + const cusparseMatDescr_t descrA, cuComplex *csrVal, const int *csrRowPtr, + int *csrColInd, csru2csrInfo_t info, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, cuComplex *, + const int *, int *, csru2csrInfo_t, void *); + static auto func_ptr = LoadSymbol("cusparseCcsr2csru"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, + pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseZcsr2csru( + cusparseHandle_t handle, int m, int n, int nnz, + const cusparseMatDescr_t descrA, cuDoubleComplex *csrVal, + const int *csrRowPtr, int *csrColInd, csru2csrInfo_t info, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, + cuDoubleComplex *, const int *, int *, csru2csrInfo_t, void *); + static auto func_ptr = LoadSymbol("cusparseZcsr2csru"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, descrA, csrVal, csrRowPtr, csrColInd, info, + pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csr_bufferSizeExt( + cusparseHandle_t handle, int m, int n, const float *A, int lda, + const float *threshold, const cusparseMatDescr_t descrC, + const float *csrSortedValC, const int *csrSortedRowPtrC, + const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const float *, int, const float *, + const cusparseMatDescr_t, const float *, const int *, const int *, + size_t *); + static auto func_ptr = + LoadSymbol("cusparseSpruneDense2csr_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csr_bufferSizeExt( + cusparseHandle_t handle, int m, int n, const double *A, int lda, + const double *threshold, const cusparseMatDescr_t descrC, + const double *csrSortedValC, const int *csrSortedRowPtrC, + const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const double *, int, const double *, + const cusparseMatDescr_t, const double *, const int *, const int *, + size_t *); + static auto func_ptr = + LoadSymbol("cusparseDpruneDense2csr_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrNnz( + cusparseHandle_t handle, int m, int n, const float *A, int lda, + const float *threshold, const cusparseMatDescr_t descrC, int *csrRowPtrC, + int *nnzTotalDevHostPtr, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const float *, int, const float *, + const cusparseMatDescr_t, int *, int *, void *); + static auto func_ptr = LoadSymbol("cusparseSpruneDense2csrNnz"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, A, lda, threshold, descrC, csrRowPtrC, + nnzTotalDevHostPtr, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrNnz( + cusparseHandle_t handle, int m, int n, const double *A, int lda, + const double *threshold, const cusparseMatDescr_t descrC, + int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const double *, int, const double *, + const cusparseMatDescr_t, int *, int *, void *); + static auto func_ptr = LoadSymbol("cusparseDpruneDense2csrNnz"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedRowPtrC, + nnzTotalDevHostPtr, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csr( + cusparseHandle_t handle, int m, int n, const float *A, int lda, + const float *threshold, const cusparseMatDescr_t descrC, + float *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const float *, int, const float *, + const cusparseMatDescr_t, float *, const int *, int *, void *); + static auto func_ptr = LoadSymbol("cusparseSpruneDense2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csr( + cusparseHandle_t handle, int m, int n, const double *A, int lda, + const double *threshold, const cusparseMatDescr_t descrC, + double *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const double *, int, const double *, + const cusparseMatDescr_t, double *, const int *, int *, void *); + static auto func_ptr = LoadSymbol("cusparseDpruneDense2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, A, lda, threshold, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csr_bufferSizeExt( + cusparseHandle_t handle, int m, int n, int nnzA, + const cusparseMatDescr_t descrA, const float *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, + const float *threshold, const cusparseMatDescr_t descrC, + const float *csrSortedValC, const int *csrSortedRowPtrC, + const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, + const int *, const int *, const float *, const cusparseMatDescr_t, + const float *, const int *, const int *, size_t *); + static auto func_ptr = + LoadSymbol("cusparseSpruneCsr2csr_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, threshold, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csr_bufferSizeExt( + cusparseHandle_t handle, int m, int n, int nnzA, + const cusparseMatDescr_t descrA, const double *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, + const double *threshold, const cusparseMatDescr_t descrC, + const double *csrSortedValC, const int *csrSortedRowPtrC, + const int *csrSortedColIndC, size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, + const int *, const int *, const double *, const cusparseMatDescr_t, + const double *, const int *, const int *, size_t *); + static auto func_ptr = + LoadSymbol("cusparseDpruneCsr2csr_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, threshold, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrNnz( + cusparseHandle_t handle, int m, int n, int nnzA, + const cusparseMatDescr_t descrA, const float *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, + const float *threshold, const cusparseMatDescr_t descrC, + int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, + const int *, const int *, const float *, const cusparseMatDescr_t, int *, + int *, void *); + static auto func_ptr = LoadSymbol("cusparseSpruneCsr2csrNnz"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, threshold, descrC, csrSortedRowPtrC, + nnzTotalDevHostPtr, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrNnz( + cusparseHandle_t handle, int m, int n, int nnzA, + const cusparseMatDescr_t descrA, const double *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, + const double *threshold, const cusparseMatDescr_t descrC, + int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, + const int *, const int *, const double *, const cusparseMatDescr_t, int *, + int *, void *); + static auto func_ptr = LoadSymbol("cusparseDpruneCsr2csrNnz"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, threshold, descrC, csrSortedRowPtrC, + nnzTotalDevHostPtr, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csr( + cusparseHandle_t handle, int m, int n, int nnzA, + const cusparseMatDescr_t descrA, const float *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, + const float *threshold, const cusparseMatDescr_t descrC, + float *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, + const int *, const int *, const float *, const cusparseMatDescr_t, + float *, const int *, int *, void *); + static auto func_ptr = LoadSymbol("cusparseSpruneCsr2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, threshold, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csr( + cusparseHandle_t handle, int m, int n, int nnzA, + const cusparseMatDescr_t descrA, const double *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, + const double *threshold, const cusparseMatDescr_t descrC, + double *csrSortedValC, const int *csrSortedRowPtrC, int *csrSortedColIndC, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, + const int *, const int *, const double *, const cusparseMatDescr_t, + double *, const int *, int *, void *); + static auto func_ptr = LoadSymbol("cusparseDpruneCsr2csr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, threshold, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrByPercentage_bufferSizeExt( + cusparseHandle_t handle, int m, int n, const float *A, int lda, + float percentage, const cusparseMatDescr_t descrC, + const float *csrSortedValC, const int *csrSortedRowPtrC, + const int *csrSortedColIndC, pruneInfo_t info, size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const float *, int, float, + const cusparseMatDescr_t, const float *, const int *, const int *, + pruneInfo_t, size_t *); + static auto func_ptr = + LoadSymbol("cusparseSpruneDense2csrByPercentage_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrByPercentage_bufferSizeExt( + cusparseHandle_t handle, int m, int n, const double *A, int lda, + float percentage, const cusparseMatDescr_t descrC, + const double *csrSortedValC, const int *csrSortedRowPtrC, + const int *csrSortedColIndC, pruneInfo_t info, size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const double *, int, float, + const cusparseMatDescr_t, const double *, const int *, const int *, + pruneInfo_t, size_t *); + static auto func_ptr = + LoadSymbol("cusparseDpruneDense2csrByPercentage_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrNnzByPercentage( + cusparseHandle_t handle, int m, int n, const float *A, int lda, + float percentage, const cusparseMatDescr_t descrC, int *csrRowPtrC, + int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const float *, int, float, + const cusparseMatDescr_t, int *, int *, pruneInfo_t, void *); + static auto func_ptr = + LoadSymbol("cusparseSpruneDense2csrNnzByPercentage"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, A, lda, percentage, descrC, csrRowPtrC, + nnzTotalDevHostPtr, info, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrNnzByPercentage( + cusparseHandle_t handle, int m, int n, const double *A, int lda, + float percentage, const cusparseMatDescr_t descrC, int *csrRowPtrC, + int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const double *, int, float, + const cusparseMatDescr_t, int *, int *, pruneInfo_t, void *); + static auto func_ptr = + LoadSymbol("cusparseDpruneDense2csrNnzByPercentage"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, A, lda, percentage, descrC, csrRowPtrC, + nnzTotalDevHostPtr, info, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpruneDense2csrByPercentage( + cusparseHandle_t handle, int m, int n, const float *A, int lda, + float percentage, const cusparseMatDescr_t descrC, float *csrSortedValC, + const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const float *, int, float, + const cusparseMatDescr_t, float *, const int *, int *, pruneInfo_t, + void *); + static auto func_ptr = + LoadSymbol("cusparseSpruneDense2csrByPercentage"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDpruneDense2csrByPercentage( + cusparseHandle_t handle, int m, int n, const double *A, int lda, + float percentage, const cusparseMatDescr_t descrC, double *csrSortedValC, + const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, const double *, int, float, + const cusparseMatDescr_t, double *, const int *, int *, pruneInfo_t, + void *); + static auto func_ptr = + LoadSymbol("cusparseDpruneDense2csrByPercentage"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, A, lda, percentage, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrByPercentage_bufferSizeExt( + cusparseHandle_t handle, int m, int n, int nnzA, + const cusparseMatDescr_t descrA, const float *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, + const cusparseMatDescr_t descrC, const float *csrSortedValC, + const int *csrSortedRowPtrC, const int *csrSortedColIndC, pruneInfo_t info, + size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, + const int *, const int *, float, const cusparseMatDescr_t, const float *, + const int *, const int *, pruneInfo_t, size_t *); + static auto func_ptr = + LoadSymbol("cusparseSpruneCsr2csrByPercentage_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, percentage, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage_bufferSizeExt( + cusparseHandle_t handle, int m, int n, int nnzA, + const cusparseMatDescr_t descrA, const double *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, + const cusparseMatDescr_t descrC, const double *csrSortedValC, + const int *csrSortedRowPtrC, const int *csrSortedColIndC, pruneInfo_t info, + size_t *pBufferSizeInBytes) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, + const int *, const int *, float, const cusparseMatDescr_t, const double *, + const int *, const int *, pruneInfo_t, size_t *); + static auto func_ptr = + LoadSymbol("cusparseDpruneCsr2csrByPercentage_bufferSizeExt"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, percentage, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC, info, pBufferSizeInBytes); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrNnzByPercentage( + cusparseHandle_t handle, int m, int n, int nnzA, + const cusparseMatDescr_t descrA, const float *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, + const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, + int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, + const int *, const int *, float, const cusparseMatDescr_t, int *, int *, + pruneInfo_t, void *); + static auto func_ptr = + LoadSymbol("cusparseSpruneCsr2csrNnzByPercentage"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, percentage, descrC, csrSortedRowPtrC, + nnzTotalDevHostPtr, info, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrNnzByPercentage( + cusparseHandle_t handle, int m, int n, int nnzA, + const cusparseMatDescr_t descrA, const double *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, + const cusparseMatDescr_t descrC, int *csrSortedRowPtrC, + int *nnzTotalDevHostPtr, pruneInfo_t info, void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, + const int *, const int *, float, const cusparseMatDescr_t, int *, int *, + pruneInfo_t, void *); + static auto func_ptr = + LoadSymbol("cusparseDpruneCsr2csrNnzByPercentage"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, percentage, descrC, csrSortedRowPtrC, + nnzTotalDevHostPtr, info, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpruneCsr2csrByPercentage( + cusparseHandle_t handle, int m, int n, int nnzA, + const cusparseMatDescr_t descrA, const float *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, + const cusparseMatDescr_t descrC, float *csrSortedValC, + const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const float *, + const int *, const int *, float, const cusparseMatDescr_t, float *, + const int *, int *, pruneInfo_t, void *); + static auto func_ptr = + LoadSymbol("cusparseSpruneCsr2csrByPercentage"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, percentage, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDpruneCsr2csrByPercentage( + cusparseHandle_t handle, int m, int n, int nnzA, + const cusparseMatDescr_t descrA, const double *csrSortedValA, + const int *csrSortedRowPtrA, const int *csrSortedColIndA, float percentage, + const cusparseMatDescr_t descrC, double *csrSortedValC, + const int *csrSortedRowPtrC, int *csrSortedColIndC, pruneInfo_t info, + void *pBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const cusparseMatDescr_t, const double *, + const int *, const int *, float, const cusparseMatDescr_t, double *, + const int *, int *, pruneInfo_t, void *); + static auto func_ptr = + LoadSymbol("cusparseDpruneCsr2csrByPercentage"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnzA, descrA, csrSortedValA, csrSortedRowPtrA, + csrSortedColIndA, percentage, descrC, csrSortedValC, + csrSortedRowPtrC, csrSortedColIndC, info, pBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCsr2cscEx2( + cusparseHandle_t handle, int m, int n, int nnz, const void *csrVal, + const int *csrRowPtr, const int *csrColInd, void *cscVal, int *cscColPtr, + int *cscRowInd, cudaDataType valType, cusparseAction_t copyValues, + cusparseIndexBase_t idxBase, cusparseCsr2CscAlg_t alg, void *buffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const void *, const int *, const int *, + void *, int *, int *, cudaDataType, cusparseAction_t, cusparseIndexBase_t, + cusparseCsr2CscAlg_t, void *); + static auto func_ptr = LoadSymbol("cusparseCsr2cscEx2"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, + cscColPtr, cscRowInd, valType, copyValues, idxBase, alg, + buffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseCsr2cscEx2_bufferSize( + cusparseHandle_t handle, int m, int n, int nnz, const void *csrVal, + const int *csrRowPtr, const int *csrColInd, void *cscVal, int *cscColPtr, + int *cscRowInd, cudaDataType valType, cusparseAction_t copyValues, + cusparseIndexBase_t idxBase, cusparseCsr2CscAlg_t alg, size_t *bufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, int, int, int, const void *, const int *, const int *, + void *, int *, int *, cudaDataType, cusparseAction_t, cusparseIndexBase_t, + cusparseCsr2CscAlg_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseCsr2cscEx2_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, m, n, nnz, csrVal, csrRowPtr, csrColInd, cscVal, + cscColPtr, cscRowInd, valType, copyValues, idxBase, alg, + bufferSize); +} + +cusparseStatus_t CUSPARSEAPI +cusparseCreateSpVec(cusparseSpVecDescr_t *spVecDescr, int64_t size, int64_t nnz, + void *indices, void *values, cusparseIndexType_t idxType, + cusparseIndexBase_t idxBase, cudaDataType valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseSpVecDescr_t *, int64_t, int64_t, void *, void *, + cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); + static auto func_ptr = LoadSymbol("cusparseCreateSpVec"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spVecDescr, size, nnz, indices, values, idxType, idxBase, + valueType); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreateConstSpVec( + cusparseConstSpVecDescr_t *spVecDescr, int64_t size, int64_t nnz, + const void *indices, const void *values, cusparseIndexType_t idxType, + cusparseIndexBase_t idxBase, cudaDataType valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseConstSpVecDescr_t *, int64_t, int64_t, const void *, const void *, + cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); + static auto func_ptr = LoadSymbol("cusparseCreateConstSpVec"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spVecDescr, size, nnz, indices, values, idxType, idxBase, + valueType); +} + +cusparseStatus_t CUSPARSEAPI +cusparseDestroySpVec(cusparseConstSpVecDescr_t spVecDescr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseConstSpVecDescr_t); + static auto func_ptr = LoadSymbol("cusparseDestroySpVec"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spVecDescr); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpVecGet(cusparseSpVecDescr_t spVecDescr, + int64_t *size, int64_t *nnz, + void **indices, void **values, + cusparseIndexType_t *idxType, + cusparseIndexBase_t *idxBase, + cudaDataType *valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseSpVecDescr_t, int64_t *, int64_t *, void **, void **, + cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); + static auto func_ptr = LoadSymbol("cusparseSpVecGet"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spVecDescr, size, nnz, indices, values, idxType, idxBase, + valueType); +} + +cusparseStatus_t CUSPARSEAPI cusparseConstSpVecGet( + cusparseConstSpVecDescr_t spVecDescr, int64_t *size, int64_t *nnz, + const void **indices, const void **values, cusparseIndexType_t *idxType, + cusparseIndexBase_t *idxBase, cudaDataType *valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseConstSpVecDescr_t, int64_t *, int64_t *, const void **, + const void **, cusparseIndexType_t *, cusparseIndexBase_t *, + cudaDataType *); + static auto func_ptr = LoadSymbol("cusparseConstSpVecGet"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spVecDescr, size, nnz, indices, values, idxType, idxBase, + valueType); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpVecGetIndexBase( + cusparseConstSpVecDescr_t spVecDescr, cusparseIndexBase_t *idxBase) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseConstSpVecDescr_t, + cusparseIndexBase_t *); + static auto func_ptr = LoadSymbol("cusparseSpVecGetIndexBase"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spVecDescr, idxBase); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSpVecGetValues(cusparseSpVecDescr_t spVecDescr, void **values) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseSpVecDescr_t, void **); + static auto func_ptr = LoadSymbol("cusparseSpVecGetValues"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spVecDescr, values); +} + +cusparseStatus_t CUSPARSEAPI cusparseConstSpVecGetValues( + cusparseConstSpVecDescr_t spVecDescr, const void **values) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseConstSpVecDescr_t, const void **); + static auto func_ptr = LoadSymbol("cusparseConstSpVecGetValues"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spVecDescr, values); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSpVecSetValues(cusparseSpVecDescr_t spVecDescr, void *values) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpVecDescr_t, void *); + static auto func_ptr = LoadSymbol("cusparseSpVecSetValues"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spVecDescr, values); +} + +cusparseStatus_t CUSPARSEAPI +cusparseCreateDnVec(cusparseDnVecDescr_t *dnVecDescr, int64_t size, + void *values, cudaDataType valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseDnVecDescr_t *, int64_t, void *, cudaDataType); + static auto func_ptr = LoadSymbol("cusparseCreateDnVec"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnVecDescr, size, values, valueType); +} + +cusparseStatus_t CUSPARSEAPI +cusparseCreateConstDnVec(cusparseConstDnVecDescr_t *dnVecDescr, int64_t size, + const void *values, cudaDataType valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseConstDnVecDescr_t *, int64_t, const void *, cudaDataType); + static auto func_ptr = LoadSymbol("cusparseCreateConstDnVec"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnVecDescr, size, values, valueType); +} + +cusparseStatus_t CUSPARSEAPI +cusparseDestroyDnVec(cusparseConstDnVecDescr_t dnVecDescr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseConstDnVecDescr_t); + static auto func_ptr = LoadSymbol("cusparseDestroyDnVec"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnVecDescr); +} + +cusparseStatus_t CUSPARSEAPI cusparseDnVecGet(cusparseDnVecDescr_t dnVecDescr, + int64_t *size, void **values, + cudaDataType *valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseDnVecDescr_t, int64_t *, void **, cudaDataType *); + static auto func_ptr = LoadSymbol("cusparseDnVecGet"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnVecDescr, size, values, valueType); +} + +cusparseStatus_t CUSPARSEAPI +cusparseConstDnVecGet(cusparseConstDnVecDescr_t dnVecDescr, int64_t *size, + const void **values, cudaDataType *valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseConstDnVecDescr_t, int64_t *, const void **, cudaDataType *); + static auto func_ptr = LoadSymbol("cusparseConstDnVecGet"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnVecDescr, size, values, valueType); +} + +cusparseStatus_t CUSPARSEAPI +cusparseDnVecGetValues(cusparseDnVecDescr_t dnVecDescr, void **values) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseDnVecDescr_t, void **); + static auto func_ptr = LoadSymbol("cusparseDnVecGetValues"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnVecDescr, values); +} + +cusparseStatus_t CUSPARSEAPI cusparseConstDnVecGetValues( + cusparseConstDnVecDescr_t dnVecDescr, const void **values) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseConstDnVecDescr_t, const void **); + static auto func_ptr = LoadSymbol("cusparseConstDnVecGetValues"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnVecDescr, values); +} + +cusparseStatus_t CUSPARSEAPI +cusparseDnVecSetValues(cusparseDnVecDescr_t dnVecDescr, void *values) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseDnVecDescr_t, void *); + static auto func_ptr = LoadSymbol("cusparseDnVecSetValues"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnVecDescr, values); +} + +cusparseStatus_t CUSPARSEAPI +cusparseDestroySpMat(cusparseConstSpMatDescr_t spMatDescr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseConstSpMatDescr_t); + static auto func_ptr = LoadSymbol("cusparseDestroySpMat"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpMatGetFormat( + cusparseConstSpMatDescr_t spMatDescr, cusparseFormat_t *format) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseConstSpMatDescr_t, + cusparseFormat_t *); + static auto func_ptr = LoadSymbol("cusparseSpMatGetFormat"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, format); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpMatGetIndexBase( + cusparseConstSpMatDescr_t spMatDescr, cusparseIndexBase_t *idxBase) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseConstSpMatDescr_t, + cusparseIndexBase_t *); + static auto func_ptr = LoadSymbol("cusparseSpMatGetIndexBase"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, idxBase); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSpMatGetValues(cusparseSpMatDescr_t spMatDescr, void **values) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, void **); + static auto func_ptr = LoadSymbol("cusparseSpMatGetValues"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, values); +} + +cusparseStatus_t CUSPARSEAPI cusparseConstSpMatGetValues( + cusparseConstSpMatDescr_t spMatDescr, const void **values) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseConstSpMatDescr_t, const void **); + static auto func_ptr = LoadSymbol("cusparseConstSpMatGetValues"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, values); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSpMatSetValues(cusparseSpMatDescr_t spMatDescr, void *values) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, void *); + static auto func_ptr = LoadSymbol("cusparseSpMatSetValues"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, values); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSpMatGetSize(cusparseConstSpMatDescr_t spMatDescr, int64_t *rows, + int64_t *cols, int64_t *nnz) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseConstSpMatDescr_t, int64_t *, int64_t *, int64_t *); + static auto func_ptr = LoadSymbol("cusparseSpMatGetSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, rows, cols, nnz); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpMatGetStridedBatch( + cusparseConstSpMatDescr_t spMatDescr, int *batchCount) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseConstSpMatDescr_t, int *); + static auto func_ptr = LoadSymbol("cusparseSpMatGetStridedBatch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, batchCount); +} + +cusparseStatus_t CUSPARSEAPI cusparseCooSetStridedBatch( + cusparseSpMatDescr_t spMatDescr, int batchCount, int64_t batchStride) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, int, int64_t); + static auto func_ptr = LoadSymbol("cusparseCooSetStridedBatch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, batchCount, batchStride); +} + +cusparseStatus_t CUSPARSEAPI cusparseCsrSetStridedBatch( + cusparseSpMatDescr_t spMatDescr, int batchCount, int64_t offsetsBatchStride, + int64_t columnsValuesBatchStride) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, int, + int64_t, int64_t); + static auto func_ptr = LoadSymbol("cusparseCsrSetStridedBatch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, batchCount, offsetsBatchStride, + columnsValuesBatchStride); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpMatGetAttribute( + cusparseConstSpMatDescr_t spMatDescr, cusparseSpMatAttribute_t attribute, + void *data, size_t dataSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseConstSpMatDescr_t, cusparseSpMatAttribute_t, void *, size_t); + static auto func_ptr = LoadSymbol("cusparseSpMatGetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, attribute, data, dataSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpMatSetAttribute( + cusparseSpMatDescr_t spMatDescr, cusparseSpMatAttribute_t attribute, + void *data, size_t dataSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseSpMatDescr_t, cusparseSpMatAttribute_t, void *, size_t); + static auto func_ptr = LoadSymbol("cusparseSpMatSetAttribute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, attribute, data, dataSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreateCsr( + cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, int64_t nnz, + void *csrRowOffsets, void *csrColInd, void *csrValues, + cusparseIndexType_t csrRowOffsetsType, cusparseIndexType_t csrColIndType, + cusparseIndexBase_t idxBase, cudaDataType valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, void *, void *, void *, + cusparseIndexType_t, cusparseIndexType_t, cusparseIndexBase_t, + cudaDataType); + static auto func_ptr = LoadSymbol("cusparseCreateCsr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, + csrValues, csrRowOffsetsType, csrColIndType, idxBase, + valueType); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreateConstCsr( + cusparseConstSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, + int64_t nnz, const void *csrRowOffsets, const void *csrColInd, + const void *csrValues, cusparseIndexType_t csrRowOffsetsType, + cusparseIndexType_t csrColIndType, cusparseIndexBase_t idxBase, + cudaDataType valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseConstSpMatDescr_t *, int64_t, int64_t, int64_t, const void *, + const void *, const void *, cusparseIndexType_t, cusparseIndexType_t, + cusparseIndexBase_t, cudaDataType); + static auto func_ptr = LoadSymbol("cusparseCreateConstCsr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, + csrValues, csrRowOffsetsType, csrColIndType, idxBase, + valueType); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreateCsc( + cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, int64_t nnz, + void *cscColOffsets, void *cscRowInd, void *cscValues, + cusparseIndexType_t cscColOffsetsType, cusparseIndexType_t cscRowIndType, + cusparseIndexBase_t idxBase, cudaDataType valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, void *, void *, void *, + cusparseIndexType_t, cusparseIndexType_t, cusparseIndexBase_t, + cudaDataType); + static auto func_ptr = LoadSymbol("cusparseCreateCsc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, rows, cols, nnz, cscColOffsets, cscRowInd, + cscValues, cscColOffsetsType, cscRowIndType, idxBase, + valueType); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreateConstCsc( + cusparseConstSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, + int64_t nnz, const void *cscColOffsets, const void *cscRowInd, + const void *cscValues, cusparseIndexType_t cscColOffsetsType, + cusparseIndexType_t cscRowIndType, cusparseIndexBase_t idxBase, + cudaDataType valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseConstSpMatDescr_t *, int64_t, int64_t, int64_t, const void *, + const void *, const void *, cusparseIndexType_t, cusparseIndexType_t, + cusparseIndexBase_t, cudaDataType); + static auto func_ptr = LoadSymbol("cusparseCreateConstCsc"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, rows, cols, nnz, cscColOffsets, cscRowInd, + cscValues, cscColOffsetsType, cscRowIndType, idxBase, + valueType); +} + +cusparseStatus_t CUSPARSEAPI cusparseCsrGet( + cusparseSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, int64_t *nnz, + void **csrRowOffsets, void **csrColInd, void **csrValues, + cusparseIndexType_t *csrRowOffsetsType, cusparseIndexType_t *csrColIndType, + cusparseIndexBase_t *idxBase, cudaDataType *valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, void **, void **, + void **, cusparseIndexType_t *, cusparseIndexType_t *, + cusparseIndexBase_t *, cudaDataType *); + static auto func_ptr = LoadSymbol("cusparseCsrGet"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, + csrValues, csrRowOffsetsType, csrColIndType, idxBase, + valueType); +} + +cusparseStatus_t CUSPARSEAPI cusparseConstCsrGet( + cusparseConstSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, + int64_t *nnz, const void **csrRowOffsets, const void **csrColInd, + const void **csrValues, cusparseIndexType_t *csrRowOffsetsType, + cusparseIndexType_t *csrColIndType, cusparseIndexBase_t *idxBase, + cudaDataType *valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseConstSpMatDescr_t, int64_t *, int64_t *, int64_t *, const void **, + const void **, const void **, cusparseIndexType_t *, + cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); + static auto func_ptr = LoadSymbol("cusparseConstCsrGet"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, rows, cols, nnz, csrRowOffsets, csrColInd, + csrValues, csrRowOffsetsType, csrColIndType, idxBase, + valueType); +} + +cusparseStatus_t CUSPARSEAPI cusparseCscGet( + cusparseSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, int64_t *nnz, + void **cscColOffsets, void **cscRowInd, void **cscValues, + cusparseIndexType_t *cscColOffsetsType, cusparseIndexType_t *cscRowIndType, + cusparseIndexBase_t *idxBase, cudaDataType *valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, void **, void **, + void **, cusparseIndexType_t *, cusparseIndexType_t *, + cusparseIndexBase_t *, cudaDataType *); + static auto func_ptr = LoadSymbol("cusparseCscGet"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, rows, cols, nnz, cscColOffsets, cscRowInd, + cscValues, cscColOffsetsType, cscRowIndType, idxBase, + valueType); +} + +cusparseStatus_t CUSPARSEAPI cusparseConstCscGet( + cusparseConstSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, + int64_t *nnz, const void **cscColOffsets, const void **cscRowInd, + const void **cscValues, cusparseIndexType_t *cscColOffsetsType, + cusparseIndexType_t *cscRowIndType, cusparseIndexBase_t *idxBase, + cudaDataType *valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseConstSpMatDescr_t, int64_t *, int64_t *, int64_t *, const void **, + const void **, const void **, cusparseIndexType_t *, + cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); + static auto func_ptr = LoadSymbol("cusparseConstCscGet"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, rows, cols, nnz, cscColOffsets, cscRowInd, + cscValues, cscColOffsetsType, cscRowIndType, idxBase, + valueType); +} + +cusparseStatus_t CUSPARSEAPI +cusparseCsrSetPointers(cusparseSpMatDescr_t spMatDescr, void *csrRowOffsets, + void *csrColInd, void *csrValues) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, void *, + void *, void *); + static auto func_ptr = LoadSymbol("cusparseCsrSetPointers"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, csrRowOffsets, csrColInd, csrValues); +} + +cusparseStatus_t CUSPARSEAPI +cusparseCscSetPointers(cusparseSpMatDescr_t spMatDescr, void *cscColOffsets, + void *cscRowInd, void *cscValues) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, void *, + void *, void *); + static auto func_ptr = LoadSymbol("cusparseCscSetPointers"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, cscColOffsets, cscRowInd, cscValues); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreateCoo(cusparseSpMatDescr_t *spMatDescr, + int64_t rows, int64_t cols, + int64_t nnz, void *cooRowInd, + void *cooColInd, void *cooValues, + cusparseIndexType_t cooIdxType, + cusparseIndexBase_t idxBase, + cudaDataType valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, void *, void *, void *, + cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); + static auto func_ptr = LoadSymbol("cusparseCreateCoo"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, rows, cols, nnz, cooRowInd, cooColInd, cooValues, + cooIdxType, idxBase, valueType); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreateConstCoo( + cusparseConstSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, + int64_t nnz, const void *cooRowInd, const void *cooColInd, + const void *cooValues, cusparseIndexType_t cooIdxType, + cusparseIndexBase_t idxBase, cudaDataType valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseConstSpMatDescr_t *, int64_t, int64_t, int64_t, const void *, + const void *, const void *, cusparseIndexType_t, cusparseIndexBase_t, + cudaDataType); + static auto func_ptr = LoadSymbol("cusparseCreateConstCoo"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, rows, cols, nnz, cooRowInd, cooColInd, cooValues, + cooIdxType, idxBase, valueType); +} + +cusparseStatus_t CUSPARSEAPI cusparseCooGet( + cusparseSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, int64_t *nnz, + void **cooRowInd, // COO row indices + void **cooColInd, // COO column indices + void **cooValues, // COO values + cusparseIndexType_t *idxType, cusparseIndexBase_t *idxBase, + cudaDataType *valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, + void **, // COO row indices + void **, // COO column indices + void **, // COO values + cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); + static auto func_ptr = LoadSymbol("cusparseCooGet"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, rows, cols, nnz, cooRowInd, cooColInd, cooValues, + idxType, idxBase, valueType); +} + +cusparseStatus_t CUSPARSEAPI +cusparseConstCooGet(cusparseConstSpMatDescr_t spMatDescr, int64_t *rows, + int64_t *cols, int64_t *nnz, + const void **cooRowInd, // COO row indices + const void **cooColInd, // COO column indices + const void **cooValues, // COO values + cusparseIndexType_t *idxType, cusparseIndexBase_t *idxBase, + cudaDataType *valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseConstSpMatDescr_t, int64_t *, int64_t *, int64_t *, + const void **, // COO row indices + const void **, // COO column indices + const void **, // COO values + cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); + static auto func_ptr = LoadSymbol("cusparseConstCooGet"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, rows, cols, nnz, cooRowInd, cooColInd, cooValues, + idxType, idxBase, valueType); +} + +cusparseStatus_t CUSPARSEAPI +cusparseCooSetPointers(cusparseSpMatDescr_t spMatDescr, void *cooRows, + void *cooColumns, void *cooValues) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMatDescr_t, void *, + void *, void *); + static auto func_ptr = LoadSymbol("cusparseCooSetPointers"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, cooRows, cooColumns, cooValues); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreateBlockedEll( + cusparseSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, + int64_t ellBlockSize, int64_t ellCols, void *ellColInd, void *ellValue, + cusparseIndexType_t ellIdxType, cusparseIndexBase_t idxBase, + cudaDataType valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseSpMatDescr_t *, int64_t, int64_t, int64_t, int64_t, void *, + void *, cusparseIndexType_t, cusparseIndexBase_t, cudaDataType); + static auto func_ptr = LoadSymbol("cusparseCreateBlockedEll"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, rows, cols, ellBlockSize, ellCols, ellColInd, + ellValue, ellIdxType, idxBase, valueType); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreateConstBlockedEll( + cusparseConstSpMatDescr_t *spMatDescr, int64_t rows, int64_t cols, + int64_t ellBlockSize, int64_t ellCols, const void *ellColInd, + const void *ellValue, cusparseIndexType_t ellIdxType, + cusparseIndexBase_t idxBase, cudaDataType valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseConstSpMatDescr_t *, int64_t, int64_t, int64_t, int64_t, + const void *, const void *, cusparseIndexType_t, cusparseIndexBase_t, + cudaDataType); + static auto func_ptr = LoadSymbol("cusparseCreateConstBlockedEll"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, rows, cols, ellBlockSize, ellCols, ellColInd, + ellValue, ellIdxType, idxBase, valueType); +} + +cusparseStatus_t CUSPARSEAPI cusparseBlockedEllGet( + cusparseSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, + int64_t *ellBlockSize, int64_t *ellCols, void **ellColInd, void **ellValue, + cusparseIndexType_t *ellIdxType, cusparseIndexBase_t *idxBase, + cudaDataType *valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseSpMatDescr_t, int64_t *, int64_t *, int64_t *, int64_t *, void **, + void **, cusparseIndexType_t *, cusparseIndexBase_t *, cudaDataType *); + static auto func_ptr = LoadSymbol("cusparseBlockedEllGet"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, rows, cols, ellBlockSize, ellCols, ellColInd, + ellValue, ellIdxType, idxBase, valueType); +} + +cusparseStatus_t CUSPARSEAPI cusparseConstBlockedEllGet( + cusparseConstSpMatDescr_t spMatDescr, int64_t *rows, int64_t *cols, + int64_t *ellBlockSize, int64_t *ellCols, const void **ellColInd, + const void **ellValue, cusparseIndexType_t *ellIdxType, + cusparseIndexBase_t *idxBase, cudaDataType *valueType) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseConstSpMatDescr_t, int64_t *, int64_t *, int64_t *, int64_t *, + const void **, const void **, cusparseIndexType_t *, + cusparseIndexBase_t *, cudaDataType *); + static auto func_ptr = LoadSymbol("cusparseConstBlockedEllGet"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spMatDescr, rows, cols, ellBlockSize, ellCols, ellColInd, + ellValue, ellIdxType, idxBase, valueType); +} + +cusparseStatus_t CUSPARSEAPI cusparseCreateDnMat( + cusparseDnMatDescr_t *dnMatDescr, int64_t rows, int64_t cols, int64_t ld, + void *values, cudaDataType valueType, cusparseOrder_t order) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseDnMatDescr_t *, int64_t, int64_t, int64_t, void *, cudaDataType, + cusparseOrder_t); + static auto func_ptr = LoadSymbol("cusparseCreateDnMat"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnMatDescr, rows, cols, ld, values, valueType, order); +} + +cusparseStatus_t CUSPARSEAPI +cusparseCreateConstDnMat(cusparseConstDnMatDescr_t *dnMatDescr, int64_t rows, + int64_t cols, int64_t ld, const void *values, + cudaDataType valueType, cusparseOrder_t order) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseConstDnMatDescr_t *, int64_t, int64_t, int64_t, const void *, + cudaDataType, cusparseOrder_t); + static auto func_ptr = LoadSymbol("cusparseCreateConstDnMat"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnMatDescr, rows, cols, ld, values, valueType, order); +} + +cusparseStatus_t CUSPARSEAPI +cusparseDestroyDnMat(cusparseConstDnMatDescr_t dnMatDescr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseConstDnMatDescr_t); + static auto func_ptr = LoadSymbol("cusparseDestroyDnMat"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnMatDescr); +} + +cusparseStatus_t CUSPARSEAPI cusparseDnMatGet(cusparseDnMatDescr_t dnMatDescr, + int64_t *rows, int64_t *cols, + int64_t *ld, void **values, + cudaDataType *type, + cusparseOrder_t *order) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseDnMatDescr_t, int64_t *, int64_t *, int64_t *, void **, + cudaDataType *, cusparseOrder_t *); + static auto func_ptr = LoadSymbol("cusparseDnMatGet"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnMatDescr, rows, cols, ld, values, type, order); +} + +cusparseStatus_t CUSPARSEAPI +cusparseConstDnMatGet(cusparseConstDnMatDescr_t dnMatDescr, int64_t *rows, + int64_t *cols, int64_t *ld, const void **values, + cudaDataType *type, cusparseOrder_t *order) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseConstDnMatDescr_t, int64_t *, int64_t *, int64_t *, const void **, + cudaDataType *, cusparseOrder_t *); + static auto func_ptr = LoadSymbol("cusparseConstDnMatGet"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnMatDescr, rows, cols, ld, values, type, order); +} + +cusparseStatus_t CUSPARSEAPI +cusparseDnMatGetValues(cusparseDnMatDescr_t dnMatDescr, void **values) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseDnMatDescr_t, void **); + static auto func_ptr = LoadSymbol("cusparseDnMatGetValues"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnMatDescr, values); +} + +cusparseStatus_t CUSPARSEAPI cusparseConstDnMatGetValues( + cusparseConstDnMatDescr_t dnMatDescr, const void **values) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseConstDnMatDescr_t, const void **); + static auto func_ptr = LoadSymbol("cusparseConstDnMatGetValues"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnMatDescr, values); +} + +cusparseStatus_t CUSPARSEAPI +cusparseDnMatSetValues(cusparseDnMatDescr_t dnMatDescr, void *values) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseDnMatDescr_t, void *); + static auto func_ptr = LoadSymbol("cusparseDnMatSetValues"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnMatDescr, values); +} + +cusparseStatus_t CUSPARSEAPI cusparseDnMatSetStridedBatch( + cusparseDnMatDescr_t dnMatDescr, int batchCount, int64_t batchStride) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseDnMatDescr_t, int, int64_t); + static auto func_ptr = LoadSymbol("cusparseDnMatSetStridedBatch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnMatDescr, batchCount, batchStride); +} + +cusparseStatus_t CUSPARSEAPI +cusparseDnMatGetStridedBatch(cusparseConstDnMatDescr_t dnMatDescr, + int *batchCount, int64_t *batchStride) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseConstDnMatDescr_t, + int *, int64_t *); + static auto func_ptr = LoadSymbol("cusparseDnMatGetStridedBatch"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(dnMatDescr, batchCount, batchStride); +} + +cusparseStatus_t CUSPARSEAPI cusparseAxpby(cusparseHandle_t handle, + const void *alpha, + cusparseConstSpVecDescr_t vecX, + const void *beta, + cusparseDnVecDescr_t vecY) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, const void *, cusparseConstSpVecDescr_t, const void *, + cusparseDnVecDescr_t); + static auto func_ptr = LoadSymbol("cusparseAxpby"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, alpha, vecX, beta, vecY); +} + +cusparseStatus_t CUSPARSEAPI cusparseGather(cusparseHandle_t handle, + cusparseConstDnVecDescr_t vecY, + cusparseSpVecDescr_t vecX) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseConstDnVecDescr_t, cusparseSpVecDescr_t); + static auto func_ptr = LoadSymbol("cusparseGather"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, vecY, vecX); +} + +cusparseStatus_t CUSPARSEAPI cusparseScatter(cusparseHandle_t handle, + cusparseConstSpVecDescr_t vecX, + cusparseDnVecDescr_t vecY) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseConstSpVecDescr_t, cusparseDnVecDescr_t); + static auto func_ptr = LoadSymbol("cusparseScatter"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, vecX, vecY); +} + +cusparseStatus_t CUSPARSEAPI cusparseRot(cusparseHandle_t handle, + const void *c_coeff, + const void *s_coeff, + cusparseSpVecDescr_t vecX, + cusparseDnVecDescr_t vecY) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, const void *, const void *, cusparseSpVecDescr_t, + cusparseDnVecDescr_t); + static auto func_ptr = LoadSymbol("cusparseRot"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, c_coeff, s_coeff, vecX, vecY); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpVV_bufferSize( + cusparseHandle_t handle, cusparseOperation_t opX, + cusparseConstSpVecDescr_t vecX, cusparseConstDnVecDescr_t vecY, + const void *result, cudaDataType computeType, size_t *bufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseConstSpVecDescr_t, + cusparseConstDnVecDescr_t, const void *, cudaDataType, size_t *); + static auto func_ptr = LoadSymbol("cusparseSpVV_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opX, vecX, vecY, result, computeType, bufferSize); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSpVV(cusparseHandle_t handle, cusparseOperation_t opX, + cusparseConstSpVecDescr_t vecX, cusparseConstDnVecDescr_t vecY, + void *result, cudaDataType computeType, void *externalBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseConstSpVecDescr_t, + cusparseConstDnVecDescr_t, void *, cudaDataType, void *); + static auto func_ptr = LoadSymbol("cusparseSpVV"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opX, vecX, vecY, result, computeType, externalBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSparseToDense_bufferSize( + cusparseHandle_t handle, cusparseConstSpMatDescr_t matA, + cusparseDnMatDescr_t matB, cusparseSparseToDenseAlg_t alg, + size_t *bufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseConstSpMatDescr_t, cusparseDnMatDescr_t, + cusparseSparseToDenseAlg_t, size_t *); + static auto func_ptr = + LoadSymbol("cusparseSparseToDense_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, matA, matB, alg, bufferSize); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSparseToDense(cusparseHandle_t handle, cusparseConstSpMatDescr_t matA, + cusparseDnMatDescr_t matB, cusparseSparseToDenseAlg_t alg, + void *externalBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseConstSpMatDescr_t, cusparseDnMatDescr_t, + cusparseSparseToDenseAlg_t, void *); + static auto func_ptr = LoadSymbol("cusparseSparseToDense"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, matA, matB, alg, externalBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDenseToSparse_bufferSize( + cusparseHandle_t handle, cusparseConstDnMatDescr_t matA, + cusparseSpMatDescr_t matB, cusparseDenseToSparseAlg_t alg, + size_t *bufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseConstDnMatDescr_t, cusparseSpMatDescr_t, + cusparseDenseToSparseAlg_t, size_t *); + static auto func_ptr = + LoadSymbol("cusparseDenseToSparse_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, matA, matB, alg, bufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseDenseToSparse_analysis( + cusparseHandle_t handle, cusparseConstDnMatDescr_t matA, + cusparseSpMatDescr_t matB, cusparseDenseToSparseAlg_t alg, + void *externalBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseConstDnMatDescr_t, cusparseSpMatDescr_t, + cusparseDenseToSparseAlg_t, void *); + static auto func_ptr = LoadSymbol("cusparseDenseToSparse_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, matA, matB, alg, externalBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseDenseToSparse_convert( + cusparseHandle_t handle, cusparseConstDnMatDescr_t matA, + cusparseSpMatDescr_t matB, cusparseDenseToSparseAlg_t alg, + void *externalBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseConstDnMatDescr_t, cusparseSpMatDescr_t, + cusparseDenseToSparseAlg_t, void *); + static auto func_ptr = LoadSymbol("cusparseDenseToSparse_convert"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, matA, matB, alg, externalBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpMV( + cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, + cusparseConstSpMatDescr_t matA, cusparseConstDnVecDescr_t vecX, + const void *beta, cusparseDnVecDescr_t vecY, cudaDataType computeType, + cusparseSpMVAlg_t alg, void *externalBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, const void *, + cusparseConstSpMatDescr_t, cusparseConstDnVecDescr_t, const void *, + cusparseDnVecDescr_t, cudaDataType, cusparseSpMVAlg_t, void *); + static auto func_ptr = LoadSymbol("cusparseSpMV"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, + externalBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpMV_bufferSize( + cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, + cusparseConstSpMatDescr_t matA, cusparseConstDnVecDescr_t vecX, + const void *beta, cusparseDnVecDescr_t vecY, cudaDataType computeType, + cusparseSpMVAlg_t alg, size_t *bufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, const void *, + cusparseConstSpMatDescr_t, cusparseConstDnVecDescr_t, const void *, + cusparseDnVecDescr_t, cudaDataType, cusparseSpMVAlg_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseSpMV_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, alpha, matA, vecX, beta, vecY, computeType, alg, + bufferSize); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSpSV_createDescr(cusparseSpSVDescr_t *descr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpSVDescr_t *); + static auto func_ptr = LoadSymbol("cusparseSpSV_createDescr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(descr); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSpSV_destroyDescr(cusparseSpSVDescr_t descr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpSVDescr_t); + static auto func_ptr = LoadSymbol("cusparseSpSV_destroyDescr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(descr); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpSV_bufferSize( + cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, + cusparseConstSpMatDescr_t matA, cusparseConstDnVecDescr_t vecX, + cusparseDnVecDescr_t vecY, cudaDataType computeType, cusparseSpSVAlg_t alg, + cusparseSpSVDescr_t spsvDescr, size_t *bufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, const void *, + cusparseConstSpMatDescr_t, cusparseConstDnVecDescr_t, + cusparseDnVecDescr_t, cudaDataType, cusparseSpSVAlg_t, + cusparseSpSVDescr_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseSpSV_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, alpha, matA, vecX, vecY, computeType, alg, + spsvDescr, bufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpSV_analysis( + cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, + cusparseConstSpMatDescr_t matA, cusparseConstDnVecDescr_t vecX, + cusparseDnVecDescr_t vecY, cudaDataType computeType, cusparseSpSVAlg_t alg, + cusparseSpSVDescr_t spsvDescr, void *externalBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, const void *, + cusparseConstSpMatDescr_t, cusparseConstDnVecDescr_t, + cusparseDnVecDescr_t, cudaDataType, cusparseSpSVAlg_t, + cusparseSpSVDescr_t, void *); + static auto func_ptr = LoadSymbol("cusparseSpSV_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, alpha, matA, vecX, vecY, computeType, alg, + spsvDescr, externalBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpSV_solve( + cusparseHandle_t handle, cusparseOperation_t opA, const void *alpha, + cusparseConstSpMatDescr_t matA, cusparseConstDnVecDescr_t vecX, + cusparseDnVecDescr_t vecY, cudaDataType computeType, cusparseSpSVAlg_t alg, + cusparseSpSVDescr_t spsvDescr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, const void *, + cusparseConstSpMatDescr_t, cusparseConstDnVecDescr_t, + cusparseDnVecDescr_t, cudaDataType, cusparseSpSVAlg_t, + cusparseSpSVDescr_t); + static auto func_ptr = LoadSymbol("cusparseSpSV_solve"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, alpha, matA, vecX, vecY, computeType, alg, + spsvDescr); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSpSM_createDescr(cusparseSpSMDescr_t *descr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpSMDescr_t *); + static auto func_ptr = LoadSymbol("cusparseSpSM_createDescr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(descr); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSpSM_destroyDescr(cusparseSpSMDescr_t descr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpSMDescr_t); + static auto func_ptr = LoadSymbol("cusparseSpSM_destroyDescr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(descr); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpSM_bufferSize( + cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, + const void *alpha, cusparseConstSpMatDescr_t matA, + cusparseConstDnMatDescr_t matB, cusparseDnMatDescr_t matC, + cudaDataType computeType, cusparseSpSMAlg_t alg, + cusparseSpSMDescr_t spsmDescr, size_t *bufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, + cusparseConstSpMatDescr_t, cusparseConstDnMatDescr_t, + cusparseDnMatDescr_t, cudaDataType, cusparseSpSMAlg_t, + cusparseSpSMDescr_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseSpSM_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, opB, alpha, matA, matB, matC, computeType, alg, + spsmDescr, bufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpSM_analysis( + cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, + const void *alpha, cusparseConstSpMatDescr_t matA, + cusparseConstDnMatDescr_t matB, cusparseDnMatDescr_t matC, + cudaDataType computeType, cusparseSpSMAlg_t alg, + cusparseSpSMDescr_t spsmDescr, void *externalBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, + cusparseConstSpMatDescr_t, cusparseConstDnMatDescr_t, + cusparseDnMatDescr_t, cudaDataType, cusparseSpSMAlg_t, + cusparseSpSMDescr_t, void *); + static auto func_ptr = LoadSymbol("cusparseSpSM_analysis"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, opB, alpha, matA, matB, matC, computeType, alg, + spsmDescr, externalBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpSM_solve( + cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, + const void *alpha, cusparseConstSpMatDescr_t matA, + cusparseConstDnMatDescr_t matB, cusparseDnMatDescr_t matC, + cudaDataType computeType, cusparseSpSMAlg_t alg, + cusparseSpSMDescr_t spsmDescr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, + cusparseConstSpMatDescr_t, cusparseConstDnMatDescr_t, + cusparseDnMatDescr_t, cudaDataType, cusparseSpSMAlg_t, + cusparseSpSMDescr_t); + static auto func_ptr = LoadSymbol("cusparseSpSM_solve"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, opB, alpha, matA, matB, matC, computeType, alg, + spsmDescr); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpMM_bufferSize( + cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, + const void *alpha, cusparseConstSpMatDescr_t matA, + cusparseConstDnMatDescr_t matB, const void *beta, cusparseDnMatDescr_t matC, + cudaDataType computeType, cusparseSpMMAlg_t alg, size_t *bufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, + cusparseConstSpMatDescr_t, cusparseConstDnMatDescr_t, const void *, + cusparseDnMatDescr_t, cudaDataType, cusparseSpMMAlg_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseSpMM_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, + alg, bufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpMM_preprocess( + cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, + const void *alpha, cusparseConstSpMatDescr_t matA, + cusparseConstDnMatDescr_t matB, const void *beta, cusparseDnMatDescr_t matC, + cudaDataType computeType, cusparseSpMMAlg_t alg, void *externalBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, + cusparseConstSpMatDescr_t, cusparseConstDnMatDescr_t, const void *, + cusparseDnMatDescr_t, cudaDataType, cusparseSpMMAlg_t, void *); + static auto func_ptr = LoadSymbol("cusparseSpMM_preprocess"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, + alg, externalBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpMM( + cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, + const void *alpha, cusparseConstSpMatDescr_t matA, + cusparseConstDnMatDescr_t matB, const void *beta, cusparseDnMatDescr_t matC, + cudaDataType computeType, cusparseSpMMAlg_t alg, void *externalBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, + cusparseConstSpMatDescr_t, cusparseConstDnMatDescr_t, const void *, + cusparseDnMatDescr_t, cudaDataType, cusparseSpMMAlg_t, void *); + static auto func_ptr = LoadSymbol("cusparseSpMM"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, + alg, externalBuffer); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSpGEMM_createDescr(cusparseSpGEMMDescr_t *descr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpGEMMDescr_t *); + static auto func_ptr = LoadSymbol("cusparseSpGEMM_createDescr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(descr); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSpGEMM_destroyDescr(cusparseSpGEMMDescr_t descr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpGEMMDescr_t); + static auto func_ptr = LoadSymbol("cusparseSpGEMM_destroyDescr"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(descr); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpGEMM_workEstimation( + cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, + const void *alpha, cusparseConstSpMatDescr_t matA, + cusparseConstSpMatDescr_t matB, const void *beta, cusparseSpMatDescr_t matC, + cudaDataType computeType, cusparseSpGEMMAlg_t alg, + cusparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize1, + void *externalBuffer1) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, + cusparseConstSpMatDescr_t, cusparseConstSpMatDescr_t, const void *, + cusparseSpMatDescr_t, cudaDataType, cusparseSpGEMMAlg_t, + cusparseSpGEMMDescr_t, size_t *, void *); + static auto func_ptr = LoadSymbol("cusparseSpGEMM_workEstimation"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, + alg, spgemmDescr, bufferSize1, externalBuffer1); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpGEMM_getNumProducts( + cusparseSpGEMMDescr_t spgemmDescr, int64_t *num_prods) { + using FuncPtr = + cusparseStatus_t(CUSPARSEAPI *)(cusparseSpGEMMDescr_t, int64_t *); + static auto func_ptr = LoadSymbol("cusparseSpGEMM_getNumProducts"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(spgemmDescr, num_prods); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpGEMM_estimateMemory( + cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, + const void *alpha, cusparseConstSpMatDescr_t matA, + cusparseConstSpMatDescr_t matB, const void *beta, cusparseSpMatDescr_t matC, + cudaDataType computeType, cusparseSpGEMMAlg_t alg, + cusparseSpGEMMDescr_t spgemmDescr, float chunk_fraction, + size_t *bufferSize3, void *externalBuffer3, size_t *bufferSize2) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, + cusparseConstSpMatDescr_t, cusparseConstSpMatDescr_t, const void *, + cusparseSpMatDescr_t, cudaDataType, cusparseSpGEMMAlg_t, + cusparseSpGEMMDescr_t, float, size_t *, void *, size_t *); + static auto func_ptr = LoadSymbol("cusparseSpGEMM_estimateMemory"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, + alg, spgemmDescr, chunk_fraction, bufferSize3, + externalBuffer3, bufferSize2); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpGEMM_compute( + cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, + const void *alpha, cusparseConstSpMatDescr_t matA, + cusparseConstSpMatDescr_t matB, const void *beta, cusparseSpMatDescr_t matC, + cudaDataType computeType, cusparseSpGEMMAlg_t alg, + cusparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize2, + void *externalBuffer2) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, + cusparseConstSpMatDescr_t, cusparseConstSpMatDescr_t, const void *, + cusparseSpMatDescr_t, cudaDataType, cusparseSpGEMMAlg_t, + cusparseSpGEMMDescr_t, size_t *, void *); + static auto func_ptr = LoadSymbol("cusparseSpGEMM_compute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, + alg, spgemmDescr, bufferSize2, externalBuffer2); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpGEMM_copy( + cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, + const void *alpha, cusparseConstSpMatDescr_t matA, + cusparseConstSpMatDescr_t matB, const void *beta, cusparseSpMatDescr_t matC, + cudaDataType computeType, cusparseSpGEMMAlg_t alg, + cusparseSpGEMMDescr_t spgemmDescr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, + cusparseConstSpMatDescr_t, cusparseConstSpMatDescr_t, const void *, + cusparseSpMatDescr_t, cudaDataType, cusparseSpGEMMAlg_t, + cusparseSpGEMMDescr_t); + static auto func_ptr = LoadSymbol("cusparseSpGEMM_copy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, + alg, spgemmDescr); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpGEMMreuse_workEstimation( + cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, + cusparseConstSpMatDescr_t matA, cusparseConstSpMatDescr_t matB, + cusparseSpMatDescr_t matC, cusparseSpGEMMAlg_t alg, + cusparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize1, + void *externalBuffer1) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, + cusparseConstSpMatDescr_t, cusparseConstSpMatDescr_t, + cusparseSpMatDescr_t, cusparseSpGEMMAlg_t, cusparseSpGEMMDescr_t, + size_t *, void *); + static auto func_ptr = + LoadSymbol("cusparseSpGEMMreuse_workEstimation"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, opB, matA, matB, matC, alg, spgemmDescr, + bufferSize1, externalBuffer1); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpGEMMreuse_nnz( + cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, + cusparseConstSpMatDescr_t matA, cusparseConstSpMatDescr_t matB, + cusparseSpMatDescr_t matC, cusparseSpGEMMAlg_t alg, + cusparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize2, + void *externalBuffer2, size_t *bufferSize3, void *externalBuffer3, + size_t *bufferSize4, void *externalBuffer4) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, + cusparseConstSpMatDescr_t, cusparseConstSpMatDescr_t, + cusparseSpMatDescr_t, cusparseSpGEMMAlg_t, cusparseSpGEMMDescr_t, + size_t *, void *, size_t *, void *, size_t *, void *); + static auto func_ptr = LoadSymbol("cusparseSpGEMMreuse_nnz"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, opB, matA, matB, matC, alg, spgemmDescr, + bufferSize2, externalBuffer2, bufferSize3, externalBuffer3, + bufferSize4, externalBuffer4); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpGEMMreuse_copy( + cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, + cusparseConstSpMatDescr_t matA, cusparseConstSpMatDescr_t matB, + cusparseSpMatDescr_t matC, cusparseSpGEMMAlg_t alg, + cusparseSpGEMMDescr_t spgemmDescr, size_t *bufferSize5, + void *externalBuffer5) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, + cusparseConstSpMatDescr_t, cusparseConstSpMatDescr_t, + cusparseSpMatDescr_t, cusparseSpGEMMAlg_t, cusparseSpGEMMDescr_t, + size_t *, void *); + static auto func_ptr = LoadSymbol("cusparseSpGEMMreuse_copy"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, opB, matA, matB, matC, alg, spgemmDescr, + bufferSize5, externalBuffer5); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpGEMMreuse_compute( + cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, + const void *alpha, cusparseConstSpMatDescr_t matA, + cusparseConstSpMatDescr_t matB, const void *beta, cusparseSpMatDescr_t matC, + cudaDataType computeType, cusparseSpGEMMAlg_t alg, + cusparseSpGEMMDescr_t spgemmDescr) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, + cusparseConstSpMatDescr_t, cusparseConstSpMatDescr_t, const void *, + cusparseSpMatDescr_t, cudaDataType, cusparseSpGEMMAlg_t, + cusparseSpGEMMDescr_t); + static auto func_ptr = LoadSymbol("cusparseSpGEMMreuse_compute"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, + alg, spgemmDescr); +} + +cusparseStatus_t CUSPARSEAPI cusparseSDDMM_bufferSize( + cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, + const void *alpha, cusparseConstDnMatDescr_t matA, + cusparseConstDnMatDescr_t matB, const void *beta, cusparseSpMatDescr_t matC, + cudaDataType computeType, cusparseSDDMMAlg_t alg, size_t *bufferSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, + cusparseConstDnMatDescr_t, cusparseConstDnMatDescr_t, const void *, + cusparseSpMatDescr_t, cudaDataType, cusparseSDDMMAlg_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseSDDMM_bufferSize"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, + alg, bufferSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseSDDMM_preprocess( + cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, + const void *alpha, cusparseConstDnMatDescr_t matA, + cusparseConstDnMatDescr_t matB, const void *beta, cusparseSpMatDescr_t matC, + cudaDataType computeType, cusparseSDDMMAlg_t alg, void *externalBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, + cusparseConstDnMatDescr_t, cusparseConstDnMatDescr_t, const void *, + cusparseSpMatDescr_t, cudaDataType, cusparseSDDMMAlg_t, void *); + static auto func_ptr = LoadSymbol("cusparseSDDMM_preprocess"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, + alg, externalBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSDDMM( + cusparseHandle_t handle, cusparseOperation_t opA, cusparseOperation_t opB, + const void *alpha, cusparseConstDnMatDescr_t matA, + cusparseConstDnMatDescr_t matB, const void *beta, cusparseSpMatDescr_t matC, + cudaDataType computeType, cusparseSDDMMAlg_t alg, void *externalBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseOperation_t, cusparseOperation_t, const void *, + cusparseConstDnMatDescr_t, cusparseConstDnMatDescr_t, const void *, + cusparseSpMatDescr_t, cudaDataType, cusparseSDDMMAlg_t, void *); + static auto func_ptr = LoadSymbol("cusparseSDDMM"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, opA, opB, alpha, matA, matB, beta, matC, computeType, + alg, externalBuffer); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpMMOp_createPlan( + cusparseHandle_t handle, cusparseSpMMOpPlan_t *plan, + cusparseOperation_t opA, cusparseOperation_t opB, + cusparseConstSpMatDescr_t matA, cusparseConstDnMatDescr_t matB, + cusparseDnMatDescr_t matC, cudaDataType computeType, + cusparseSpMMOpAlg_t alg, const void *addOperationNvvmBuffer, + size_t addOperationBufferSize, const void *mulOperationNvvmBuffer, + size_t mulOperationBufferSize, const void *epilogueNvvmBuffer, + size_t epilogueBufferSize, size_t *SpMMWorkspaceSize) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)( + cusparseHandle_t, cusparseSpMMOpPlan_t *, cusparseOperation_t, + cusparseOperation_t, cusparseConstSpMatDescr_t, cusparseConstDnMatDescr_t, + cusparseDnMatDescr_t, cudaDataType, cusparseSpMMOpAlg_t, const void *, + size_t, const void *, size_t, const void *, size_t, size_t *); + static auto func_ptr = LoadSymbol("cusparseSpMMOp_createPlan"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(handle, plan, opA, opB, matA, matB, matC, computeType, alg, + addOperationNvvmBuffer, addOperationBufferSize, + mulOperationNvvmBuffer, mulOperationBufferSize, + epilogueNvvmBuffer, epilogueBufferSize, SpMMWorkspaceSize); +} + +cusparseStatus_t CUSPARSEAPI cusparseSpMMOp(cusparseSpMMOpPlan_t plan, + void *externalBuffer) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMMOpPlan_t, void *); + static auto func_ptr = LoadSymbol("cusparseSpMMOp"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(plan, externalBuffer); +} + +cusparseStatus_t CUSPARSEAPI +cusparseSpMMOp_destroyPlan(cusparseSpMMOpPlan_t plan) { + using FuncPtr = cusparseStatus_t(CUSPARSEAPI *)(cusparseSpMMOpPlan_t); + static auto func_ptr = LoadSymbol("cusparseSpMMOp_destroyPlan"); + if (!func_ptr) return GetSymbolNotFoundError(); + return func_ptr(plan); +} + +} // extern "C" diff --git a/tensorflow/stream_executor/cuda/cusparse_stub.cc b/tensorflow/stream_executor/cuda/cusparse_stub.cc index 439de5eb83a..751ecb838eb 100644 --- a/tensorflow/stream_executor/cuda/cusparse_stub.cc +++ b/tensorflow/stream_executor/cuda/cusparse_stub.cc @@ -50,8 +50,4 @@ cusparseStatus_t GetSymbolNotFoundError() { } } // namespace -#if CUDA_VERSION < 9020 -#include "tensorflow/stream_executor/cuda/cusparse_9_0.inc" -#else -#include "tensorflow/stream_executor/cuda/cusparse_10_0.inc" -#endif +#include "tensorflow/stream_executor/cuda/cusparse_12_0.inc" diff --git a/tensorflow/stream_executor/platform/default/dso_loader.cc b/tensorflow/stream_executor/platform/default/dso_loader.cc index 9ae8b41ccf4..8f4d9904322 100644 --- a/tensorflow/stream_executor/platform/default/dso_loader.cc +++ b/tensorflow/stream_executor/platform/default/dso_loader.cc @@ -73,7 +73,7 @@ port::StatusOr GetCudaDriverDsoHandle() { } port::StatusOr GetCudaRuntimeDsoHandle() { - return GetDsoHandle("cudart", GetCudaVersion()); + return GetDsoHandle("cudart", GetCudaLibVersion()); } port::StatusOr GetCublasDsoHandle() { @@ -81,11 +81,11 @@ port::StatusOr GetCublasDsoHandle() { } port::StatusOr GetCufftDsoHandle() { - return GetDsoHandle("cufft", GetCudaLibVersion()); + return GetDsoHandle("cufft", "11"); } port::StatusOr GetCusolverDsoHandle() { - return GetDsoHandle("cusolver", GetCudaLibVersion()); + return GetDsoHandle("cusolver", "11"); } port::StatusOr GetCusparseDsoHandle() { @@ -93,7 +93,7 @@ port::StatusOr GetCusparseDsoHandle() { } port::StatusOr GetCurandDsoHandle() { - return GetDsoHandle("curand", GetCudaLibVersion()); + return GetDsoHandle("curand", "10"); } port::StatusOr GetCuptiDsoHandle() { diff --git a/tensorflow/stream_executor/stream_executor_pimpl.h b/tensorflow/stream_executor/stream_executor_pimpl.h index efa4034c88a..aae01ce94a7 100644 --- a/tensorflow/stream_executor/stream_executor_pimpl.h +++ b/tensorflow/stream_executor/stream_executor_pimpl.h @@ -772,7 +772,7 @@ inline port::StatusOr>> StreamExecutor::CreateTypedKernel(absl::string_view kernel_name, absl::string_view ptx, absl::Span cubin_data) { - auto kernel_base = absl::make_unique>(this); + auto kernel_base = std::make_unique>(this); MultiKernelLoaderSpec loader_spec(kernel_base->kNumberOfParameters); loader_spec.AddCudaPtxInMemory(ptx, kernel_name); diff --git a/tensorflow/tools/lib_package/BUILD b/tensorflow/tools/lib_package/BUILD index f7b9255092f..c6cbb2cdb62 100644 --- a/tensorflow/tools/lib_package/BUILD +++ b/tensorflow/tools/lib_package/BUILD @@ -187,7 +187,6 @@ genrule( ], "//conditions:default": [], }) + if_cuda([ - "@cub_archive//:LICENSE.TXT", "@local_config_nccl//:LICENSE", ]) + if_mkl([ "//third_party/mkl:LICENSE", @@ -261,7 +260,6 @@ genrule( ], "//conditions:default": [], }) + if_cuda([ - "@cub_archive//:LICENSE.TXT", "@local_config_nccl//:LICENSE", ]) + if_mkl([ "//third_party/mkl:LICENSE", diff --git a/tensorflow/tools/pip_package/BUILD b/tensorflow/tools/pip_package/BUILD index 1573618f7c3..f8c006bf76b 100644 --- a/tensorflow/tools/pip_package/BUILD +++ b/tensorflow/tools/pip_package/BUILD @@ -199,7 +199,6 @@ filegroup( ], "//conditions:default": [], }) + if_cuda([ - "@cub_archive//:LICENSE.TXT", "@local_config_nccl//:LICENSE", ]) + if_mkl([ "//third_party/mkl:LICENSE", diff --git a/tensorflow/tools/pip_package/setup.py b/tensorflow/tools/pip_package/setup.py index f47878fdd7f..65819587f92 100644 --- a/tensorflow/tools/pip_package/setup.py +++ b/tensorflow/tools/pip_package/setup.py @@ -65,7 +65,7 @@ REQUIRED_PACKAGES = [ 'numpy >= 1.16.0, < 1.19.0', 'opt_einsum >= 2.3.2', 'six >= 1.10.0', - 'protobuf >= 3.6.1', + 'protobuf >= 3.6.1, < 4', 'tensorboard >= 1.15.0, < 1.16.0', 'tensorflow-estimator == 1.15.1', 'termcolor >= 1.1.0', diff --git a/tensorflow/workspace.bzl b/tensorflow/workspace.bzl index 0616aa91fa6..b6d9461eb73 100755 --- a/tensorflow/workspace.bzl +++ b/tensorflow/workspace.bzl @@ -157,6 +157,7 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): tf_http_archive( name = "com_google_absl", build_file = clean_dep("//third_party:com_google_absl.BUILD"), + patch_file = clean_dep("//third_party:com_google_absl.patch"), sha256 = "acd93f6baaedc4414ebd08b33bebca7c7a46888916101d8c0b8083573526d070", strip_prefix = "abseil-cpp-43ef2148c0936ebf7cb4be6b19927a9d9d145b8f", urls = [ @@ -215,6 +216,7 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): sha256 = "fd0c3e3b50f32af332b53857f8cd1bfa009e33d1eeecabc5c79a4825d906a90c", strip_prefix = "google-cloud-cpp-0.10.0", system_build_file = clean_dep("//third_party/systemlibs:google_cloud_cpp.BUILD"), + patch_file = "//third_party:google_cloud_cpp.patch", system_link_files = { "//third_party/systemlibs:google_cloud_cpp.google.cloud.bigtable.BUILD": "google/cloud/bigtable/BUILD", }, @@ -514,6 +516,7 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): sha256 = "67a6c26db56f345f7cee846e681db2c23f919eba46dd639b09462d1b6203d28c", strip_prefix = "grpc-4566c2a29ebec0835643b972eb99f4306c4234a3", system_build_file = clean_dep("//third_party/systemlibs:grpc.BUILD"), + patch_file = clean_dep("//third_party/systemlibs:grpc.patch"), urls = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/grpc/grpc/archive/4566c2a29ebec0835643b972eb99f4306c4234a3.tar.gz", "https://github.com/grpc/grpc/archive/4566c2a29ebec0835643b972eb99f4306c4234a3.tar.gz", @@ -584,6 +587,7 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): sha256 = "1188e29000013ed6517168600fc35a010d58c5d321846d6a6dfee74e4c788b45", strip_prefix = "boringssl-7f634429a04abc48e2eb041c81c5235816c96514", system_build_file = clean_dep("//third_party/systemlibs:boringssl.BUILD"), + patch_file = clean_dep("//third_party/boringssl:patch"), urls = [ "https://storage.googleapis.com/mirror.tensorflow.org/github.com/google/boringssl/archive/7f634429a04abc48e2eb041c81c5235816c96514.tar.gz", "https://github.com/google/boringssl/archive/7f634429a04abc48e2eb041c81c5235816c96514.tar.gz", @@ -728,17 +732,6 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): ], ) - tf_http_archive( - name = "cub_archive", - build_file = clean_dep("//third_party:cub.BUILD"), - sha256 = "6bfa06ab52a650ae7ee6963143a0bbc667d6504822cbd9670369b598f18c58c3", - strip_prefix = "cub-1.8.0", - urls = [ - "https://storage.googleapis.com/mirror.tensorflow.org/github.com/NVlabs/cub/archive/1.8.0.zip", - "https://github.com/NVlabs/cub/archive/1.8.0.zip", - ], - ) - tf_http_archive( name = "rocprim_archive", build_file = clean_dep("//third_party:rocprim.BUILD"), @@ -931,6 +924,7 @@ def tf_repositories(path_prefix = "", tf_repo_name = ""): sha256 = "0f34838f2c8024a6765168227ba587b3687729ebf03dc912f88ff75c7aa9cfe8", strip_prefix = "pybind11-2.3.0", build_file = clean_dep("//third_party:pybind11.BUILD"), + patch_file = clean_dep("//third_party:pybind11.patch"), ) tf_http_archive( diff --git a/third_party/aws/aws.patch b/third_party/aws/aws.patch new file mode 100644 index 00000000000..192a4f6a376 --- /dev/null +++ b/third_party/aws/aws.patch @@ -0,0 +1,10 @@ +--- ./aws-cpp-sdk-core/include/aws/core/utils/crypto/Hash.org.h 2026-04-28 16:22:19.764053626 +0000 ++++ ./aws-cpp-sdk-core/include/aws/core/utils/crypto/Hash.h 2026-04-28 16:22:47.655858646 +0000 +@@ -15,6 +15,7 @@ + + #pragma once + ++#include + #include + #include + #include diff --git a/third_party/aws/workspace.bzl b/third_party/aws/workspace.bzl index f37699e34c5..811ca23d719 100644 --- a/third_party/aws/workspace.bzl +++ b/third_party/aws/workspace.bzl @@ -15,4 +15,5 @@ def repo(): sha256 = "89905075fe50aa13e0337ff905c2e8c1ce9caf77a3504484a7cda39179120ffc", strip_prefix = "aws-sdk-cpp-1.5.8", build_file = "//third_party/aws:BUILD.bazel", + patch_file = "//third_party/aws:aws.patch", ) diff --git a/third_party/boringssl/patch b/third_party/boringssl/patch new file mode 100644 index 00000000000..2de1f004bcf --- /dev/null +++ b/third_party/boringssl/patch @@ -0,0 +1,48 @@ +--- boringssl/src/crypto/x509/t_x509.org.c 2026-04-28 16:32:42.664037294 +0000 ++++ boringssl/src/crypto/x509/t_x509.c 2026-04-28 16:34:56.391899068 +0000 +@@ -500,9 +500,7 @@ + int X509_NAME_print(BIO *bp, X509_NAME *name, int obase) + { + char *s, *c, *b; +- int ret = 0, l, i; +- +- l = 80 - 2 - obase; ++ int ret = 0, i; + + b = X509_NAME_oneline(name, NULL, 0); + if (!b) +@@ -529,12 +527,10 @@ + if (BIO_write(bp, ", ", 2) != 2) + goto err; + } +- l--; + } + if (*s == '\0') + break; + s++; +- l--; + } + + ret = 1; +--- boringssl/src/third_party/fiat/curve25519.org.c 2026-04-28 18:08:53.752703283 +0000 ++++ boringssl/src/third_party/fiat/curve25519.c 2026-04-28 18:11:07.433554030 +0000 +@@ -1902,7 +1902,7 @@ + // + // Preconditions: + // a[31] <= 127 +-void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t *a) { ++void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32]) { + signed char e[64]; + signed char carry; + ge_p1p1 r; +--- boringssl/src/crypto/fipsmodule/bn/internal.org.h 2026-04-29 06:30:13.414347370 +0000 ++++ boringssl/src/crypto/fipsmodule/bn/internal.h 2026-04-29 06:30:31.495865508 +0000 +@@ -285,7 +285,7 @@ + void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]); + + // bn_sqr_comba8 sets |r| to |a|^2. +-void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[4]); ++void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[8]); + + // bn_sqr_comba4 sets |r| to |a|^2. + void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]); diff --git a/third_party/com_google_absl.patch b/third_party/com_google_absl.patch new file mode 100644 index 00000000000..3c09ff0d5c9 --- /dev/null +++ b/third_party/com_google_absl.patch @@ -0,0 +1,20 @@ +--- com_google_absl/absl/synchronization/internal/graphcycles_org.cc 2019-06-18 20:10:39.000000000 +0000 ++++ com_google_absl/absl/synchronization/internal/graphcycles.cc 2026-04-28 04:49:36.608653670 +0000 +@@ -37,6 +37,7 @@ + + #include + #include ++#include + #include "absl/base/internal/hide_ptr.h" + #include "absl/base/internal/raw_logging.h" + #include "absl/base/internal/spinlock.h" +--- com_google_absl/absl/strings/internal/str_format/extension.org.h 2026-04-28 16:09:20.784594158 +0000 ++++ com_google_absl/absl/strings/internal/str_format/extension.h 2026-04-28 16:10:04.683857902 +0000 +@@ -20,6 +20,7 @@ + #include + #include + #include ++#include + + #include "absl/base/port.h" + #include "absl/strings/internal/str_format/output.h" diff --git a/third_party/google_cloud_cpp.patch b/third_party/google_cloud_cpp.patch new file mode 100644 index 00000000000..fdb96ee7acf --- /dev/null +++ b/third_party/google_cloud_cpp.patch @@ -0,0 +1,11 @@ +--- com_github_googlecloudplatform_google_cloud_cpp/google/cloud/iam_policy.org.h 2026-04-28 23:45:05.612601190 +0000 ++++ com_github_googlecloudplatform_google_cloud_cpp/google/cloud/iam_policy.h 2026-04-28 23:45:33.705603481 +0000 +@@ -16,6 +16,7 @@ + #define GOOGLE_CLOUD_CPP_GOOGLE_CLOUD_IAM_POLICY_H_ + + #include "google/cloud/iam_bindings.h" ++#include + + namespace google { + namespace cloud { + diff --git a/third_party/gpus/crosstool/cc_toolchain_config.bzl.tpl b/third_party/gpus/crosstool/cc_toolchain_config.bzl.tpl index 282ba08cda5..1ca6ba12908 100644 --- a/third_party/gpus/crosstool/cc_toolchain_config.bzl.tpl +++ b/third_party/gpus/crosstool/cc_toolchain_config.bzl.tpl @@ -1248,12 +1248,12 @@ def _impl(ctx): ], ) - cpp11_feature = feature( - name = "c++11", + cpp14_feature = feature( + name = "c++14", flag_sets = [ flag_set( actions = [ACTION_NAMES.cpp_compile], - flag_groups = [flag_group(flags = ["-std=c++11"])], + flag_groups = [flag_group(flags = ["-std=c++14"])], ), ], ) @@ -1263,7 +1263,7 @@ def _impl(ctx): name = "common", implies = [ "stdlib", - "c++11", + "c++14", "determinism", "alwayslink", "hardening", @@ -1279,7 +1279,7 @@ def _impl(ctx): name = "common", implies = [ "stdlib", - "c++11", + "c++14", "determinism", "hardening", "warnings", @@ -1294,7 +1294,7 @@ def _impl(ctx): if (ctx.attr.cpu == "local"): features = [ - cpp11_feature, + cpp14_feature, stdlib_feature, determinism_feature, alwayslink_feature, @@ -1315,7 +1315,7 @@ def _impl(ctx): ] elif (ctx.attr.cpu == "darwin"): features = [ - cpp11_feature, + cpp14_feature, stdlib_feature, determinism_feature, pic_feature, diff --git a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl index a69be47945b..6039a9bba7a 100755 --- a/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl +++ b/third_party/gpus/crosstool/clang/bin/crosstool_wrapper_driver_is_not_gcc.tpl @@ -170,7 +170,7 @@ def InvokeNvcc(argv, log=False): undefines = ''.join([' -U' + define for define in undefines]) std_options = GetOptionValue(argv, 'std') # currently only c++11 is supported by Cuda 7.0 std argument - nvcc_allowed_std_options = ["c++11"] + nvcc_allowed_std_options = ["c++14"] std_options = ''.join([' -std=' + define for define in std_options if define in nvcc_allowed_std_options]) diff --git a/third_party/gpus/cuda_configure.bzl b/third_party/gpus/cuda_configure.bzl index cf63adcbaa2..dfb23daa442 100644 --- a/third_party/gpus/cuda_configure.bzl +++ b/third_party/gpus/cuda_configure.bzl @@ -617,14 +617,14 @@ def _find_libs(repository_ctx, cuda_config): repository_ctx, cpu_value, cuda_config.config["cuda_library_dir"], - cuda_config.cuda_version, + None, ), "cudart_static": _find_cuda_lib( "cudart_static", repository_ctx, cpu_value, cuda_config.config["cuda_library_dir"], - cuda_config.cuda_version, + None, static = True, ), "cublas": _find_cuda_lib( @@ -632,49 +632,49 @@ def _find_libs(repository_ctx, cuda_config): repository_ctx, cpu_value, cuda_config.config["cublas_library_dir"], - cuda_config.cuda_lib_version, + None, ), "cusolver": _find_cuda_lib( "cusolver", repository_ctx, cpu_value, cuda_config.config["cuda_library_dir"], - cuda_config.cuda_lib_version, + None, ), "curand": _find_cuda_lib( "curand", repository_ctx, cpu_value, cuda_config.config["cuda_library_dir"], - cuda_config.cuda_lib_version, + None, ), "cufft": _find_cuda_lib( "cufft", repository_ctx, cpu_value, cuda_config.config["cuda_library_dir"], - cuda_config.cuda_lib_version, + None, ), "cudnn": _find_cuda_lib( "cudnn", repository_ctx, cpu_value, cuda_config.config["cudnn_library_dir"], - cuda_config.cudnn_version, + None, ), "cupti": _find_cuda_lib( "cupti", repository_ctx, cpu_value, cuda_config.config["cupti_library_dir"], - cuda_config.cuda_version, + None, ), "cusparse": _find_cuda_lib( "cusparse", repository_ctx, cpu_value, cuda_config.config["cuda_library_dir"], - cuda_config.cuda_lib_version, + None, ), } @@ -1053,8 +1053,26 @@ def _create_local_cuda_repository(repository_ctx): copy_rules.append(make_copy_files_rule( repository_ctx, name = "cudnn-include", - srcs = [cudnn_header_dir + "/cudnn.h"], - outs = ["cudnn/include/cudnn.h"], + srcs = [ + cudnn_header_dir + "/cudnn.h", + cudnn_header_dir + "/cudnn_ops_infer.h", + cudnn_header_dir + "/cudnn_ops_train.h", + cudnn_header_dir + "/cudnn_adv_infer.h", + cudnn_header_dir + "/cudnn_adv_train.h", + cudnn_header_dir + "/cudnn_cnn_infer.h", + cudnn_header_dir + "/cudnn_cnn_train.h", + cudnn_header_dir + "/cudnn_backend.h", + ], + outs = [ + "cudnn/include/cudnn.h", + "cudnn/include/cudnn_ops_infer.h", + "cudnn/include/cudnn_ops_train.h", + "cudnn/include/cudnn_adv_infer.h", + "cudnn/include/cudnn_adv_train.h", + "cudnn/include/cudnn_cnn_infer.h", + "cudnn/include/cudnn_cnn_train.h", + "cudnn/include/cudnn_backend.h", + ], )) # Set up BUILD file for cuda/ @@ -1159,7 +1177,7 @@ def _create_local_cuda_repository(repository_ctx): # .d file - given that includes that are prefixed with "../" multiple # time quickly grow longer than the root of the tree, this can lead to # bazel's header check failing. - cuda_defines["%{extra_no_canonical_prefixes_flags}"] = "\"-fno-canonical-system-headers\"" + #cuda_defines["%{extra_no_canonical_prefixes_flags}"] = "\"-fno-canonical-system-headers\"" nvcc_path = str( repository_ctx.path("%s/nvcc%s" % ( diff --git a/third_party/gpus/find_cuda_config.py b/third_party/gpus/find_cuda_config.py index be59515661a..2bae2d1d2f7 100644 --- a/third_party/gpus/find_cuda_config.py +++ b/third_party/gpus/find_cuda_config.py @@ -344,7 +344,7 @@ def _find_cudnn_config(base_paths, required_version): for name in ("CUDNN_MAJOR", "CUDNN_MINOR", "CUDNN_PATCHLEVEL")) return ".".join(version) - header_path, header_version = _find_header(base_paths, "cudnn.h", + header_path, header_version = _find_header(base_paths, "cudnn_version.h", required_version, get_header_version) cudnn_version = header_version.split(".")[0] diff --git a/third_party/mlir/include/mlir/Support/STLExtras.h b/third_party/mlir/include/mlir/Support/STLExtras.h index 3448b080d03..64ac556d61e 100644 --- a/third_party/mlir/include/mlir/Support/STLExtras.h +++ b/third_party/mlir/include/mlir/Support/STLExtras.h @@ -26,6 +26,7 @@ #include "mlir/Support/LLVM.h" #include "llvm/ADT/iterator.h" #include +#include namespace mlir { diff --git a/third_party/nccl/build_defs.bzl.tpl b/third_party/nccl/build_defs.bzl.tpl index 57191398553..5f5c3a1008a 100644 --- a/third_party/nccl/build_defs.bzl.tpl +++ b/third_party/nccl/build_defs.bzl.tpl @@ -113,7 +113,6 @@ def _device_link_impl(ctx): "--cmdline=--compile-only", "--link", "--compress-all", - "--bin2c-path=%s" % bin2c.dirname, "--create=%s" % tmp_fatbin.path, "--embedded-fatbin=%s" % fatbin_h.path, ] + images, diff --git a/third_party/pybind11.patch b/third_party/pybind11.patch new file mode 100644 index 00000000000..17170e41ff9 --- /dev/null +++ b/third_party/pybind11.patch @@ -0,0 +1,10 @@ +--- ./include/pybind11/pybind11.org.h 2026-04-28 17:49:47.435849420 +0000 ++++ ./include/pybind11/pybind11.h 2026-04-28 17:51:05.006053099 +0000 +@@ -45,6 +45,7 @@ + #include + #endif + ++#include + + #include "attr.h" + #include "options.h" diff --git a/third_party/systemlibs/grpc.patch b/third_party/systemlibs/grpc.patch new file mode 100644 index 00000000000..f478742ee94 --- /dev/null +++ b/third_party/systemlibs/grpc.patch @@ -0,0 +1,12 @@ +--- grpc/src/core/lib/gpr/log_linux.org.cc 2026-05-02 14:09:32.079548021 +0900 ++++ grpc/src/core/lib/gpr/log_linux.cc 2026-05-02 14:07:02.037469605 +0900 +@@ -40,7 +40,9 @@ + #include + #include + ++#ifndef __USE_GNU + static long gettid(void) { return syscall(__NR_gettid); } ++#endif + + void gpr_log(const char* file, int line, gpr_log_severity severity, + const char* format, ...) {