/* * Copyright 2014-2023 NVIDIA Corporation. All rights reserved. * * NOTICE TO LICENSEE: * * This source code and/or documentation ("Licensed Deliverables") are * subject to NVIDIA intellectual property rights under U.S. and * international Copyright laws. * * These Licensed Deliverables contained herein is PROPRIETARY and * CONFIDENTIAL to NVIDIA and is being provided under the terms and * conditions of a form of NVIDIA software license agreement by and * between NVIDIA and Licensee ("License Agreement") or electronically * accepted by Licensee. Notwithstanding any terms or conditions to * the contrary in the License Agreement, reproduction or disclosure * of the Licensed Deliverables to any third party without the express * written consent of NVIDIA is prohibited. * * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND. * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE. * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE * OF THESE LICENSED DELIVERABLES. * * U.S. Government End Users. These Licensed Deliverables are a * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT * 1995), consisting of "commercial computer software" and "commercial * computer software documentation" as such terms are used in 48 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government * only as a commercial end item. Consistent with 48 C.F.R.12.212 and * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all * U.S. Government End Users acquire the Licensed Deliverables with * only those rights set forth herein. * * Any use of the Licensed Deliverables in individual and commercial * software must include, in the user documentation and internal * comments to the code, the above Disclaimer and U.S. Government End * Users Notice. */ /* * cudnn_ops : cuDNN's basic definitions and basic operations. */ #if !defined(CUDNN_OPS_H_) #define CUDNN_OPS_H_ #include #include "cudnn_version.h" #include "cudnn_graph.h" /* These version numbers are autogenerated, do not edit manually. */ #define CUDNN_OPS_MAJOR 9 #define CUDNN_OPS_MINOR 1 #define CUDNN_OPS_PATCH 0 #if (CUDNN_OPS_MAJOR != CUDNN_MAJOR) || (CUDNN_OPS_MINOR != CUDNN_MINOR) || (CUDNN_OPS_PATCH != CUDNN_PATCHLEVEL) #error Version mismatch in cuDNN OPS INFER!!! #endif #if defined(__cplusplus) extern "C" { #endif /* Data structures to represent Image/Filter and the Neural Network Layer */ typedef struct cudnnTensorStruct *cudnnTensorDescriptor_t; typedef struct cudnnPoolingStruct *cudnnPoolingDescriptor_t CUDNN_DEPRECATED; typedef struct cudnnFilterStruct *cudnnFilterDescriptor_t CUDNN_DEPRECATED; typedef struct cudnnLRNStruct *cudnnLRNDescriptor_t; typedef struct cudnnActivationStruct *cudnnActivationDescriptor_t CUDNN_DEPRECATED; typedef struct cudnnSpatialTransformerStruct *cudnnSpatialTransformerDescriptor_t; typedef struct cudnnOpTensorStruct *cudnnOpTensorDescriptor_t CUDNN_DEPRECATED; typedef struct cudnnReduceTensorStruct *cudnnReduceTensorDescriptor_t CUDNN_DEPRECATED; typedef struct cudnnCTCLossStruct *cudnnCTCLossDescriptor_t; typedef struct cudnnTensorTransformStruct *cudnnTensorTransformDescriptor_t CUDNN_DEPRECATED; /* * CUDNN Determinism */ typedef enum { CUDNN_NON_DETERMINISTIC = 0, CUDNN_DETERMINISTIC = 1, } cudnnDeterminism_t; /* Create an instance of a generic Tensor descriptor */ cudnnStatus_t CUDNNWINAPI cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc); cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, cudnnDataType_t dataType, /* image data type */ int n, /* number of inputs (batch size) */ int c, /* number of input feature maps */ int h, /* height of input section */ int w); /* width of input section */ cudnnStatus_t CUDNNWINAPI cudnnSetTensor4dDescriptorEx(cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, /* image data type */ int n, /* number of inputs (batch size) */ int c, /* number of input feature maps */ int h, /* height of input section */ int w, /* width of input section */ int nStride, int cStride, int hStride, int wStride); cudnnStatus_t CUDNNWINAPI cudnnGetTensor4dDescriptor(const cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t *dataType, /* image data type */ int *n, /* number of inputs (batch size) */ int *c, /* number of input feature maps */ int *h, /* height of input section */ int *w, /* width of input section */ int *nStride, int *cStride, int *hStride, int *wStride); cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptor(cudnnTensorDescriptor_t tensorDesc, cudnnDataType_t dataType, int nbDims, const int dimA[], const int strideA[]); cudnnStatus_t CUDNNWINAPI cudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc, cudnnTensorFormat_t format, cudnnDataType_t dataType, int nbDims, const int dimA[]); cudnnStatus_t CUDNNWINAPI cudnnGetTensorNdDescriptor(const cudnnTensorDescriptor_t tensorDesc, int nbDimsRequested, cudnnDataType_t *dataType, int *nbDims, int dimA[], int strideA[]); cudnnStatus_t CUDNNWINAPI cudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t *size); /* PixelOffset( n, c, h, w ) = n *input_stride + c * feature_stride + h * h_stride + w * w_stride 1)Example of all images in row major order one batch of features after the other (with an optional padding on row) input_stride : c x h x h_stride feature_stride : h x h_stride h_stride : >= w ( h_stride = w if no padding) w_stride : 1 2)Example of all images in row major with features maps interleaved input_stride : c x h x h_stride feature_stride : 1 h_stride : w x c w_stride : c 3)Example of all images in column major order one batch of features after the other (with optional padding on column) input_stride : c x w x w_stride feature_stride : w x w_stride h_stride : 1 w_stride : >= h */ /* Destroy an instance of Tensor4d descriptor */ cudnnStatus_t CUDNNWINAPI cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc); /* Fold/unfold transforms */ typedef enum { CUDNN_TRANSFORM_FOLD = 0U, CUDNN_TRANSFORM_UNFOLD = 1U, } cudnnFoldingDirection_t; /** Create a destination descriptor for cudnnTransformTensor */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnInitTransformDest(const cudnnTensorTransformDescriptor_t transformDesc, const cudnnTensorDescriptor_t srcDesc, cudnnTensorDescriptor_t destDesc, size_t *destSizeInBytes); /** Create an empty tensor transform descriptor */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnCreateTensorTransformDescriptor(cudnnTensorTransformDescriptor_t *transformDesc); /** Initialize a previously created tensor transform descriptor. */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc, const uint32_t nbDims, const cudnnTensorFormat_t destFormat, const int32_t padBeforeA[], const int32_t padAfterA[], const uint32_t foldA[], const cudnnFoldingDirection_t direction); /** * Retrieves the values stored in a previously initialized tensor transform * descriptor. */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc, uint32_t nbDimsRequested, cudnnTensorFormat_t *destFormat, int32_t padBeforeA[], int32_t padAfterA[], uint32_t foldA[], cudnnFoldingDirection_t *direction); /** * Destroys a previously created tensor transform descriptor. */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnDestroyTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc); /* Tensor layout conversion helper (y = alpha * x + beta * y) */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnTransformTensor(cudnnHandle_t handle, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnTransformTensorEx(cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc, const void *alpha, const cudnnTensorDescriptor_t srcDesc, const void *srcData, const void *beta, const cudnnTensorDescriptor_t destDesc, void *destData); /* Tensor Bias addition : C = alpha * A + beta * C */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnAddTensor(cudnnHandle_t handle, const void *alpha, const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta, const cudnnTensorDescriptor_t cDesc, void *C); /* * CUDNN OpTensor op type */ typedef enum { CUDNN_OP_TENSOR_ADD = 0, CUDNN_OP_TENSOR_MUL = 1, CUDNN_OP_TENSOR_MIN = 2, CUDNN_OP_TENSOR_MAX = 3, CUDNN_OP_TENSOR_SQRT = 4, CUDNN_OP_TENSOR_NOT = 5, } cudnnOpTensorOp_t; CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t opTensorOp, cudnnDataType_t opTensorCompType, cudnnNanPropagation_t opTensorNanOpt); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc, cudnnOpTensorOp_t *opTensorOp, cudnnDataType_t *opTensorCompType, cudnnNanPropagation_t *opTensorNanOpt); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc); /* Tensor operation : C = op( alpha1 * A, alpha2 * B ) + beta * C */ /* B tensor is ignored for CUDNN_OP_TENSOR_SQRT, CUDNN_OP_TENSOR_NOT. */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnOpTensor(cudnnHandle_t handle, const cudnnOpTensorDescriptor_t opTensorDesc, const void *alpha1, const cudnnTensorDescriptor_t aDesc, const void *A, const void *alpha2, const cudnnTensorDescriptor_t bDesc, const void *B, const void *beta, const cudnnTensorDescriptor_t cDesc, void *C); /* * CUDNN ReduceTensor indices type */ typedef enum { CUDNN_REDUCE_TENSOR_NO_INDICES = 0, CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = 1, } cudnnReduceTensorIndices_t CUDNN_DEPRECATED; /* * CUDNN tensor indices type size (all unsigned) * Currently not supported, default is 32 bit unsigned. */ typedef enum { CUDNN_32BIT_INDICES = 0, CUDNN_64BIT_INDICES = 1, CUDNN_16BIT_INDICES = 2, CUDNN_8BIT_INDICES = 3, } cudnnIndicesType_t CUDNN_DEPRECATED; CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t *reduceTensorDesc); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc, cudnnReduceTensorOp_t reduceTensorOp, cudnnDataType_t reduceTensorCompType, cudnnNanPropagation_t reduceTensorNanOpt, cudnnReduceTensorIndices_t reduceTensorIndices, cudnnIndicesType_t reduceTensorIndicesType); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc, cudnnReduceTensorOp_t *reduceTensorOp, cudnnDataType_t *reduceTensorCompType, cudnnNanPropagation_t *reduceTensorNanOpt, cudnnReduceTensorIndices_t *reduceTensorIndices, cudnnIndicesType_t *reduceTensorIndicesType); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc); /* Helper function to return the minimum size of the index space to be passed to the reduction given the input and * output tensors */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetReductionIndicesSize(cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, size_t *sizeInBytes); /* Helper function to return the minimum size of the workspace to be passed to the reduction given the input and output * tensors */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetReductionWorkspaceSize(cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, const cudnnTensorDescriptor_t aDesc, const cudnnTensorDescriptor_t cDesc, size_t *sizeInBytes); /* Tensor operation : C = reduce op( alpha * A ) + beta * C */ /* The NaN propagation enum applies to only the min and max reduce ops; the other reduce ops propagate NaN as usual. */ /* The indices space is ignored for reduce ops other than min or max. */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnReduceTensor(cudnnHandle_t handle, const cudnnReduceTensorDescriptor_t reduceTensorDesc, void *indices, size_t indicesSizeInBytes, void *workspace, size_t workspaceSizeInBytes, const void *alpha, const cudnnTensorDescriptor_t aDesc, const void *A, const void *beta, const cudnnTensorDescriptor_t cDesc, void *C); /* Set all values of a tensor to a given value : y[i] = value[0] */ cudnnStatus_t CUDNNWINAPI cudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *valuePtr); /* Scale all values of a tensor by a given factor : y[i] = alpha * y[i] */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *alpha); /* Create an instance of FilterStruct */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc, cudnnDataType_t dataType, /* image data type */ cudnnTensorFormat_t format, int k, /* number of output feature maps */ int c, /* number of input feature maps */ int h, /* height of each input filter */ int w); /* width of each input filter */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc, cudnnDataType_t *dataType, /* image data type */ cudnnTensorFormat_t *format, int *k, /* number of output feature maps */ int *c, /* number of input feature maps */ int *h, /* height of each input filter */ int *w); /* width of each input filter */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc, cudnnDataType_t dataType, /* image data type */ cudnnTensorFormat_t format, int nbDims, const int filterDimA[]); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc, int nbDimsRequested, cudnnDataType_t *dataType, /* image data type */ cudnnTensorFormat_t *format, int *nbDims, int filterDimA[]); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetFilterSizeInBytes(const cudnnFilterDescriptor_t filterDesc, size_t *size); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnTransformFilter(cudnnHandle_t handle, const cudnnTensorTransformDescriptor_t transDesc, const void *alpha, const cudnnFilterDescriptor_t srcDesc, const void *srcData, const void *beta, const cudnnFilterDescriptor_t destDesc, void *destData); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc); /* * softmax algorithm */ typedef enum { CUDNN_SOFTMAX_FAST = 0, /* straightforward implementation */ CUDNN_SOFTMAX_ACCURATE = 1, /* subtract max from every point to avoid overflow */ CUDNN_SOFTMAX_LOG = 2 } cudnnSoftmaxAlgorithm_t; typedef enum { CUDNN_SOFTMAX_MODE_INSTANCE = 0, /* compute the softmax over all C, H, W for each N */ CUDNN_SOFTMAX_MODE_CHANNEL = 1 /* compute the softmax over all C for each H, W, N */ } cudnnSoftmaxMode_t; /* Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" */ /* Function to perform forward softmax */ cudnnStatus_t CUDNNWINAPI cudnnSoftmaxForward(cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y); /* * pooling mode */ typedef enum { CUDNN_POOLING_MAX = 0, CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = 1, /* count for average includes padded values */ CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = 2, /* count for average does not include padded values */ CUDNN_POOLING_MAX_DETERMINISTIC = 3 } cudnnPoolingMode_t CUDNN_DEPRECATED; /* Create an instance of pooling descriptor */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t mode, cudnnNanPropagation_t maxpoolingNanOpt, int windowHeight, int windowWidth, int verticalPadding, int horizontalPadding, int verticalStride, int horizontalStride); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc, cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt, int *windowHeight, int *windowWidth, int *verticalPadding, int *horizontalPadding, int *verticalStride, int *horizontalStride); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc, const cudnnPoolingMode_t mode, const cudnnNanPropagation_t maxpoolingNanOpt, int nbDims, const int windowDimA[], const int paddingA[], const int strideA[]); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc, int nbDimsRequested, cudnnPoolingMode_t *mode, cudnnNanPropagation_t *maxpoolingNanOpt, int *nbDims, int windowDimA[], int paddingA[], int strideA[]); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, const cudnnTensorDescriptor_t inputTensorDesc, int nbDims, int outputTensorDimA[]); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc, const cudnnTensorDescriptor_t inputTensorDesc, int *n, int *c, int *h, int *w); /* Destroy an instance of pooling descriptor */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc); /* Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" */ /* Function to perform forward pooling */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnPoolingForward(cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y); /* Activation functions: All of the form "output = alpha * Op(inputs) + beta * output" */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t mode, cudnnNanPropagation_t reluNanOpt, double coef); /* ceiling for clipped RELU, alpha for ELU */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc, cudnnActivationMode_t *mode, cudnnNanPropagation_t *reluNanOpt, double *coef); /* ceiling for clipped RELU, alpha for ELU */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnSetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double swish_beta); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetActivationDescriptorSwishBeta(cudnnActivationDescriptor_t activationDesc, double *swish_beta); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc); /* Function to perform forward activation */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnActivationForward(cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y); /* * Create an instance of LRN (Local Response Normalization) descriptor * Uses lrnN=5, lrnAlpha=1e-4, lrnBeta=0.75, lrnK=2.0 as defaults from Krizhevsky'12 ImageNet paper */ cudnnStatus_t CUDNNWINAPI cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc); #define CUDNN_LRN_MIN_N 1 /* minimum allowed lrnN */ #define CUDNN_LRN_MAX_N 16 /* maximum allowed lrnN */ #define CUDNN_LRN_MIN_K 1e-5 /* minimum allowed lrnK */ #define CUDNN_LRN_MIN_BETA 0.01 /* minimum allowed lrnBeta */ /* LRN layer mode */ typedef enum { CUDNN_LRN_CROSS_CHANNEL_DIM1 = 0, /* Normalize across tensor's dimA[1] dimension */ } cudnnLRNMode_t; /* * Uses a window [center-lookBehind, center+lookAhead], where * lookBehind = floor( (lrnN-1)/2 ), lookAhead = lrnN-lookBehind-1. * Values of double parameters cast to tensor data type. */ cudnnStatus_t CUDNNWINAPI cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned lrnN, double lrnAlpha, double lrnBeta, double lrnK); /* * Retrieve the settings currently stored in an LRN layer descriptor * Any of the provided pointers can be NULL (no corresponding value will be returned) */ cudnnStatus_t CUDNNWINAPI cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned *lrnN, double *lrnAlpha, double *lrnBeta, double *lrnK); /* Destroy an instance of LRN descriptor */ cudnnStatus_t CUDNNWINAPI cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc); /* LRN functions: output = alpha * normalize(x) + beta * old_y */ /* LRN cross-channel forward computation. Double parameters cast to tensor data type */ cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelForward(cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y); typedef enum { CUDNN_DIVNORM_PRECOMPUTED_MEANS = 0, } cudnnDivNormMode_t; /* LCN/divisive normalization functions: y = alpha * normalize(x) + beta * y */ cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationForward(cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnDivNormMode_t mode, const void *alpha, const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */ const void *x, const void *means, /* if NULL, means are assumed to be zero */ void *temp, void *temp2, const void *beta, const cudnnTensorDescriptor_t yDesc, void *y); typedef enum { /* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */ CUDNN_BATCHNORM_PER_ACTIVATION = 0, /* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */ CUDNN_BATCHNORM_SPATIAL = 1, /* * bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors). * May be faster than CUDNN_BATCHNORM_SPATIAL but imposes some limits on the range of values */ CUDNN_BATCHNORM_SPATIAL_PERSISTENT = 2, } cudnnBatchNormMode_t CUDNN_DEPRECATED; #define CUDNN_BN_MIN_EPSILON 0.0 /* Minimum epsilon allowed to be used in the Batch Normalization formula */ /* * Derives a tensor descriptor from layer data descriptor for BatchNormalization * scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for * bnScaleBiasMeanVarDesc and bnScaleBiasDiffDesc in Batch Normalization forward and backward functions. */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc, const cudnnTensorDescriptor_t xDesc, cudnnBatchNormMode_t mode); typedef enum { CUDNN_BATCHNORM_OPS_BN = 0, /* do batch normalization only */ CUDNN_BATCHNORM_OPS_BN_ACTIVATION = 1, /* do batchNorm, then activation */ CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = 2, /* do batchNorm, then elemWiseAdd, then activation */ } cudnnBatchNormOps_t CUDNN_DEPRECATED; /* * Performs Batch Normalization during Inference: * y[i] = bnScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + bnBias[k] * with bnScale, bnBias, runningMean, runningInvVariance tensors indexed * according to spatial or per-activation mode. Refer to cudnnBatchNormalizationForwardTraining * above for notes on function arguments. */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardInference(cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alpha, /* alpha[0] = result blend factor */ const void *beta, /* beta[0] = dest layer blend factor */ const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, const void *bnBias, const void *estimatedMean, const void *estimatedVariance, double epsilon); typedef enum { /* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */ CUDNN_NORM_PER_ACTIVATION = 0, /* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */ CUDNN_NORM_PER_CHANNEL = 1, } cudnnNormMode_t CUDNN_DEPRECATED; typedef enum { CUDNN_NORM_ALGO_STANDARD = 0, CUDNN_NORM_ALGO_PERSIST = 1 } cudnnNormAlgo_t CUDNN_DEPRECATED; /* * Derives a tensor descriptor from layer data descriptor for Normalization * scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for * normScaleBiasMeanVarDesc and normScaleBiasDiffDesc in Normalization forward and backward functions. */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnDeriveNormTensorDescriptor(cudnnTensorDescriptor_t derivedNormScaleBiasDesc, cudnnTensorDescriptor_t derivedNormMeanVarDesc, const cudnnTensorDescriptor_t xDesc, cudnnNormMode_t mode, int groupCnt); /* Place hold for future work, should be set to 1 now*/ typedef enum { CUDNN_NORM_OPS_NORM = 0, /* do normalization only */ CUDNN_NORM_OPS_NORM_ACTIVATION = 1, /* do Norm, then activation */ CUDNN_NORM_OPS_NORM_ADD_ACTIVATION = 2, /* do Norm, then elemWiseAdd, then activation */ } cudnnNormOps_t CUDNN_DEPRECATED; /* * Performs Normalization during Inference: * y[i] = normScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + normBias[k] * with normScale, normBias, runningMean, runningInvVariance tensors indexed * according to per-channel or per-activation mode. Refer to cudnnNormalizationForwardTraining * above for notes on function arguments. */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnNormalizationForwardInference(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const void *alpha, /* alpha[0] = result blend factor */ const void *beta, /* beta[0] = dest layer blend factor */ const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ const cudnnTensorDescriptor_t normScaleBiasDesc, const void *normScale, const void *normBias, const cudnnTensorDescriptor_t normMeanVarDesc, const void *estimatedMean, const void *estimatedVariance, const cudnnTensorDescriptor_t zDesc, const void *z, cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ double epsilon, int groupCnt); /* Place hold for future work*/ /* APIs for spatial transformer network*/ typedef enum { CUDNN_SAMPLER_BILINEAR = 0, } cudnnSamplerType_t; cudnnStatus_t CUDNNWINAPI cudnnCreateSpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t *stDesc); cudnnStatus_t CUDNNWINAPI cudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerDescriptor_t stDesc, cudnnSamplerType_t samplerType, cudnnDataType_t dataType, const int nbDims, const int dimA[]); cudnnStatus_t CUDNNWINAPI cudnnDestroySpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t stDesc); cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorForward(cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, const void *theta, void *grid); cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerForward(cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *grid, const void *beta, cudnnTensorDescriptor_t yDesc, void *y); typedef struct cudnnDropoutStruct *cudnnDropoutDescriptor_t; cudnnStatus_t CUDNNWINAPI cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc); cudnnStatus_t CUDNNWINAPI cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc); /*helper function to determine size of the states to be passed to cudnnSetDropoutDescriptor */ cudnnStatus_t CUDNNWINAPI cudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t *sizeInBytes); /*helper function to determine size of the reserve space to be passed to dropout forward/backward calls */ cudnnStatus_t CUDNNWINAPI cudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes); cudnnStatus_t CUDNNWINAPI cudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, void *states, size_t stateSizeInBytes, unsigned long long seed); /* Restores the dropout descriptor to a previously saved-off state */ cudnnStatus_t CUDNNWINAPI cudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float dropout, void *states, size_t stateSizeInBytes, unsigned long long seed); cudnnStatus_t CUDNNWINAPI cudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc, cudnnHandle_t handle, float *dropout, void **states, unsigned long long *seed); cudnnStatus_t CUDNNWINAPI cudnnDropoutForward(cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, const cudnnTensorDescriptor_t xdesc, const void *x, const cudnnTensorDescriptor_t ydesc, void *y, void *reserveSpace, size_t reserveSpaceSizeInBytes); /* TODO: move these enums out to the appropriate submodule */ typedef enum { CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0, CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1, CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2, CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3, CUDNN_CONVOLUTION_FWD_ALGO_FFT = 4, CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING = 5, CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD = 6, CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED = 7, CUDNN_CONVOLUTION_FWD_ALGO_COUNT = 8 } cudnnConvolutionFwdAlgo_t; typedef enum { CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 = 0, /* non-deterministic */ CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 = 1, CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT = 2, CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 = 3, /* non-deterministic */ CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD = 4, /* not implemented */ CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED = 5, CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING = 6, CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT = 7 } cudnnConvolutionBwdFilterAlgo_t; typedef enum { CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 = 0, /* non-deterministic */ CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 = 1, CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT = 2, CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING = 3, CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD = 4, CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED = 5, CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT = 6 } cudnnConvolutionBwdDataAlgo_t; typedef enum { CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = 0, CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = 1 } cudnnCTCLossAlgo_t; /* * \brief Cross-library version checker. * This function is implemented differently in each sub-library. Each sublib * checks whether its own version matches that of its dependencies. * \returns CUDNN_STATUS_SUCCESS if the version check passes, * CUDNN_STATUS_SUBLIBRARY_VERSION_MISMATCH if the versions are inconsistent. */ cudnnStatus_t CUDNNWINAPI cudnnOpsVersionCheck(void); /* Function to perform backward softmax */ cudnnStatus_t CUDNNWINAPI cudnnSoftmaxBackward(cudnnHandle_t handle, cudnnSoftmaxAlgorithm_t algo, cudnnSoftmaxMode_t mode, const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx); /* Function to perform backward pooling */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnPoolingBackward(cudnnHandle_t handle, const cudnnPoolingDescriptor_t poolingDesc, const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, const cudnnTensorDescriptor_t dyDesc, const void *dy, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx); /* Function to perform backward activation */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnActivationBackward(cudnnHandle_t handle, cudnnActivationDescriptor_t activationDesc, const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, const cudnnTensorDescriptor_t dyDesc, const void *dy, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx); /* LRN cross-channel backward computation. Double parameters cast to tensor data type */ cudnnStatus_t CUDNNWINAPI cudnnLRNCrossChannelBackward(cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnLRNMode_t lrnMode, const void *alpha, const cudnnTensorDescriptor_t yDesc, const void *y, const cudnnTensorDescriptor_t dyDesc, const void *dy, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx); cudnnStatus_t CUDNNWINAPI cudnnDivisiveNormalizationBackward(cudnnHandle_t handle, cudnnLRNDescriptor_t normDesc, cudnnDivNormMode_t mode, const void *alpha, const cudnnTensorDescriptor_t xDesc, /* same desc for x, means, dy, temp, temp2 */ const void *x, const void *means, /* if NULL, means are assumed to be zero */ const void *dy, void *temp, void *temp2, const void *beta, const cudnnTensorDescriptor_t dXdMeansDesc, /* same desc for dx, dMeans */ void *dx, /* output x differential */ void *dMeans); /* output means differential, can be NULL */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetBatchNormalizationForwardTrainingExWorkspaceSize(cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t zDesc, const cudnnTensorDescriptor_t yDesc, const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const cudnnActivationDescriptor_t activationDesc, size_t *sizeInBytes); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetBatchNormalizationBackwardExWorkspaceSize(cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t yDesc, const cudnnTensorDescriptor_t dyDesc, const cudnnTensorDescriptor_t dzDesc, const cudnnTensorDescriptor_t dxDesc, const cudnnTensorDescriptor_t dBnScaleBiasDesc, const cudnnActivationDescriptor_t activationDesc, size_t *sizeInBytes); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetBatchNormalizationTrainingExReserveSpaceSize(cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, const cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes); /* Computes y = BN(x). Also accumulates moving averages of mean and inverse variances */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTraining( cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alpha, /* alpha[0] = result blend factor */ const void *beta, /* beta[0] = dest layer blend factor */ const cudnnTensorDescriptor_t xDesc, const void *x, /* NxCxHxW */ const cudnnTensorDescriptor_t yDesc, void *y, /* NxCxHxW */ /* Shared desc for the next 6 tensors in the argument list. Data type to be set as follows: type = (typeOf(x) == double) ? double : float Dimensions for this descriptor depend on normalization mode - Spatial Normalization : tensors are expected to have dims 1xCx1x1 (normalization is performed across NxHxW) - Per-Activation Normalization : tensors are expected to have dims of 1xCxHxW (normalization is performed across N) */ const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, /* 'Gamma' and 'Beta' respectively in Ioffe and Szegedy's paper's notation */ const void *bnScale, const void *bnBias, /* MUST use factor=1 in the very first call of a complete training cycle. Use a factor=1/(1+n) at N-th call to the function to get Cumulative Moving Average (CMA) behavior CMA[n] = (x[1]+...+x[n])/n Since CMA[n+1] = (n*CMA[n]+x[n+1])/(n+1) = ((n+1)*CMA[n]-CMA[n])/(n+1) + x[n+1]/(n+1) = CMA[n]*(1-1/(n+1)) + x[n+1]*1/(n+1) */ double exponentialAverageFactor, /* Used in Training phase only. runningMean = newMean*factor + runningMean*(1-factor) */ void *resultRunningMean, /* Output in training mode, input in inference. Is the moving average of variance[x] (factor is applied in the same way as for runningMean) */ void *resultRunningVariance, /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */ double epsilon, /* Optionally save intermediate results from the forward pass here - can be reused to speed up backward pass. NULL if unused */ void *resultSaveMean, void *resultSaveInvVariance); /* Computes y = relu(BN(x) + z). Also accumulates moving averages of mean and inverse variances */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationForwardTrainingEx( cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, const void *alpha, /* alpha[0] = result blend factor */ const void *beta, /* beta[0] = dest layer blend factor */ const cudnnTensorDescriptor_t xDesc, const void *xData, const cudnnTensorDescriptor_t zDesc, const void *zData, const cudnnTensorDescriptor_t yDesc, void *yData, const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc, const void *bnScale, const void *bnBias, double exponentialAverageFactor, void *resultRunningMean, void *resultRunningVariance, /* Has to be >= CUDNN_BN_MIN_EPSILON. Should be the same in forward and backward functions. */ double epsilon, /* Optionally save intermediate results from the forward pass here - can be reused to speed up backward pass. NULL if unused */ void *resultSaveMean, void *resultSaveInvVariance, cudnnActivationDescriptor_t activationDesc, void *workspace, size_t workSpaceSizeInBytes, void *reserveSpace, size_t reserveSpaceSizeInBytes); /* Performs backward pass of Batch Normalization layer. Returns x gradient, * bnScale gradient and bnBias gradient */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackward(cudnnHandle_t handle, cudnnBatchNormMode_t mode, const void *alphaDataDiff, const void *betaDataDiff, const void *alphaParamDiff, const void *betaParamDiff, const cudnnTensorDescriptor_t xDesc, /* same desc for x, dx, dy */ const void *x, const cudnnTensorDescriptor_t dyDesc, const void *dy, const cudnnTensorDescriptor_t dxDesc, void *dx, /* Shared tensor desc for the 4 tensors below */ const cudnnTensorDescriptor_t dBnScaleBiasDesc, const void *bnScale, /* bnBias doesn't affect backpropagation */ /* scale and bias diff are not backpropagated below this layer */ void *dBnScaleResult, void *dBnBiasResult, /* Same epsilon as forward pass */ double epsilon, /* Optionally cached intermediate results from forward pass */ const void *savedMean, const void *savedInvVariance); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnBatchNormalizationBackwardEx(cudnnHandle_t handle, cudnnBatchNormMode_t mode, cudnnBatchNormOps_t bnOps, const void *alphaDataDiff, const void *betaDataDiff, const void *alphaParamDiff, const void *betaParamDiff, const cudnnTensorDescriptor_t xDesc, const void *xData, const cudnnTensorDescriptor_t yDesc, const void *yData, const cudnnTensorDescriptor_t dyDesc, const void *dyData, const cudnnTensorDescriptor_t dzDesc, void *dzData, const cudnnTensorDescriptor_t dxDesc, void *dxData, /* Shared tensor desc for the 4 tensors below */ const cudnnTensorDescriptor_t dBnScaleBiasDesc, const void *bnScaleData, const void *bnBiasData, /* needed if there is activation */ void *dBnScaleData, void *dBnBiasData, double epsilon, /* Same epsilon as forward pass */ /* Optionally cached intermediate results from forward pass */ const void *savedMean, const void *savedInvVariance, cudnnActivationDescriptor_t activationDesc, void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace, size_t reserveSpaceSizeInBytes); CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetNormalizationForwardTrainingWorkspaceSize(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t zDesc, const cudnnTensorDescriptor_t yDesc, const cudnnTensorDescriptor_t normScaleBiasDesc, const cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t normMeanVarDesc, size_t *sizeInBytes, int groupCnt); /* Place hold for future work, should be set to 1 now*/ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetNormalizationBackwardWorkspaceSize(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const cudnnTensorDescriptor_t xDesc, const cudnnTensorDescriptor_t yDesc, const cudnnTensorDescriptor_t dyDesc, const cudnnTensorDescriptor_t dzDesc, const cudnnTensorDescriptor_t dxDesc, const cudnnTensorDescriptor_t dNormScaleBiasDesc, const cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t normMeanVarDesc, size_t *sizeInBytes, int groupCnt); /* Place hold for future work, should be set to 1 now*/ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnGetNormalizationTrainingReserveSpaceSize(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t xDesc, size_t *sizeInBytes, int groupCnt); /* Place hold for future work, should be set to 1 now*/ /* Computes y = relu(Norm(x) + z). Also accumulates moving averages of mean and inverse variances */ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnNormalizationForwardTraining(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const void *alpha, /* alpha[0] = result blend factor */ const void *beta, /* beta[0] = dest layer blend factor */ const cudnnTensorDescriptor_t xDesc, const void *xData, const cudnnTensorDescriptor_t normScaleBiasDesc, const void *normScale, const void *normBias, double exponentialAverageFactor, const cudnnTensorDescriptor_t normMeanVarDesc, void *resultRunningMean, void *resultRunningVariance, /* Has to be >= 0. Should be the same in forward and backward functions. */ double epsilon, /* Optionally save intermediate results from the forward pass here - can be reused to speed up backward pass. NULL if unused */ void *resultSaveMean, void *resultSaveInvVariance, cudnnActivationDescriptor_t activationDesc, const cudnnTensorDescriptor_t zDesc, const void *zData, const cudnnTensorDescriptor_t yDesc, void *yData, void *workspace, size_t workSpaceSizeInBytes, void *reserveSpace, size_t reserveSpaceSizeInBytes, int groupCnt); /* Place hold for future work, should be set to 1 now*/ CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI cudnnNormalizationBackward(cudnnHandle_t handle, cudnnNormMode_t mode, cudnnNormOps_t normOps, cudnnNormAlgo_t algo, const void *alphaDataDiff, const void *betaDataDiff, const void *alphaParamDiff, const void *betaParamDiff, const cudnnTensorDescriptor_t xDesc, const void *xData, const cudnnTensorDescriptor_t yDesc, const void *yData, const cudnnTensorDescriptor_t dyDesc, const void *dyData, const cudnnTensorDescriptor_t dzDesc, void *dzData, const cudnnTensorDescriptor_t dxDesc, void *dxData, /* Shared tensor desc for the 4 tensors below */ const cudnnTensorDescriptor_t dNormScaleBiasDesc, const void *normScaleData, const void *normBiasData, /* needed if there is activation */ void *dNormScaleData, void *dNormBiasData, double epsilon, /* Same epsilon as forward pass */ const cudnnTensorDescriptor_t normMeanVarDesc, /* Optionally cached intermediate results from forward pass */ const void *savedMean, const void *savedInvVariance, cudnnActivationDescriptor_t activationDesc, void *workSpace, size_t workSpaceSizeInBytes, void *reserveSpace, size_t reserveSpaceSizeInBytes, int groupCnt); /* Place hold for future work, should be set to 1 now*/ cudnnStatus_t CUDNNWINAPI cudnnSpatialTfGridGeneratorBackward(cudnnHandle_t handle, const cudnnSpatialTransformerDescriptor_t stDesc, const void *dgrid, void *dtheta); cudnnStatus_t CUDNNWINAPI cudnnSpatialTfSamplerBackward(cudnnHandle_t handle, cudnnSpatialTransformerDescriptor_t stDesc, const void *alpha, const cudnnTensorDescriptor_t xDesc, const void *x, const void *beta, const cudnnTensorDescriptor_t dxDesc, void *dx, const void *alphaDgrid, const cudnnTensorDescriptor_t dyDesc, const void *dy, const void *grid, const void *betaDgrid, void *dgrid); cudnnStatus_t CUDNNWINAPI cudnnDropoutBackward(cudnnHandle_t handle, const cudnnDropoutDescriptor_t dropoutDesc, const cudnnTensorDescriptor_t dydesc, const void *dy, const cudnnTensorDescriptor_t dxdesc, void *dx, void *reserveSpace, size_t reserveSpaceSizeInBytes); #if defined(__cplusplus) } #endif #endif /* CUDNN_OPS_H_ */