diff --git a/CMakeLists.txt b/CMakeLists.txt index 1d3e583b5..aa90ede39 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -257,9 +257,24 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) if(NOT MSVC) - set(CMAKE_CXX_FLAGS "-Wno-deprecated-declarations ${CMAKE_CXX_FLAGS} -DBUILD_SYSTEM=cmake_oss") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBUILD_SYSTEM=cmake_oss") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wpedantic") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wshadow -Wnon-virtual-dtor -Woverloaded-virtual") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion -Wdouble-promotion") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat=2 -Wimplicit-fallthrough") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") + + # Clang-specific suppressions + if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + # CUDA toolkit headers trigger variadic macro warnings + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-variadic-macro-arguments-omitted") + # Clang's -Wpedantic enables -Wsign-conversion (GCC does not) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sign-conversion") + endif() else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBUILD_SYSTEM=cmake_oss") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /WX") endif() ############################################################################################ diff --git a/include/NvInferRuntime.h b/include/NvInferRuntime.h index cf875bf1d..c444760e0 100644 --- a/include/NvInferRuntime.h +++ b/include/NvInferRuntime.h @@ -360,7 +360,8 @@ class DimsExprs //! struct DynamicPluginTensorDesc { - //! Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of any runtime dimension. + //! Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of any runtime + //! dimension. PluginTensorDesc desc; //! Lower bounds on tensor’s dimensions @@ -433,7 +434,8 @@ class TRT_DEPRECATED IPluginV2DynamicExt : public nvinfer1::IPluginV2Ext //! return output; //! virtual DimsExprs getOutputDimensions( - int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept = 0; + int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept + = 0; //! //! \brief Limit on number of format combinations accepted. @@ -473,7 +475,8 @@ class TRT_DEPRECATED IPluginV2DynamicExt : public nvinfer1::IPluginV2Ext //! Warning: TensorRT will stop asking for formats once it finds kFORMAT_COMBINATION_LIMIT on combinations. //! virtual bool supportsFormatCombination( - int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0; + int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept + = 0; //! //! \brief Configure the plugin. @@ -513,7 +516,8 @@ class TRT_DEPRECATED IPluginV2DynamicExt : public nvinfer1::IPluginV2Ext //! \param nbOutputs Number of output tensors. //! virtual void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, - DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0; + DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept + = 0; //! //! \brief Find the workspace size required by the layer. @@ -525,7 +529,8 @@ class TRT_DEPRECATED IPluginV2DynamicExt : public nvinfer1::IPluginV2Ext //! \return The workspace size. //! virtual size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, - int32_t nbOutputs) const noexcept = 0; + int32_t nbOutputs) const noexcept + = 0; //! //! \brief Execute the layer. @@ -540,7 +545,8 @@ class TRT_DEPRECATED IPluginV2DynamicExt : public nvinfer1::IPluginV2Ext //! \return 0 for success, else non-zero (which will cause engine termination). //! virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, - void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0; + void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept + = 0; protected: //! @@ -557,8 +563,11 @@ class TRT_DEPRECATED IPluginV2DynamicExt : public nvinfer1::IPluginV2Ext virtual ~IPluginV2DynamicExt() noexcept {} -private: +protected: // Following are obsolete base class methods, and must not be implemented or used. + // Kept as protected (not private) so derived classes can use `using` declarations + // to suppress -Woverloaded-virtual warnings when defining new methods with the + // same names but different signatures. //! //! \brief Set plugin configuration @@ -916,7 +925,8 @@ class IPluginV3OneBuild : public IPluginCapability //! \return 0 for success, else non-zero (which will cause engine termination, if invoked by TensorRT). //! virtual int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, - DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0; + DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept + = 0; //! //! \brief Provide the data types of the plugin outputs if the input tensors have the data types provided. @@ -935,7 +945,8 @@ class IPluginV3OneBuild : public IPluginCapability //! \warning DataType:kBOOL and DataType::kUINT8 are not supported. //! virtual int32_t getOutputDataTypes( - DataType* outputTypes, int32_t nbOutputs, const DataType* inputTypes, int32_t nbInputs) const noexcept = 0; + DataType* outputTypes, int32_t nbOutputs, const DataType* inputTypes, int32_t nbInputs) const noexcept + = 0; //! //! \brief Provide expressions for computing dimensions of the output tensors from dimensions of the input tensors. @@ -959,7 +970,8 @@ class IPluginV3OneBuild : public IPluginCapability //! through the error recorder. //! virtual int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs, - int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept = 0; + int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept + = 0; //! //! \brief Return true if plugin supports the format and datatype for the input/output indexed by pos. @@ -996,7 +1008,8 @@ class IPluginV3OneBuild : public IPluginCapability //! \see getFormatCombinationLimit //! virtual bool supportsFormatCombination( - int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0; + int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept + = 0; //! //! \brief Get the number of outputs from the plugin. @@ -1014,8 +1027,8 @@ class IPluginV3OneBuild : public IPluginCapability //! //! \return The workspace size. //! - virtual size_t getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept + virtual size_t getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -1051,7 +1064,7 @@ class IPluginV3OneBuild : public IPluginCapability //! \return 0 for success, else non-zero (which will cause engine termination). The returned code will be reported //! through the error recorder. //! - virtual int32_t getValidTactics(int32_t* tactics, int32_t nbTactics) noexcept + virtual int32_t getValidTactics(int32_t* /*tactics*/, int32_t /*nbTactics*/) noexcept { return 0; } @@ -1118,7 +1131,7 @@ class IPluginV3OneRuntime : public IPluginCapability //! \return 0 for success, else non-zero (which will cause engine termination). The returned code will be reported //! through the error recorder. //! - virtual int32_t setTactic(int32_t tactic) noexcept + virtual int32_t setTactic(int32_t /*tactic*/) noexcept { return 0; } @@ -1142,7 +1155,8 @@ class IPluginV3OneRuntime : public IPluginCapability //! \param nbOutputs Number of output tensors. //! virtual int32_t onShapeChange( - PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0; + PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept + = 0; //! //! \brief Execute the layer. @@ -1158,7 +1172,8 @@ class IPluginV3OneRuntime : public IPluginCapability //! through the error recorder. //! virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, - void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0; + void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept + = 0; //! //! \brief Clone the plugin, attach the cloned plugin object to a execution context and grant the cloned plugin @@ -1234,7 +1249,7 @@ class IPluginV3OneBuild : public v_1_0::IPluginV3OneBuild //! +->|Copy +--> t** --->|Plugin1 +--> t2 //! +-----+ +--------+ //! - virtual int32_t getAliasedInput(int32_t outputIndex) noexcept + virtual int32_t getAliasedInput(int32_t /*outputIndex*/) noexcept { return -1; } @@ -1680,7 +1695,8 @@ class IGpuAllocator : public IVersionedInterface //! \deprecated Deprecated in TensorRT 10.0. Superseded by allocateAsync //! TRT_DEPRECATED virtual void* allocate( - uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept = 0; + uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept + = 0; ~IGpuAllocator() override = default; IGpuAllocator() = default; @@ -1854,7 +1870,6 @@ class IGpuAllocator : public IVersionedInterface //! using IGpuAllocator = v_1_0::IGpuAllocator; - //! //! \class IRuntime //! @@ -2179,7 +2194,6 @@ class IRuntime : public INoCopy return mImpl->getEngineHostCodeAllowed(); } - protected: apiv::VRuntime* mImpl; }; @@ -2629,7 +2643,8 @@ class IRefitter : public INoCopy //! The minimum and maximum specify the permitted range that is supported at runtime, while the optimum value //! is used for the kernel selection. This should be the "typical" value that is expected to occur at runtime. //! -//! \see IOptimizationProfile::setDimensions(), IOptimizationProfile::setShapeValuesV2(), IOptimizationProfile::setShapeValues() +//! \see IOptimizationProfile::setDimensions(), IOptimizationProfile::setShapeValuesV2(), +//! IOptimizationProfile::setShapeValues() //! enum class OptProfileSelector : int32_t { @@ -3121,7 +3136,6 @@ constexpr inline int32_t EnumMax() noexcept return 3; } - //! \class IRuntimeConfig //! //! \brief A class for runtime configuration. This class is used during execution context creation. @@ -3153,7 +3167,6 @@ class IRuntimeConfig : public INoCopy return mImpl->getExecutionContextAllocationStrategy(); } - protected: apiv::VRuntimeConfig* mImpl; }; // class IRuntimeConfig @@ -4209,7 +4222,7 @@ class IOutputAllocator : public IVersionedInterface //! \deprecated Deprecated in TensorRT 10.0. Superseded by reallocateOutputAsync with cudaStream_t argument //! TRT_DEPRECATED virtual void* reallocateOutput( - char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept + char const* /*tensorName*/, void* /*currentMemory*/, uint64_t /*size*/, uint64_t /*alignment*/) noexcept { return nullptr; } @@ -5534,7 +5547,8 @@ class IGpuAsyncAllocator : public IGpuAllocator //! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads. //! void* allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, - cudaStream_t /*stream*/) noexcept override = 0; + cudaStream_t /*stream*/) noexcept override + = 0; //! //! \brief A thread-safe callback implemented by the application to handle stream-ordered asynchronous @@ -5652,7 +5666,8 @@ class IPluginCreatorV3One : public IPluginCreatorInterface //! engine deserialization), TensorRT will delete any objects it creates. //! virtual IPluginV3* createPlugin( - AsciiChar const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept = 0; + AsciiChar const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + = 0; //! //! \brief Return a list of fields that need to be passed to createPlugin() when creating a plugin for use in the diff --git a/include/NvInferRuntimePlugin.h b/include/NvInferRuntimePlugin.h index e374a6942..d166fb228 100644 --- a/include/NvInferRuntimePlugin.h +++ b/include/NvInferRuntimePlugin.h @@ -420,8 +420,8 @@ class TRT_DEPRECATED IPluginV2 //! //! \param pluginNamespace The namespace for the plugin object. //! - //! \warning The string pluginNamespace will be NULL-terminated and have a length of 1024 bytes or less including the - //! NULL terminator. + //! \warning The string pluginNamespace will be NULL-terminated and have a length of 1024 bytes or less including + //! the NULL terminator. //! //! \usage //! - Allowed context for the API call @@ -446,15 +446,15 @@ class TRT_DEPRECATED IPluginV2 // @cond SuppressDoxyWarnings IPluginV2() = default; virtual ~IPluginV2() noexcept = default; -// @endcond + // @endcond protected: -// @cond SuppressDoxyWarnings + // @cond SuppressDoxyWarnings IPluginV2(IPluginV2 const&) = default; IPluginV2(IPluginV2&&) = default; IPluginV2& operator=(IPluginV2 const&) & = default; IPluginV2& operator=(IPluginV2&&) & = default; -// @endcond + // @endcond }; //! @@ -672,7 +672,7 @@ class TRT_DEPRECATED IPluginV2Ext : public IPluginV2 IPluginV2Ext(IPluginV2Ext&&) = default; IPluginV2Ext& operator=(IPluginV2Ext const&) & = default; IPluginV2Ext& operator=(IPluginV2Ext&&) & = default; -// @endcond + // @endcond //! //! \brief Return the API version with which this plugin was built. The @@ -787,15 +787,15 @@ class TRT_DEPRECATED IPluginV2IOExt : public IPluginV2Ext // @cond SuppressDoxyWarnings IPluginV2IOExt() = default; ~IPluginV2IOExt() override = default; -// @endcond + // @endcond protected: -// @cond SuppressDoxyWarnings + // @cond SuppressDoxyWarnings IPluginV2IOExt(IPluginV2IOExt const&) = default; IPluginV2IOExt(IPluginV2IOExt&&) = default; IPluginV2IOExt& operator=(IPluginV2IOExt const&) & = default; IPluginV2IOExt& operator=(IPluginV2IOExt&&) & = default; -// @endcond + // @endcond //! //! \brief Return the API version with which this plugin was built. The upper byte is reserved by TensorRT and is @@ -814,8 +814,10 @@ class TRT_DEPRECATED IPluginV2IOExt : public IPluginV2Ext | (static_cast(NV_TENSORRT_VERSION) & 0xFFFFFFU)); } -private: +protected: // Following are obsolete base class methods, and must not be implemented or used. + // Kept as protected (not private) so derived classes can use `using` declarations + // to suppress -Woverloaded-virtual warnings. //! //! \brief Set plugin configuration. diff --git a/plugin/batchTilePlugin/batchTilePlugin.cpp b/plugin/batchTilePlugin/batchTilePlugin.cpp index 0e5e32f2b..f83d7fabc 100644 --- a/plugin/batchTilePlugin/batchTilePlugin.cpp +++ b/plugin/batchTilePlugin/batchTilePlugin.cpp @@ -84,7 +84,7 @@ size_t BatchTilePlugin::getWorkspaceSize(int32_t) const noexcept } DataType BatchTilePlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { try { @@ -137,19 +137,20 @@ size_t BatchTilePlugin::getSerializationSize() const noexcept } bool BatchTilePlugin::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } -bool BatchTilePlugin::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool BatchTilePlugin::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } -void BatchTilePlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void BatchTilePlugin::configurePlugin(Dims const* inputDims, int32_t /*nbInputs*/, Dims const* outputDims, + int32_t nbOutputs, DataType const* /*inputTypes*/, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t /*maxBatchSize*/) noexcept { try { @@ -239,7 +240,7 @@ PluginFieldCollection const* BatchTilePluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2Ext* BatchTilePluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* BatchTilePluginCreator::createPlugin(char const* name, PluginFieldCollection const* /*fc*/) noexcept { try { diff --git a/plugin/batchedNMSPlugin/batchedNMSPlugin.cpp b/plugin/batchedNMSPlugin/batchedNMSPlugin.cpp index e453950f9..ee2069fce 100644 --- a/plugin/batchedNMSPlugin/batchedNMSPlugin.cpp +++ b/plugin/batchedNMSPlugin/batchedNMSPlugin.cpp @@ -148,9 +148,9 @@ Dims BatchedNMSPlugin::getOutputDimensions(int32_t index, Dims const* inputs, in PLUGIN_ASSERT(inputs[0].nbDims == 3); PLUGIN_ASSERT(inputs[1].nbDims == 2 || (inputs[1].nbDims == 3 && inputs[1].d[2] == 1)); // mBoxesSize: number of box coordinates for one sample - mBoxesSize = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2]; + mBoxesSize = static_cast(inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2]); // mScoresSize: number of scores for one sample - mScoresSize = inputs[1].d[0] * inputs[1].d[1]; + mScoresSize = static_cast(inputs[1].d[0] * inputs[1].d[1]); // num_detections if (index == 0) { @@ -237,13 +237,13 @@ size_t BatchedNMSPlugin::getWorkspaceSize(int32_t maxBatchSize) const noexcept mNumPriors, param.topK, mPrecision, mPrecision); } -size_t BatchedNMSDynamicPlugin::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t BatchedNMSDynamicPlugin::getWorkspaceSize(PluginTensorDesc const* inputs, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { - int32_t batchSize = inputs[0].dims.d[0]; - int32_t boxesSize = inputs[0].dims.d[1] * inputs[0].dims.d[2] * inputs[0].dims.d[3]; - int32_t scoreSize = inputs[1].dims.d[1] * inputs[1].dims.d[2]; - int32_t numPriors = inputs[0].dims.d[1]; + int32_t batchSize = static_cast(inputs[0].dims.d[0]); + int32_t boxesSize = static_cast(inputs[0].dims.d[1] * inputs[0].dims.d[2] * inputs[0].dims.d[3]); + int32_t scoreSize = static_cast(inputs[1].dims.d[1] * inputs[1].dims.d[2]); + int32_t numPriors = static_cast(inputs[0].dims.d[1]); return detectionInferenceWorkspaceSize(param.shareLocation, batchSize, boxesSize, scoreSize, param.numClasses, numPriors, param.topK, mPrecision, mPrecision); } @@ -299,10 +299,11 @@ int32_t BatchedNMSDynamicPlugin::enqueue(PluginTensorDesc const* inputDesc, Plug void* nmsedScores = outputs[2]; void* nmsedClasses = outputs[3]; - pluginStatus_t status = nmsInference(stream, inputDesc[0].dims.d[0], mBoxesSize, mScoresSize, - param.shareLocation, param.backgroundLabelId, mNumPriors, param.numClasses, param.topK, param.keepTopK, - param.scoreThreshold, param.iouThreshold, mPrecision, locData, mPrecision, confData, keepCount, nmsedBoxes, - nmsedScores, nmsedClasses, workspace, param.isNormalized, false, mClipBoxes, mScoreBits, mCaffeSemantics); + pluginStatus_t status = nmsInference(stream, static_cast(inputDesc[0].dims.d[0]), mBoxesSize, + mScoresSize, param.shareLocation, param.backgroundLabelId, mNumPriors, param.numClasses, param.topK, + param.keepTopK, param.scoreThreshold, param.iouThreshold, mPrecision, locData, mPrecision, confData, + keepCount, nmsedBoxes, nmsedScores, nmsedClasses, workspace, param.isNormalized, false, mClipBoxes, + mScoreBits, mCaffeSemantics); return status; } catch (std::exception const& e) @@ -352,9 +353,9 @@ void BatchedNMSDynamicPlugin::serialize(void* buffer) const noexcept PLUGIN_ASSERT(d == a + getSerializationSize()); } -void BatchedNMSPlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, nvinfer1::PluginFormat format, int32_t maxBatchSize) noexcept +void BatchedNMSPlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t nbOutputs, DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* inputIsBroadcast, + bool const* outputIsBroadcast, nvinfer1::PluginFormat /*format*/, int32_t /*maxBatchSize*/) noexcept { try { @@ -365,10 +366,10 @@ void BatchedNMSPlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, PLUGIN_ASSERT(std::none_of(inputIsBroadcast, inputIsBroadcast + nbInputs, [](bool b) { return b; })); PLUGIN_ASSERT(std::none_of(outputIsBroadcast, outputIsBroadcast + nbInputs, [](bool b) { return b; })); - mBoxesSize = inputDims[0].d[0] * inputDims[0].d[1] * inputDims[0].d[2]; - mScoresSize = inputDims[1].d[0] * inputDims[1].d[1]; + mBoxesSize = static_cast(inputDims[0].d[0] * inputDims[0].d[1] * inputDims[0].d[2]); + mScoresSize = static_cast(inputDims[1].d[0] * inputDims[1].d[1]); // num_boxes - mNumPriors = inputDims[0].d[0]; + mNumPriors = static_cast(inputDims[0].d[0]); const int32_t numLocClasses = param.shareLocation ? 1 : param.numClasses; // Third dimension of boxes must be either 1 or num_classes PLUGIN_ASSERT(inputDims[0].d[1] == numLocClasses); @@ -381,8 +382,8 @@ void BatchedNMSPlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, } } -void BatchedNMSDynamicPlugin::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void BatchedNMSDynamicPlugin::configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, + DynamicPluginTensorDesc const* /*out*/, int32_t nbOutputs) noexcept { try { @@ -401,10 +402,10 @@ void BatchedNMSDynamicPlugin::configurePlugin( // Constant shape: [batch_size, num_boxes, num_classes] or [batch_size, num_boxes, num_classes, 1] PLUGIN_ASSERT(in[1].desc.dims.nbDims == 3 || (in[1].desc.dims.nbDims == 4 && in[1].desc.dims.d[3] == 1)); - mBoxesSize = in[0].desc.dims.d[1] * in[0].desc.dims.d[2] * in[0].desc.dims.d[3]; - mScoresSize = in[1].desc.dims.d[1] * in[1].desc.dims.d[2]; + mBoxesSize = static_cast(in[0].desc.dims.d[1] * in[0].desc.dims.d[2] * in[0].desc.dims.d[3]); + mScoresSize = static_cast(in[1].desc.dims.d[1] * in[1].desc.dims.d[2]); // num_boxes - mNumPriors = in[0].desc.dims.d[1]; + mNumPriors = static_cast(in[0].desc.dims.d[1]); mPrecision = in[0].desc.type; } @@ -564,7 +565,7 @@ char const* BatchedNMSDynamicPlugin::getPluginNamespace() const noexcept } nvinfer1::DataType BatchedNMSPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { if (index == 0) { @@ -574,7 +575,7 @@ nvinfer1::DataType BatchedNMSPlugin::getOutputDataType( } nvinfer1::DataType BatchedNMSDynamicPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { if (index == 0) { @@ -614,12 +615,12 @@ void BatchedNMSDynamicPlugin::setCaffeSemantics(bool caffeSemantics) noexcept } bool BatchedNMSPlugin::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } -bool BatchedNMSPlugin::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool BatchedNMSPlugin::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } @@ -638,7 +639,7 @@ BatchedNMSBasePluginCreator::BatchedNMSBasePluginCreator() mPluginAttributes.emplace_back(PluginField("clipBoxes", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("scoreBits", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("caffeSemantics", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -662,7 +663,7 @@ PluginFieldCollection const* BatchedNMSBasePluginCreator::getFieldNames() noexce return &mFC; } -IPluginV2Ext* BatchedNMSPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* BatchedNMSPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -757,7 +758,7 @@ IPluginV2Ext* BatchedNMSPluginCreator::createPlugin(char const* name, PluginFiel } IPluginV2DynamicExt* BatchedNMSDynamicPluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc) noexcept + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -852,7 +853,7 @@ IPluginV2DynamicExt* BatchedNMSDynamicPluginCreator::createPlugin( } IPluginV2Ext* BatchedNMSPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { @@ -873,7 +874,7 @@ IPluginV2Ext* BatchedNMSPluginCreator::deserializePlugin( } IPluginV2DynamicExt* BatchedNMSDynamicPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/batchedNMSPlugin/batchedNMSPlugin.h b/plugin/batchedNMSPlugin/batchedNMSPlugin.h index 5ecebc625..8ea4c0112 100644 --- a/plugin/batchedNMSPlugin/batchedNMSPlugin.h +++ b/plugin/batchedNMSPlugin/batchedNMSPlugin.h @@ -82,6 +82,11 @@ class TRT_DEPRECATED BatchedNMSPlugin : public IPluginV2Ext class TRT_DEPRECATED BatchedNMSDynamicPlugin : public IPluginV2DynamicExt { public: + using IPluginV2DynamicExt::configurePlugin; + using IPluginV2DynamicExt::getOutputDimensions; + using IPluginV2DynamicExt::getWorkspaceSize; + using IPluginV2DynamicExt::enqueue; + BatchedNMSDynamicPlugin(NMSParameters param); BatchedNMSDynamicPlugin(void const* data, size_t length); ~BatchedNMSDynamicPlugin() override = default; diff --git a/plugin/bertQKVToContextPlugin/mhaRunner.h b/plugin/bertQKVToContextPlugin/mhaRunner.h index 1cbcb4dc9..e0c685ac7 100644 --- a/plugin/bertQKVToContextPlugin/mhaRunner.h +++ b/plugin/bertQKVToContextPlugin/mhaRunner.h @@ -68,7 +68,7 @@ class MHARunner mB = B; mS = S; mHeadSize = headSize; - mRsqrtHeadSize = 1.F / std::sqrt(headSize); + mRsqrtHeadSize = 1.0f / std::sqrt(static_cast(headSize)); mLdQKV = 3 * B * mNumHeads * mHeadSize; mStrideQKV = 3 * mHeadSize; diff --git a/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPlugin.cpp b/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPlugin.cpp index a69983fe4..2157f17e2 100644 --- a/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPlugin.cpp +++ b/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPlugin.cpp @@ -69,7 +69,7 @@ QKVToContextInterleavedPlugin::QKVToContextInterleavedPlugin(std::string const& QKVToContextInterleavedPlugin::~QKVToContextInterleavedPlugin() {} -IPluginV3* QKVToContextInterleavedPlugin::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* QKVToContextInterleavedPlugin::attachToContext(IPluginResourceContext* /*context*/) noexcept { return clone(); } @@ -114,7 +114,7 @@ IPluginV3* QKVToContextInterleavedPlugin::clone() noexcept } int32_t QKVToContextInterleavedPlugin::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, + DimsExprs const* /*shapeInputs*/, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept { try @@ -172,26 +172,26 @@ bool QKVToContextInterleavedPlugin::supportsFormatCombination( return false; } -int32_t QKVToContextInterleavedPlugin::onShapeChange( - PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept +int32_t QKVToContextInterleavedPlugin::onShapeChange(PluginTensorDesc const* /*in*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { return pluginStatus_t::STATUS_SUCCESS; } -int32_t QKVToContextInterleavedPlugin::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +int32_t QKVToContextInterleavedPlugin::configurePlugin(DynamicPluginTensorDesc const* /*in*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { return pluginStatus_t::STATUS_SUCCESS; } -size_t QKVToContextInterleavedPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t QKVToContextInterleavedPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } int32_t QKVToContextInterleavedPlugin::getOutputDataTypes( - DataType* outputTypes, int32_t nbOutputs, DataType const* inputTypes, int32_t nbInputs) const noexcept + DataType* outputTypes, int32_t nbOutputs, DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { try { @@ -236,9 +236,9 @@ int32_t QKVToContextInterleavedPlugin::enqueue(PluginTensorDesc const* inputDesc { PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const total = inputDesc[0].dims.d[2]; - int32_t const B = inputDesc[1].dims.d[0] - 1; - int32_t const maxS = inputDesc[2].dims.d[0]; + int32_t const total = static_cast(inputDesc[0].dims.d[2]); + int32_t const B = static_cast(inputDesc[1].dims.d[0] - 1); + int32_t const maxS = static_cast(inputDesc[2].dims.d[0]); int32_t S = 384; if (maxS <= 128) { @@ -267,9 +267,9 @@ int32_t QKVToContextInterleavedPlugin::enqueue(PluginTensorDesc const* inputDesc float scaleQkv = mUseExplicitInt8 ? mQkvScale : inputDesc[0].scale; float scaleCtx = mUseExplicitInt8 ? mCtxScale : outputDesc[0].scale; - float scaleBmm1 = scaleQkv * scaleQkv * 0.125; // 1 / sqrt(64) + float scaleBmm1 = scaleQkv * scaleQkv * 0.125f; // 1 / sqrt(64) float scaleBmm2 = mDqProbs * scaleQkv / scaleCtx; - float scaleSoftmax = 1.F / mDqProbs; + float scaleSoftmax = 1.0f / mDqProbs; params.scale_bmm1 = reinterpret_cast(scaleBmm1); params.scale_bmm2 = reinterpret_cast(scaleBmm2); @@ -279,8 +279,8 @@ int32_t QKVToContextInterleavedPlugin::enqueue(PluginTensorDesc const* inputDesc params.o_stride_in_bytes = total; params.use_int8_scale_max = mUseInt8ScaleMax; - params.enable_i2f_trick - = -double(1 << 22) * double(scaleBmm2) <= -128.F && double(1 << 22) * double(scaleBmm2) >= 127.F; + params.enable_i2f_trick = -static_cast(1 << 22) * static_cast(scaleBmm2) <= -128.0 + && static_cast(1 << 22) * static_cast(scaleBmm2) >= 127.0; try { @@ -310,7 +310,7 @@ PluginFieldCollection const* QKVToContextInterleavedPlugin::getFieldsToSerialize mDataToSerialize.emplace_back("dq_probs", &mDqProbs, PluginFieldType::kFLOAT32, 1); } - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; @@ -331,7 +331,7 @@ QKVToContextInterleavedPluginCreator::QKVToContextInterleavedPluginCreator() mPluginAttributes.emplace_back(PluginField("input_qkv_scale", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("output_ctx_scale", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPluginLegacy.cpp b/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPluginLegacy.cpp index 64df35a0e..1b606ee55 100644 --- a/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPluginLegacy.cpp +++ b/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPluginLegacy.cpp @@ -104,7 +104,7 @@ nvinfer1::IPluginV2DynamicExt* QKVToContextInterleavedPluginLegacy::clone() cons } DimsExprs QKVToContextInterleavedPluginLegacy::getOutputDimensions( - int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept + int32_t outputIndex, DimsExprs const* inputs, int32_t /*nbInputs*/, IExprBuilder& exprBuilder) noexcept { // Input SHAPE is 1x(3*N*H)xTotalx1 (NCHW) // Output SHAPE is 1x(N*H)xTotalx1 @@ -148,20 +148,20 @@ bool QKVToContextInterleavedPluginLegacy::supportsFormatCombination( return false; } -void QKVToContextInterleavedPluginLegacy::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void QKVToContextInterleavedPluginLegacy::configurePlugin(DynamicPluginTensorDesc const* /*in*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { } -size_t QKVToContextInterleavedPluginLegacy::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t QKVToContextInterleavedPluginLegacy::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } // IPluginV2Ext Methods DataType QKVToContextInterleavedPluginLegacy::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { PLUGIN_ASSERT(index == 0); return DataType::kINT8; @@ -233,9 +233,9 @@ int32_t QKVToContextInterleavedPluginLegacy::enqueue(PluginTensorDesc const* inp { PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const total = inputDesc[0].dims.d[2]; - int32_t const B = inputDesc[1].dims.d[0] - 1; - int32_t const maxS = inputDesc[2].dims.d[0]; + int32_t const total = static_cast(inputDesc[0].dims.d[2]); + int32_t const B = static_cast(inputDesc[1].dims.d[0] - 1); + int32_t const maxS = static_cast(inputDesc[2].dims.d[0]); int32_t S = 384; if (maxS <= 128) { @@ -264,9 +264,9 @@ int32_t QKVToContextInterleavedPluginLegacy::enqueue(PluginTensorDesc const* inp float scaleQkv = mUseExplicitInt8 ? mQkvScale : inputDesc[0].scale; float scaleCtx = mUseExplicitInt8 ? mCtxScale : outputDesc[0].scale; - float scaleBmm1 = scaleQkv * scaleQkv * 0.125; // 1 / sqrt(64) + float scaleBmm1 = scaleQkv * scaleQkv * 0.125f; // 1 / sqrt(64) float scaleBmm2 = mDqProbs * scaleQkv / scaleCtx; - float scaleSoftmax = 1.F / mDqProbs; + float scaleSoftmax = 1.0f / mDqProbs; params.scale_bmm1 = reinterpret_cast(scaleBmm1); params.scale_bmm2 = reinterpret_cast(scaleBmm2); @@ -276,8 +276,8 @@ int32_t QKVToContextInterleavedPluginLegacy::enqueue(PluginTensorDesc const* inp params.o_stride_in_bytes = total; params.use_int8_scale_max = mUseInt8ScaleMax; - params.enable_i2f_trick - = -double(1 << 22) * double(scaleBmm2) <= -128.F && double(1 << 22) * double(scaleBmm2) >= 127.F; + params.enable_i2f_trick = -static_cast(1 << 22) * static_cast(scaleBmm2) <= -128.0 + && static_cast(1 << 22) * static_cast(scaleBmm2) >= 127.0; try { @@ -302,7 +302,7 @@ QKVToContextInterleavedPluginLegacyCreator::QKVToContextInterleavedPluginLegacyC mPluginAttributes.emplace_back(PluginField("input_qkv_scale", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("output_ctx_scale", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPluginLegacy.h b/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPluginLegacy.h index 4858f6998..48a098d64 100644 --- a/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPluginLegacy.h +++ b/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPluginLegacy.h @@ -43,6 +43,11 @@ static constexpr int32_t kSM_BLACKWELL_120 = 120; class QKVToContextInterleavedPluginLegacy : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + QKVToContextInterleavedPluginLegacy(std::string const& name, int32_t hiddenSize, int32_t numHeads, float dqProbs, bool useInt8ScaleMax, bool useExplicitInt8, float qkvScale, float ctxScale); diff --git a/plugin/bertQKVToContextPlugin/qkvToContextPlugin.cpp b/plugin/bertQKVToContextPlugin/qkvToContextPlugin.cpp index ea98d1dd0..5fbae21ed 100644 --- a/plugin/bertQKVToContextPlugin/qkvToContextPlugin.cpp +++ b/plugin/bertQKVToContextPlugin/qkvToContextPlugin.cpp @@ -100,7 +100,6 @@ QKVToContextPluginDynamic::QKVToContextPluginDynamic(const std::string name, con BERT_DEBUG_MSG("MHA Runner Deser Done"); } - IPluginCapability* QKVToContextPluginDynamic::getCapabilityInterface(PluginCapabilityType type) noexcept { try @@ -167,7 +166,7 @@ IPluginV3* QKVToContextPluginDynamic::clone() noexcept } int32_t QKVToContextPluginDynamic::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, + DimsExprs const* /*shapeInputs*/, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept { try @@ -200,7 +199,7 @@ bool QKVToContextPluginDynamic::supportsFormatCombination( PLUGIN_ASSERT(nbInputs == 1 + mHasImask); auto const* in = inOut; auto const* out = inOut + nbInputs; - int32_t packedSize = getMHAMaskPackedSize(mSM, mType, in->desc.dims.d[SDIM]); + int32_t packedSize = getMHAMaskPackedSize(mSM, mType, static_cast(in->desc.dims.d[SDIM])); // we only support int8 IO in fused mha runner, and we only support fused mha runner on Xavier, Turing and Ampere if (mType == DataType::kINT8) @@ -326,8 +325,8 @@ int32_t QKVToContextPluginDynamic::onShapeChange( createMHARunner(); // mS and mB that are set by configurePlugin() may be stale - mS = inDesc.dims.d[SDIM]; - mB = inDesc.dims.d[BDIM]; + mS = static_cast(inDesc.dims.d[SDIM]); + mB = static_cast(inDesc.dims.d[BDIM]); PLUGIN_ASSERT(mS); PLUGIN_ASSERT(mB); if (fusedDispatcher.get() && fusedDispatcher->isValid(mHeadSize, mS)) @@ -375,13 +374,13 @@ int32_t QKVToContextPluginDynamic::configurePlugin( createMHARunner(); - const int32_t S = inDesc.dims.d[SDIM]; - const int32_t B = inDesc.dims.d[BDIM] <= 0 ? in->max.d[BDIM] : inDesc.dims.d[BDIM]; + const int32_t S = static_cast(inDesc.dims.d[SDIM]); + const int32_t B = static_cast(inDesc.dims.d[BDIM] <= 0 ? in->max.d[BDIM] : inDesc.dims.d[BDIM]); if (S <= 0) { // in dynamic shape build stage, we setup with max sequence that cannot fused - const int32_t Smin = in->min.d[SDIM]; - const int32_t Smax = in->max.d[SDIM]; + const int32_t Smin = static_cast(in->min.d[SDIM]); + const int32_t Smax = static_cast(in->max.d[SDIM]); if (fusedDispatcher.get()) { @@ -438,7 +437,7 @@ size_t QKVToContextPluginDynamic::getWorkspaceSize(DynamicPluginTensorDesc const // IPluginV2Ext Methods int32_t QKVToContextPluginDynamic::getOutputDataTypes( - DataType* outputTypes, int32_t nbOutputs, DataType const* inputTypes, int32_t nbInputs) const noexcept + DataType* outputTypes, int32_t /*nbOutputs*/, DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { try { @@ -496,7 +495,6 @@ char const* QKVToContextPluginDynamic::getPluginName() const noexcept return kQKV_TO_CONTEXT_PLUGIN_NAME; } - void QKVToContextPluginDynamic::setPluginNamespace(char const* libNamespace) noexcept { mNamespace = libNamespace; @@ -517,7 +515,8 @@ int32_t QKVToContextPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, Pl try { void const* const maskPtr = mHasImask ? inputs[1] : nullptr; - if (mHasImask && fusedDispatcher.get() && fusedDispatcher->isValid(mHeadSize, inputDesc->dims.d[SDIM])) + if (mHasImask && fusedDispatcher.get() + && fusedDispatcher->isValid(mHeadSize, static_cast(inputDesc->dims.d[SDIM]))) { fusedDispatcher->run( inputDesc[0], outputDesc[0], inputs[0], maskPtr, outputs[0], workspace, stream, mCublasHandle); @@ -556,7 +555,7 @@ PluginFieldCollection const* QKVToContextPluginDynamic::getFieldsToSerialize() n mRunnerStateBuffer.resize(unfusedDispatcher->getSerializationSize()); unfusedDispatcher->serialize(mRunnerStateBuffer.data()); mDataToSerialize.emplace_back("runnerStateBuffer", (void const*) mRunnerStateBuffer.data(), - PluginFieldType::kUNKNOWN, mRunnerStateBuffer.size()); + PluginFieldType::kUNKNOWN, static_cast(mRunnerStateBuffer.size())); } else { @@ -570,7 +569,7 @@ PluginFieldCollection const* QKVToContextPluginDynamic::getFieldsToSerialize() n mDataToSerialize.emplace_back("dq_probs", &mDqProbs, PluginFieldType::kFLOAT32, 1); } - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; @@ -584,7 +583,7 @@ QKVToContextPluginDynamicCreator::QKVToContextPluginDynamicCreator() mPluginAttributes.emplace_back(PluginField("has_mask", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("dq_probs", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -743,7 +742,6 @@ IPluginV3* QKVToContextPluginDynamicCreator::createPlugin( return nullptr; } - void QKVToContextPluginDynamicCreator::setPluginNamespace(char const* libNamespace) noexcept { mNamespace = libNamespace; @@ -754,7 +752,6 @@ char const* QKVToContextPluginDynamicCreator::getPluginNamespace() const noexcep return mNamespace.c_str(); } - ///// QKVToContextVarSeqlenPlugin (CustomQKVToContextPluginDynamic v5) //// QKVToContextVarSeqlenPlugin::~QKVToContextVarSeqlenPlugin() {} @@ -879,7 +876,6 @@ void QKVToContextVarSeqlenPlugin::createMHARunner() } } - IPluginV3* QKVToContextVarSeqlenPlugin::clone() noexcept { BERT_DEBUG_MSG("QKV Clone"); @@ -910,7 +906,7 @@ IPluginV3* QKVToContextVarSeqlenPlugin::clone() noexcept } int32_t QKVToContextVarSeqlenPlugin::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, + DimsExprs const* /*shapeInputs*/, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept { try @@ -1098,8 +1094,8 @@ int32_t QKVToContextVarSeqlenPlugin::configurePlugin( PLUGIN_ASSERT(maskDesc.dims.d[0] == inDesc.dims.d[BDIM]); } - const int32_t S = inDesc.dims.d[SDIM] <= 0 ? in->max.d[SDIM] : inDesc.dims.d[SDIM]; - const int32_t B = inDesc.dims.d[BDIM] <= 0 ? in->max.d[BDIM] : inDesc.dims.d[BDIM]; + const int32_t S = static_cast(inDesc.dims.d[SDIM] <= 0 ? in->max.d[SDIM] : inDesc.dims.d[SDIM]); + const int32_t B = static_cast(inDesc.dims.d[BDIM] <= 0 ? in->max.d[BDIM] : inDesc.dims.d[BDIM]); if (S != mS || B != mB) { @@ -1131,12 +1127,13 @@ int32_t QKVToContextVarSeqlenPlugin::configurePlugin( size_t QKVToContextVarSeqlenPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t /* nbInputs */, DynamicPluginTensorDesc const* /* outputs */, int32_t /* nbOutputs */) const noexcept { - size_t paddingWorkpaceSize = mPatcher ? mPatcher->getWorkspaceSize(inputs[0].desc.dims.d[0], mNumHeads) : 0; + size_t paddingWorkpaceSize + = mPatcher ? mPatcher->getWorkspaceSize(static_cast(inputs[0].desc.dims.d[0]), mNumHeads) : 0; return mDispatcher->getWorkspaceSize() + paddingWorkpaceSize; } int32_t QKVToContextVarSeqlenPlugin::getOutputDataTypes( - DataType* outputTypes, int32_t nbOutputs, DataType const* inputTypes, int32_t nbInputs) const noexcept + DataType* outputTypes, int32_t /*nbOutputs*/, DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { try { @@ -1194,7 +1191,6 @@ char const* QKVToContextVarSeqlenPlugin::getPluginName() const noexcept return kQKV_TO_CONTEXT_PLUGIN_NAME; } - void QKVToContextVarSeqlenPlugin::setPluginNamespace(char const* libNamespace) noexcept { mNamespace = libNamespace; @@ -1213,8 +1209,8 @@ int32_t QKVToContextVarSeqlenPlugin::enqueue(nvinfer1::PluginTensorDesc const* i if (mUseVarSeqlen) { - const int32_t B = inputDesc[2].dims.d[0] - 1; - const int32_t maxS = inputDesc[3].dims.d[0]; + const int32_t B = static_cast(inputDesc[2].dims.d[0] - 1); + const int32_t maxS = static_cast(inputDesc[3].dims.d[0]); PLUGIN_ASSERT((maxS <= 512) && "No implementation for variable sequence length multi-head attention plugin with sequence > 512."); @@ -1249,7 +1245,8 @@ int32_t QKVToContextVarSeqlenPlugin::enqueue(nvinfer1::PluginTensorDesc const* i } auto runV2Kernel = [this, &S, &B, &workspace, &inputDesc, &outputDesc, &stream, &inputs, &outputs]( - MHARunner* dispatcher, QkvPaddingRunner* patcher, int32_t padSize) { + MHARunner* dispatcher, QkvPaddingRunner* patcher, int32_t padSize) + { PLUGIN_ASSERT(dispatcher); // Validate that we can padding to the dispatch required head size also there is kernel exist for this // sequence length. @@ -1264,7 +1261,7 @@ int32_t QKVToContextVarSeqlenPlugin::enqueue(nvinfer1::PluginTensorDesc const* i { PLUGIN_ASSERT(patcher); PLUGIN_ASSERT(padSize <= patcher->getMaxPaddingHeadSize()); - auto sumSeqLen = inputDesc[0].dims.d[0]; + int32_t sumSeqLen = static_cast(inputDesc[0].dims.d[0]); auto paddingWorkspace = patcher->get16BytesAlignedPointer(workspace, dispatcher->getWorkspaceSize()); auto ret = mPatcher->pad(inputs[0], paddingWorkspace, sumSeqLen, mNumHeads, mHeadSize, padSize, stream); if (ret != cudaSuccess) @@ -1337,14 +1334,14 @@ PluginFieldCollection const* QKVToContextVarSeqlenPlugin::getFieldsToSerialize() mRunnerStateBuffer.resize(mDispatcher->getSerializationSize()); mDispatcher->serialize(mRunnerStateBuffer.data()); mDataToSerialize.emplace_back("runnerStateBuffer", (void const*) mRunnerStateBuffer.data(), - PluginFieldType::kUNKNOWN, mRunnerStateBuffer.size()); + PluginFieldType::kUNKNOWN, static_cast(mRunnerStateBuffer.size())); if (mDqProbs >= 0) { mDataToSerialize.emplace_back("dq_probs", &mDqProbs, PluginFieldType::kFLOAT32, 1); } - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; @@ -1361,7 +1358,7 @@ QKVToContextVarSeqlenPluginCreator::QKVToContextVarSeqlenPluginCreator() mPluginAttributes.emplace_back(PluginField("var_seqlen", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("use_int8_scale_max", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/bertQKVToContextPlugin/qkvToContextPluginLegacy.cpp b/plugin/bertQKVToContextPlugin/qkvToContextPluginLegacy.cpp index 9fe6a27c8..e0c1fc76c 100644 --- a/plugin/bertQKVToContextPlugin/qkvToContextPluginLegacy.cpp +++ b/plugin/bertQKVToContextPlugin/qkvToContextPluginLegacy.cpp @@ -162,7 +162,7 @@ bool QKVToContextPluginDynamicLegacy::supportsFormatCombination( PLUGIN_ASSERT(nbInputs == 1 + mHasImask); auto const* in = inOut; auto const* out = inOut + nbInputs; - int32_t packedSize = getMHAMaskPackedSize(mSM, mType, in->dims.d[SDIM]); + int32_t packedSize = getMHAMaskPackedSize(mSM, mType, static_cast(in->dims.d[SDIM])); // we only support int8 IO in fused mha runner, and we only support fused mha runner on Xavier, Turing and Ampere if (mType == DataType::kINT8) @@ -282,13 +282,13 @@ void QKVToContextPluginDynamicLegacy::configurePlugin( createMHARunner(); - int32_t const S = inDesc.dims.d[SDIM]; - int32_t const B = inDesc.dims.d[BDIM] <= 0 ? in->max.d[BDIM] : inDesc.dims.d[BDIM]; + int32_t const S = static_cast(inDesc.dims.d[SDIM]); + int32_t const B = static_cast(inDesc.dims.d[BDIM] <= 0 ? in->max.d[BDIM] : inDesc.dims.d[BDIM]); if (S <= 0) { // in dynamic shape build stage, we setup with max sequence that cannot fused - int32_t const Smin = in->min.d[SDIM]; - int32_t const Smax = in->max.d[SDIM]; + int32_t const Smin = static_cast(in->min.d[SDIM]); + int32_t const Smax = static_cast(in->max.d[SDIM]); if (fusedDispatcher.get()) { @@ -347,7 +347,7 @@ DataType QKVToContextPluginDynamicLegacy::getOutputDataType( } void QKVToContextPluginDynamicLegacy::attachToContext( - cudnnContext* cudnn, cublasContext* cublas, nvinfer1::IGpuAllocator* allocator) noexcept + cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, nvinfer1::IGpuAllocator* allocator) noexcept { try { @@ -442,7 +442,8 @@ int32_t QKVToContextPluginDynamicLegacy::enqueue(PluginTensorDesc const* inputDe try { void const* const maskPtr = mHasImask ? inputs[1] : nullptr; - if (mHasImask && fusedDispatcher.get() && fusedDispatcher->isValid(mHeadSize, inputDesc->dims.d[SDIM])) + if (mHasImask && fusedDispatcher.get() + && fusedDispatcher->isValid(mHeadSize, static_cast(inputDesc->dims.d[SDIM]))) { fusedDispatcher->run( inputDesc[0], outputDesc[0], inputs[0], maskPtr, outputs[0], workspace, stream, mCublas); @@ -471,7 +472,7 @@ QKVToContextPluginDynamicLegacyCreator::QKVToContextPluginDynamicLegacyCreator() mPluginAttributes.emplace_back(PluginField("has_mask", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("dq_probs", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -819,8 +820,8 @@ void QKVToContextVarSeqlenPluginLegacy::configurePlugin( PLUGIN_ASSERT(maskDesc.dims.d[0] == inDesc.dims.d[BDIM]); } - int32_t const S = inDesc.dims.d[SDIM] <= 0 ? in->max.d[SDIM] : inDesc.dims.d[SDIM]; - int32_t const B = inDesc.dims.d[BDIM] <= 0 ? in->max.d[BDIM] : inDesc.dims.d[BDIM]; + int32_t const S = static_cast(inDesc.dims.d[SDIM] <= 0 ? in->max.d[SDIM] : inDesc.dims.d[SDIM]); + int32_t const B = static_cast(inDesc.dims.d[BDIM] <= 0 ? in->max.d[BDIM] : inDesc.dims.d[BDIM]); if (S != mS || B != mB) { @@ -844,7 +845,8 @@ void QKVToContextVarSeqlenPluginLegacy::configurePlugin( size_t QKVToContextVarSeqlenPluginLegacy::getWorkspaceSize(PluginTensorDesc const* inputs, int32_t /* nbInputs */, PluginTensorDesc const* /* outputs */, int32_t /* nbOutputs */) const noexcept { - size_t paddingWorkpaceSize = mPatcher ? mPatcher->getWorkspaceSize(inputs[0].dims.d[0], mNumHeads) : 0; + size_t paddingWorkpaceSize + = mPatcher ? mPatcher->getWorkspaceSize(static_cast(inputs[0].dims.d[0]), mNumHeads) : 0; return mDispatcher->getWorkspaceSize() + paddingWorkpaceSize; } @@ -859,7 +861,7 @@ DataType QKVToContextVarSeqlenPluginLegacy::getOutputDataType( } void QKVToContextVarSeqlenPluginLegacy::attachToContext( - cudnnContext* cudnn, cublasContext* cublas, nvinfer1::IGpuAllocator* allocator) noexcept + cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, nvinfer1::IGpuAllocator* allocator) noexcept { try { @@ -944,8 +946,8 @@ int32_t QKVToContextVarSeqlenPluginLegacy::enqueue(nvinfer1::PluginTensorDesc co if (mUseVarSeqlen) { - int32_t const B = inputDesc[2].dims.d[0] - 1; - int32_t const maxS = inputDesc[3].dims.d[0]; + int32_t const B = static_cast(inputDesc[2].dims.d[0] - 1); + int32_t const maxS = static_cast(inputDesc[3].dims.d[0]); PLUGIN_ASSERT((maxS <= 512) && "No implementation for variable sequence length multi-head attention plugin with sequence > 512."); @@ -980,7 +982,8 @@ int32_t QKVToContextVarSeqlenPluginLegacy::enqueue(nvinfer1::PluginTensorDesc co } auto runV2Kernel = [this, &S, &B, &workspace, &inputDesc, &outputDesc, &stream, &inputs, &outputs]( - MHARunner* dispatcher, QkvPaddingRunner* patcher, int32_t padSize) { + MHARunner* dispatcher, QkvPaddingRunner* patcher, int32_t padSize) + { PLUGIN_ASSERT(dispatcher); // Validate that we can padding to the dispatch required head size also there is kernel exist for this // sequence length. @@ -995,7 +998,7 @@ int32_t QKVToContextVarSeqlenPluginLegacy::enqueue(nvinfer1::PluginTensorDesc co { PLUGIN_ASSERT(patcher); PLUGIN_ASSERT(padSize <= patcher->getMaxPaddingHeadSize()); - auto sumSeqLen = inputDesc[0].dims.d[0]; + int32_t sumSeqLen = static_cast(inputDesc[0].dims.d[0]); auto paddingWorkspace = patcher->get16BytesAlignedPointer(workspace, dispatcher->getWorkspaceSize()); auto ret = mPatcher->pad(inputs[0], paddingWorkspace, sumSeqLen, mNumHeads, mHeadSize, padSize, stream); if (ret != cudaSuccess) @@ -1063,7 +1066,7 @@ QKVToContextVarSeqlenPluginLegacyCreator::QKVToContextVarSeqlenPluginLegacyCreat mPluginAttributes.emplace_back(PluginField("var_seqlen", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("use_int8_scale_max", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/bertQKVToContextPlugin/qkvToContextPluginLegacy.h b/plugin/bertQKVToContextPlugin/qkvToContextPluginLegacy.h index c8fa0d8f0..09bc59c01 100644 --- a/plugin/bertQKVToContextPlugin/qkvToContextPluginLegacy.h +++ b/plugin/bertQKVToContextPlugin/qkvToContextPluginLegacy.h @@ -55,6 +55,11 @@ int32_t computeMaskedScaledSoftmax(cudaStream_t stream, int32_t const ld, int32_ class QKVToContextPluginDynamicLegacy : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + QKVToContextPluginDynamicLegacy(std::string const name, nvinfer1::DataType const type, int32_t const hiddenSize, int32_t const numHeads, float const dqProbs, bool hasImask = false); @@ -120,11 +125,6 @@ class QKVToContextPluginDynamicLegacy : public nvinfer1::IPluginV2DynamicExt nvinfer1::pluginInternal::cublasHandle_t mCublas{}; // the wrapper pointer is shared among all plugins attached to the same context. std::shared_ptr mCublasWrapper; - - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2::enqueue; - using IPluginV2Ext::configurePlugin; }; class QKVToContextPluginDynamicLegacyCreator : public nvinfer1::IPluginCreator @@ -156,6 +156,11 @@ class QKVToContextPluginDynamicLegacyCreator : public nvinfer1::IPluginCreator class QKVToContextVarSeqlenPluginLegacy : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + QKVToContextVarSeqlenPluginLegacy(std::string const name, nvinfer1::DataType const type, int32_t const hiddenSize, int32_t const numHeads, float const dqProbs, bool hasImask = false, bool varSeqlen = false, bool const useInt8ScaleMax = true); @@ -225,11 +230,6 @@ class QKVToContextVarSeqlenPluginLegacy : public nvinfer1::IPluginV2DynamicExt nvinfer1::pluginInternal::cublasHandle_t mCublas{}; // the wrapper pointer is shared among all plugins attached to the same context. std::shared_ptr mCublasWrapper; - - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2::enqueue; - using IPluginV2Ext::configurePlugin; }; class QKVToContextVarSeqlenPluginLegacyCreator : public nvinfer1::IPluginCreator diff --git a/plugin/clipPlugin/clipPlugin.cpp b/plugin/clipPlugin/clipPlugin.cpp index 727f8fb7d..d4f440438 100644 --- a/plugin/clipPlugin/clipPlugin.cpp +++ b/plugin/clipPlugin/clipPlugin.cpp @@ -88,7 +88,8 @@ int32_t ClipPlugin::enqueue( { void* output = outputs[0]; int32_t status = pluginStatus_t::STATUS_FAILURE; - status = clipInference(stream, mInputVolume * batchSize, mClipMin, mClipMax, inputs[0], output, mDataType); + status = clipInference( + stream, static_cast(mInputVolume * batchSize), mClipMin, mClipMax, inputs[0], output, mDataType); if (status != pluginStatus_t::STATUS_SUCCESS) { @@ -127,8 +128,8 @@ void ClipPlugin::serialize(void* buffer) const noexcept PLUGIN_ASSERT(d == a + getSerializationSize()); } -void ClipPlugin::configureWithFormat(Dims const* inputs, int32_t nbInputs, Dims const* outputs, int32_t nbOutputs, - DataType type, PluginFormat format, int32_t) noexcept +void ClipPlugin::configureWithFormat(Dims const* inputs, int32_t /*nbInputs*/, Dims const* /*outputs*/, + int32_t nbOutputs, DataType type, PluginFormat format, int32_t) noexcept { PLUGIN_ASSERT(nbOutputs == 1); PLUGIN_API_CHECK_ENUM_RANGE(DataType, type); @@ -138,7 +139,7 @@ void ClipPlugin::configureWithFormat(Dims const* inputs, int32_t nbInputs, Dims size_t volume = 1; for (int32_t i = 0; i < inputs->nbDims; i++) { - volume *= inputs->d[i]; + volume *= static_cast(inputs->d[i]); } mInputVolume = volume; } @@ -191,7 +192,7 @@ ClipPluginCreator::ClipPluginCreator() mPluginAttributes.emplace_back(PluginField("clipMin", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("clipMax", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -217,7 +218,7 @@ IPluginV2* ClipPluginCreator::createPlugin(char const* name, PluginFieldCollecti gLogWarning << "ClipPlugin is deprecated since TensorRT 9.0. Use INetworkDefinition::addActivation() to add an " "IActivationLayer with ActivationType::kCLIP." << std::endl; - float clipMin = 0.0, clipMax = 0.0; + float clipMin = 0.0f, clipMax = 0.0f; PluginField const* fields = fc->fields; plugin::validateRequiredAttributesExist({"clipMin", "clipMax"}, fc); diff --git a/plugin/common/bboxUtils.h b/plugin/common/bboxUtils.h index 6419611dd..defdd4b15 100644 --- a/plugin/common/bboxUtils.h +++ b/plugin/common/bboxUtils.h @@ -26,11 +26,11 @@ template struct Bbox { T xmin, ymin, xmax, ymax; - Bbox(T xmin, T ymin, T xmax, T ymax) - : xmin(xmin) - , ymin(ymin) - , xmax(xmax) - , ymax(ymax) + Bbox(T xmin_, T ymin_, T xmax_, T ymax_) + : xmin(xmin_) + , ymin(ymin_) + , xmax(xmax_) + , ymax(ymax_) { } Bbox() = default; @@ -43,11 +43,11 @@ struct BboxInfo int32_t label; int32_t bbox_idx; bool kept; - BboxInfo(T conf_score, int32_t label, int32_t bbox_idx, bool kept) - : conf_score(conf_score) - , label(label) - , bbox_idx(bbox_idx) - , kept(kept) + BboxInfo(T conf_score_, int32_t label_, int32_t bbox_idx_, bool kept_) + : conf_score(conf_score_) + , label(label_) + , bbox_idx(bbox_idx_) + , kept(kept_) { } BboxInfo() = default; diff --git a/plugin/common/bertCommon.h b/plugin/common/bertCommon.h index 06df9fbc7..ecd96884c 100644 --- a/plugin/common/bertCommon.h +++ b/plugin/common/bertCommon.h @@ -372,12 +372,12 @@ struct WeightsWithOwnership : public nvinfer1::Weights WeightsWithOwnership(WeightsWithOwnership const&&) = delete; WeightsWithOwnership operator=(WeightsWithOwnership const&&) = delete; - void convertAndCopy(nvinfer1::Weights const& src, nvinfer1::DataType type) + void convertAndCopy(nvinfer1::Weights const& src, nvinfer1::DataType type_) { - this->type = type; + this->type = type_; this->count = src.count; - if (type == nvinfer1::DataType::kFLOAT) + if (type_ == nvinfer1::DataType::kFLOAT) { auto destBuf = new float[src.count]; this->values = destBuf; @@ -401,7 +401,7 @@ struct WeightsWithOwnership : public nvinfer1::Weights } } } - else if (type == nvinfer1::DataType::kHALF) + else if (type_ == nvinfer1::DataType::kHALF) { auto destBuf = new half[src.count]; this->values = destBuf; @@ -431,11 +431,11 @@ struct WeightsWithOwnership : public nvinfer1::Weights } } - void convertAndCopy(char const*& srcBuf, size_t count, nvinfer1::DataType type) noexcept + void convertAndCopy(char const*& srcBuf, size_t count_, nvinfer1::DataType type_) noexcept { - this->type = type; - this->count = count; - auto const nbBytes = getWeightsSize(*this, type); + this->type = type_; + this->count = count_; + auto const nbBytes = getWeightsSize(*this, type_); auto destBuf = new char[nbBytes]; this->values = destBuf; diff --git a/plugin/common/checkMacrosPlugin.h b/plugin/common/checkMacrosPlugin.h index ae9ed9808..095d5e71f 100644 --- a/plugin/common/checkMacrosPlugin.h +++ b/plugin/common/checkMacrosPlugin.h @@ -61,34 +61,35 @@ class CublasError : public TRTException } // namespace nvinfer1 +#undef PLUGIN_CHECK_CUDNN #define PLUGIN_CHECK_CUDNN(call) \ do \ { \ - cudnnStatus_t status = call; \ - if (status != CUDNN_STATUS_SUCCESS) \ + cudnnStatus_t status_check_cudnn_ = call; \ + if (status_check_cudnn_ != CUDNN_STATUS_SUCCESS) \ { \ - return status; \ + return status_check_cudnn_; \ } \ } while (0) #define PLUGIN_CUBLASASSERT(status_) \ { \ - auto s_ = status_; \ - if (s_ != nvinfer1::pluginInternal::CUBLAS_STATUS_SUCCESS) \ + auto s_cublas_ = status_; \ + if (s_cublas_ != nvinfer1::pluginInternal::CUBLAS_STATUS_SUCCESS) \ { \ - nvinfer1::plugin::throwCublasError(__FILE__, FN_NAME, __LINE__, s_); \ + nvinfer1::plugin::throwCublasError(__FILE__, FN_NAME, __LINE__, s_cublas_); \ } \ } #define PLUGIN_CUDNNASSERT(status_) \ { \ - auto s_ = status_; \ - if (s_ != CUDNN_STATUS_SUCCESS) \ + auto s_cudnn_ = status_; \ + if (s_cudnn_ != CUDNN_STATUS_SUCCESS) \ { \ nvinfer1::pluginInternal::CudnnWrapper& wrapper \ = nvinfer1::pluginInternal::getCudnnWrapper(/* plugin caller name */ nullptr); \ - const char* msg = wrapper.cudnnGetErrorString(s_); \ - nvinfer1::plugin::throwCudnnError(__FILE__, FN_NAME, __LINE__, s_, msg); \ + const char* msg_cudnn_ = wrapper.cudnnGetErrorString(s_cudnn_); \ + nvinfer1::plugin::throwCudnnError(__FILE__, FN_NAME, __LINE__, s_cudnn_, msg_cudnn_); \ } \ } diff --git a/plugin/common/cudaDriverWrapper.cpp b/plugin/common/cudaDriverWrapper.cpp index f81635a06..42badfcb8 100644 --- a/plugin/common/cudaDriverWrapper.cpp +++ b/plugin/common/cudaDriverWrapper.cpp @@ -44,8 +44,9 @@ CUDADriverWrapper::CUDADriverWrapper() handle = dllOpen(CUDA_LIB_NAME); PLUGIN_ASSERT(handle != nullptr); - auto load_sym = [](void* handle, char const* name) { - void* ret = dllGetSym(handle, name); + auto load_sym = [](void* libHandle, char const* name) + { + void* ret = dllGetSym(libHandle, name); PLUGIN_ASSERT(ret != nullptr); return ret; }; diff --git a/plugin/common/cudnnWrapper.cpp b/plugin/common/cudnnWrapper.cpp index a16270f9c..6ee15d6d6 100644 --- a/plugin/common/cudnnWrapper.cpp +++ b/plugin/common/cudnnWrapper.cpp @@ -44,7 +44,8 @@ namespace nvinfer1::pluginInternal CudnnWrapper::CudnnWrapper(bool initHandle, char const* callerPluginName) : mLibrary(tryLoadingCudnn(callerPluginName)) { - auto load_sym = [](void* handle, char const* name) { + auto load_sym = [](void* handle, char const* name) + { void* ret = dllGetSym(handle, name); std::string loadError = "Fail to load symbol " + std::string(name) + " from the cudnn library."; PLUGIN_VALIDATE(ret != nullptr, loadError.c_str()); @@ -87,6 +88,7 @@ CudnnWrapper::~CudnnWrapper() void* CudnnWrapper::tryLoadingCudnn(char const* callerPluginName) { + (void) callerPluginName; #if CUDART_VERSION >= 12070 && CUDNN_MAJOR == 8 static constexpr int32_t kSM_BLACKWELL_100 = 100; diff --git a/plugin/common/kernels/proposalKernel.cu b/plugin/common/kernels/proposalKernel.cu index 52d97ea50..5dff84ce5 100644 --- a/plugin/common/kernels/proposalKernel.cu +++ b/plugin/common/kernels/proposalKernel.cu @@ -32,6 +32,7 @@ namespace nvinfer1 { namespace plugin { +#undef PLUGIN_CHECK_CUDA #define PLUGIN_CHECK_CUDA(call) \ do \ { \ @@ -59,23 +60,23 @@ typedef pluginStatus_t frcnnStatus_t; #define DEBUG_RPN_ENABLE 0 -#define FRCNN_ASSERT_PARAM(exp) \ - do \ - { \ - if (!(exp)) \ - { \ - DEBUG_FPRINTF(stderr, "Bad param - " #exp ", %s:%d\n", __FILE__, __LINE__); \ - return STATUS_BAD_PARAM; \ - } \ +#define FRCNN_ASSERT_PARAM(exp) \ + do \ + { \ + if (!(exp)) \ + { \ + DEBUG_FPRINTF(stderr, "Bad param - " #exp ", %s:%d\n", __FILE__, __LINE__); \ + return STATUS_BAD_PARAM; \ + } \ } while (0) -#define DEBUG_FPRINTF(...) \ - do \ - { \ - if (DEBUG_RPN_ENABLE) \ - { \ - fprintf(__VA_ARGS__); \ - } \ +#define DEBUG_FPRINTF(...) \ + do \ + { \ + if (DEBUG_RPN_ENABLE) \ + { \ + fprintf(__VA_ARGS__); \ + } \ } while (0) #define CUDA_MEM_ALIGN 256 @@ -83,46 +84,30 @@ typedef pluginStatus_t frcnnStatus_t; unsigned int hash(const void* array_, size_t size); int8_t* alignPtr(int8_t* ptr, uintptr_t to); __global__ void setOffset(int stride, int size, int* output); -frcnnStatus_t nms(cudaStream_t stream, - const int N, - const int R, - const int preNmsTop, - const int nmsMaxOut, - const float iouThreshold, - const DType_t t_fgScores, - const DLayout_t l_fgScores, - void* fgScores, - const DType_t t_proposals, - const DLayout_t l_proposals, - const void* proposals, - void* workspace, - const DType_t t_rois, - void* rois); +frcnnStatus_t nms(cudaStream_t stream, const int N, const int R, const int preNmsTop, const int nmsMaxOut, + const float iouThreshold, const DType_t t_fgScores, const DLayout_t l_fgScores, void* fgScores, + const DType_t t_proposals, const DLayout_t l_proposals, const void* proposals, void* workspace, + const DType_t t_rois, void* rois); int8_t* nextWorkspacePtr(int8_t* ptr, uintptr_t previousWorkspaceSize); - template __device__ __host__ inline float IoU(const Bbox& a, const Bbox& b) { TFloat left = max(a.x1, b.x1), right = min(a.x2, b.x2); TFloat top = max(a.y1, b.y1), bottom = min(a.y2, b.y2); - TFloat width = max((TFloat)(right - left + (TFloat) 1.0), (TFloat) 0.0); - TFloat height = max((TFloat)(bottom - top + (TFloat) 1.0), (TFloat) 0.0); + TFloat width = max((TFloat) (right - left + (TFloat) 1.0), (TFloat) 0.0); + TFloat height = max((TFloat) (bottom - top + (TFloat) 1.0), (TFloat) 0.0); TFloat interS = width * height; TFloat Sa = (a.x2 - a.x1 + (TFloat) 1) * (a.y2 - a.y1 + (TFloat) 1); TFloat Sb = (b.x2 - b.x1 + (TFloat) 1) * (b.y2 - b.y1 + (TFloat) 1); return (float) interS / (float) (Sa + Sb - interS); } - // NMS KERNEL FOR SMALL BATCH SIZE {{{ template __global__ __launch_bounds__(DIM) void nmsKernel1(const int propSize, - Bbox const* __restrict__ preNmsProposals, - T_ROIS* __restrict__ afterNmsProposals, - const int preNmsTopN, - const float nmsThres, - const int afterNmsTopN) + Bbox const* __restrict__ preNmsProposals, T_ROIS* __restrict__ afterNmsProposals, const int preNmsTopN, + const float nmsThres, const int afterNmsTopN) { __shared__ bool kept_boxes[TSIZE * DIM]; int kept = 0; @@ -185,8 +170,7 @@ __global__ __launch_bounds__(DIM) void nmsKernel1(const int propSize, do { ref_box_idx++; - } - while (!kept_boxes[ref_box_idx - batch_offset] && ref_box_idx < max_box_idx); + } while (!kept_boxes[ref_box_idx - batch_offset] && ref_box_idx < max_box_idx); kept++; } @@ -195,12 +179,8 @@ __global__ __launch_bounds__(DIM) void nmsKernel1(const int propSize, // NMS KERNEL FOR LARGE BATCH SIZE {{{ template -__global__ __launch_bounds__(DIM) void nmsKernel2(const int propSize, - Bbox const* __restrict__ proposals, - T_ROIS* __restrict__ filtered, - const int preNmsTopN, - const float nmsThres, - const int afterNmsTopN) +__global__ __launch_bounds__(DIM) void nmsKernel2(const int propSize, Bbox const* __restrict__ proposals, + T_ROIS* __restrict__ filtered, const int preNmsTopN, const float nmsThres, const int afterNmsTopN) { Bbox const* cProposals = proposals + blockIdx.x * propSize; Bbox t[TSIZE]; @@ -265,8 +245,7 @@ __global__ __launch_bounds__(DIM) void nmsKernel2(const int propSize, for (int k = 0; k < TSIZE; k++) { - if (index < k * DIM + threadIdx.x - && IoU(test, t[k]) > nmsThres) + if (index < k * DIM + threadIdx.x && IoU(test, t[k]) > nmsThres) { del |= (uint64_t) 1 << k; } @@ -279,34 +258,22 @@ __global__ __launch_bounds__(DIM) void nmsKernel2(const int propSize, // NMS LAUNCH {{{ template -frcnnStatus_t nmsLaunch(cudaStream_t stream, - const int batch, - const int propSize, - void* proposals, - void* filtered, - const int preNmsTopN, - const float nmsThres, - const int afterNmsTopN) +frcnnStatus_t nmsLaunch(cudaStream_t stream, const int batch, const int propSize, void* proposals, void* filtered, + const int preNmsTopN, const float nmsThres, const int afterNmsTopN) { const int blockSize = 1024; #define P1(tsize) nmsKernel1 #define P2(tsize) nmsKernel2 - void (*kernel[64])(int, Bbox const*, T_ROIS*, int, float, int) = - { - P1(1), P1(2), P1(3), P1(4), P1(5), P1(6), P1(7), P1(8), P1(9), P1(10), P1(11), P1(12), P2(13), P2(14), P2(15), P2(16), - P2(17), P2(18), P2(19), P2(20), P2(21), P2(22), P2(23), P2(24), P2(25), P2(26), P2(27), P2(28), P2(29), P2(30), P2(31), P2(32), - P2(33), P2(34), P2(35), P2(36), P2(37), P2(38), P2(39), P2(40), P2(41), P2(42), P2(43), P2(44), P2(45), P2(46), P2(47), P2(48), - P2(49), P2(50), P2(51), P2(52), P2(53), P2(54), P2(55), P2(56), P2(57), P2(58), P2(59), P2(60), P2(61), P2(62), P2(63), P2(64) - }; + void (*kernel[64])(int, Bbox const*, T_ROIS*, int, float, int) + = {P1(1), P1(2), P1(3), P1(4), P1(5), P1(6), P1(7), P1(8), P1(9), P1(10), P1(11), P1(12), P2(13), P2(14), + P2(15), P2(16), P2(17), P2(18), P2(19), P2(20), P2(21), P2(22), P2(23), P2(24), P2(25), P2(26), P2(27), + P2(28), P2(29), P2(30), P2(31), P2(32), P2(33), P2(34), P2(35), P2(36), P2(37), P2(38), P2(39), P2(40), + P2(41), P2(42), P2(43), P2(44), P2(45), P2(46), P2(47), P2(48), P2(49), P2(50), P2(51), P2(52), P2(53), + P2(54), P2(55), P2(56), P2(57), P2(58), P2(59), P2(60), P2(61), P2(62), P2(63), P2(64)}; FRCNN_ASSERT_PARAM(preNmsTopN < 64 * blockSize); - CSC(cudaMemsetAsync(filtered, 0, batch * afterNmsTopN * 4 * sizeof(T_ROIS), stream), - STATUS_FAILURE); - kernel[(preNmsTopN + blockSize - 1) / blockSize - 1] <<< batch, blockSize, 0, stream>>>(propSize, - (Bbox*) proposals, - (T_ROIS*) filtered, - preNmsTopN, - nmsThres, - afterNmsTopN); + CSC(cudaMemsetAsync(filtered, 0, batch * afterNmsTopN * 4 * sizeof(T_ROIS), stream), STATUS_FAILURE); + kernel[(preNmsTopN + blockSize - 1) / blockSize - 1]<<>>( + propSize, (Bbox*) proposals, (T_ROIS*) filtered, preNmsTopN, nmsThres, afterNmsTopN); CSC(cudaGetLastError(), STATUS_FAILURE); return STATUS_SUCCESS; } @@ -354,14 +321,7 @@ frcnnStatus_t nmsGpu(cudaStream_t stream, const int N, const int R, const int pr DEBUG_PRINTF("&&&& [NMS] POST CUB\n"); DEBUG_PRINTF("&&&& [NMS] PROPOSALS %u\n", hash(proposalsOut, N * R * 4 * sizeof(float))); DEBUG_PRINTF("&&&& [NMS] SCORES %u\n", hash(scoresOut, N * R * sizeof(float))); - error = nmsLaunch(stream, - N, - R, - proposalsOut, - rois, - preNmsTop, - iouThreshold, - nmsMaxOut); + error = nmsLaunch(stream, N, R, proposalsOut, rois, preNmsTop, iouThreshold, nmsMaxOut); DEBUG_PRINTF("&&&& [NMS] POST LAUNCH\n"); DEBUG_PRINTF("&&&& [NMS] SCORES %u\n", hash(rois, N * nmsMaxOut * 4 * sizeof(float))); @@ -375,15 +335,15 @@ frcnnStatus_t nmsGpu(cudaStream_t stream, const int N, const int R, const int pr // }}} typedef frcnnStatus_t (*nmsFun)(cudaStream_t, - const int, // N - const int, // R - const int, // preNmsTop - const int, // nmsMaxOut - const float, // iouThreshold - void*, // fgScores - const void*, // proposals, - void*, // workspace, - void*); // rois + const int, // N + const int, // R + const int, // preNmsTop + const int, // nmsMaxOut + const float, // iouThreshold + void*, // fgScores + const void*, // proposals, + void*, // workspace, + void*); // rois struct nmsLaunchConfig { @@ -394,12 +354,8 @@ struct nmsLaunchConfig DType_t t_rois; nmsFun function; - nmsLaunchConfig(DType_t t_fgScores, - DLayout_t l_fgScores, - DType_t t_proposals, - DLayout_t l_proposals, - DType_t t_rois, - nmsFun function) + nmsLaunchConfig(DType_t t_fgScores, DLayout_t l_fgScores, DType_t t_proposals, DLayout_t l_proposals, + DType_t t_rois, nmsFun function) : t_fgScores(t_fgScores) , l_fgScores(l_fgScores) , t_proposals(t_proposals) @@ -423,26 +379,23 @@ struct nmsLaunchConfig bool operator==(nmsLaunchConfig const& other) const { return (t_fgScores == other.t_fgScores) && (l_fgScores == other.l_fgScores) - && (t_proposals == other.t_proposals) && (l_proposals == other.l_proposals) - && (t_rois == other.t_rois); + && (t_proposals == other.t_proposals) && (l_proposals == other.l_proposals) && (t_rois == other.t_rois); } }; static std::vector nmsLCVec; #define FLOAT32 nvinfer1::DataType::kFLOAT -__global__ void _inverse_transform_gpu(const float* RPN_prob, const float* RPN_regr, int N, - int INPUT_H, int INPUT_W, int RPN_H, int RPN_W, float RPN_STD_SCALING, int RPN_STRIDE, - float* ANCHOR_SIZES, int anc_size_num, float* ANCHOR_RATIOS, int anc_ratio_num, float bbox_min_size, - float* fg_scores, float* proposal_out) +__global__ void _inverse_transform_gpu(const float* RPN_prob, const float* RPN_regr, int N, int INPUT_H, int INPUT_W, + int RPN_H, int RPN_W, float RPN_STD_SCALING, int RPN_STRIDE, float* ANCHOR_SIZES, int anc_size_num, + float* ANCHOR_RATIOS, int anc_ratio_num, float bbox_min_size, float* fg_scores, float* proposal_out) { int nthreads = N * RPN_H * RPN_W * anc_size_num * anc_ratio_num; int num_ancs = anc_size_num * anc_ratio_num; - for (int out_idx = threadIdx.x + blockDim.x * blockIdx.x; out_idx < nthreads; - out_idx += blockDim.x * gridDim.x) + for (int out_idx = threadIdx.x + blockDim.x * blockIdx.x; out_idx < nthreads; out_idx += blockDim.x * gridDim.x) { - //input RPN_regr: (N, A4, H, W), thread: (N, A, H, W) + // input RPN_regr: (N, A4, H, W), thread: (N, A, H, W) int idx = out_idx; int w = idx % RPN_W; idx /= RPN_W; @@ -481,7 +434,7 @@ __global__ void _inverse_transform_gpu(const float* RPN_prob, const float* RPN_r ty = (ty >= 0.0f) ? ty : 0.0f; tw = (tw >= 0.0f) ? tw : 0.0f; th = (th >= 0.0f) ? th : 0.0f; - //clip to max + // clip to max tx = (tx <= INPUT_W - 1.0f) ? tx : (INPUT_W - 1.0f); ty = (ty <= INPUT_H - 1.0f) ? ty : (INPUT_H - 1.0f); tw = (tw <= INPUT_W - 1.0f) ? tw : (INPUT_W - 1.0f); @@ -503,28 +456,21 @@ __global__ void _inverse_transform_gpu(const float* RPN_prob, const float* RPN_r } } - - -cudaError_t _inverse_transform_wrapper(const float* RPN_prob, const float* RPN_regr, int N, int INPUT_H, - int INPUT_W, int RPN_H, int RPN_W, float RPN_STD_SCALING, int RPN_STRIDE, float* ANCHOR_SIZES, - int anc_size_num, float* ANCHOR_RATIOS, int anc_ratio_num, float bbox_min_size, float* fg_scores, - float* proposal_out, cudaStream_t stream) +cudaError_t _inverse_transform_wrapper(const float* RPN_prob, const float* RPN_regr, int N, int INPUT_H, int INPUT_W, + int RPN_H, int RPN_W, float RPN_STD_SCALING, int RPN_STRIDE, float* ANCHOR_SIZES, int anc_size_num, + float* ANCHOR_RATIOS, int anc_ratio_num, float bbox_min_size, float* fg_scores, float* proposal_out, + cudaStream_t stream) { const int block_size = 1024; - const int grid_size = (N * anc_size_num * anc_ratio_num * RPN_H * RPN_W + block_size - 1) / - (block_size); - _inverse_transform_gpu <<< grid_size, block_size, 0, stream>>> (RPN_prob, RPN_regr, N, INPUT_H, - INPUT_W, RPN_H, RPN_W, RPN_STD_SCALING, RPN_STRIDE, ANCHOR_SIZES, anc_size_num, ANCHOR_RATIOS, - anc_ratio_num, bbox_min_size, fg_scores, proposal_out); + const int grid_size = (N * anc_size_num * anc_ratio_num * RPN_H * RPN_W + block_size - 1) / (block_size); + _inverse_transform_gpu<<>>(RPN_prob, RPN_regr, N, INPUT_H, INPUT_W, RPN_H, RPN_W, + RPN_STD_SCALING, RPN_STRIDE, ANCHOR_SIZES, anc_size_num, ANCHOR_RATIOS, anc_ratio_num, bbox_min_size, fg_scores, + proposal_out); return cudaGetLastError(); } -size_t _proposalsForwardNMSWorkspaceSize(int N, - int A, - int H, - int W, - int nmsMaxOut) +size_t _proposalsForwardNMSWorkspaceSize(int N, int A, int H, int W, int nmsMaxOut) { return N * A * H * W * 5 * 5 * sizeof(float) + (1 << 22); } @@ -534,13 +480,11 @@ size_t _proposalsForwardBboxWorkspaceSize(int N, int A, int H, int W) return N * A * H * W * 4 * sizeof(float); } - size_t _proposalForwardFgScoresWorkspaceSize(int N, int A, int H, int W) { return N * A * H * W * sizeof(float); } - size_t anchors_buf_size(int anc_size_num, int anc_ratio_num) { return (anc_size_num + anc_ratio_num) * sizeof(float); @@ -548,12 +492,7 @@ size_t anchors_buf_size(int anc_size_num, int anc_ratio_num) size_t calculateTotalWorkspaceSize(size_t* workspaces, int count); -size_t _get_workspace_size(int N, - int anc_size_num, - int anc_ratio_num, - int H, - int W, - int nmsMaxOut) +size_t _get_workspace_size(int N, int anc_size_num, int anc_ratio_num, int H, int W, int nmsMaxOut) { size_t wss[4]; int A = anc_size_num * anc_ratio_num; @@ -564,26 +503,19 @@ size_t _get_workspace_size(int N, return calculateTotalWorkspaceSize(wss, 4); } - - template -frcnnStatus_t extractFgScores_gpu(cudaStream_t stream, - int N, - int A, - int H, - int W, - const void* scores, - void* fgScores) +frcnnStatus_t extractFgScores_gpu(cudaStream_t stream, int N, int A, int H, int W, const void* scores, void* fgScores) { - //TODO custom kernel for this + // TODO custom kernel for this size_t size = A * H * W * sizeof(T); for (int n = 0; n < N; n++) { size_t offset_ld = n * A * H * W; size_t offset_st = n * A * H * W; - CSC(cudaMemcpyAsync(((T*) fgScores) + offset_st, ((T*) scores) + offset_ld, size, - cudaMemcpyDeviceToDevice, stream), STATUS_FAILURE); + CSC(cudaMemcpyAsync( + ((T*) fgScores) + offset_st, ((T*) scores) + offset_ld, size, cudaMemcpyDeviceToDevice, stream), + STATUS_FAILURE); } return STATUS_SUCCESS; @@ -600,10 +532,9 @@ cudaError_t _copy_anchors_to_gpu(cudaStream_t stream, float* ANCHOR_SIZES, int a return cudaSuccess; } - __global__ void _normalize_rois_kernel(float* roi_after_nms, int nthreads, int width, int height) { - for(int i = threadIdx.x + blockDim.x * blockIdx.x; i < nthreads; i += blockDim.x * gridDim.x) + for (int i = threadIdx.x + blockDim.x * blockIdx.x; i < nthreads; i += blockDim.x * gridDim.x) { float x1 = roi_after_nms[i * 4]; float y1 = roi_after_nms[i * 4 + 1]; @@ -616,53 +547,33 @@ __global__ void _normalize_rois_kernel(float* roi_after_nms, int nthreads, int w } } - - -cudaError_t _normalize_rois(float* roi_after_nms, int n, int max_box_num, int input_width, - int input_height, cudaStream_t stream) +cudaError_t _normalize_rois( + float* roi_after_nms, int n, int max_box_num, int input_width, int input_height, cudaStream_t stream) { const int block_size = 1024; const int grid_size = (n * max_box_num + block_size - 1) / block_size; - _normalize_rois_kernel <<< grid_size, block_size, 0, stream>>>(roi_after_nms, n * max_box_num, - input_width, input_height); + _normalize_rois_kernel<<>>( + roi_after_nms, n * max_box_num, input_width, input_height); return cudaGetLastError(); } - -int proposalInference_gpu( - cudaStream_t stream, - const void* rpn_prob, - const void* rpn_regr, - int batch_size, - int input_height, - int input_width, - int rpn_height, - int rpn_width, - int MAX_BOX_NUM, - int RPN_PRE_NMS_TOP_N, - float* ANCHOR_SIZES, - int anc_size_num, - float* ANCHOR_RATIOS, - int anc_ratio_num, - float rpn_std_scaling, - int rpn_stride, - float bbox_min_size, - float nms_iou_threshold, - void * workspace, - void* output) +int proposalInference_gpu(cudaStream_t stream, const void* rpn_prob, const void* rpn_regr, int batch_size, + int input_height, int input_width, int rpn_height, int rpn_width, int MAX_BOX_NUM, int RPN_PRE_NMS_TOP_N, + float* ANCHOR_SIZES, int anc_size_num, float* ANCHOR_RATIOS, int anc_ratio_num, float rpn_std_scaling, + int rpn_stride, float bbox_min_size, float nms_iou_threshold, void* workspace, void* output) { - size_t nmsWorkspaceSize = _proposalsForwardNMSWorkspaceSize(batch_size, anc_size_num * anc_ratio_num, - rpn_height, rpn_width, MAX_BOX_NUM); + size_t nmsWorkspaceSize = _proposalsForwardNMSWorkspaceSize( + batch_size, anc_size_num * anc_ratio_num, rpn_height, rpn_width, MAX_BOX_NUM); void* nmsWorkspace = workspace; - size_t proposalsSize = _proposalsForwardBboxWorkspaceSize(batch_size, anc_size_num * anc_ratio_num, - rpn_height, rpn_width); + size_t proposalsSize + = _proposalsForwardBboxWorkspaceSize(batch_size, anc_size_num * anc_ratio_num, rpn_height, rpn_width); const DType_t t_proposals = nvinfer1::DataType::kFLOAT; const DLayout_t l_proposals = NC4HW; void* proposals = nextWorkspacePtr((int8_t*) nmsWorkspace, nmsWorkspaceSize); void* fg_scores = nextWorkspacePtr((int8_t*) proposals, proposalsSize); - size_t fg_scores_size = _proposalForwardFgScoresWorkspaceSize(batch_size, - anc_size_num * anc_ratio_num, rpn_height, rpn_width); + size_t fg_scores_size + = _proposalForwardFgScoresWorkspaceSize(batch_size, anc_size_num * anc_ratio_num, rpn_height, rpn_width); void* anchor_size_buf = nextWorkspacePtr((int8_t*) fg_scores, fg_scores_size); void* anchor_ratio_buf = static_cast(static_cast(anchor_size_buf) + anc_size_num); frcnnStatus_t status; diff --git a/plugin/common/plugin.cpp b/plugin/common/plugin.cpp index 6ff56323e..4929611c1 100644 --- a/plugin/common/plugin.cpp +++ b/plugin/common/plugin.cpp @@ -54,7 +54,8 @@ class PerContextPluginHandleSingletonCreator std::shared_ptr result = mObservers[executionContextIdentifier].lock(); if (result == nullptr) { - auto deleter = [this, executionContextIdentifier](T* obj) { + auto deleter = [this, executionContextIdentifier](T* obj) + { if (obj == nullptr) { return; @@ -67,7 +68,7 @@ class PerContextPluginHandleSingletonCreator // To avoid deadlock, it's critical to release the lock here held by lk first, // before destroying observedObjHolder. Hence observedObjHolder must be declared // before lk. - std::lock_guard lk{mMutex}; + std::lock_guard lk_{mMutex}; // Must check observer again because another thread may create new instance for // this ctx just before we lock mMutex. We can't infer that the observer is // stale from the fact that obj is destroyed, because shared_ptr ref-count diff --git a/plugin/common/reducedMathPlugin.cpp b/plugin/common/reducedMathPlugin.cpp index bf2c90dd1..d8fee8c3b 100644 --- a/plugin/common/reducedMathPlugin.cpp +++ b/plugin/common/reducedMathPlugin.cpp @@ -32,7 +32,7 @@ int32_t clz(int32_t x) return 32; } -#define CUDNN_IS_POW_2(x) (0 == ((x) & ((x) -1))) +#define CUDNN_IS_POW_2(x) (0 == ((x) & ((x) - 1))) int32_t find_log_2(int32_t x, bool round_up = false) { @@ -72,8 +72,8 @@ void findDivisor(int32_t denom, uint32_t& mul_coeff, uint32_t& shift_coeff) // Once we've picked Y, then X [our mul_coeff value] is simply Y/D, rounding up, // and we save shift_coeff as whatever further shift we have to do beyond // what the umulhi() implies. - uint32_t p = 31 + find_log_2(denom, true); - uint32_t m = ((1ULL << p) + (uint32_t) denom - 1) / (uint32_t) denom; + uint32_t p = static_cast(31 + find_log_2(denom, true)); + uint32_t m = static_cast(((1ULL << p) + static_cast(denom) - 1) / static_cast(denom)); mul_coeff = m; shift_coeff = p - 32; } diff --git a/plugin/coordConvACPlugin/coordConvACPlugin.cpp b/plugin/coordConvACPlugin/coordConvACPlugin.cpp index 704014faf..fe762dbe4 100644 --- a/plugin/coordConvACPlugin/coordConvACPlugin.cpp +++ b/plugin/coordConvACPlugin/coordConvACPlugin.cpp @@ -33,14 +33,14 @@ int32_t const kNUM_COORDCONV_CHANNELS = 2; CoordConvACPlugin::CoordConvACPlugin() {} CoordConvACPlugin::CoordConvACPlugin( - nvinfer1::DataType iType, int32_t iC, int32_t iH, int32_t iW, int32_t oC, int32_t oH, int32_t oW) - : iType(iType) - , iC(iC) - , iH(iH) - , iW(iW) - , oC(oC) - , oH(oH) - , oW(oW) + nvinfer1::DataType iType_, int32_t iC_, int32_t iH_, int32_t iW_, int32_t oC_, int32_t oH_, int32_t oW_) + : iType(iType_) + , iC(iC_) + , iH(iH_) + , iW(iW_) + , oC(oC_) + , oH(oH_) + , oW(oW_) { } @@ -90,7 +90,7 @@ Dims CoordConvACPlugin::getOutputDimensions(int32_t index, Dims const* inputs, i return dimsOutput; } -size_t CoordConvACPlugin::getWorkspaceSize(int32_t maxBatchSize) const noexcept +size_t CoordConvACPlugin::getWorkspaceSize(int32_t /*maxBatchSize*/) const noexcept { return 0; } @@ -115,19 +115,19 @@ void CoordConvACPlugin::serialize(void* buffer) const noexcept } void CoordConvACPlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, nvinfer1::PluginFormat format, int32_t maxBatchSize) noexcept + int32_t nbOutputs, DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, nvinfer1::PluginFormat /*format*/, int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(nbInputs == 1); PLUGIN_ASSERT(nbOutputs == 1); - iC = inputDims->d[0]; - iH = inputDims->d[1]; - iW = inputDims->d[2]; + iC = static_cast(inputDims->d[0]); + iH = static_cast(inputDims->d[1]); + iW = static_cast(inputDims->d[2]); - oC = outputDims->d[0]; - oH = outputDims->d[1]; - oW = outputDims->d[2]; + oC = static_cast(outputDims->d[0]); + oH = static_cast(outputDims->d[1]); + oW = static_cast(outputDims->d[2]); iType = inputTypes[0]; } @@ -178,24 +178,24 @@ char const* CoordConvACPlugin::getPluginNamespace() const noexcept } nvinfer1::DataType CoordConvACPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { return inputTypes[0]; } bool CoordConvACPlugin::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } -bool CoordConvACPlugin::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool CoordConvACPlugin::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } void CoordConvACPlugin::attachToContext( - cudnnContext* cudnn, cublasContext* cublas, nvinfer1::IGpuAllocator* allocator) noexcept + cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, nvinfer1::IGpuAllocator* /*allocator*/) noexcept { } @@ -217,7 +217,7 @@ PluginFieldCollection const* CoordConvACPluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2Ext* CoordConvACPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* CoordConvACPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* /*fc*/) noexcept { try { @@ -234,7 +234,7 @@ IPluginV2Ext* CoordConvACPluginCreator::createPlugin(char const* name, PluginFie } IPluginV2Ext* CoordConvACPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/cropAndResizePlugin/cropAndResizePlugin.cpp b/plugin/cropAndResizePlugin/cropAndResizePlugin.cpp index 5fb6b205d..a382004e5 100644 --- a/plugin/cropAndResizePlugin/cropAndResizePlugin.cpp +++ b/plugin/cropAndResizePlugin/cropAndResizePlugin.cpp @@ -66,7 +66,7 @@ int32_t CropAndResizeDynamicPlugin::getNbOutputs() const noexcept } int32_t CropAndResizeDynamicPlugin::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept { try @@ -104,7 +104,7 @@ int32_t CropAndResizeDynamicPlugin::enqueue(PluginTensorDesc const* inputDesc, P // Our plugin outputs only one tensor void* output = outputs[0]; - int32_t batchSize = inputDesc[0].dims.d[0]; + int32_t batchSize = static_cast(inputDesc[0].dims.d[0]); int32_t status = cropAndResizeInference(stream, mDepth * mInputHeight * mInputWidth * batchSize, inputs[0], inputs[1], batchSize, mInputHeight, mInputWidth, mNumBoxes, mCropHeight, mCropWidth, mDepth, output); return status; @@ -116,8 +116,8 @@ int32_t CropAndResizeDynamicPlugin::enqueue(PluginTensorDesc const* inputDesc, P return STATUS_FAILURE; } -size_t CropAndResizeDynamicPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t CropAndResizeDynamicPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -168,7 +168,7 @@ PluginFieldCollection const* CropAndResizeDynamicPlugin::getFieldsToSerialize() mDataToSerialize.emplace_back(PluginField("crop_width", &mCropWidth, PluginFieldType::kINT32, 1)); mDataToSerialize.emplace_back(PluginField("crop_height", &mCropHeight, PluginFieldType::kINT32, 1)); - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; } @@ -200,7 +200,7 @@ int32_t CropAndResizeDynamicPlugin::getOutputDataTypes( } int32_t CropAndResizeDynamicPlugin::onShapeChange( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) noexcept + PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* /*outputs*/, int32_t nbOutputs) noexcept { try { @@ -210,10 +210,10 @@ int32_t CropAndResizeDynamicPlugin::onShapeChange( // Re-validate dimensions and update internal state if needed // Here we can update mDepth, mInputHeight, mInputWidth, mNumBoxes if they change - mDepth = inputs[0].dims.d[1]; - mInputHeight = inputs[0].dims.d[2]; - mInputWidth = inputs[0].dims.d[3]; - mNumBoxes = inputs[1].dims.d[1]; + mDepth = static_cast(inputs[0].dims.d[1]); + mInputHeight = static_cast(inputs[0].dims.d[2]); + mInputWidth = static_cast(inputs[0].dims.d[3]); + mNumBoxes = static_cast(inputs[1].dims.d[1]); return STATUS_SUCCESS; } @@ -224,7 +224,7 @@ int32_t CropAndResizeDynamicPlugin::onShapeChange( return STATUS_FAILURE; } -IPluginV3* CropAndResizeDynamicPlugin::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* CropAndResizeDynamicPlugin::attachToContext(IPluginResourceContext* /*context*/) noexcept { try { @@ -238,7 +238,7 @@ IPluginV3* CropAndResizeDynamicPlugin::attachToContext(IPluginResourceContext* c } bool CropAndResizeDynamicPlugin::supportsFormatCombination( - int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept + int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t /*nbOutputs*/) noexcept { try { @@ -281,18 +281,18 @@ void CropAndResizeDynamicPlugin::setPluginNamespace(char const* libNamespace) no } } -int32_t CropAndResizeDynamicPlugin::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +int32_t CropAndResizeDynamicPlugin::configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, + DynamicPluginTensorDesc const* /*out*/, int32_t nbOutputs) noexcept { try { // Validate input/output counts and update internal state based on input dimensions PLUGIN_VALIDATE(nbInputs == 2); PLUGIN_VALIDATE(nbOutputs == 1); - mDepth = in[0].desc.dims.d[1]; - mInputHeight = in[0].desc.dims.d[2]; - mInputWidth = in[0].desc.dims.d[3]; - mNumBoxes = in[1].desc.dims.d[1]; + mDepth = static_cast(in[0].desc.dims.d[1]); + mInputHeight = static_cast(in[0].desc.dims.d[2]); + mInputWidth = static_cast(in[0].desc.dims.d[3]); + mNumBoxes = static_cast(in[1].desc.dims.d[1]); } catch (std::exception const& e) { @@ -312,7 +312,7 @@ CropAndResizeDynamicPluginCreator::CropAndResizeDynamicPluginCreator() mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("crop_width", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("crop_height", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -332,7 +332,7 @@ PluginFieldCollection const* CropAndResizeDynamicPluginCreator::getFieldNames() } IPluginV3* CropAndResizeDynamicPluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* /*name*/, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { diff --git a/plugin/cropAndResizePlugin/cropAndResizePluginLegacy.cpp b/plugin/cropAndResizePlugin/cropAndResizePluginLegacy.cpp index ce3a0b702..2f334ee15 100644 --- a/plugin/cropAndResizePlugin/cropAndResizePluginLegacy.cpp +++ b/plugin/cropAndResizePlugin/cropAndResizePluginLegacy.cpp @@ -46,7 +46,7 @@ CropAndResizeDynamicPluginLegacy::CropAndResizeDynamicPluginLegacy(int32_t cropW { } -CropAndResizePlugin::CropAndResizePlugin(void const* serialBuf, size_t serialSize) +CropAndResizePlugin::CropAndResizePlugin(void const* serialBuf, size_t /*serialSize*/) { auto const* d = toPointer(serialBuf); auto const* a = d; @@ -59,7 +59,7 @@ CropAndResizePlugin::CropAndResizePlugin(void const* serialBuf, size_t serialSiz PLUGIN_ASSERT(d == a + sizeof(int32_t) * 6); } -CropAndResizeDynamicPluginLegacy::CropAndResizeDynamicPluginLegacy(void const* serialBuf, size_t serialSize) +CropAndResizeDynamicPluginLegacy::CropAndResizeDynamicPluginLegacy(void const* serialBuf, size_t /*serialSize*/) { auto const* d = reinterpret_cast(serialBuf); auto const* a = d; @@ -136,10 +136,10 @@ Dims CropAndResizePlugin::getOutputDimensions(int32_t index, Dims const* inputs, PLUGIN_VALIDATE(nbInputDims == 2); PLUGIN_VALIDATE(inputs != nullptr); PLUGIN_VALIDATE(inputs->nbDims == 3); - int32_t channels = inputs->d[0]; + int32_t channels = static_cast(inputs->d[0]); int32_t height = mCropHeight; int32_t width = mCropWidth; - int32_t roiBatch = inputs[1].d[0]; + int32_t roiBatch = static_cast(inputs[1].d[0]); return Dims4(roiBatch, channels, height, width); } catch (std::exception const& e) @@ -220,7 +220,7 @@ int32_t CropAndResizeDynamicPluginLegacy::enqueue(PluginTensorDesc const* inputD void* output = outputs[0]; // Launch CUDA kernel wrapper and save its return value - int32_t batchSize = inputDesc[0].dims.d[0]; + int32_t batchSize = static_cast(inputDesc[0].dims.d[0]); int32_t status = cropAndResizeInference(stream, mDepth * mInputHeight * mInputWidth * batchSize, inputs[0], inputs[1], batchSize, mInputHeight, mInputWidth, mNumBoxes, mCropHeight, mCropWidth, mDepth, output); return status; @@ -280,7 +280,7 @@ bool CropAndResizePlugin::supportsFormat(DataType type, PluginFormat format) con } bool CropAndResizeDynamicPluginLegacy::supportsFormatCombination( - int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept + int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t /*nbOutputs*/) noexcept { try { @@ -317,8 +317,8 @@ size_t CropAndResizePlugin::getWorkspaceSize(int32_t /*maxBatchSize*/) const noe return 0; } -size_t CropAndResizeDynamicPluginLegacy::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t CropAndResizeDynamicPluginLegacy::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -405,7 +405,7 @@ char const* CropAndResizeDynamicPluginLegacy::getPluginNamespace() const noexcep // Return the DataType of the plugin output at the requested index. DataType CropAndResizePlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { try { @@ -421,7 +421,7 @@ DataType CropAndResizePlugin::getOutputDataType( } DataType CropAndResizeDynamicPluginLegacy::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { try { @@ -438,20 +438,20 @@ DataType CropAndResizeDynamicPluginLegacy::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool CropAndResizePlugin::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool CropAndResizePlugin::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool CropAndResizePlugin::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } -void CropAndResizePlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void CropAndResizePlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t nbOutputs, DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, PluginFormat floatFormat, int32_t /*maxBatchSize*/) noexcept { try { @@ -459,10 +459,10 @@ void CropAndResizePlugin::configurePlugin(Dims const* inputDims, int32_t nbInput && floatFormat == PluginFormat::kLINEAR); PLUGIN_VALIDATE(nbInputs == 2); PLUGIN_VALIDATE(nbOutputs == 1); - mDepth = inputDims[0].d[0]; - mInputHeight = inputDims[0].d[1]; - mInputWidth = inputDims[0].d[2]; - mNumBoxes = inputDims[1].d[0]; + mDepth = static_cast(inputDims[0].d[0]); + mInputHeight = static_cast(inputDims[0].d[1]); + mInputWidth = static_cast(inputDims[0].d[2]); + mNumBoxes = static_cast(inputDims[1].d[0]); } catch (std::exception const& e) { @@ -470,17 +470,17 @@ void CropAndResizePlugin::configurePlugin(Dims const* inputDims, int32_t nbInput } } -void CropAndResizeDynamicPluginLegacy::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void CropAndResizeDynamicPluginLegacy::configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, + DynamicPluginTensorDesc const* /*out*/, int32_t nbOutputs) noexcept { try { PLUGIN_VALIDATE(nbInputs == 2); PLUGIN_VALIDATE(nbOutputs == 1); - mDepth = in[0].desc.dims.d[1]; - mInputHeight = in[0].desc.dims.d[2]; - mInputWidth = in[0].desc.dims.d[3]; - mNumBoxes = in[1].desc.dims.d[1]; + mDepth = static_cast(in[0].desc.dims.d[1]); + mInputHeight = static_cast(in[0].desc.dims.d[2]); + mInputWidth = static_cast(in[0].desc.dims.d[3]); + mNumBoxes = static_cast(in[1].desc.dims.d[1]); } catch (std::exception const& e) { @@ -489,7 +489,7 @@ void CropAndResizeDynamicPluginLegacy::configurePlugin( } // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void CropAndResizePlugin::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } @@ -502,7 +502,7 @@ CropAndResizeBasePluginCreator::CropAndResizeBasePluginCreator() mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("crop_width", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("crop_height", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/cropAndResizePlugin/cropAndResizePluginLegacy.h b/plugin/cropAndResizePlugin/cropAndResizePluginLegacy.h index 015edb04e..295a99185 100644 --- a/plugin/cropAndResizePlugin/cropAndResizePluginLegacy.h +++ b/plugin/cropAndResizePlugin/cropAndResizePluginLegacy.h @@ -134,6 +134,11 @@ class TRT_DEPRECATED CropAndResizePlugin : public IPluginV2Ext class TRT_DEPRECATED CropAndResizeDynamicPluginLegacy : public IPluginV2DynamicExt { public: + using IPluginV2DynamicExt::configurePlugin; + using IPluginV2DynamicExt::getOutputDimensions; + using IPluginV2DynamicExt::getWorkspaceSize; + using IPluginV2DynamicExt::enqueue; + //! //! \brief Constructs CropAndResizeDynamicPluginLegacy with specified crop dimensions. //! diff --git a/plugin/decodeBbox3DPlugin/decodeBbox3D.cpp b/plugin/decodeBbox3DPlugin/decodeBbox3D.cpp index 67e78a399..1207bc352 100644 --- a/plugin/decodeBbox3DPlugin/decodeBbox3D.cpp +++ b/plugin/decodeBbox3DPlugin/decodeBbox3D.cpp @@ -126,7 +126,7 @@ nvinfer1::IPluginV2DynamicExt* DecodeBbox3DPlugin::clone() const noexcept } nvinfer1::DimsExprs DecodeBbox3DPlugin::getOutputDimensions(int32_t outputIndex, nvinfer1::DimsExprs const* inputs, - int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept + int32_t /*nbInputs*/, nvinfer1::IExprBuilder& exprBuilder) noexcept { try { @@ -199,14 +199,14 @@ bool DecodeBbox3DPlugin::supportsFormatCombination( return false; } -void DecodeBbox3DPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, - nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void DecodeBbox3DPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t /*nbInputs*/, + nvinfer1::DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { try { PLUGIN_VALIDATE(in != nullptr); - mFeatureH = in[0].desc.dims.d[1]; - mFeatureW = in[0].desc.dims.d[2]; + mFeatureH = static_cast(in[0].desc.dims.d[1]); + mFeatureW = static_cast(in[0].desc.dims.d[2]); } catch (std::exception const& e) { @@ -214,8 +214,8 @@ void DecodeBbox3DPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc const } } -size_t DecodeBbox3DPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t DecodeBbox3DPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + nvinfer1::PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { size_t mAnchorsSize = mNumClasses * 2 * 4 * sizeof(float); size_t mAnchorBottomHeightSize = mNumClasses * sizeof(float); @@ -233,7 +233,7 @@ int32_t DecodeBbox3DPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr && workspace != nullptr); - int32_t batchSize = inputDesc[0].dims.d[0]; + int32_t batchSize = static_cast(inputDesc[0].dims.d[0]); // Inputs auto const* clsInput = static_cast(inputs[0]); @@ -269,7 +269,7 @@ int32_t DecodeBbox3DPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, } nvinfer1::DataType DecodeBbox3DPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { try { @@ -379,7 +379,7 @@ DecodeBbox3DPluginCreator::DecodeBbox3DPluginCreator() mPluginAttributes.emplace_back(PluginField("num_dir_bins", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("score_thresh", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/decodeBbox3DPlugin/decodeBbox3D.h b/plugin/decodeBbox3DPlugin/decodeBbox3D.h index 1f7fac66f..84e09f5ac 100644 --- a/plugin/decodeBbox3DPlugin/decodeBbox3D.h +++ b/plugin/decodeBbox3DPlugin/decodeBbox3D.h @@ -31,6 +31,11 @@ namespace plugin class DecodeBbox3DPlugin : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + DecodeBbox3DPlugin() = delete; DecodeBbox3DPlugin(float xMin, float xMax, float yMin, float yMax, float zMin, float zMax, int32_t numDirBins, float dirOffset, float dirLimitOffset, std::vector const& anchorBottomHeight, diff --git a/plugin/detectionLayerPlugin/detectionLayerPlugin.cpp b/plugin/detectionLayerPlugin/detectionLayerPlugin.cpp index 1ce3dfb68..0ccbbe520 100644 --- a/plugin/detectionLayerPlugin/detectionLayerPlugin.cpp +++ b/plugin/detectionLayerPlugin/detectionLayerPlugin.cpp @@ -36,7 +36,7 @@ DetectionLayerPluginCreator::DetectionLayerPluginCreator() mPluginAttributes.emplace_back(PluginField("score_threshold", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("iou_threshold", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -316,7 +316,7 @@ int32_t DetectionLayer::enqueue( } DataType DetectionLayer::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { // Only DataType::kFLOAT is acceptable by the plugin layer. return DataType::kFLOAT; @@ -324,28 +324,29 @@ DataType DetectionLayer::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool DetectionLayer::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool DetectionLayer::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool DetectionLayer::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void DetectionLayer::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void DetectionLayer::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t /*nbOutputs*/, DataType const* inputTypes, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t maxBatchSize) noexcept { try { checkValidInputs(inputDims, nbInputs); PLUGIN_VALIDATE(inputDims[0].d[0] == inputDims[1].d[0] && inputDims[1].d[0] == inputDims[2].d[0]); - mAnchorsCnt = inputDims[2].d[0]; + mAnchorsCnt = static_cast(inputDims[2].d[0]); mType = inputTypes[0]; mMaxBatchSize = maxBatchSize; } @@ -357,7 +358,7 @@ void DetectionLayer::configurePlugin(Dims const* inputDims, int32_t nbInputs, Di // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void DetectionLayer::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } diff --git a/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.cpp b/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.cpp index 959980fec..c08a834df 100644 --- a/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.cpp +++ b/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.cpp @@ -126,7 +126,7 @@ PluginFieldCollection const* DisentangledAttentionPlugin::getFieldsToSerialize() mDataToSerialize.emplace_back("span", &mSpan, PluginFieldType::kINT32, 1); mDataToSerialize.emplace_back("factor", &mFactor, PluginFieldType::kFLOAT32, 1); - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; @@ -141,8 +141,8 @@ PluginFieldCollection const* DisentangledAttentionPlugin::getFieldsToSerialize() // IPluginV3OneBuild methods int32_t DisentangledAttentionPlugin::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, - IExprBuilder& exprBuilder) noexcept + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, + IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -233,15 +233,18 @@ bool DisentangledAttentionPlugin::supportsFormatCombination( // IPluginV3OneRuntime methods template -void DisentangledAttentionPlugin::enqueueType(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, +void DisentangledAttentionPlugin::enqueueType(PluginTensorDesc const* inputDesc, PluginTensorDesc const* /*outputDesc*/, void const* const* inputs, void* const* outputs, cudaStream_t stream, TDataType factor) { Dims dims0 = inputDesc[0].dims; Dims dims1 = inputDesc[1].dims; Dims dims2 = inputDesc[2].dims; - dim3 dimData0(dims0.d[0], dims0.d[1], dims0.d[2]); - dim3 dimData1(dims1.d[0], dims1.d[1], dims1.d[2]); - dim3 dimData2(dims2.d[0], dims2.d[1], dims2.d[2]); + dim3 dimData0(static_cast(dims0.d[0]), static_cast(dims0.d[1]), + static_cast(dims0.d[2])); + dim3 dimData1(static_cast(dims1.d[0]), static_cast(dims1.d[1]), + static_cast(dims1.d[2])); + dim3 dimData2(static_cast(dims2.d[0]), static_cast(dims2.d[1]), + static_cast(dims2.d[2])); dim3 dimResult(dimData0); dim3 blockOptimized(kDISENTANGLED_TILESIZE, kDISENTANGLED_BLOCKDIMY); @@ -283,8 +286,8 @@ int32_t DisentangledAttentionPlugin::enqueue(PluginTensorDesc const* inputDesc, } } -size_t DisentangledAttentionPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t DisentangledAttentionPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -318,7 +321,7 @@ int32_t DisentangledAttentionPlugin::onShapeChange( return STATUS_FAILURE; } -IPluginV3* DisentangledAttentionPlugin::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* DisentangledAttentionPlugin::attachToContext(IPluginResourceContext* /*context*/) noexcept { try { @@ -339,7 +342,7 @@ DisentangledAttentionPluginCreator::DisentangledAttentionPluginCreator() mPluginAttributes.emplace_back(PluginField("span", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("factor", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -359,7 +362,7 @@ PluginFieldCollection const* DisentangledAttentionPluginCreator::getFieldNames() } IPluginV3* DisentangledAttentionPluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* /*name*/, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { diff --git a/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.cpp b/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.cpp index e96f11aa9..a07bb88d8 100644 --- a/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.cpp +++ b/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.cpp @@ -72,8 +72,8 @@ char const* DisentangledAttentionPluginLegacy::getPluginVersion() const noexcept } // IPluginV2DynamicExt Methods -nvinfer1::DimsExprs DisentangledAttentionPluginLegacy::getOutputDimensions( - int32_t index, nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept +nvinfer1::DimsExprs DisentangledAttentionPluginLegacy::getOutputDimensions(int32_t index, + nvinfer1::DimsExprs const* inputs, int32_t /*nbInputs*/, nvinfer1::IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -90,15 +90,18 @@ nvinfer1::DimsExprs DisentangledAttentionPluginLegacy::getOutputDimensions( template void DisentangledAttentionPluginLegacy::enqueueType(nvinfer1::PluginTensorDesc const* inputDesc, - nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, cudaStream_t stream, - TDataType factor) + nvinfer1::PluginTensorDesc const* /*outputDesc*/, void const* const* inputs, void* const* outputs, + cudaStream_t stream, TDataType factor) { nvinfer1::Dims dims0 = inputDesc[0].dims; nvinfer1::Dims dims1 = inputDesc[1].dims; nvinfer1::Dims dims2 = inputDesc[2].dims; - dim3 dimData0(dims0.d[0], dims0.d[1], dims0.d[2]); - dim3 dimData1(dims1.d[0], dims1.d[1], dims1.d[2]); - dim3 dimData2(dims2.d[0], dims2.d[1], dims2.d[2]); + dim3 dimData0(static_cast(dims0.d[0]), static_cast(dims0.d[1]), + static_cast(dims0.d[2])); + dim3 dimData1(static_cast(dims1.d[0]), static_cast(dims1.d[1]), + static_cast(dims1.d[2])); + dim3 dimData2(static_cast(dims2.d[0]), static_cast(dims2.d[1]), + static_cast(dims2.d[2])); dim3 dimResult(dimData0); dim3 blockOptimized(kDISENTANGLED_TILESIZE, kDISENTANGLED_BLOCKDIMY); @@ -247,8 +250,8 @@ nvinfer1::DataType DisentangledAttentionPluginLegacy::getOutputDataType( return nvinfer1::DataType{}; } -size_t DisentangledAttentionPluginLegacy::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t DisentangledAttentionPluginLegacy::getWorkspaceSize(nvinfer1::PluginTensorDesc const* /*inputs*/, + int32_t /*nbInputs*/, nvinfer1::PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -279,7 +282,7 @@ DisentangledAttentionPluginCreatorLegacy::DisentangledAttentionPluginCreatorLega mPluginAttributes.emplace_back(PluginField("span", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("factor", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.h b/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.h index d3eecacfa..882144500 100644 --- a/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.h +++ b/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.h @@ -44,6 +44,11 @@ namespace plugin class DisentangledAttentionPluginLegacy : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + DisentangledAttentionPluginLegacy(); DisentangledAttentionPluginLegacy(int32_t span, float factor); @@ -103,11 +108,6 @@ class DisentangledAttentionPluginLegacy : public nvinfer1::IPluginV2DynamicExt // attributes int32_t mSpan; float mFactor; - - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2::enqueue; - using IPluginV2Ext::configurePlugin; }; class DisentangledAttentionPluginCreatorLegacy : public nvinfer1::IPluginCreator diff --git a/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp b/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp index 71836943c..869a790cd 100644 --- a/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp +++ b/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp @@ -131,7 +131,7 @@ char const* EfficientNMSPlugin::getPluginNamespace() const noexcept } nvinfer1::DataType EfficientNMSPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { if (mParam.outputONNXIndices) { @@ -164,7 +164,7 @@ IPluginV2DynamicExt* EfficientNMSPlugin::clone() const noexcept } DimsExprs EfficientNMSPlugin::getOutputDimensions( - int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept + int32_t outputIndex, DimsExprs const* inputs, int32_t /*nbInputs*/, IExprBuilder& exprBuilder) noexcept { try { @@ -280,8 +280,8 @@ bool EfficientNMSPlugin::supportsFormatCombination( && (inOut[0].type == inOut[pos].type); } -void EfficientNMSPlugin::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void EfficientNMSPlugin::configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, + DynamicPluginTensorDesc const* /*out*/, int32_t nbOutputs) noexcept { try { @@ -305,8 +305,8 @@ void EfficientNMSPlugin::configurePlugin( // Shape of scores input should be // [batch_size, num_boxes, num_classes] or [batch_size, num_boxes, num_classes, 1] PLUGIN_ASSERT(in[1].desc.dims.nbDims == 3 || (in[1].desc.dims.nbDims == 4 && in[1].desc.dims.d[3] == 1)); - mParam.numScoreElements = in[1].desc.dims.d[1] * in[1].desc.dims.d[2]; - mParam.numClasses = in[1].desc.dims.d[2]; + mParam.numScoreElements = static_cast(in[1].desc.dims.d[1] * in[1].desc.dims.d[2]); + mParam.numClasses = static_cast(in[1].desc.dims.d[2]); // When pad per class is set, the total output boxes size may need to be reduced. // This operation is also done in getOutputDimension(), but for dynamic shapes, the @@ -326,16 +326,17 @@ void EfficientNMSPlugin::configurePlugin( { PLUGIN_ASSERT(in[0].desc.dims.d[2] == 4); mParam.shareLocation = true; - mParam.numBoxElements = in[0].desc.dims.d[1] * in[0].desc.dims.d[2]; + mParam.numBoxElements = static_cast(in[0].desc.dims.d[1] * in[0].desc.dims.d[2]); } else { mParam.shareLocation = (in[0].desc.dims.d[2] == 1); PLUGIN_ASSERT(in[0].desc.dims.d[2] == mParam.numClasses || mParam.shareLocation); PLUGIN_ASSERT(in[0].desc.dims.d[3] == 4); - mParam.numBoxElements = in[0].desc.dims.d[1] * in[0].desc.dims.d[2] * in[0].desc.dims.d[3]; + mParam.numBoxElements + = static_cast(in[0].desc.dims.d[1] * in[0].desc.dims.d[2] * in[0].desc.dims.d[3]); } - mParam.numAnchors = in[0].desc.dims.d[1]; + mParam.numAnchors = static_cast(in[0].desc.dims.d[1]); if (nbInputs == 2) { @@ -358,12 +359,12 @@ void EfficientNMSPlugin::configurePlugin( } } -size_t EfficientNMSPlugin::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t EfficientNMSPlugin::getWorkspaceSize(PluginTensorDesc const* inputs, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { - int32_t batchSize = inputs[1].dims.d[0]; - int32_t numScoreElements = inputs[1].dims.d[1] * inputs[1].dims.d[2]; - int32_t numClasses = inputs[1].dims.d[2]; + int32_t batchSize = static_cast(inputs[1].dims.d[0]); + int32_t numScoreElements = static_cast(inputs[1].dims.d[1] * inputs[1].dims.d[2]); + int32_t numClasses = static_cast(inputs[1].dims.d[2]); return EfficientNMSWorkspaceSize(batchSize, numScoreElements, numClasses, mParam.datatype); } @@ -374,7 +375,7 @@ int32_t EfficientNMSPlugin::enqueue(PluginTensorDesc const* inputDesc, PluginTen { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr && workspace != nullptr); - mParam.batchSize = inputDesc[0].dims.d[0]; + mParam.batchSize = static_cast(inputDesc[0].dims.d[0]); if (mParam.outputONNXIndices) { @@ -421,7 +422,7 @@ EfficientNMSPluginCreator::EfficientNMSPluginCreator() mPluginAttributes.emplace_back(PluginField("score_activation", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("class_agnostic", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("box_coding", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -440,7 +441,8 @@ PluginFieldCollection const* EfficientNMSPluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2DynamicExt* EfficientNMSPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2DynamicExt* EfficientNMSPluginCreator::createPlugin( + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -512,7 +514,7 @@ IPluginV2DynamicExt* EfficientNMSPluginCreator::createPlugin(char const* name, P } IPluginV2DynamicExt* EfficientNMSPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { @@ -539,7 +541,7 @@ EfficientNMSONNXPluginCreator::EfficientNMSONNXPluginCreator() mPluginAttributes.emplace_back(PluginField("iou_threshold", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("max_output_boxes_per_class", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("center_point_box", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -559,7 +561,7 @@ PluginFieldCollection const* EfficientNMSONNXPluginCreator::getFieldNames() noex } IPluginV2DynamicExt* EfficientNMSONNXPluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc) noexcept + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -608,7 +610,7 @@ IPluginV2DynamicExt* EfficientNMSONNXPluginCreator::createPlugin( } IPluginV2DynamicExt* EfficientNMSONNXPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/efficientNMSPlugin/efficientNMSPlugin.h b/plugin/efficientNMSPlugin/efficientNMSPlugin.h index c7248d91f..9ce2061b6 100644 --- a/plugin/efficientNMSPlugin/efficientNMSPlugin.h +++ b/plugin/efficientNMSPlugin/efficientNMSPlugin.h @@ -30,6 +30,11 @@ namespace plugin class EfficientNMSPlugin : public IPluginV2DynamicExt { public: + using IPluginV2DynamicExt::configurePlugin; + using IPluginV2DynamicExt::getOutputDimensions; + using IPluginV2DynamicExt::getWorkspaceSize; + using IPluginV2DynamicExt::enqueue; + explicit EfficientNMSPlugin(EfficientNMSParameters param); EfficientNMSPlugin(void const* data, size_t length); ~EfficientNMSPlugin() override = default; diff --git a/plugin/efficientNMSPlugin/tftrt/efficientNMSExplicitTFTRTPlugin.cpp b/plugin/efficientNMSPlugin/tftrt/efficientNMSExplicitTFTRTPlugin.cpp index 3aef2fe6c..02fad8d89 100644 --- a/plugin/efficientNMSPlugin/tftrt/efficientNMSExplicitTFTRTPlugin.cpp +++ b/plugin/efficientNMSPlugin/tftrt/efficientNMSExplicitTFTRTPlugin.cpp @@ -77,7 +77,7 @@ EfficientNMSExplicitTFTRTPluginCreator::EfficientNMSExplicitTFTRTPluginCreator() mPluginAttributes.emplace_back(PluginField("score_threshold", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("pad_per_class", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("clip_boxes", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -97,7 +97,7 @@ const PluginFieldCollection* EfficientNMSExplicitTFTRTPluginCreator::getFieldNam } IPluginV2DynamicExt* EfficientNMSExplicitTFTRTPluginCreator::createPlugin( - const char* name, const PluginFieldCollection* fc) noexcept + const char* /*name*/, const PluginFieldCollection* fc) noexcept { try { @@ -149,7 +149,7 @@ IPluginV2DynamicExt* EfficientNMSExplicitTFTRTPluginCreator::createPlugin( } IPluginV2DynamicExt* EfficientNMSExplicitTFTRTPluginCreator::deserializePlugin( - const char* name, const void* serialData, size_t serialLength) noexcept + const char* /*name*/, const void* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.cpp b/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.cpp index af75d75dc..d361f52c5 100644 --- a/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.cpp +++ b/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.cpp @@ -107,7 +107,7 @@ const char* EfficientNMSImplicitTFTRTPlugin::getPluginNamespace() const noexcept } Dims EfficientNMSImplicitTFTRTPlugin::getOutputDimensions( - int32_t outputIndex, const Dims* inputs, int32_t nbInputs) noexcept + int32_t outputIndex, const Dims* inputs, int32_t /*nbInputs*/) noexcept { try { @@ -118,7 +118,7 @@ Dims EfficientNMSImplicitTFTRTPlugin::getOutputDimensions( PLUGIN_ASSERT(inputs[1].nbDims == 2); if (mParam.padOutputBoxesPerClass && mParam.numOutputBoxesPerClass > 0) { - const int32_t numClasses = inputs[1].d[1]; + const int32_t numClasses = static_cast(inputs[1].d[1]); if (mParam.numOutputBoxesPerClass * numClasses < mParam.numOutputBoxes) { mParam.numOutputBoxes = mParam.numOutputBoxesPerClass * numClasses; @@ -189,13 +189,13 @@ int32_t EfficientNMSImplicitTFTRTPlugin::enqueue(int32_t batchSize, void const* return -1; } -bool EfficientNMSImplicitTFTRTPlugin::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool EfficientNMSImplicitTFTRTPlugin::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } DataType EfficientNMSImplicitTFTRTPlugin::getOutputDataType( - int32_t index, const DataType* inputTypes, int32_t nbInputs) const noexcept + int32_t index, const DataType* inputTypes, int32_t /*nbInputs*/) const noexcept { // num_detections and detection_classes use integer outputs if (index == 0 || index == 3) @@ -222,7 +222,7 @@ IPluginV2IOExt* EfficientNMSImplicitTFTRTPlugin::clone() const noexcept } bool EfficientNMSImplicitTFTRTPlugin::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } @@ -255,7 +255,7 @@ bool EfficientNMSImplicitTFTRTPlugin::supportsFormatCombination( } void EfficientNMSImplicitTFTRTPlugin::configurePlugin( - const PluginTensorDesc* in, int32_t nbInputs, const PluginTensorDesc* out, int32_t nbOutputs) noexcept + const PluginTensorDesc* in, int32_t nbInputs, const PluginTensorDesc* /*out*/, int32_t nbOutputs) noexcept { try { @@ -268,8 +268,8 @@ void EfficientNMSImplicitTFTRTPlugin::configurePlugin( // [batch_size, num_boxes, num_classes] or [batch_size, num_boxes, // num_classes, 1] PLUGIN_ASSERT(in[1].dims.nbDims == 2 || (in[1].dims.nbDims == 3 && in[1].dims.d[2] == 1)); - mParam.numScoreElements = in[1].dims.d[0] * in[1].dims.d[1]; - mParam.numClasses = in[1].dims.d[1]; + mParam.numScoreElements = static_cast(in[1].dims.d[0] * in[1].dims.d[1]); + mParam.numClasses = static_cast(in[1].dims.d[1]); // Shape of boxes input should be // [batch_size, num_boxes, 4] or [batch_size, num_boxes, 1, 4] or [batch_size, @@ -279,16 +279,16 @@ void EfficientNMSImplicitTFTRTPlugin::configurePlugin( { PLUGIN_ASSERT(in[0].dims.d[1] == 4); mParam.shareLocation = true; - mParam.numBoxElements = in[0].dims.d[0] * in[0].dims.d[1]; + mParam.numBoxElements = static_cast(in[0].dims.d[0] * in[0].dims.d[1]); } else { mParam.shareLocation = (in[0].dims.d[1] == 1); PLUGIN_ASSERT(in[0].dims.d[1] == mParam.numClasses || mParam.shareLocation); PLUGIN_ASSERT(in[0].dims.d[2] == 4); - mParam.numBoxElements = in[0].dims.d[0] * in[0].dims.d[1] * in[0].dims.d[2]; + mParam.numBoxElements = static_cast(in[0].dims.d[0] * in[0].dims.d[1] * in[0].dims.d[2]); } - mParam.numAnchors = in[0].dims.d[0]; + mParam.numAnchors = static_cast(in[0].dims.d[0]); if (nbInputs == 2) { @@ -311,7 +311,7 @@ EfficientNMSImplicitTFTRTPluginCreator::EfficientNMSImplicitTFTRTPluginCreator() mPluginAttributes.emplace_back(PluginField("score_threshold", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("pad_per_class", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("clip_boxes", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -331,7 +331,7 @@ const PluginFieldCollection* EfficientNMSImplicitTFTRTPluginCreator::getFieldNam } IPluginV2IOExt* EfficientNMSImplicitTFTRTPluginCreator::createPlugin( - const char* name, const PluginFieldCollection* fc) noexcept + const char* /*name*/, const PluginFieldCollection* fc) noexcept { try { @@ -383,7 +383,7 @@ IPluginV2IOExt* EfficientNMSImplicitTFTRTPluginCreator::createPlugin( } IPluginV2IOExt* EfficientNMSImplicitTFTRTPluginCreator::deserializePlugin( - const char* name, const void* serialData, size_t serialLength) noexcept + const char* /*name*/, const void* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.h b/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.h index 3ca88f8dc..8b4a4fa5c 100644 --- a/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.h +++ b/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.h @@ -40,6 +40,8 @@ using EfficientNMSImplicitTFTRTOutputsDataType = void**; class EfficientNMSImplicitTFTRTPlugin : public nvinfer1::IPluginV2IOExt { public: + using nvinfer1::IPluginV2IOExt::configurePlugin; + explicit EfficientNMSImplicitTFTRTPlugin(EfficientNMSParameters param); EfficientNMSImplicitTFTRTPlugin(const void* data, size_t length); ~EfficientNMSImplicitTFTRTPlugin() override = default; diff --git a/plugin/embLayerNormPlugin/embLayerNormPlugin.cpp b/plugin/embLayerNormPlugin/embLayerNormPlugin.cpp index 0df771fb5..a42b48280 100644 --- a/plugin/embLayerNormPlugin/embLayerNormPlugin.cpp +++ b/plugin/embLayerNormPlugin/embLayerNormPlugin.cpp @@ -42,19 +42,19 @@ EmbLayerNormPluginDynamic::EmbLayerNormPluginDynamic(std::string const& name, Da DataType const mhaType, Weights const& beta, Weights const& gamma, Weights const& wordEmb, Weights const& posEmb, Weights const& tokEmb, bool const useFullMask) : mLayerName(name) - , mLd(beta.count) + , mLd(static_cast(beta.count)) , mType(type) , mMhaType(mhaType) { // Assuming Weights.count is the number of elements and not bytes PLUGIN_VALIDATE(beta.count == gamma.count); PLUGIN_VALIDATE(mLd > 0U); - PLUGIN_VALIDATE(wordEmb.count % mLd == 0); - PLUGIN_VALIDATE(posEmb.count % mLd == 0); - PLUGIN_VALIDATE(tokEmb.count % mLd == 0); - mWordVocabSize = wordEmb.count / mLd; - mPosVocabSize = posEmb.count / mLd; - mTokVocabSize = tokEmb.count / mLd; + PLUGIN_VALIDATE(static_cast(wordEmb.count) % mLd == 0); + PLUGIN_VALIDATE(static_cast(posEmb.count) % mLd == 0); + PLUGIN_VALIDATE(static_cast(tokEmb.count) % mLd == 0); + mWordVocabSize = static_cast(wordEmb.count) / mLd; + mPosVocabSize = static_cast(posEmb.count) / mLd; + mTokVocabSize = static_cast(tokEmb.count) / mLd; mSM = getSmVersion(); mOutputFp16 = mType == DataType::kHALF ? 1 : 0; mUseFullMask = static_cast(useFullMask); @@ -175,7 +175,7 @@ PluginFieldCollection const* EmbLayerNormPluginDynamic::getFieldsToSerialize() n mDataToSerialize.emplace_back("bert_embeddings_position_embeddings", static_cast(mPosEmb.values), PluginFieldType::kFLOAT32, mPosEmb.count); } - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; } @@ -191,9 +191,9 @@ int32_t EmbLayerNormPluginDynamic::onShapeChange( PLUGIN_ASSERT(nbInputs == 3); PLUGIN_ASSERT(inputs[0].dims.nbDims == 2); - int32_t const S = inputs[0].dims.d[SDIM]; - mS = S; - int32_t const B = inputs[0].dims.d[BDIM]; + int32_t const S = static_cast(inputs[0].dims.d[SDIM]); + mS = static_cast(S); + int32_t const B = static_cast(inputs[0].dims.d[BDIM]); TRT_UNUSED B; PLUGIN_ASSERT(mS == static_cast(inputs[1].dims.d[SDIM])); PLUGIN_ASSERT(B == inputs[1].dims.d[BDIM]); @@ -242,7 +242,7 @@ int32_t EmbLayerNormPluginDynamic::onShapeChange( return pluginStatus_t::STATUS_FAILURE; } -IPluginV3* EmbLayerNormPluginDynamic::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* EmbLayerNormPluginDynamic::attachToContext(IPluginResourceContext* /*context*/) noexcept { return clone(); } @@ -254,8 +254,8 @@ int32_t EmbLayerNormPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, Pl { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const batchSize = inputDesc->dims.d[BDIM]; - int32_t const S = inputDesc->dims.d[SDIM]; + int32_t const batchSize = static_cast(inputDesc->dims.d[BDIM]); + int32_t const S = static_cast(inputDesc->dims.d[SDIM]); int32_t status = STATUS_FAILURE; // Our plugin outputs only one tensor @@ -272,7 +272,8 @@ int32_t EmbLayerNormPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, Pl auto const tokEmb = static_cast(mTokEmbDev.get()); auto const posEmb = static_cast(mPosEmbDev.get()); status = embSkipLayerNorm(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output); + beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output); if (status != cudaSuccess) { @@ -286,7 +287,8 @@ int32_t EmbLayerNormPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, Pl auto const tokEmb = static_cast(mTokEmbDev.get()); auto const posEmb = static_cast(mPosEmbDev.get()); status = embSkipLayerNorm(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, beta, - gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output); + gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output); if (status != cudaSuccess) { @@ -317,7 +319,9 @@ int32_t EmbLayerNormPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, Pl } uint32_t* inputMaskX = static_cast(outputs[1]); - status = convertMask(S, batchSize, warps_m, warps_n, warps_k, inputMask, inputMaskX, stream); + status = convertMask(static_cast(S), static_cast(batchSize), + static_cast(warps_m), static_cast(warps_n), static_cast(warps_k), + inputMask, inputMaskX, stream); } else { @@ -386,7 +390,7 @@ bool EmbLayerNormPluginDynamic::supportsFormatCombination( } int32_t EmbLayerNormPluginDynamic::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept { try @@ -406,7 +410,7 @@ int32_t EmbLayerNormPluginDynamic::getOutputShapes(DimsExprs const* inputs, int3 outputs[0].nbDims = 5; outputs[0].d[0] = inputs[0].d[0]; outputs[0].d[1] = inputs[0].d[1]; - outputs[0].d[2] = exprBuilder.constant(mLd); + outputs[0].d[2] = exprBuilder.constant(static_cast(mLd)); outputs[0].d[3] = exprBuilder.constant(1); outputs[0].d[4] = exprBuilder.constant(1); @@ -483,14 +487,14 @@ int32_t EmbLayerNormPluginDynamic::getOutputDataTypes( return pluginStatus_t::STATUS_FAILURE; } -int32_t EmbLayerNormPluginDynamic::configurePlugin(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) noexcept +int32_t EmbLayerNormPluginDynamic::configurePlugin(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) noexcept { return pluginStatus_t::STATUS_SUCCESS; } -size_t EmbLayerNormPluginDynamic::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t EmbLayerNormPluginDynamic::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -548,7 +552,7 @@ EmbLayerNormPluginDynamicCreator::EmbLayerNormPluginDynamicCreator() mPluginAttributes.emplace_back(PluginField("output_fp16")); mPluginAttributes.emplace_back(PluginField("full_mask")); mPluginAttributes.emplace_back(PluginField("mha_type_id")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -568,7 +572,7 @@ PluginFieldCollection const* EmbLayerNormPluginDynamicCreator::getFieldNames() n } IPluginV3* EmbLayerNormPluginDynamicCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* name, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { diff --git a/plugin/embLayerNormPlugin/embLayerNormPluginLegacy.cpp b/plugin/embLayerNormPlugin/embLayerNormPluginLegacy.cpp index 62cb36447..f029f7745 100644 --- a/plugin/embLayerNormPlugin/embLayerNormPluginLegacy.cpp +++ b/plugin/embLayerNormPlugin/embLayerNormPluginLegacy.cpp @@ -42,7 +42,7 @@ EmbLayerNormPluginDynamicLegacy::EmbLayerNormPluginDynamicLegacy(std::string con DataType const mhaType, Weights const& beta, Weights const& gamma, Weights const& wordEmb, Weights const& posEmb, Weights const& tokEmb, bool const useFullMask) : mLayerName(name) - , mLd(beta.count) + , mLd(static_cast(beta.count)) , mType(type) , mUseFullMask(useFullMask) , mMhaType(mhaType) @@ -50,12 +50,12 @@ EmbLayerNormPluginDynamicLegacy::EmbLayerNormPluginDynamicLegacy(std::string con // Assuming Weights.count is the number of elements and not bytes PLUGIN_VALIDATE(beta.count == gamma.count); PLUGIN_VALIDATE(mLd > 0U); - PLUGIN_VALIDATE(wordEmb.count % mLd == 0); - PLUGIN_VALIDATE(posEmb.count % mLd == 0); - PLUGIN_VALIDATE(tokEmb.count % mLd == 0); - mWordVocabSize = wordEmb.count / mLd; - mPosVocabSize = posEmb.count / mLd; - mTokVocabSize = tokEmb.count / mLd; + PLUGIN_VALIDATE(static_cast(wordEmb.count) % mLd == 0); + PLUGIN_VALIDATE(static_cast(posEmb.count) % mLd == 0); + PLUGIN_VALIDATE(static_cast(tokEmb.count) % mLd == 0); + mWordVocabSize = static_cast(wordEmb.count) / mLd; + mPosVocabSize = static_cast(posEmb.count) / mLd; + mTokVocabSize = static_cast(tokEmb.count) / mLd; mSM = getSmVersion(); // mS is set during configure @@ -150,7 +150,7 @@ DimsExprs EmbLayerNormPluginDynamicLegacy::getOutputDimensions( ret.nbDims = 5; ret.d[0] = inputs[0].d[0]; ret.d[1] = inputs[0].d[1]; - ret.d[2] = exprBuilder.constant(mLd); + ret.d[2] = exprBuilder.constant(static_cast(mLd)); ret.d[3] = exprBuilder.constant(1); ret.d[4] = exprBuilder.constant(1); return ret; @@ -253,9 +253,9 @@ void EmbLayerNormPluginDynamicLegacy::configurePlugin(DynamicPluginTensorDesc co PLUGIN_ASSERT(nbInputs == 3); PLUGIN_ASSERT(inputs[0].desc.dims.nbDims == 2); - int32_t const S = inputs[0].desc.dims.d[SDIM]; - mS = S; - int32_t const B = inputs[0].desc.dims.d[BDIM]; + int32_t const S = static_cast(inputs[0].desc.dims.d[SDIM]); + mS = static_cast(S); + int32_t const B = static_cast(inputs[0].desc.dims.d[BDIM]); TRT_UNUSED B; PLUGIN_ASSERT(mS == static_cast(inputs[1].desc.dims.d[SDIM])); PLUGIN_ASSERT(B == inputs[1].desc.dims.d[BDIM]); @@ -297,8 +297,8 @@ void EmbLayerNormPluginDynamicLegacy::configurePlugin(DynamicPluginTensorDesc co PLUGIN_ASSERT(outputs[1].desc.type == DataType::kINT32); } -size_t EmbLayerNormPluginDynamicLegacy::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t EmbLayerNormPluginDynamicLegacy::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -311,8 +311,8 @@ int32_t EmbLayerNormPluginDynamicLegacy::enqueue(PluginTensorDesc const* inputDe { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const batchSize = inputDesc->dims.d[BDIM]; - int32_t const S = inputDesc->dims.d[SDIM]; + int32_t const batchSize = static_cast(inputDesc->dims.d[BDIM]); + int32_t const S = static_cast(inputDesc->dims.d[SDIM]); int32_t status = STATUS_FAILURE; // Our plugin outputs only one tensor @@ -329,7 +329,8 @@ int32_t EmbLayerNormPluginDynamicLegacy::enqueue(PluginTensorDesc const* inputDe auto const tokEmb = static_cast(mTokEmbDev.get()); auto const posEmb = static_cast(mPosEmbDev.get()); status = embSkipLayerNorm(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output); + beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output); if (status != cudaSuccess) { @@ -343,7 +344,8 @@ int32_t EmbLayerNormPluginDynamicLegacy::enqueue(PluginTensorDesc const* inputDe auto const tokEmb = static_cast(mTokEmbDev.get()); auto const posEmb = static_cast(mPosEmbDev.get()); status = embSkipLayerNorm(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, beta, - gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output); + gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output); if (status != cudaSuccess) { @@ -374,7 +376,9 @@ int32_t EmbLayerNormPluginDynamicLegacy::enqueue(PluginTensorDesc const* inputDe } uint32_t* inputMaskX = static_cast(outputs[1]); - status = convertMask(S, batchSize, warps_m, warps_n, warps_k, inputMask, inputMaskX, stream); + status = convertMask(static_cast(S), static_cast(batchSize), + static_cast(warps_m), static_cast(warps_n), static_cast(warps_k), + inputMask, inputMaskX, stream); } else { @@ -393,7 +397,7 @@ int32_t EmbLayerNormPluginDynamicLegacy::enqueue(PluginTensorDesc const* inputDe // IPluginV2Ext Methods DataType EmbLayerNormPluginDynamicLegacy::getOutputDataType( - int32_t index, DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { PLUGIN_ASSERT(index == 0 || index == 1); @@ -509,7 +513,7 @@ EmbLayerNormPluginDynamicLegacyCreator::EmbLayerNormPluginDynamicLegacyCreator() mPluginAttributes.emplace_back(PluginField("output_fp16")); mPluginAttributes.emplace_back(PluginField("full_mask")); mPluginAttributes.emplace_back(PluginField("mha_type_id")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/embLayerNormPlugin/embLayerNormPluginLegacy.h b/plugin/embLayerNormPlugin/embLayerNormPluginLegacy.h index 936098ef1..ca8760364 100644 --- a/plugin/embLayerNormPlugin/embLayerNormPluginLegacy.h +++ b/plugin/embLayerNormPlugin/embLayerNormPluginLegacy.h @@ -48,6 +48,11 @@ cudaError_t convertMask(uint32_t const S, uint32_t const B, uint32_t const warps class EmbLayerNormPluginDynamicLegacy : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + EmbLayerNormPluginDynamicLegacy(std::string const& name, nvinfer1::DataType const type, nvinfer1::DataType const mhaType, nvinfer1::Weights const& beta, nvinfer1::Weights const& gamma, nvinfer1::Weights const& word_emb, nvinfer1::Weights const& pos_emb, nvinfer1::Weights const& tok_emb, @@ -111,11 +116,6 @@ class EmbLayerNormPluginDynamicLegacy : public nvinfer1::IPluginV2DynamicExt bool mUseFullMask; nvinfer1::DataType mMhaType; int32_t mSM; - - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2::enqueue; - using IPluginV2Ext::configurePlugin; }; class EmbLayerNormPluginDynamicLegacyCreator : public nvinfer1::IPluginCreator diff --git a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPlugin.cpp b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPlugin.cpp index 8f472a265..bd096cdc5 100644 --- a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPlugin.cpp +++ b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPlugin.cpp @@ -58,7 +58,7 @@ void checkConfigurationInputs( PLUGIN_ASSERT(inputs[2].type == DataType::kINT32); } -bool initializeFields(char const* name, PluginFieldCollection const* fc, Weights& beta, Weights& gamma, +bool initializeFields(char const* /*name*/, PluginFieldCollection const* fc, Weights& beta, Weights& gamma, Weights& word_emb, Weights& pos_emb, Weights& tok_emb) { bool output_fp16 = false; @@ -132,19 +132,19 @@ EmbLayerNormVarSeqlenPluginBase::EmbLayerNormVarSeqlenPluginBase(std::string con Weights const& beta, Weights const& gamma, Weights const& wordEmb, Weights const& posEmb, Weights const& tokEmb, DataType maskType) : mLayerName(name) - , mLd(beta.count) + , mLd(static_cast(beta.count)) , mType(type) , mMaskType(maskType) { // Assuming Weights.count is the number of elements and not bytes PLUGIN_VALIDATE(beta.count == gamma.count); PLUGIN_VALIDATE(mLd > 0U); - PLUGIN_VALIDATE(wordEmb.count % mLd == 0); - PLUGIN_VALIDATE(posEmb.count % mLd == 0); - PLUGIN_VALIDATE(tokEmb.count % mLd == 0); - mWordVocabSize = wordEmb.count / mLd; - mPosVocabSize = posEmb.count / mLd; - mTokVocabSize = tokEmb.count / mLd; + PLUGIN_VALIDATE(static_cast(wordEmb.count) % mLd == 0); + PLUGIN_VALIDATE(static_cast(posEmb.count) % mLd == 0); + PLUGIN_VALIDATE(static_cast(tokEmb.count) % mLd == 0); + mWordVocabSize = static_cast(wordEmb.count) / mLd; + mPosVocabSize = static_cast(posEmb.count) / mLd; + mTokVocabSize = static_cast(tokEmb.count) / mLd; mBeta.convertAndCopy(beta, nvinfer1::DataType::kFLOAT); mGamma.convertAndCopy(gamma, nvinfer1::DataType::kFLOAT); @@ -302,7 +302,7 @@ PluginFieldCollection const* EmbLayerNormVarSeqlenPluginBase::getFieldsToSeriali mDataToSerialize.emplace_back("bert_embeddings_position_embeddings", static_cast(mPosEmb.values), PluginFieldType::kFLOAT32, mPosEmb.count); } - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; } @@ -357,7 +357,7 @@ int32_t EmbLayerNormVarSeqlenPluginMTron::onShapeChange( return pluginStatus_t::STATUS_FAILURE; } -IPluginV3* EmbLayerNormVarSeqlenPluginBase::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* EmbLayerNormVarSeqlenPluginBase::attachToContext(IPluginResourceContext* /*context*/) noexcept { return clone(); } @@ -370,9 +370,9 @@ int32_t EmbLayerNormVarSeqlenPluginHFace::enqueue(PluginTensorDesc const* inputD { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const batchSize = inputDesc[2].dims.d[0] - 1; + int32_t const batchSize = static_cast(inputDesc[2].dims.d[0]) - 1; // read out the maximum sequence length from the dummy input - int32_t const maxSeqlen = inputDesc[3].dims.d[0]; + int32_t const maxSeqlen = static_cast(inputDesc[3].dims.d[0]); // There are four versions of the kernel which are optimized for sequence lengths 384, 256, 192 and 128. // Find the closest sequence length bigger than the max seq length in this batch. @@ -405,7 +405,8 @@ int32_t EmbLayerNormVarSeqlenPluginHFace::enqueue(PluginTensorDesc const* inputD auto const posEmb = static_cast(mPosEmbDev.get()); return embSkipLayerNormHFace(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output); + cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output); } if (mType == DataType::kHALF) { @@ -415,7 +416,8 @@ int32_t EmbLayerNormVarSeqlenPluginHFace::enqueue(PluginTensorDesc const* inputD auto const posEmb = static_cast(mPosEmbDev.get()); return embSkipLayerNormHFace(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output); + cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output); } else { @@ -442,9 +444,9 @@ int32_t EmbLayerNormVarSeqlenPluginMTron::enqueue(PluginTensorDesc const* inputD { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const batchSize = inputDesc[2].dims.d[0] - 1; + int32_t const batchSize = static_cast(inputDesc[2].dims.d[0]) - 1; // read out the maximum sequence length from the dummy input - int32_t const maxSeqlen = inputDesc[3].dims.d[0]; + int32_t const maxSeqlen = static_cast(inputDesc[3].dims.d[0]); // There are four versions of the kernel which are optimized for sequence lengths 384, 256, 192 and 128. // Find the closest sequence length bigger than the max seq length in this batch. @@ -478,7 +480,8 @@ int32_t EmbLayerNormVarSeqlenPluginMTron::enqueue(PluginTensorDesc const* inputD auto const posEmb = static_cast(mPosEmbDev.get()); return embSkipLayerNormMTron(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output, skip); + cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output, skip); } if (mType == DataType::kHALF) { @@ -489,7 +492,8 @@ int32_t EmbLayerNormVarSeqlenPluginMTron::enqueue(PluginTensorDesc const* inputD auto const posEmb = static_cast(mPosEmbDev.get()); return embSkipLayerNormMTron(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output, skip); + cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output, skip); } else { @@ -566,7 +570,7 @@ bool EmbLayerNormVarSeqlenPluginBase::supportsFormatCombination( } int32_t EmbLayerNormVarSeqlenPluginHFace::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept { try @@ -587,7 +591,7 @@ int32_t EmbLayerNormVarSeqlenPluginHFace::getOutputShapes(DimsExprs const* input // output 0 : embedded input outputs[0].nbDims = 4; outputs[0].d[0] = inputs[0].d[0]; - outputs[0].d[1] = exprBuilder.constant(mLd); + outputs[0].d[1] = exprBuilder.constant(static_cast(mLd)); outputs[0].d[2] = exprBuilder.constant(1); outputs[0].d[3] = exprBuilder.constant(1); @@ -604,7 +608,7 @@ int32_t EmbLayerNormVarSeqlenPluginHFace::getOutputShapes(DimsExprs const* input } int32_t EmbLayerNormVarSeqlenPluginMTron::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept { try @@ -623,14 +627,14 @@ int32_t EmbLayerNormVarSeqlenPluginMTron::getOutputShapes(DimsExprs const* input // Output 0 : embedded input outputs[0].nbDims = 4; outputs[0].d[0] = inputs[0].d[0]; - outputs[0].d[1] = exprBuilder.constant(mLd); + outputs[0].d[1] = exprBuilder.constant(static_cast(mLd)); outputs[0].d[2] = exprBuilder.constant(1); outputs[0].d[3] = exprBuilder.constant(1); // Output 1 : maskIdx outputs[1].nbDims = 4; outputs[1].d[0] = inputs[0].d[0]; - outputs[1].d[1] = exprBuilder.constant(mLd); + outputs[1].d[1] = exprBuilder.constant(static_cast(mLd)); outputs[1].d[2] = exprBuilder.constant(1); outputs[1].d[3] = exprBuilder.constant(1); @@ -644,7 +648,7 @@ int32_t EmbLayerNormVarSeqlenPluginMTron::getOutputShapes(DimsExprs const* input } int32_t EmbLayerNormVarSeqlenPluginBase::getOutputDataTypes( - DataType* outputTypes, int32_t nbOutputs, DataType const* inputTypes, int32_t nbInputs) const noexcept + DataType* outputTypes, int32_t /*nbOutputs*/, DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { try { @@ -660,14 +664,14 @@ int32_t EmbLayerNormVarSeqlenPluginBase::getOutputDataTypes( return pluginStatus_t::STATUS_FAILURE; } -int32_t EmbLayerNormVarSeqlenPluginBase::configurePlugin(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) noexcept +int32_t EmbLayerNormVarSeqlenPluginBase::configurePlugin(DynamicPluginTensorDesc const* /*inputs*/, + int32_t /*nbInputs*/, DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) noexcept { return pluginStatus_t::STATUS_SUCCESS; } -size_t EmbLayerNormVarSeqlenPluginBase::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t EmbLayerNormVarSeqlenPluginBase::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, + int32_t /*nbInputs*/, DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -716,14 +720,19 @@ EmbLayerNormVarSeqlenPluginBaseCreator::EmbLayerNormVarSeqlenPluginBaseCreator() mPluginAttributes.emplace_back(PluginField("output_fp16", nullptr, PluginFieldType::kINT32, 1)); // the length of beta, gamma, word_emb, pos_emb, and tok_emb will only be known at the time of plugin creation // so we set it to 0 here - mPluginAttributes.emplace_back(PluginField("bert_embeddings_layernorm_beta", nullptr, PluginFieldType::kFLOAT32, 0)); - mPluginAttributes.emplace_back(PluginField("bert_embeddings_layernorm_gamma", nullptr, PluginFieldType::kFLOAT32, 0)); + mPluginAttributes.emplace_back( + PluginField("bert_embeddings_layernorm_beta", nullptr, PluginFieldType::kFLOAT32, 0)); + mPluginAttributes.emplace_back( + PluginField("bert_embeddings_layernorm_gamma", nullptr, PluginFieldType::kFLOAT32, 0)); // the embeddings datatype is determined by the output_fp16 attribute known at runtime // so we set it to kUNKNOWN here - mPluginAttributes.emplace_back(PluginField("bert_embeddings_word_embeddings", nullptr, PluginFieldType::kUNKNOWN, 0)); - mPluginAttributes.emplace_back(PluginField("bert_embeddings_token_type_embeddings", nullptr, PluginFieldType::kUNKNOWN, 0)); - mPluginAttributes.emplace_back(PluginField("bert_embeddings_position_embeddings", nullptr, PluginFieldType::kUNKNOWN, 0)); - mFC.nbFields = mPluginAttributes.size(); + mPluginAttributes.emplace_back( + PluginField("bert_embeddings_word_embeddings", nullptr, PluginFieldType::kUNKNOWN, 0)); + mPluginAttributes.emplace_back( + PluginField("bert_embeddings_token_type_embeddings", nullptr, PluginFieldType::kUNKNOWN, 0)); + mPluginAttributes.emplace_back( + PluginField("bert_embeddings_position_embeddings", nullptr, PluginFieldType::kUNKNOWN, 0)); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -748,7 +757,7 @@ PluginFieldCollection const* EmbLayerNormVarSeqlenPluginBaseCreator::getFieldNam } IPluginV3* EmbLayerNormVarSeqlenPluginHFaceCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* name, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { @@ -779,7 +788,7 @@ IPluginV3* EmbLayerNormVarSeqlenPluginHFaceCreator::createPlugin( } IPluginV3* EmbLayerNormVarSeqlenPluginMTronCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* name, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { diff --git a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPluginLegacy.cpp b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPluginLegacy.cpp index 5f103c058..a0ec91472 100644 --- a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPluginLegacy.cpp +++ b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPluginLegacy.cpp @@ -42,19 +42,19 @@ EmbLayerNormVarSeqlenPluginLegacyBase::EmbLayerNormVarSeqlenPluginLegacyBase(std Weights const& beta, Weights const& gamma, Weights const& wordEmb, Weights const& posEmb, Weights const& tokEmb, DataType maskType) : mLayerName(name) - , mLd(beta.count) + , mLd(static_cast(beta.count)) , mType(type) , mMaskType(maskType) { // Assuming Weights.count is the number of elements and not bytes PLUGIN_VALIDATE(beta.count == gamma.count); PLUGIN_VALIDATE(mLd > 0U); - PLUGIN_VALIDATE(wordEmb.count % mLd == 0); - PLUGIN_VALIDATE(posEmb.count % mLd == 0); - PLUGIN_VALIDATE(tokEmb.count % mLd == 0); - mWordVocabSize = wordEmb.count / mLd; - mPosVocabSize = posEmb.count / mLd; - mTokVocabSize = tokEmb.count / mLd; + PLUGIN_VALIDATE(static_cast(wordEmb.count) % mLd == 0); + PLUGIN_VALIDATE(static_cast(posEmb.count) % mLd == 0); + PLUGIN_VALIDATE(static_cast(tokEmb.count) % mLd == 0); + mWordVocabSize = static_cast(wordEmb.count) / mLd; + mPosVocabSize = static_cast(posEmb.count) / mLd; + mTokVocabSize = static_cast(tokEmb.count) / mLd; mBeta.convertAndCopy(beta, nvinfer1::DataType::kFLOAT); mGamma.convertAndCopy(gamma, nvinfer1::DataType::kFLOAT); @@ -190,7 +190,7 @@ DimsExprs EmbLayerNormVarSeqlenPluginLegacyHFace::getOutputDimensions( DimsExprs ret; ret.nbDims = 4; ret.d[0] = inputs[0].d[0]; - ret.d[1] = exprBuilder.constant(mLd); + ret.d[1] = exprBuilder.constant(static_cast(mLd)); ret.d[2] = exprBuilder.constant(1); ret.d[3] = exprBuilder.constant(1); return ret; @@ -219,7 +219,7 @@ DimsExprs EmbLayerNormVarSeqlenPluginLegacyMTron::getOutputDimensions( DimsExprs ret; ret.nbDims = 4; ret.d[0] = inputs[0].d[0]; - ret.d[1] = exprBuilder.constant(mLd); + ret.d[1] = exprBuilder.constant(static_cast(mLd)); ret.d[2] = exprBuilder.constant(1); ret.d[3] = exprBuilder.constant(1); return ret; @@ -319,8 +319,8 @@ void EmbLayerNormVarSeqlenPluginLegacyMTron::configurePlugin(DynamicPluginTensor PLUGIN_ASSERT(outputs[1].desc.type == mMaskType); } -size_t EmbLayerNormVarSeqlenPluginLegacyBase::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t EmbLayerNormVarSeqlenPluginLegacyBase::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -333,9 +333,9 @@ int32_t EmbLayerNormVarSeqlenPluginLegacyHFace::enqueue(PluginTensorDesc const* { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const batchSize = inputDesc[2].dims.d[0] - 1; + int32_t const batchSize = static_cast(inputDesc[2].dims.d[0]) - 1; // read out the maximum sequence length from the dummy input - int32_t const maxSeqlen = inputDesc[3].dims.d[0]; + int32_t const maxSeqlen = static_cast(inputDesc[3].dims.d[0]); // There are four versions of the kernel which are optimized for sequence lengths 384, 256, 192 and 128. // Find the closest sequence length bigger than the max seq length in this batch. @@ -368,7 +368,8 @@ int32_t EmbLayerNormVarSeqlenPluginLegacyHFace::enqueue(PluginTensorDesc const* auto const posEmb = static_cast(mPosEmbDev.get()); return embSkipLayerNormHFace(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output); + cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output); } if (mType == DataType::kHALF) { @@ -378,7 +379,8 @@ int32_t EmbLayerNormVarSeqlenPluginLegacyHFace::enqueue(PluginTensorDesc const* auto const posEmb = static_cast(mPosEmbDev.get()); return embSkipLayerNormHFace(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output); + cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output); } else { @@ -405,9 +407,9 @@ int32_t EmbLayerNormVarSeqlenPluginLegacyMTron::enqueue(PluginTensorDesc const* { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const batchSize = inputDesc[2].dims.d[0] - 1; + int32_t const batchSize = static_cast(inputDesc[2].dims.d[0]) - 1; // read out the maximum sequence length from the dummy input - int32_t const maxSeqlen = inputDesc[3].dims.d[0]; + int32_t const maxSeqlen = static_cast(inputDesc[3].dims.d[0]); // There are four versions of the kernel which are optimized for sequence lengths 384, 256, 192 and 128. // Find the closest sequence length bigger than the max seq length in this batch. @@ -441,7 +443,8 @@ int32_t EmbLayerNormVarSeqlenPluginLegacyMTron::enqueue(PluginTensorDesc const* auto const posEmb = static_cast(mPosEmbDev.get()); return embSkipLayerNormMTron(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output, skip); + cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output, skip); } if (mType == DataType::kHALF) { @@ -452,7 +455,8 @@ int32_t EmbLayerNormVarSeqlenPluginLegacyMTron::enqueue(PluginTensorDesc const* auto const posEmb = static_cast(mPosEmbDev.get()); return embSkipLayerNormMTron(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output, skip); + cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output, skip); } else { @@ -473,7 +477,7 @@ int32_t EmbLayerNormVarSeqlenPluginLegacyMTron::enqueue(PluginTensorDesc const* // IPluginV2Ext Methods DataType EmbLayerNormVarSeqlenPluginLegacyBase::getOutputDataType( - int32_t index, DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { PLUGIN_ASSERT(index == 0 || index == 1); PLUGIN_ASSERT(mType == DataType::kHALF || mType == DataType::kFLOAT); @@ -609,7 +613,7 @@ EmbLayerNormVarSeqlenPluginLegacyBaseCreator::EmbLayerNormVarSeqlenPluginLegacyB mPluginAttributes.emplace_back(PluginField("bert_embeddings_token_type_embeddings")); mPluginAttributes.emplace_back(PluginField("bert_embeddings_position_embeddings")); mPluginAttributes.emplace_back(PluginField("output_fp16")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -633,7 +637,7 @@ PluginFieldCollection const* EmbLayerNormVarSeqlenPluginLegacyBaseCreator::getFi return &mFC; } -bool initializeFields(char const* name, PluginFieldCollection const* fc, Weights& beta, Weights& gamma, +bool initializeFields(char const* /*name*/, PluginFieldCollection const* fc, Weights& beta, Weights& gamma, Weights& word_emb, Weights& pos_emb, Weights& tok_emb) { bool output_fp16 = false; diff --git a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPluginLegacy.h b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPluginLegacy.h index 67b03a25d..c4cb03569 100644 --- a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPluginLegacy.h +++ b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPluginLegacy.h @@ -46,6 +46,11 @@ int32_t embSkipLayerNormMTron(cudaStream_t stream, int32_t ld, int32_t B, int32_ class EmbLayerNormVarSeqlenPluginLegacyBase : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + EmbLayerNormVarSeqlenPluginLegacyBase(std::string const& name, DataType type, Weights const& beta, Weights const& gamma, Weights const& word_emb, Weights const& pos_emb, Weights const& tok_emb, DataType maskType); @@ -100,6 +105,11 @@ class EmbLayerNormVarSeqlenPluginLegacyBase : public nvinfer1::IPluginV2DynamicE class EmbLayerNormVarSeqlenPluginLegacyHFace : public EmbLayerNormVarSeqlenPluginLegacyBase { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + EmbLayerNormVarSeqlenPluginLegacyHFace(std::string const& name, nvinfer1::DataType const type, nvinfer1::Weights const& beta, nvinfer1::Weights const& gamma, nvinfer1::Weights const& word_emb, nvinfer1::Weights const& pos_emb, nvinfer1::Weights const& tok_emb); @@ -129,6 +139,11 @@ class EmbLayerNormVarSeqlenPluginLegacyHFace : public EmbLayerNormVarSeqlenPlugi class EmbLayerNormVarSeqlenPluginLegacyMTron : public EmbLayerNormVarSeqlenPluginLegacyBase { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + EmbLayerNormVarSeqlenPluginLegacyMTron(std::string const& name, nvinfer1::DataType const type, nvinfer1::Weights const& beta, nvinfer1::Weights const& gamma, nvinfer1::Weights const& word_emb, nvinfer1::Weights const& pos_emb, nvinfer1::Weights const& tok_emb); diff --git a/plugin/fcPlugin/fcPlugin.cpp b/plugin/fcPlugin/fcPlugin.cpp index 6f0d02257..f7dc9dad7 100644 --- a/plugin/fcPlugin/fcPlugin.cpp +++ b/plugin/fcPlugin/fcPlugin.cpp @@ -50,9 +50,9 @@ static void printPerfStructure(customMatmulPerf_t const& perf, int32_t const m, AlgoProps p; p.populate(perf.algo); // Calculate GFLOPS - double timeAvg - = perf.time * 1e-3; // Convert to seconds. It has been divided by kNB_KERNEL_REPEATS in customMatmulRun(). - double gflop = (2 * static_cast(m * n) * k) * 1e-9; // Real + double timeAvg = static_cast(perf.time) + * 1e-3; // Convert to seconds. It has been divided by kNB_KERNEL_REPEATS in customMatmulRun(). + double gflop = (2 * static_cast(static_cast(m * n) * k)) * 1e-9; // Real gLogVerbose << "Algo=" << p.algoId << " Tile=" << p.tile << " (" << matmulTileName[p.tile] << ") K=" << p.numSplitsK << " Red.Sch.=" << p.reductionScheme << " Swiz=" << p.swizzle << " Cust=" << p.customOption @@ -117,7 +117,7 @@ static cublasStatus_t customMatmulRun(cublasLtHandle_t ltHandle, // to get the c } // For the moment only add successful findings perfResults.algo = algo; - perfResults.time = time / kNB_KERNEL_REPEATS; // Average time + perfResults.time = time / static_cast(kNB_KERNEL_REPEATS); // Average time perfResults.workspaceSize = heurResult.workspaceSize; perfResults.wavesCount = heurResult.wavesCount; } @@ -179,11 +179,14 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera // Create matrix descriptors. We are good with the details here so no need to // set any extra attributes + PLUGIN_CUBLASASSERT( + cublasLtWrapper.cublasLtMatrixLayoutCreate(&Adesc, Atype, static_cast(transa == CUBLAS_OP_N ? m : k), + static_cast(transa == CUBLAS_OP_N ? k : m), static_cast(lda))); + PLUGIN_CUBLASASSERT( + cublasLtWrapper.cublasLtMatrixLayoutCreate(&Bdesc, Btype, static_cast(transb == CUBLAS_OP_N ? k : n), + static_cast(transb == CUBLAS_OP_N ? n : k), static_cast(ldb))); PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatrixLayoutCreate( - &Adesc, Atype, transa == CUBLAS_OP_N ? m : k, transa == CUBLAS_OP_N ? k : m, lda)); - PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatrixLayoutCreate( - &Bdesc, Btype, transb == CUBLAS_OP_N ? k : n, transb == CUBLAS_OP_N ? n : k, ldb)); - PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatrixLayoutCreate(&Cdesc, Ctype, m, n, ldc)); + &Cdesc, Ctype, static_cast(m), static_cast(n), static_cast(ldc))); // Request the 4 first AlgoId available for SGEMM ( computeType = scaleType = // Atype = Btype = Ctype = Dtype = CUDA_R_32F) @@ -209,7 +212,7 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera continue; } - uint64_t numericImpl = -1; + uint64_t numericImpl = static_cast(-1); PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatmulAlgoCapGetAttribute( &algo, CUBLASLT_ALGO_CAP_NUMERICAL_IMPL_FLAGS, &numericImpl, sizeof(numericImpl), nullptr)); if (Ctype == CUDA_R_32F && numericImpl == CUBLASLT_NUMERICAL_IMPL_FLAGS_HMMA) @@ -221,7 +224,7 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera // Query the tiles enums supported by that algo PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatmulAlgoCapGetAttribute( &algo, CUBLASLT_ALGO_CAP_TILE_IDS, nullptr, 0, &sizeWritten)); - int32_t nbTiles = int32_t(sizeWritten / sizeof(int32_t)); + int32_t nbTiles = static_cast(sizeWritten / sizeof(int32_t)); int32_t* tileA = new int32_t[nbTiles == 0 ? 1 : nbTiles]; if (nbTiles == 0) { @@ -259,12 +262,12 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatmulAlgoConfigSetAttribute( &algo, CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, &customOption, sizeof(customOption))); // Loop over the CTAs swizzling support - for (int32_t k = 0; k <= swizzlingMax; k++) + for (int32_t k_ = 0; k_ <= swizzlingMax; k_++) { int32_t splitkTrial = 0; if (splitkSupport) { - splitkTrial += sizeof(splitKSequenceA) / sizeof(splitKSequenceA[0]); + splitkTrial += static_cast(sizeof(splitKSequenceA) / sizeof(splitKSequenceA[0])); } // Loop over the splitK value over a fixed sequence splitKSequenceA in // addition to the case where splitK is not enabled @@ -278,7 +281,7 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatmulAlgoConfigSetAttribute( &algo, CUBLASLT_ALGO_CONFIG_SPLITK_NUM, &splitK_val, sizeof(splitK_val))); PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatmulAlgoConfigSetAttribute( - &algo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &k, sizeof(k))); + &algo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &k_, sizeof(k_))); PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatmulAlgoConfigSetAttribute( &algo, CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, &redScheme, sizeof(int32_t))); @@ -290,8 +293,8 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera sizeof(splitKSequenceA[l - 1]))); // Going over all the reduction scheme for (redScheme = 1; redScheme < static_cast(CUBLASLT_REDUCTION_SCHEME_MASK) - && (algoCount < kNB_ALGO_COMBINATIONS); - redScheme = redScheme << 1) + && (algoCount < kNB_ALGO_COMBINATIONS); + redScheme = redScheme << 1) { if (redScheme & redMask) { @@ -308,7 +311,7 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera algoCount++; } } // end if - } // end for + } // end for } else { // Non-splitK case @@ -327,9 +330,9 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera } } } // end l - } // end k - } // end customOption - } // end tileIdx + } // end k + } // end customOption + } // end tileIdx delete[] tileA; } // end idx @@ -360,7 +363,7 @@ FCPluginDynamic::FCPluginDynamic(std::string const name, DataType const type, in : mLayerName(name) , mType(type) , mOutDim(outDim) - , mNumParams(W.count) + , mNumParams(static_cast(W.count)) , mNmax(0) , mK(0) , mWdev(nullptr) @@ -398,7 +401,7 @@ IPluginV2DynamicExt* FCPluginDynamic::clone() const noexcept { gLogVerbose << "FCPluginDynamic clone\n"; - auto* p = new FCPluginDynamic(mLayerName, mType, mOutDim, mW); + auto* p = new FCPluginDynamic(mLayerName, mType, static_cast(mOutDim), mW); memcpy(p->mAlgo.data, mAlgo.data, sizeof(mAlgo.data)); p->setPluginNamespace(mNamespace.c_str()); @@ -411,8 +414,8 @@ IPluginV2DynamicExt* FCPluginDynamic::clone() const noexcept return nullptr; } -void FCPluginDynamic::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, nvinfer1::IGpuAllocator* gpuAllocator) noexcept +void FCPluginDynamic::attachToContext(cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, + nvinfer1::IGpuAllocator* /*gpuAllocator*/) noexcept { mLtContext.attach(); } @@ -478,15 +481,15 @@ void FCPluginDynamic::configurePlugin(DynamicPluginTensorDesc const* inputs, int auto const& inDims0 = inputs[0].desc.dims; PLUGIN_VALIDATE(inDims0.nbDims == 5); - mK = inDims0.d[HDIM]; // hiddensize + mK = static_cast(inDims0.d[HDIM]); // hiddensize // PLUGIN_ASSERT(hiddenSize * mOutDim == mNumParams); PLUGIN_VALIDATE(inDims0.d[3] == 1); PLUGIN_VALIDATE(inDims0.d[4] == 1); // m and k are mOutDim // n is B*S - int32_t const S = inputs->max.d[SDIM]; - int32_t const B = inputs->max.d[BDIM]; + int32_t const S = static_cast(inputs->max.d[SDIM]); + int32_t const B = static_cast(inputs->max.d[BDIM]); mNmax = S * B; @@ -495,12 +498,12 @@ void FCPluginDynamic::configurePlugin(DynamicPluginTensorDesc const* inputs, int if (mType == DataType::kFLOAT) { - Gemm g(mOutDim, mNmax, mK, false, false); + Gemm g(static_cast(mOutDim), mNmax, mK, false, false); mLtContext.create(g, kMAX_WORKSPACE_BYTES); } else if (mType == DataType::kHALF) { - Gemm g(mOutDim, mNmax, mK, false, false); + Gemm g(static_cast(mOutDim), mNmax, mK, false, false); mLtContext.create(g, kMAX_WORKSPACE_BYTES); } else @@ -524,11 +527,13 @@ void FCPluginDynamic::configurePlugin(DynamicPluginTensorDesc const* inputs, int } if (mType == DataType::kFLOAT) { - mAlgo = gemmSearch(mOutDim, mNmax, mK, kMAX_WORKSPACE_BYTES, actualWorkspace, mSharedStream); + mAlgo = gemmSearch( + static_cast(mOutDim), mNmax, mK, kMAX_WORKSPACE_BYTES, actualWorkspace, mSharedStream); } else if (mType == DataType::kHALF) { - mAlgo = gemmSearch(mOutDim, mNmax, mK, kMAX_WORKSPACE_BYTES, actualWorkspace, mSharedStream); + mAlgo = gemmSearch( + static_cast(mOutDim), mNmax, mK, kMAX_WORKSPACE_BYTES, actualWorkspace, mSharedStream); } } @@ -556,8 +561,8 @@ void FCPluginDynamic::configurePlugin(DynamicPluginTensorDesc const* inputs, int } } -size_t FCPluginDynamic::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t FCPluginDynamic::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return kMAX_WORKSPACE_BYTES; } @@ -572,8 +577,8 @@ int32_t FCPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, PluginTensor size_t const workspaceSize = getWorkspaceSize(inputDesc, 1, outputDesc, 1); - int32_t const S = inputDesc->dims.d[SDIM]; - int32_t const B = inputDesc->dims.d[BDIM]; + int32_t const S = static_cast(inputDesc->dims.d[SDIM]); + int32_t const B = static_cast(inputDesc->dims.d[BDIM]); int32_t const n = S * B; PLUGIN_VALIDATE(n >= 0); mLtContext.setN(static_cast(n)); @@ -583,7 +588,7 @@ int32_t FCPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, PluginTensor auto const* const input = static_cast(inputs[0]); auto* output = static_cast(outputs[0]); - Gemm g(mOutDim, n, mK, false, false); + Gemm g(static_cast(mOutDim), n, mK, false, false); if (mWdev == nullptr) { return STATUS_FAILURE; @@ -599,7 +604,7 @@ int32_t FCPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, PluginTensor auto const* const input = static_cast(inputs[0]); auto* output = static_cast(outputs[0]); - Gemm g(mOutDim, n, mK, false, false); + Gemm g(static_cast(mOutDim), n, mK, false, false); if (mWdev == nullptr) { return STATUS_FAILURE; @@ -668,8 +673,8 @@ void FCPluginDynamic::terminate() noexcept size_t FCPluginDynamic::getSerializationSize() const noexcept { size_t wordSize = getElementSize(mType); - return wordSize * mNumParams + sizeof(mType) + sizeof(mOutDim) + sizeof(mNumParams) + sizeof(mAlgo) + sizeof(mNmax) - + sizeof(mK); + return wordSize * static_cast(mNumParams) + sizeof(mType) + sizeof(mOutDim) + sizeof(mNumParams) + + sizeof(mAlgo) + sizeof(mNmax) + sizeof(mK); } void FCPluginDynamic::serialize(void* buffer) const noexcept @@ -683,7 +688,7 @@ void FCPluginDynamic::serialize(void* buffer) const noexcept size_t wordSize = getElementSize(mType); char* d = static_cast(buffer); - serFromDev(d, static_cast(mWdev.get()), mNumParams * wordSize); + serFromDev(d, static_cast(mWdev.get()), static_cast(mNumParams) * wordSize); } void FCPluginDynamic::destroy() noexcept @@ -722,7 +727,7 @@ FCPluginDynamicCreator::FCPluginDynamicCreator() mPluginAttributes.emplace_back(PluginField("type_id", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("W", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/fcPlugin/fcPlugin.h b/plugin/fcPlugin/fcPlugin.h index 61f613860..a41919f65 100644 --- a/plugin/fcPlugin/fcPlugin.h +++ b/plugin/fcPlugin/fcPlugin.h @@ -60,7 +60,7 @@ class SharedStream : public IPluginResource { free(); } - catch (std::exception const& e) + catch (std::exception const& /*e*/) { return -1; } @@ -74,7 +74,7 @@ class SharedStream : public IPluginResource { cloned = std::make_unique(/* init */ true); } - catch (std::exception const& e) + catch (std::exception const& /*e*/) { return nullptr; } @@ -323,7 +323,7 @@ struct LtContext } template - void create(Gemm& g, size_t workspaceSize) + void create(Gemm& g, size_t /*workspaceSize*/) { typeA = Gemm::Types::cudaTypeI; typeB = Gemm::Types::cudaTypeI; @@ -522,6 +522,11 @@ nvinfer1::pluginInternal::cublasLtMatmulAlgo_t gemmSearch( class FCPluginDynamic : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + FCPluginDynamic( std::string const name, nvinfer1::DataType const type, int32_t const outDim, nvinfer1::Weights const& W); diff --git a/plugin/flattenConcat/flattenConcat.cpp b/plugin/flattenConcat/flattenConcat.cpp index 512df0db6..ce258112b 100644 --- a/plugin/flattenConcat/flattenConcat.cpp +++ b/plugin/flattenConcat/flattenConcat.cpp @@ -112,7 +112,7 @@ Dims FlattenConcat::getOutputDimensions(int32_t index, Dims const* inputs, int32 { PLUGIN_ASSERT(inputs[i].d[2] == inputs[0].d[2]); } - flattenInput = inputs[i].d[0] * inputs[i].d[1] * inputs[i].d[2]; + flattenInput = static_cast(inputs[i].d[0] * inputs[i].d[1] * inputs[i].d[2]); outputConcatAxis += flattenInput; } @@ -133,13 +133,13 @@ int32_t FlattenConcat::initialize() noexcept void FlattenConcat::terminate() noexcept {} -size_t FlattenConcat::getWorkspaceSize(int32_t) const noexcept +size_t FlattenConcat::getWorkspaceSize(int32_t /*maxBatchSize*/) const noexcept { return 0; } -int32_t FlattenConcat::enqueue( - int32_t batchSize, void const* const* inputs, void* const* outputs, void*, cudaStream_t stream) noexcept +int32_t FlattenConcat::enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* /*workspace*/, + cudaStream_t /*stream*/) noexcept { try { @@ -182,8 +182,8 @@ int32_t FlattenConcat::enqueue( size_t FlattenConcat::getSerializationSize() const noexcept { - return sizeof(bool) + sizeof(int32_t) * (3 + mNumInputs) + sizeof(nvinfer1::Dims) - + (sizeof(decltype(mCopySize)::value_type) * mNumInputs); + return sizeof(bool) + sizeof(int32_t) * static_cast(3 + mNumInputs) + sizeof(nvinfer1::Dims) + + (sizeof(decltype(mCopySize)::value_type) * static_cast(mNumInputs)); } void FlattenConcat::serialize(void* buffer) const noexcept @@ -208,7 +208,7 @@ void FlattenConcat::serialize(void* buffer) const noexcept // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void FlattenConcat::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* gpuAllocator) noexcept { try { @@ -227,13 +227,13 @@ void FlattenConcat::detachFromContext() noexcept {} // Return true if output tensor is broadcast across a batch. bool FlattenConcat::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool FlattenConcat::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool FlattenConcat::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } @@ -258,15 +258,15 @@ char const* FlattenConcat::getPluginNamespace() const noexcept // Return the DataType of the plugin output at the requested index DataType FlattenConcat::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { - PLUGIN_ASSERT(index < 3); return DataType::kFLOAT; } -void FlattenConcat::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void FlattenConcat::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t nbOutputs, DataType const* /*inputTypes*/, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t /*maxBatchSize*/) noexcept { try { @@ -292,7 +292,7 @@ void FlattenConcat::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dim { PLUGIN_ASSERT(inputDims[i].d[2] == inputDims[0].d[2]); } - flattenInput = inputDims[i].d[0] * inputDims[i].d[1] * inputDims[i].d[2]; + flattenInput = static_cast(inputDims[i].d[0] * inputDims[i].d[1] * inputDims[i].d[2]); mInputConcatAxis[i] = flattenInput; mOutputConcatAxis += mInputConcatAxis[i]; } @@ -300,7 +300,8 @@ void FlattenConcat::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dim mCopySize.resize(mNumInputs); for (int32_t i = 0; i < nbInputs; ++i) { - mCopySize[i] = inputDims[i].d[0] * inputDims[i].d[1] * inputDims[i].d[2] * sizeof(float); + mCopySize[i] + = static_cast(inputDims[i].d[0] * inputDims[i].d[1] * inputDims[i].d[2]) * sizeof(float); } } catch (std::exception const& e) @@ -350,7 +351,7 @@ FlattenConcatPluginCreator::FlattenConcatPluginCreator() mPluginAttributes.emplace_back(PluginField("axis", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("ignoreBatch", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -369,7 +370,7 @@ PluginFieldCollection const* FlattenConcatPluginCreator::getFieldNames() noexcep return &mFC; } -IPluginV2Ext* FlattenConcatPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* FlattenConcatPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -404,7 +405,7 @@ IPluginV2Ext* FlattenConcatPluginCreator::createPlugin(char const* name, PluginF } IPluginV2Ext* FlattenConcatPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/geluPlugin/geluPlugin.cpp b/plugin/geluPlugin/geluPlugin.cpp index f3ab72caa..bf8319184 100644 --- a/plugin/geluPlugin/geluPlugin.cpp +++ b/plugin/geluPlugin/geluPlugin.cpp @@ -47,8 +47,9 @@ GeluPluginDynamic::GeluPluginDynamic(const std::string name, const DataType type if (mHasBias) { void* cudaMem{nullptr}; - PLUGIN_CUASSERT(cudaMalloc(&cudaMem, getWeightsSize(bias, mType))); - PLUGIN_CUASSERT(cudaMemcpy(cudaMem, bias.values, getWeightsSize(bias, mType), cudaMemcpyHostToDevice)); + PLUGIN_CUASSERT(cudaMalloc(&cudaMem, static_cast(getWeightsSize(bias, mType)))); + PLUGIN_CUASSERT( + cudaMemcpy(cudaMem, bias.values, static_cast(getWeightsSize(bias, mType)), cudaMemcpyHostToDevice)); make_cuda_shared(mBiasDev, cudaMem); } } @@ -65,7 +66,7 @@ GeluPluginDynamic::GeluPluginDynamic(const std::string name, void const* data, s { PLUGIN_VALIDATE(mLd > 0); char const* d = static_cast(data); - make_cuda_shared(mBiasDev, deserToDev(d, mLd * getElementSize(mType))); + make_cuda_shared(mBiasDev, deserToDev(d, static_cast(mLd) * getElementSize(mType))); } } // IPluginV2DynamicExt Methods @@ -86,7 +87,7 @@ nvinfer1::IPluginV2DynamicExt* GeluPluginDynamic::clone() const noexcept } nvinfer1::DimsExprs GeluPluginDynamic::getOutputDimensions(int32_t outputIndex, nvinfer1::DimsExprs const* inputs, - int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept + int32_t nbInputs, nvinfer1::IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -133,7 +134,7 @@ bool GeluPluginDynamic::supportsFormatCombination( } void GeluPluginDynamic::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, - nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept + nvinfer1::DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { gLogVerbose << "GeluPluginDynamic configurePlugin\n"; @@ -149,8 +150,8 @@ void GeluPluginDynamic::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* } } -size_t GeluPluginDynamic::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t GeluPluginDynamic::getWorkspaceSize(nvinfer1::PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + nvinfer1::PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -164,8 +165,8 @@ int32_t GeluPluginDynamic::enqueueTyped( if (mHasBias) { - int32_t const cols = inputVolume / mLd; - int32_t const rows = mLd; + int32_t const cols = static_cast(inputVolume / mLd); + int32_t const rows = static_cast(mLd); TDataType const* bias = static_cast(mBiasDev.get()); return computeGeluBias(output, input, bias, rows, cols, stream); } @@ -189,7 +190,7 @@ int32_t GeluPluginDynamic::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, return STATUS_FAILURE; } - int32_t const inputVolume = volume(inputDesc[0].dims); + int32_t const inputVolume = static_cast(volume(inputDesc[0].dims)); // Our plugin outputs only one tensor. // Launch CUDA kernel wrapper and save its return value. @@ -205,7 +206,7 @@ int32_t GeluPluginDynamic::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, // IPluginV2Ext Methods nvinfer1::DataType GeluPluginDynamic::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { try { @@ -252,7 +253,7 @@ void GeluPluginDynamic::terminate() noexcept size_t GeluPluginDynamic::getSerializationSize() const noexcept { const size_t wordSize = getElementSize(mType); - const size_t biasSize = mHasBias ? mLd * wordSize : 0; + const size_t biasSize = mHasBias ? static_cast(mLd) * wordSize : 0; return sizeof(mType) + sizeof(mHasBias) + sizeof(mLd) + biasSize; } @@ -265,7 +266,7 @@ void GeluPluginDynamic::serialize(void* buffer) const noexcept { PLUGIN_ASSERT(mLd > 0); char* d = static_cast(buffer); - serFromDev(d, static_cast(mBiasDev.get()), mLd * getElementSize(mType)); + serFromDev(d, static_cast(mBiasDev.get()), static_cast(mLd) * getElementSize(mType)); } } @@ -303,7 +304,7 @@ GeluPluginDynamicCreator::GeluPluginDynamicCreator() mPluginAttributes.emplace_back(PluginField("bias", nullptr, PluginFieldType::kFLOAT32, 1)); // Fill PluginFieldCollection with PluginField arguments metadata - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/geluPlugin/geluPlugin.h b/plugin/geluPlugin/geluPlugin.h index a20630729..4514ecf2e 100644 --- a/plugin/geluPlugin/geluPlugin.h +++ b/plugin/geluPlugin/geluPlugin.h @@ -46,6 +46,11 @@ int32_t computeGeluBias( class TRT_DEPRECATED GeluPluginDynamic : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + GeluPluginDynamic(const std::string name, const nvinfer1::DataType type, nvinfer1::Weights const& bias); GeluPluginDynamic(const std::string name, void const* data, size_t length); @@ -95,11 +100,6 @@ class TRT_DEPRECATED GeluPluginDynamic : public nvinfer1::IPluginV2DynamicExt bool mHasBias; bert::cuda_shared_ptr mBiasDev; size_t mLd; - - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2::enqueue; - using IPluginV2Ext::configurePlugin; }; class TRT_DEPRECATED GeluPluginDynamicCreator : public nvinfer1::IPluginCreator diff --git a/plugin/generateDetectionPlugin/generateDetectionPlugin.cpp b/plugin/generateDetectionPlugin/generateDetectionPlugin.cpp index e8655dbdb..723764b78 100644 --- a/plugin/generateDetectionPlugin/generateDetectionPlugin.cpp +++ b/plugin/generateDetectionPlugin/generateDetectionPlugin.cpp @@ -42,7 +42,7 @@ GenerateDetectionPluginCreator::GenerateDetectionPluginCreator() noexcept mPluginAttributes.emplace_back(PluginField("iou_threshold", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("image_size", nullptr, PluginFieldType::kINT32, 3)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -61,7 +61,8 @@ PluginFieldCollection const* GenerateDetectionPluginCreator::getFieldNames() noe return &mFC; } -IPluginV2Ext* GenerateDetectionPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* GenerateDetectionPluginCreator::createPlugin( + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -109,7 +110,7 @@ IPluginV2Ext* GenerateDetectionPluginCreator::createPlugin(char const* name, Plu } IPluginV2Ext* GenerateDetectionPluginCreator::deserializePlugin( - char const* name, void const* data, size_t length) noexcept + char const* /*name*/, void const* data, size_t length) noexcept { try { @@ -165,7 +166,7 @@ int32_t GenerateDetection::initialize() noexcept mValidCnt = std::make_shared>(mMaxBatchSize); PLUGIN_CUASSERT(cudaMemcpy(mValidCnt->mPtr, static_cast(tempValidCnt.data()), - sizeof(int32_t) * mMaxBatchSize, cudaMemcpyHostToDevice)); + sizeof(int32_t) * static_cast(mMaxBatchSize), cudaMemcpyHostToDevice)); return 0; } @@ -315,11 +316,11 @@ int32_t GenerateDetection::enqueue( detections); PLUGIN_ASSERT(status == cudaSuccess); - return status; + return static_cast(status); } DataType GenerateDetection::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { // Only DataType::kFLOAT is acceptable by the plugin layer return DataType::kFLOAT; @@ -327,33 +328,34 @@ DataType GenerateDetection::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool GenerateDetection::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool GenerateDetection::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool GenerateDetection::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void GenerateDetection::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void GenerateDetection::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t /*nbOutputs*/, DataType const* inputTypes, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t maxBatchSize) noexcept { check_valid_inputs(inputDims, nbInputs); PLUGIN_ASSERT(inputDims[0].d[0] == inputDims[1].d[0] && inputDims[1].d[0] == inputDims[2].d[0]); - mAnchorsCnt = inputDims[2].d[0]; + mAnchorsCnt = static_cast(inputDims[2].d[0]); mType = inputTypes[0]; mMaxBatchSize = maxBatchSize; } // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void GenerateDetection::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } diff --git a/plugin/gridAnchorPlugin/gridAnchorPlugin.cpp b/plugin/gridAnchorPlugin/gridAnchorPlugin.cpp index 262fdc743..61a21fed5 100644 --- a/plugin/gridAnchorPlugin/gridAnchorPlugin.cpp +++ b/plugin/gridAnchorPlugin/gridAnchorPlugin.cpp @@ -64,7 +64,9 @@ GridAnchorGenerator::GridAnchorGenerator(GridAnchorParameters const* paramIn, in // Calculate the scales of SSD model for each layer for (int32_t i = 0; i < mNumLayers; i++) { - tmpScales[i] = (mParam[id].minSize + (mParam[id].maxSize - mParam[id].minSize) * id / (mNumLayers - 1)); + tmpScales[i] = (mParam[id].minSize + + (mParam[id].maxSize - mParam[id].minSize) * static_cast(id) + / static_cast(mNumLayers - 1)); } // Add another 1.0f to tmpScales to prevent going out side of the vector in calculating the scale_next. tmpScales.push_back(1.0F); // has 7 entries @@ -92,7 +94,7 @@ GridAnchorGenerator::GridAnchorGenerator(GridAnchorParameters const* paramIn, in aspect_ratios.push_back(mParam[id].aspectRatios[i]); } // Additional aspect ratio of 1.0 as described in the paper - aspect_ratios.push_back(1.0); + aspect_ratios.push_back(1.0F); // scales for (int32_t i = 0; i < mParam[id].numAspectRatios; i++) @@ -100,9 +102,11 @@ GridAnchorGenerator::GridAnchorGenerator(GridAnchorParameters const* paramIn, in scales.push_back(tmpScales[id]); } auto scale_next = (id == mNumLayers - 1) - ? 1.0 - : (mParam[id].minSize + (mParam[id].maxSize - mParam[id].minSize) * (id + 1) / (mNumLayers - 1)); - scales.push_back(std::sqrt(tmpScales[id] * scale_next)); + ? 1.0F + : (mParam[id].minSize + + (mParam[id].maxSize - mParam[id].minSize) * static_cast(id + 1) + / static_cast(mNumLayers - 1)); + scales.push_back(static_cast(std::sqrt(tmpScales[id] * scale_next))); mNumPriors[id] = mParam[id].numAspectRatios + 1; } @@ -112,7 +116,7 @@ GridAnchorGenerator::GridAnchorGenerator(GridAnchorParameters const* paramIn, in // Calculate the width and height of the prior boxes for (int32_t i = 0; i < mNumPriors[id]; i++) { - float sqrt_AR = std::sqrt(aspect_ratios[i]); + float sqrt_AR = sqrtf(aspect_ratios[i]); tmpWidths.push_back(scales[i] * sqrt_AR); tmpHeights.push_back(scales[i] / sqrt_AR); } @@ -175,7 +179,7 @@ int32_t GridAnchorGenerator::getNbOutputs() const noexcept return mNumLayers; } -Dims GridAnchorGenerator::getOutputDimensions(int32_t index, Dims const* inputs, int32_t nbInputDims) noexcept +Dims GridAnchorGenerator::getOutputDimensions(int32_t index, Dims const* /*inputs*/, int32_t /*nbInputDims*/) noexcept { // Particularity of the PriorBox layer: no batchSize dimension needed // 2 channels. First channel stores the mean of each prior coordinate. @@ -190,13 +194,13 @@ int32_t GridAnchorGenerator::initialize() noexcept void GridAnchorGenerator::terminate() noexcept {} -size_t GridAnchorGenerator::getWorkspaceSize(int32_t maxBatchSize) const noexcept +size_t GridAnchorGenerator::getWorkspaceSize(int32_t /*maxBatchSize*/) const noexcept { return 0; } -int32_t GridAnchorGenerator::enqueue( - int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t GridAnchorGenerator::enqueue(int32_t /*batchSize*/, void const* const* /*inputs*/, void* const* outputs, + void* /*workspace*/, cudaStream_t stream) noexcept { // Generate prior boxes for each layer for (int32_t id = 0; id < mNumLayers; id++) @@ -218,10 +222,10 @@ size_t GridAnchorGenerator::getSerializationSize() const noexcept for (int32_t i = 0; i < mNumLayers; i++) { sum += 4 * sizeof(int32_t); // mNumPriors, mParam[i].{numAspectRatios, H, W} - sum += (6 + mParam[i].numAspectRatios) + sum += static_cast(6 + mParam[i].numAspectRatios) * sizeof(float); // mParam[i].{minSize, maxSize, aspectRatios, variance[4]} - sum += mDeviceWidths[i].count * sizeof(float); - sum += mDeviceHeights[i].count * sizeof(float); + sum += static_cast(mDeviceWidths[i].count) * sizeof(float); + sum += static_cast(mDeviceHeights[i].count) * sizeof(float); } return sum; } @@ -264,9 +268,9 @@ Weights GridAnchorGenerator::copyToDevice(void const* hostData, size_t count) no void GridAnchorGenerator::serializeFromDevice(char*& hostBuffer, Weights deviceWeights) const noexcept { - PLUGIN_CUASSERT( - cudaMemcpy(hostBuffer, deviceWeights.values, deviceWeights.count * sizeof(float), cudaMemcpyDeviceToHost)); - hostBuffer += deviceWeights.count * sizeof(float); + PLUGIN_CUASSERT(cudaMemcpy(hostBuffer, deviceWeights.values, + static_cast(deviceWeights.count) * sizeof(float), cudaMemcpyDeviceToHost)); + hostBuffer += static_cast(deviceWeights.count) * sizeof(float); } Weights GridAnchorGenerator::deserializeToDevice(char const*& hostBuffer, size_t count) noexcept @@ -304,7 +308,7 @@ char const* GridAnchorGenerator::getPluginNamespace() const noexcept #include // Return the DataType of the plugin output at the requested index DataType GridAnchorGenerator::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { PLUGIN_ASSERT(index < mNumLayers); return DataType::kFLOAT; @@ -312,21 +316,22 @@ DataType GridAnchorGenerator::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool GridAnchorGenerator::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool GridAnchorGenerator::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool GridAnchorGenerator::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void GridAnchorGenerator::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void GridAnchorGenerator::configurePlugin(Dims const* /*inputDims*/, int32_t /*nbInputs*/, Dims const* outputDims, + int32_t nbOutputs, DataType const* /*inputTypes*/, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(nbOutputs == mNumLayers); PLUGIN_ASSERT(outputDims[0].nbDims == 3); @@ -334,7 +339,7 @@ void GridAnchorGenerator::configurePlugin(Dims const* inputDims, int32_t nbInput // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void GridAnchorGenerator::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } @@ -371,7 +376,7 @@ GridAnchorBasePluginCreator::GridAnchorBasePluginCreator() mPluginAttributes.emplace_back(PluginField("variance", nullptr, PluginFieldType::kFLOAT32, 4)); mPluginAttributes.emplace_back(PluginField("numLayers", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -390,7 +395,7 @@ PluginFieldCollection const* GridAnchorBasePluginCreator::getFieldNames() noexce return &mFC; } -IPluginV2Ext* GridAnchorBasePluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* GridAnchorBasePluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -423,7 +428,7 @@ IPluginV2Ext* GridAnchorBasePluginCreator::createPlugin(char const* name, Plugin else if (!strcmp(attrName, "variance")) { PLUGIN_VALIDATE(fields[i].type == PluginFieldType::kFLOAT32); - int32_t size = fields[i].length; + int32_t size = static_cast(fields[i].length); layerVariances.reserve(size); auto const* lVar = static_cast(fields[i].data); for (int32_t j = 0; j < size; j++) @@ -435,7 +440,7 @@ IPluginV2Ext* GridAnchorBasePluginCreator::createPlugin(char const* name, Plugin else if (!strcmp(attrName, "aspectRatios")) { PLUGIN_VALIDATE(fields[i].type == PluginFieldType::kFLOAT32); - int32_t size = fields[i].length; + int32_t size = static_cast(fields[i].length); aspectRatios.reserve(size); auto const* aR = static_cast(fields[i].data); for (int32_t j = 0; j < size; j++) @@ -447,7 +452,7 @@ IPluginV2Ext* GridAnchorBasePluginCreator::createPlugin(char const* name, Plugin else if (!strcmp(attrName, "featureMapShapes")) { PLUGIN_VALIDATE(fields[i].type == PluginFieldType::kINT32); - int32_t size = fields[i].length; + int32_t size = static_cast(fields[i].length); PLUGIN_VALIDATE(!isFMapRect || (size % 2 == 0)); fMapShapes.reserve(size); int32_t const* fMap = static_cast(fields[i].data); @@ -463,7 +468,7 @@ IPluginV2Ext* GridAnchorBasePluginCreator::createPlugin(char const* name, Plugin std::vector firstLayerAspectRatios; PLUGIN_VALIDATE(numLayers > 0); - int32_t const numExpectedLayers = static_cast(fMapShapes.size()) >> (isFMapRect ? 1 : 0); + int32_t const numExpectedLayers = static_cast(fMapShapes.size() >> (isFMapRect ? 1 : 0)); PLUGIN_VALIDATE(numExpectedLayers == numLayers); int32_t numFirstLayerARs = 3; @@ -485,12 +490,12 @@ IPluginV2Ext* GridAnchorBasePluginCreator::createPlugin(char const* name, Plugin if (i == 0) { boxParams[i] = {minScale, maxScale, firstLayerAspectRatios.data(), - (int32_t) firstLayerAspectRatios.size(), fMapShapes[hOffset], fMapShapes[wOffset], + static_cast(firstLayerAspectRatios.size()), fMapShapes[hOffset], fMapShapes[wOffset], {layerVariances[0], layerVariances[1], layerVariances[2], layerVariances[3]}}; } else { - boxParams[i] = {minScale, maxScale, aspectRatios.data(), (int32_t) aspectRatios.size(), + boxParams[i] = {minScale, maxScale, aspectRatios.data(), static_cast(aspectRatios.size()), fMapShapes[hOffset], fMapShapes[wOffset], {layerVariances[0], layerVariances[1], layerVariances[2], layerVariances[3]}}; } @@ -508,7 +513,7 @@ IPluginV2Ext* GridAnchorBasePluginCreator::createPlugin(char const* name, Plugin } IPluginV2Ext* GridAnchorBasePluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/groupNormalizationPlugin/groupNormalizationPlugin.cpp b/plugin/groupNormalizationPlugin/groupNormalizationPlugin.cpp index bfdb42154..debd15af0 100644 --- a/plugin/groupNormalizationPlugin/groupNormalizationPlugin.cpp +++ b/plugin/groupNormalizationPlugin/groupNormalizationPlugin.cpp @@ -72,8 +72,8 @@ int32_t GroupNormalizationPlugin::getNbOutputs() const noexcept return 1; } -nvinfer1::DimsExprs GroupNormalizationPlugin::getOutputDimensions( - int32_t index, nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept +nvinfer1::DimsExprs GroupNormalizationPlugin::getOutputDimensions(int32_t index, nvinfer1::DimsExprs const* inputs, + int32_t nbInputs, nvinfer1::IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -90,7 +90,7 @@ nvinfer1::DimsExprs GroupNormalizationPlugin::getOutputDimensions( } void GroupNormalizationPlugin::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* gpuAllocator) noexcept { try { @@ -149,28 +149,28 @@ int32_t GroupNormalizationPlugin::enqueue(nvinfer1::PluginTensorDesc const* inpu float const one = 1.F; float const zero = 0.F; PLUGIN_CHECK_CUDNN(mCudnnWrapper->cudnnBatchNormalizationForwardTraining(mCudnnHandle, // handle - CUDNN_BATCHNORM_SPATIAL, // BatchNormMode_t, try also non persistent - &one, // - &zero, // - mTensorDesc, // in/out descriptor - inputs[0], // input - mTensorDesc, // in/out descriptor - outputs[0], // output - mBNTensorDesc, // - mBnScales->mPtr, // 1 - mBnBias->mPtr, // 0 - 0.0, // exponential average factor - nullptr, // resultRunningMean - nullptr, // resultRunningVar - mEpsilon, // eps - nullptr, // resultSaveMean - nullptr // resultSaveInvVar + CUDNN_BATCHNORM_SPATIAL, // BatchNormMode_t, try also non persistent + &one, // + &zero, // + mTensorDesc, // in/out descriptor + inputs[0], // input + mTensorDesc, // in/out descriptor + outputs[0], // output + mBNTensorDesc, // + mBnScales->mPtr, // 1 + mBnBias->mPtr, // 0 + static_cast(0.0F), // exponential average factor + nullptr, // resultRunningMean + nullptr, // resultRunningVar + static_cast(mEpsilon), // eps + nullptr, // resultSaveMean + nullptr // resultSaveInvVar )); // Apply an additional scale and bias on each channel. nvinfer1::Dims inputDims = inputDesc[0].dims; - int32_t batchSize = inputDims.d[0]; - int32_t nbChannels = inputDims.d[1]; + int32_t batchSize = static_cast(inputDims.d[0]); + int32_t nbChannels = static_cast(inputDims.d[1]); auto* output = static_cast(outputs[0]); return scaleShiftChannelsInplace(output, batchSize, nbChannels, mChannelVolume, static_cast(inputs[2]), static_cast(inputs[1]), stream); // mBetaDev, mGammaDev, @@ -250,8 +250,8 @@ void GroupNormalizationPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc PLUGIN_VALIDATE(nbOutputs == getNbOutputs()); nvinfer1::Dims inputDims = in[0].desc.dims; - int32_t const batchSize = inputDims.d[0]; - int32_t const nbChannels = inputDims.d[1]; + int32_t const batchSize = static_cast(inputDims.d[0]); + int32_t const nbChannels = static_cast(inputDims.d[1]); if (batchSize <= 0 || nbChannels <= 0) { @@ -267,7 +267,8 @@ void GroupNormalizationPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc // Allocate scale/bias tensors needed for cudnnBatchNorm. mNbScaleBias = batchSize * mNbGroups; - auto allocScaleBias = [this](std::shared_ptr>& buf, float value) { + auto allocScaleBias = [this](std::shared_ptr>& buf, float value) + { PLUGIN_VALIDATE(mNbScaleBias > 0); if (!buf || !buf->mPtr || buf->mSize != mNbScaleBias) { @@ -276,8 +277,8 @@ void GroupNormalizationPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc // Initialize values. std::vector const values(mNbScaleBias, value); - PLUGIN_CUASSERT( - cudaMemcpy(buf->mPtr, values.data(), sizeof(float) * mNbScaleBias, cudaMemcpyHostToDevice)); + PLUGIN_CUASSERT(cudaMemcpy(buf->mPtr, values.data(), sizeof(float) * static_cast(mNbScaleBias), + cudaMemcpyHostToDevice)); } }; allocScaleBias(mBnScales, 1.F); @@ -285,7 +286,8 @@ void GroupNormalizationPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc // Calculate size of each group int32_t groupSize = nbChannels / mNbGroups; - mChannelVolume = pluginInternal::volume(inputDims, /*start*/ 2, /*stop*/ inputDims.nbDims); + mChannelVolume + = static_cast(pluginInternal::volume(inputDims, /*start*/ 2, /*stop*/ inputDims.nbDims)); // Set tensor descriptor in a way that cudnnBatchNorm will perform Group Normalization. PLUGIN_CUDNNASSERT(mCudnnWrapper->cudnnSetTensor4dDescriptor(mTensorDesc, // descriptor @@ -306,7 +308,7 @@ void GroupNormalizationPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc } nvinfer1::DataType GroupNormalizationPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { try { @@ -321,8 +323,8 @@ nvinfer1::DataType GroupNormalizationPlugin::getOutputDataType( } } -size_t GroupNormalizationPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t GroupNormalizationPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + nvinfer1::PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -351,7 +353,7 @@ GroupNormalizationPluginCreator::GroupNormalizationPluginCreator() mPluginAttributes.emplace_back(PluginField("eps", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("num_groups", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -389,7 +391,7 @@ void GroupNormalizationPluginCreator::setPluginNamespace(char const* libNamespac } IPluginV2DynamicExt* GroupNormalizationPluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc) noexcept + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -425,7 +427,7 @@ IPluginV2DynamicExt* GroupNormalizationPluginCreator::createPlugin( } IPluginV2DynamicExt* GroupNormalizationPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/groupNormalizationPlugin/groupNormalizationPlugin.h b/plugin/groupNormalizationPlugin/groupNormalizationPlugin.h index 29d893e00..962151ba9 100644 --- a/plugin/groupNormalizationPlugin/groupNormalizationPlugin.h +++ b/plugin/groupNormalizationPlugin/groupNormalizationPlugin.h @@ -37,6 +37,11 @@ cudaError_t scaleShiftChannelsInplace(T* inOut, int32_t const B, int32_t const C class GroupNormalizationPlugin final : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + GroupNormalizationPlugin(float epsilon, int32_t const nbGroups); GroupNormalizationPlugin(void const* data, size_t length); @@ -110,11 +115,6 @@ class GroupNormalizationPlugin final : public nvinfer1::IPluginV2DynamicExt std::shared_ptr> mBnScales{}; std::shared_ptr> mBnBias{}; size_t mNbScaleBias{}; - - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2::enqueue; - using IPluginV2Ext::configurePlugin; }; class GroupNormalizationPluginCreator : public IPluginCreator diff --git a/plugin/instanceNormalizationPlugin/instanceNormFwd.h b/plugin/instanceNormalizationPlugin/instanceNormFwd.h index e09096b33..4a47e3fb9 100644 --- a/plugin/instanceNormalizationPlugin/instanceNormFwd.h +++ b/plugin/instanceNormalizationPlugin/instanceNormFwd.h @@ -25,6 +25,7 @@ namespace instance_norm_impl { +#undef PLUGIN_CHECK_CUDA #define PLUGIN_CHECK_CUDA(call) \ do \ { \ @@ -35,6 +36,7 @@ namespace instance_norm_impl } \ } while (0) +#undef PLUGIN_CHECK_CUDNN #define PLUGIN_CHECK_CUDNN(call) \ do \ { \ @@ -105,7 +107,7 @@ struct InstanceNormFwdContext InstanceNormFwdContext() : sm_count(0) , sm_shared_size(0) - , sm_version(0){}; + , sm_version(0) {}; int32_t sm_count; int32_t sm_shared_size; int32_t sm_version; diff --git a/plugin/instanceNormalizationPlugin/instanceNormalizationPluginLegacy.h b/plugin/instanceNormalizationPlugin/instanceNormalizationPluginLegacy.h index 2d6412d50..d3a0a2089 100644 --- a/plugin/instanceNormalizationPlugin/instanceNormalizationPluginLegacy.h +++ b/plugin/instanceNormalizationPlugin/instanceNormalizationPluginLegacy.h @@ -54,7 +54,7 @@ class InstanceNormalizationPlugin : public nvinfer1::IPluginV2DynamicExt int32_t getNbOutputs() const noexcept override; // DynamicExt plugins returns DimsExprs class instead of Dims - using nvinfer1::IPluginV2::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; DimsExprs getOutputDimensions(int32_t outputIndex, nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept override; @@ -62,11 +62,11 @@ class InstanceNormalizationPlugin : public nvinfer1::IPluginV2DynamicExt void terminate() noexcept override; - using nvinfer1::IPluginV2::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; size_t getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept override; - using nvinfer1::IPluginV2::enqueue; + using nvinfer1::IPluginV2DynamicExt::enqueue; int32_t enqueue(nvinfer1::PluginTensorDesc const* inputDesc, nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override; @@ -98,7 +98,7 @@ class InstanceNormalizationPlugin : public nvinfer1::IPluginV2DynamicExt void detachFromContext() noexcept override; - using nvinfer1::IPluginV2Ext::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::configurePlugin; void configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept override; diff --git a/plugin/leakyReluPlugin/lReluPlugin.cpp b/plugin/leakyReluPlugin/lReluPlugin.cpp index 57517725c..193b8ac5e 100644 --- a/plugin/leakyReluPlugin/lReluPlugin.cpp +++ b/plugin/leakyReluPlugin/lReluPlugin.cpp @@ -51,13 +51,13 @@ Dims LReLU::getOutputDimensions(int32_t index, Dims const* inputs, int32_t nbInp return inputs[0]; } -int32_t LReLU::enqueue( - int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t LReLU::enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* /*workspace*/, + cudaStream_t stream) noexcept { void const* inputData = inputs[0]; void* outputData = outputs[0]; pluginStatus_t status = lReLUInference(stream, mBatchDim * batchSize, mNegSlope, inputData, outputData); - return status; + return static_cast(status); } size_t LReLU::getSerializationSize() const noexcept @@ -82,7 +82,7 @@ void LReLU::configureWithFormat(Dims const* inputDims, int32_t /* nbInputs */, D PLUGIN_ASSERT(nbOutputs == 1); for (int32_t i = 0; i < inputDims[0].nbDims; ++i) { - mBatchDim *= inputDims[0].d[i]; + mBatchDim *= static_cast(inputDims[0].d[i]); } } @@ -138,7 +138,7 @@ LReluPluginCreator::LReluPluginCreator() mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("negSlope", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -157,7 +157,7 @@ PluginFieldCollection const* LReluPluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2* LReluPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2* LReluPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -180,7 +180,8 @@ IPluginV2* LReluPluginCreator::createPlugin(char const* name, PluginFieldCollect return nullptr; } -IPluginV2* LReluPluginCreator::deserializePlugin(char const* name, void const* serialData, size_t serialLength) noexcept +IPluginV2* LReluPluginCreator::deserializePlugin( + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPlugin.cpp b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPlugin.cpp index c2a1aadd7..484a92057 100644 --- a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPlugin.cpp +++ b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPlugin.cpp @@ -106,8 +106,8 @@ IPluginCapability* ModulatedDeformableConvPluginDynamic::getCapabilityInterface( } int32_t ModulatedDeformableConvPluginDynamic::getOutputShapes(nvinfer1::DimsExprs const* inputs, int32_t nbInputs, - nvinfer1::DimsExprs const* shapeInputs, int32_t nbShapeInputs, nvinfer1::DimsExprs* outputs, int32_t nbOutputs, - nvinfer1::IExprBuilder& exprBuilder) noexcept + nvinfer1::DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, nvinfer1::DimsExprs* outputs, + int32_t nbOutputs, nvinfer1::IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -135,7 +135,7 @@ int32_t ModulatedDeformableConvPluginDynamic::getOutputShapes(nvinfer1::DimsExpr } bool ModulatedDeformableConvPluginDynamic::supportsFormatCombination( - int32_t pos, nvinfer1::DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept + int32_t pos, nvinfer1::DynamicPluginTensorDesc const* inOut, int32_t /*nbInputs*/, int32_t /*nbOutputs*/) noexcept { try { @@ -185,13 +185,13 @@ size_t ModulatedDeformableConvPluginDynamic::getWorkspaceSize(nvinfer1::DynamicP int32_t /* nbInputs */, nvinfer1::DynamicPluginTensorDesc const* outputs, int32_t /* nbOutputs */) const noexcept { // Calculate workspace size needed for the im2col buffer. - int32_t const sizeOfDtype = nvinfer1::plugin::bert::getElementSize(outputs[0].desc.type); + int32_t const sizeOfDtype = static_cast(nvinfer1::plugin::bert::getElementSize(outputs[0].desc.type)); - int32_t const nInputPlane = inputs[0].desc.dims.d[1]; // Input channels - int32_t const outputHeight = outputs[0].desc.dims.d[2]; - int32_t const outputWidth = outputs[0].desc.dims.d[3]; - int32_t const kernelH = inputs[3].desc.dims.d[2]; // Weight kernel height - int32_t const kernelW = inputs[3].desc.dims.d[3]; // Weight kernel width + int32_t const nInputPlane = static_cast(inputs[0].desc.dims.d[1]); // Input channels + int32_t const outputHeight = static_cast(outputs[0].desc.dims.d[2]); + int32_t const outputWidth = static_cast(outputs[0].desc.dims.d[3]); + int32_t const kernelH = static_cast(inputs[3].desc.dims.d[2]); // Weight kernel height + int32_t const kernelW = static_cast(inputs[3].desc.dims.d[3]); // Weight kernel width // Calculate size needed for the intermediate 'columns' buffer used in im2col + GEMM approach. int64_t const colSize @@ -211,13 +211,13 @@ int32_t ModulatedDeformableConvPluginDynamic::enqueue(nvinfer1::PluginTensorDesc && workspace != nullptr); // Extract dimensions - int32_t const batch = inputDescs[0].dims.d[0]; - int32_t const channels = inputDescs[0].dims.d[1]; - int32_t const height = inputDescs[0].dims.d[2]; - int32_t const width = inputDescs[0].dims.d[3]; - int32_t const channelsOut = outputDescs[0].dims.d[1]; - int32_t const kernelH = inputDescs[3].dims.d[2]; // Weight kernel height - int32_t const kernelW = inputDescs[3].dims.d[3]; // Weight kernel width + int32_t const batch = static_cast(inputDescs[0].dims.d[0]); + int32_t const channels = static_cast(inputDescs[0].dims.d[1]); + int32_t const height = static_cast(inputDescs[0].dims.d[2]); + int32_t const width = static_cast(inputDescs[0].dims.d[3]); + int32_t const channelsOut = static_cast(outputDescs[0].dims.d[1]); + int32_t const kernelH = static_cast(inputDescs[3].dims.d[2]); // Weight kernel height + int32_t const kernelW = static_cast(inputDescs[3].dims.d[3]); // Weight kernel width // Get input/output pointers void const* inputTensor = inputs[0]; @@ -238,16 +238,20 @@ int32_t ModulatedDeformableConvPluginDynamic::enqueue(nvinfer1::PluginTensorDesc static_cast(weightTensor), static_cast(biasTensor), static_cast(offsetTensor), static_cast(maskTensor), static_cast(outputTensor), workspace, batch, channels, height, width, channelsOut, kernelW, - kernelH, mStride.d[0], mStride.d[1], mPadding.d[0], mPadding.d[1], mDilation.d[0], mDilation.d[1], - mGroup, mDeformableGroup, im2colStep, mCublasHandle, stream); + kernelH, static_cast(mStride.d[0]), static_cast(mStride.d[1]), + static_cast(mPadding.d[0]), static_cast(mPadding.d[1]), + static_cast(mDilation.d[0]), static_cast(mDilation.d[1]), mGroup, mDeformableGroup, + im2colStep, mCublasHandle, stream); break; case nvinfer1::DataType::kHALF: ModulatedDeformConvForwardCUDAKernelLauncherHalf(static_cast(inputTensor), static_cast(weightTensor), static_cast(biasTensor), static_cast(offsetTensor), static_cast(maskTensor), static_cast(outputTensor), workspace, batch, channels, height, width, channelsOut, kernelW, - kernelH, mStride.d[0], mStride.d[1], mPadding.d[0], mPadding.d[1], mDilation.d[0], mDilation.d[1], - mGroup, mDeformableGroup, im2colStep, mCublasHandle, stream); + kernelH, static_cast(mStride.d[0]), static_cast(mStride.d[1]), + static_cast(mPadding.d[0]), static_cast(mPadding.d[1]), + static_cast(mDilation.d[0]), static_cast(mDilation.d[1]), mGroup, mDeformableGroup, + im2colStep, mCublasHandle, stream); break; default: // Unsupported data type @@ -361,7 +365,7 @@ nvinfer1::PluginFieldCollection const* ModulatedDeformableConvPluginDynamic::get mDataToSerialize.emplace_back("group", &mGroup, PluginFieldType::kINT32, 1); mDataToSerialize.emplace_back("deformable_group", &mDeformableGroup, PluginFieldType::kINT32, 1); - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; } @@ -383,7 +387,7 @@ ModulatedDeformableConvPluginDynamicCreator::ModulatedDeformableConvPluginDynami mPluginAttributes.emplace_back(PluginField("group", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("deformable_group", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.cpp b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.cpp index 4fa4b7134..fd081b24a 100644 --- a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.cpp +++ b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.cpp @@ -96,8 +96,8 @@ nvinfer1::IPluginV2DynamicExt* ModulatedDeformableConvPluginDynamicLegacy::clone return nullptr; } -nvinfer1::DimsExprs ModulatedDeformableConvPluginDynamicLegacy::getOutputDimensions(int32_t outputIndex, - nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept +nvinfer1::DimsExprs ModulatedDeformableConvPluginDynamicLegacy::getOutputDimensions(int32_t /*outputIndex*/, + nvinfer1::DimsExprs const* inputs, int32_t /*nbInputs*/, nvinfer1::IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -118,7 +118,7 @@ nvinfer1::DimsExprs ModulatedDeformableConvPluginDynamicLegacy::getOutputDimensi } bool ModulatedDeformableConvPluginDynamicLegacy::supportsFormatCombination( - int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept + int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t /*nbInputs*/, int32_t /*nbOutputs*/) noexcept { if (pos == 0) { @@ -131,8 +131,8 @@ bool ModulatedDeformableConvPluginDynamicLegacy::supportsFormatCombination( } } -void ModulatedDeformableConvPluginDynamicLegacy::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* inputs, - int32_t nbInputs, nvinfer1::DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) noexcept +void ModulatedDeformableConvPluginDynamicLegacy::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* /*inputs*/, + int32_t nbInputs, nvinfer1::DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) noexcept { try { @@ -148,19 +148,20 @@ void ModulatedDeformableConvPluginDynamicLegacy::configurePlugin(nvinfer1::Dynam } size_t ModulatedDeformableConvPluginDynamicLegacy::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, - int32_t nbInputs, nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept + int32_t /*nbInputs*/, nvinfer1::PluginTensorDesc const* outputs, int32_t /*nbOutputs*/) const noexcept { - int32_t sizeofDtype = nvinfer1::plugin::bert::getElementSize(outputs[0].type); + int32_t sizeofDtype = static_cast(nvinfer1::plugin::bert::getElementSize(outputs[0].type)); - int32_t nInputPlane = inputs[0].dims.d[1]; - int32_t outputHeight = outputs[0].dims.d[2]; - int32_t outputWidth = outputs[0].dims.d[3]; - int32_t kH = inputs[3].dims.d[2]; - int32_t kW = inputs[3].dims.d[3]; + int32_t nInputPlane = static_cast(inputs[0].dims.d[1]); + int32_t outputHeight = static_cast(outputs[0].dims.d[2]); + int32_t outputWidth = static_cast(outputs[0].dims.d[3]); + int32_t kH = static_cast(inputs[3].dims.d[2]); + int32_t kW = static_cast(inputs[3].dims.d[3]); - int64_t colSize = divUp(nInputPlane * kW * kH * outputHeight * outputWidth * sizeofDtype, 16) * 16; + int64_t colSize + = divUp(static_cast(nInputPlane) * kW * kH * outputHeight * outputWidth * sizeofDtype, 16) * 16; - return colSize; + return static_cast(colSize); } int32_t ModulatedDeformableConvPluginDynamicLegacy::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, @@ -172,13 +173,13 @@ int32_t ModulatedDeformableConvPluginDynamicLegacy::enqueue(nvinfer1::PluginTens PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr && workSpace != nullptr); - int32_t batch = inputDesc[0].dims.d[0]; - int32_t channels = inputDesc[0].dims.d[1]; - int32_t height = inputDesc[0].dims.d[2]; - int32_t width = inputDesc[0].dims.d[3]; - int32_t channelsOut = outputDesc[0].dims.d[1]; - int32_t kernelH = inputDesc[3].dims.d[2]; - int32_t kernelW = inputDesc[3].dims.d[3]; + int32_t batch = static_cast(inputDesc[0].dims.d[0]); + int32_t channels = static_cast(inputDesc[0].dims.d[1]); + int32_t height = static_cast(inputDesc[0].dims.d[2]); + int32_t width = static_cast(inputDesc[0].dims.d[3]); + int32_t channelsOut = static_cast(outputDesc[0].dims.d[1]); + int32_t kernelH = static_cast(inputDesc[3].dims.d[2]); + int32_t kernelW = static_cast(inputDesc[3].dims.d[3]); void const* x = inputs[0]; void const* offset = inputs[1]; @@ -192,16 +193,22 @@ int32_t ModulatedDeformableConvPluginDynamicLegacy::enqueue(nvinfer1::PluginTens switch (data_type) { case nvinfer1::DataType::kFLOAT: - ModulatedDeformConvForwardCUDAKernelLauncherFloat((float*) x, (float*) weight, (float*) bias, - (float*) offset, (float*) mask, (float*) output, workSpace, batch, channels, height, width, channelsOut, - kernelW, kernelH, mStride.d[0], mStride.d[1], mPadding.d[0], mPadding.d[1], mDilation.d[0], - mDilation.d[1], mGroup, mDeformableGroup, im2colStep, mCublasHandle, stream); + ModulatedDeformConvForwardCUDAKernelLauncherFloat(static_cast(x), + static_cast(weight), static_cast(bias), static_cast(offset), + static_cast(mask), static_cast(output), workSpace, batch, channels, height, width, + channelsOut, kernelW, kernelH, static_cast(mStride.d[0]), static_cast(mStride.d[1]), + static_cast(mPadding.d[0]), static_cast(mPadding.d[1]), + static_cast(mDilation.d[0]), static_cast(mDilation.d[1]), mGroup, mDeformableGroup, + im2colStep, mCublasHandle, stream); break; case nvinfer1::DataType::kHALF: - ModulatedDeformConvForwardCUDAKernelLauncherHalf((half*) x, (half*) weight, (half*) bias, (half*) offset, - (half*) mask, (half*) output, workSpace, batch, channels, height, width, channelsOut, kernelW, kernelH, - mStride.d[0], mStride.d[1], mPadding.d[0], mPadding.d[1], mDilation.d[0], mDilation.d[1], mGroup, - mDeformableGroup, im2colStep, mCublasHandle, stream); + ModulatedDeformConvForwardCUDAKernelLauncherHalf(static_cast(x), + static_cast(weight), static_cast(bias), static_cast(offset), + static_cast(mask), static_cast(output), workSpace, batch, channels, height, width, + channelsOut, kernelW, kernelH, static_cast(mStride.d[0]), static_cast(mStride.d[1]), + static_cast(mPadding.d[0]), static_cast(mPadding.d[1]), + static_cast(mDilation.d[0]), static_cast(mDilation.d[1]), mGroup, mDeformableGroup, + im2colStep, mCublasHandle, stream); break; default: return 1; } @@ -215,7 +222,7 @@ int32_t ModulatedDeformableConvPluginDynamicLegacy::enqueue(nvinfer1::PluginTens } nvinfer1::DataType ModulatedDeformableConvPluginDynamicLegacy::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { return inputTypes[0]; } @@ -265,7 +272,7 @@ void ModulatedDeformableConvPluginDynamicLegacy::destroy() noexcept } void ModulatedDeformableConvPluginDynamicLegacy::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, nvinfer1::IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, nvinfer1::IGpuAllocator* gpuAllocator) noexcept { try { @@ -308,7 +315,7 @@ ModulatedDeformableConvPluginDynamicLegacyCreator::ModulatedDeformableConvPlugin mPluginAttributes.emplace_back(nvinfer1::PluginField("group", nullptr, nvinfer1::PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back( nvinfer1::PluginField("deformable_group", nullptr, nvinfer1::PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -365,8 +372,8 @@ nvinfer1::IPluginV2* ModulatedDeformableConvPluginDynamicLegacyCreator::createPl { PLUGIN_VALIDATE(fc->fields[i].type == PluginFieldType::kINT32); stride.nbDims = 2; - stride.d[0] = static_cast(fc->fields[i].data)[0]; - stride.d[1] = static_cast(fc->fields[i].data)[1]; + stride.d[0] = static_cast(static_cast(fc->fields[i].data)[0]); + stride.d[1] = static_cast(static_cast(fc->fields[i].data)[1]); PLUGIN_VALIDATE(stride.d[0] > 0); PLUGIN_VALIDATE(stride.d[1] > 0); } @@ -375,8 +382,8 @@ nvinfer1::IPluginV2* ModulatedDeformableConvPluginDynamicLegacyCreator::createPl { PLUGIN_VALIDATE(fc->fields[i].type == PluginFieldType::kINT32); padding.nbDims = 2; - padding.d[0] = static_cast(fc->fields[i].data)[0]; - padding.d[1] = static_cast(fc->fields[i].data)[1]; + padding.d[0] = static_cast(static_cast(fc->fields[i].data)[0]); + padding.d[1] = static_cast(static_cast(fc->fields[i].data)[1]); PLUGIN_VALIDATE(padding.d[0] >= 0); PLUGIN_VALIDATE(padding.d[1] >= 0); } @@ -385,8 +392,8 @@ nvinfer1::IPluginV2* ModulatedDeformableConvPluginDynamicLegacyCreator::createPl { PLUGIN_VALIDATE(fc->fields[i].type == PluginFieldType::kINT32); dilation.nbDims = 2; - dilation.d[0] = static_cast(fc->fields[i].data)[0]; - dilation.d[1] = static_cast(fc->fields[i].data)[1]; + dilation.d[0] = static_cast(static_cast(fc->fields[i].data)[0]); + dilation.d[1] = static_cast(static_cast(fc->fields[i].data)[1]); PLUGIN_VALIDATE(dilation.d[0] > 0); PLUGIN_VALIDATE(dilation.d[1] > 0); } diff --git a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.h b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.h index 3526030e7..475fa3191 100644 --- a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.h +++ b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.h @@ -46,6 +46,11 @@ namespace plugin class ModulatedDeformableConvPluginDynamicLegacy : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + ModulatedDeformableConvPluginDynamicLegacy(std::string const& name, nvinfer1::Dims const stride, nvinfer1::Dims const padding, nvinfer1::Dims const dilation, int32_t const deformableGroup, int32_t const group); diff --git a/plugin/multilevelCropAndResizePlugin/multilevelCropAndResizePlugin.cpp b/plugin/multilevelCropAndResizePlugin/multilevelCropAndResizePlugin.cpp index 79a3800eb..3e9e0921f 100644 --- a/plugin/multilevelCropAndResizePlugin/multilevelCropAndResizePlugin.cpp +++ b/plugin/multilevelCropAndResizePlugin/multilevelCropAndResizePlugin.cpp @@ -39,7 +39,7 @@ MultilevelCropAndResizePluginCreator::MultilevelCropAndResizePluginCreator() noe mPluginAttributes.emplace_back(PluginField("pooled_size", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("image_size", nullptr, PluginFieldType::kINT32, 3)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -59,7 +59,7 @@ PluginFieldCollection const* MultilevelCropAndResizePluginCreator::getFieldNames } IPluginV2Ext* MultilevelCropAndResizePluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc) noexcept + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -92,7 +92,7 @@ IPluginV2Ext* MultilevelCropAndResizePluginCreator::createPlugin( } IPluginV2Ext* MultilevelCropAndResizePluginCreator::deserializePlugin( - char const* name, void const* data, size_t length) noexcept + char const* /*name*/, void const* data, size_t length) noexcept { try { @@ -113,8 +113,8 @@ MultilevelCropAndResize::MultilevelCropAndResize(int32_t pooled_size, nvinfer1:: PLUGIN_VALIDATE(imageSize.nbDims == 3); PLUGIN_VALIDATE(imageSize.d[0] > 0 && imageSize.d[1] > 0 && imageSize.d[2] > 0); // shape - mInputHeight = imageSize.d[1]; - mInputWidth = imageSize.d[2]; + mInputHeight = static_cast(imageSize.d[1]); + mInputWidth = static_cast(imageSize.d[2]); // Threshold to P3: Smaller -> P2 mThresh = (224 * 224) / (4.0F); } @@ -136,7 +136,7 @@ void MultilevelCropAndResize::destroy() noexcept delete this; } -size_t MultilevelCropAndResize::getWorkspaceSize(int32_t) const noexcept +size_t MultilevelCropAndResize::getWorkspaceSize(int32_t /*maxBatchSize*/) const noexcept { return 0; } @@ -220,8 +220,8 @@ Dims MultilevelCropAndResize::getOutputDimensions(int32_t index, Dims const* inp return result; } -int32_t MultilevelCropAndResize::enqueue( - int32_t batch_size, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t MultilevelCropAndResize::enqueue(int32_t batch_size, void const* const* inputs, void* const* outputs, + void* /*workspace*/, cudaStream_t stream) noexcept { void* pooled = outputs[0]; @@ -238,8 +238,8 @@ int32_t MultilevelCropAndResize::enqueue( size_t MultilevelCropAndResize::getSerializationSize() const noexcept { - return sizeof(int32_t) * 2 + sizeof(int32_t) * 4 + sizeof(float) + sizeof(int32_t) * 2 * mFeatureMapCount - + sizeof(DataType); + return sizeof(int32_t) * 2 + sizeof(int32_t) * 4 + sizeof(float) + + sizeof(int32_t) * 2 * static_cast(mFeatureMapCount) + sizeof(DataType); } void MultilevelCropAndResize::serialize(void* buffer) const noexcept @@ -285,7 +285,7 @@ void MultilevelCropAndResize::deserialize(int8_t const* data, size_t length) // Return the DataType of the plugin output at the requested index DataType MultilevelCropAndResize::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { // Only DataType::kFLOAT is acceptable by the plugin layer // return DataType::kFLOAT; @@ -298,21 +298,21 @@ DataType MultilevelCropAndResize::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool MultilevelCropAndResize::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool MultilevelCropAndResize::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool MultilevelCropAndResize::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void MultilevelCropAndResize::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void MultilevelCropAndResize::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t nbOutputs, DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, PluginFormat floatFormat, int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(supportsFormat(inputTypes[0], floatFormat)); check_valid_inputs(inputDims, nbInputs); @@ -325,7 +325,7 @@ void MultilevelCropAndResize::configurePlugin(Dims const* inputDims, int32_t nbI mROICount = dimToInt32(inputDims[0].d[0]); mFeatureLength = dimToInt32(inputDims[1].d[0]); - for (size_t layer = 0; layer < mFeatureMapCount; ++layer) + for (int32_t layer = 0; layer < mFeatureMapCount; ++layer) { mFeatureSpatialSize[layer] = {dimToInt32(inputDims[layer + 1].d[1]), dimToInt32(inputDims[layer + 1].d[2])}; } @@ -340,7 +340,7 @@ void MultilevelCropAndResize::configurePlugin(Dims const* inputDims, int32_t nbI // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void MultilevelCropAndResize::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } diff --git a/plugin/multilevelProposeROI/multilevelProposeROIPlugin.cpp b/plugin/multilevelProposeROI/multilevelProposeROIPlugin.cpp index f9dddc0a2..75e090023 100644 --- a/plugin/multilevelProposeROI/multilevelProposeROIPlugin.cpp +++ b/plugin/multilevelProposeROI/multilevelProposeROIPlugin.cpp @@ -45,7 +45,7 @@ MultilevelProposeROIPluginCreator::MultilevelProposeROIPluginCreator() noexcept mPluginAttributes.emplace_back(PluginField("iou_threshold", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("image_size", nullptr, PluginFieldType::kINT32, 3)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -65,7 +65,7 @@ PluginFieldCollection const* MultilevelProposeROIPluginCreator::getFieldNames() } IPluginV2Ext* MultilevelProposeROIPluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc) noexcept + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -112,7 +112,7 @@ IPluginV2Ext* MultilevelProposeROIPluginCreator::createPlugin( } IPluginV2Ext* MultilevelProposeROIPluginCreator::deserializePlugin( - char const* name, void const* data, size_t length) noexcept + char const* /*name*/, void const* data, size_t length) noexcept { try { @@ -174,7 +174,7 @@ int32_t MultilevelProposeROI::initialize() noexcept mValidCnt = std::make_shared>(mMaxBatchSize); PLUGIN_CUASSERT(cudaMemcpy(mValidCnt->mPtr, static_cast(tempValidCnt.data()), - sizeof(int32_t) * mMaxBatchSize, cudaMemcpyHostToDevice)); + sizeof(int32_t) * static_cast(mMaxBatchSize), cudaMemcpyHostToDevice)); // Init the anchors for batch size: for (int32_t i = 0; i < mFeatureCnt; i++) @@ -182,11 +182,11 @@ int32_t MultilevelProposeROI::initialize() noexcept int32_t i_anchors_cnt = mAnchorsCnt[i]; auto i_anchors_host = mAnchorBoxesHost[i].data(); auto i_anchors_device = std::make_shared>(i_anchors_cnt * 4 * mMaxBatchSize); - int32_t batch_offset = sizeof(float) * i_anchors_cnt * 4; + size_t batch_offset = sizeof(float) * static_cast(i_anchors_cnt) * 4; uint8_t* device_ptr = static_cast(i_anchors_device->mPtr); - for (int32_t i = 0; i < mMaxBatchSize; i++) + for (int32_t b = 0; b < mMaxBatchSize; b++) { - PLUGIN_CUASSERT(cudaMemcpy(static_cast(device_ptr + i * batch_offset), + PLUGIN_CUASSERT(cudaMemcpy(static_cast(device_ptr + b * batch_offset), static_cast(i_anchors_host), batch_offset, cudaMemcpyHostToDevice)); } mAnchorBoxesDevice.push_back(i_anchors_device); @@ -218,11 +218,13 @@ int32_t MultilevelProposeROI::initialize() noexcept } // Init the temp storage for pointer arrays of score and box: - PLUGIN_CUASSERT(cudaMalloc(&mDeviceScores, sizeof(void*) * mFeatureCnt)); - PLUGIN_CUASSERT(cudaMalloc(&mDeviceBboxes, sizeof(void*) * mFeatureCnt)); + PLUGIN_CUASSERT(cudaMalloc(&mDeviceScores, sizeof(void*) * static_cast(mFeatureCnt))); + PLUGIN_CUASSERT(cudaMalloc(&mDeviceBboxes, sizeof(void*) * static_cast(mFeatureCnt))); - PLUGIN_CUASSERT(cudaMemcpy(mDeviceScores, score_tp.data(), sizeof(void*) * mFeatureCnt, cudaMemcpyHostToDevice)); - PLUGIN_CUASSERT(cudaMemcpy(mDeviceBboxes, box_tp.data(), sizeof(void*) * mFeatureCnt, cudaMemcpyHostToDevice)); + PLUGIN_CUASSERT(cudaMemcpy( + mDeviceScores, score_tp.data(), sizeof(void*) * static_cast(mFeatureCnt), cudaMemcpyHostToDevice)); + PLUGIN_CUASSERT(cudaMemcpy( + mDeviceBboxes, box_tp.data(), sizeof(void*) * static_cast(mFeatureCnt), cudaMemcpyHostToDevice)); return 0; } @@ -274,8 +276,8 @@ char const* MultilevelProposeROI::getPluginNamespace() const noexcept size_t MultilevelProposeROI::getSerializationSize() const noexcept { - return sizeof(int32_t) * 2 + sizeof(float) * 2 + sizeof(int32_t) * (mFeatureCnt + 1) + sizeof(nvinfer1::Dims) - + sizeof(DataType); + return sizeof(int32_t) * 2 + sizeof(float) * 2 + sizeof(int32_t) * static_cast(mFeatureCnt + 1) + + sizeof(nvinfer1::Dims) + sizeof(DataType); } void MultilevelProposeROI::serialize(void* buffer) const noexcept @@ -388,9 +390,9 @@ void MultilevelProposeROI::generate_pyramid_anchors(nvinfer1::Dims const& imageS std::vector anchor_strides; for (int32_t i = min_level; i < max_level + 1; i++) { - int32_t stride = static_cast(pow(2.0, i)); + int32_t stride = static_cast(std::pow(2.0, i)); anchor_strides.push_back(stride); - anchor_scales.push_back(stride * anchor_scale); + anchor_scales.push_back(static_cast(stride) * anchor_scale); } auto& anchors = mAnchorBoxesHost; @@ -411,7 +413,9 @@ void MultilevelProposeROI::generate_pyramid_anchors(nvinfer1::Dims const& imageS float w = scale * r.first; // Using y+h/2 instead of y+h/2-1 for alignment with TLT implementation - s_anchors.insert(s_anchors.end(), {(y - h / 2), (x - w / 2), (y + h / 2), (x + w / 2)}); + s_anchors.insert(s_anchors.end(), + {(static_cast(y) - h / 2), (static_cast(x) - w / 2), + (static_cast(y) + h / 2), (static_cast(x) + w / 2)}); } anchors.push_back(s_anchors); @@ -470,12 +474,12 @@ int32_t MultilevelProposeROI::enqueue( reinterpret_cast(mDeviceBboxes), final_proposals); PLUGIN_ASSERT(status == cudaSuccess); - return status; + return static_cast(status); } // Return the DataType of the plugin output at the requested index DataType MultilevelProposeROI::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { // Only DataType::kFLOAT is acceptable by the plugin layer if ((inputTypes[0] == DataType::kFLOAT) || (inputTypes[0] == DataType::kHALF)) @@ -485,29 +489,30 @@ DataType MultilevelProposeROI::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool MultilevelProposeROI::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool MultilevelProposeROI::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool MultilevelProposeROI::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void MultilevelProposeROI::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void MultilevelProposeROI::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t /*nbOutputs*/, DataType const* inputTypes, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t maxBatchSize) noexcept { check_valid_inputs(inputDims, nbInputs); mAnchorsCnt.clear(); for (int32_t i = 0; i < mFeatureCnt; i++) { - mAnchorsCnt.push_back(inputDims[2 * i].d[0]); - PLUGIN_ASSERT(mAnchorsCnt[i] == (int32_t) (mAnchorBoxesHost[i].size() / 4)); + mAnchorsCnt.push_back(static_cast(inputDims[2 * i].d[0])); + PLUGIN_ASSERT(mAnchorsCnt[i] == static_cast(mAnchorBoxesHost[i].size() / 4)); } mMaxBatchSize = maxBatchSize; @@ -517,7 +522,7 @@ void MultilevelProposeROI::configurePlugin(Dims const* inputDims, int32_t nbInpu // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void MultilevelProposeROI::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } diff --git a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.cpp b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.cpp index 80182b3d5..06f2d95fe 100644 --- a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.cpp +++ b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.cpp @@ -119,8 +119,8 @@ int32_t MultiscaleDeformableAttnPlugin::getOutputDataTypes( } int32_t MultiscaleDeformableAttnPlugin::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, - IExprBuilder& exprBuilder) noexcept + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, + IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -236,7 +236,7 @@ PluginFieldCollection const* MultiscaleDeformableAttnPlugin::getFieldsToSerializ { mDataToSerialize.clear(); // This plugin has no fields to serialize - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; } @@ -248,8 +248,8 @@ PluginFieldCollection const* MultiscaleDeformableAttnPlugin::getFieldsToSerializ } // IPluginV3OneRuntime methods -size_t MultiscaleDeformableAttnPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t MultiscaleDeformableAttnPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { // No workspace needed for this plugin return 0; @@ -296,7 +296,7 @@ int32_t MultiscaleDeformableAttnPlugin::onShapeChange( return STATUS_FAILURE; } -IPluginV3* MultiscaleDeformableAttnPlugin::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* MultiscaleDeformableAttnPlugin::attachToContext(IPluginResourceContext* /*context*/) noexcept { try { @@ -310,21 +310,22 @@ IPluginV3* MultiscaleDeformableAttnPlugin::attachToContext(IPluginResourceContex return nullptr; } -int32_t MultiscaleDeformableAttnPlugin::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, - void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t MultiscaleDeformableAttnPlugin::enqueue(PluginTensorDesc const* inputDesc, + PluginTensorDesc const* /*outputDesc*/, void const* const* inputs, void* const* outputs, void* /*workspace*/, + cudaStream_t stream) noexcept { try { PLUGIN_VALIDATE( inputDesc != nullptr && inputs != nullptr && outputs != nullptr, "Null pointers found in enqueue"); - int32_t const batch = inputDesc[0].dims.d[0]; - int32_t spatialSize = inputDesc[0].dims.d[1]; - int32_t numHeads = inputDesc[0].dims.d[2]; - int32_t channels = inputDesc[0].dims.d[3]; - int32_t numLevels = inputDesc[1].dims.d[0]; - int32_t numQuery = inputDesc[3].dims.d[1]; - int32_t numPoint = inputDesc[3].dims.d[4]; + int32_t const batch = static_cast(inputDesc[0].dims.d[0]); + int32_t spatialSize = static_cast(inputDesc[0].dims.d[1]); + int32_t numHeads = static_cast(inputDesc[0].dims.d[2]); + int32_t channels = static_cast(inputDesc[0].dims.d[3]); + int32_t numLevels = static_cast(inputDesc[1].dims.d[0]); + int32_t numQuery = static_cast(inputDesc[3].dims.d[1]); + int32_t numPoint = static_cast(inputDesc[3].dims.d[4]); int32_t rc = 0; if (inputDesc[0].type == DataType::kFLOAT) @@ -369,7 +370,7 @@ int32_t MultiscaleDeformableAttnPlugin::enqueue(PluginTensorDesc const* inputDes MultiscaleDeformableAttnPluginCreator::MultiscaleDeformableAttnPluginCreator() { mPluginAttributes.clear(); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -389,7 +390,7 @@ PluginFieldCollection const* MultiscaleDeformableAttnPluginCreator::getFieldName } IPluginV3* MultiscaleDeformableAttnPluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* /*name*/, PluginFieldCollection const* /*fc*/, TensorRTPhase /*phase*/) noexcept { try { diff --git a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.cpp b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.cpp index cb5206fa2..201fa0303 100644 --- a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.cpp +++ b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.cpp @@ -39,7 +39,7 @@ static char const* DMHA_NAME{"MultiscaleDeformableAttnPlugin_TRT"}; MultiscaleDeformableAttnPluginLegacy::MultiscaleDeformableAttnPluginLegacy() {} -MultiscaleDeformableAttnPluginLegacy::MultiscaleDeformableAttnPluginLegacy(void const* data, size_t length) {} +MultiscaleDeformableAttnPluginLegacy::MultiscaleDeformableAttnPluginLegacy(void const* /*data*/, size_t /*length*/) {} nvinfer1::IPluginV2DynamicExt* MultiscaleDeformableAttnPluginLegacy::clone() const noexcept { @@ -56,8 +56,8 @@ nvinfer1::IPluginV2DynamicExt* MultiscaleDeformableAttnPluginLegacy::clone() con return nullptr; } -nvinfer1::DimsExprs MultiscaleDeformableAttnPluginLegacy::getOutputDimensions(int32_t outputIndex, - nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept +nvinfer1::DimsExprs MultiscaleDeformableAttnPluginLegacy::getOutputDimensions(int32_t /*outputIndex*/, + nvinfer1::DimsExprs const* inputs, int32_t /*nbInputs*/, nvinfer1::IExprBuilder& /*exprBuilder*/) noexcept { nvinfer1::DimsExprs ret; ret.nbDims = 4; @@ -88,7 +88,7 @@ bool MultiscaleDeformableAttnPluginLegacy::supportsFormatCombination( } void MultiscaleDeformableAttnPluginLegacy::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* inputs, - int32_t nbInputs, nvinfer1::DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) noexcept + int32_t /*nbInputs*/, nvinfer1::DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) noexcept { // Check for valid input dimensions PLUGIN_ASSERT(inputs[0].desc.dims.nbDims == 4); @@ -113,8 +113,8 @@ void MultiscaleDeformableAttnPluginLegacy::configurePlugin(nvinfer1::DynamicPlug PLUGIN_ASSERT(inputs[3].desc.dims.d[1] == inputs[4].desc.dims.d[1]); } -size_t MultiscaleDeformableAttnPluginLegacy::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, - int32_t nbInputs, nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t MultiscaleDeformableAttnPluginLegacy::getWorkspaceSize(nvinfer1::PluginTensorDesc const* /*inputs*/, + int32_t /*nbInputs*/, nvinfer1::PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -125,13 +125,13 @@ int32_t MultiscaleDeformableAttnPluginLegacy::enqueue(nvinfer1::PluginTensorDesc { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const batch = inputDesc[0].dims.d[0]; - int32_t spatial_size = inputDesc[0].dims.d[1]; - int32_t num_heads = inputDesc[0].dims.d[2]; - int32_t channels = inputDesc[0].dims.d[3]; - int32_t num_levels = inputDesc[1].dims.d[0]; - int32_t num_query = inputDesc[3].dims.d[1]; - int32_t num_point = inputDesc[3].dims.d[4]; + int32_t const batch = static_cast(inputDesc[0].dims.d[0]); + int32_t spatial_size = static_cast(inputDesc[0].dims.d[1]); + int32_t num_heads = static_cast(inputDesc[0].dims.d[2]); + int32_t channels = static_cast(inputDesc[0].dims.d[3]); + int32_t num_levels = static_cast(inputDesc[1].dims.d[0]); + int32_t num_query = static_cast(inputDesc[3].dims.d[1]); + int32_t num_point = static_cast(inputDesc[3].dims.d[4]); int32_t rc = 0; if (inputDesc[0].type == nvinfer1::DataType::kFLOAT) { @@ -161,8 +161,8 @@ int32_t MultiscaleDeformableAttnPluginLegacy::enqueue(nvinfer1::PluginTensorDesc return rc; } -void MultiscaleDeformableAttnPluginLegacy::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, nvinfer1::IGpuAllocator* gpuAllocator) noexcept +void MultiscaleDeformableAttnPluginLegacy::attachToContext(cudnnContext* /*cudnnContext*/, + cublasContext* /*cublasContext*/, nvinfer1::IGpuAllocator* /*gpuAllocator*/) noexcept { } @@ -170,7 +170,7 @@ void MultiscaleDeformableAttnPluginLegacy::detachFromContext() noexcept {} // IPluginV2Ext Methods nvinfer1::DataType MultiscaleDeformableAttnPluginLegacy::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { return inputTypes[0]; } @@ -203,7 +203,7 @@ size_t MultiscaleDeformableAttnPluginLegacy::getSerializationSize() const noexce return 0; } -void MultiscaleDeformableAttnPluginLegacy::serialize(void* buffer) const noexcept {} +void MultiscaleDeformableAttnPluginLegacy::serialize(void* /*buffer*/) const noexcept {} void MultiscaleDeformableAttnPluginLegacy::destroy() noexcept { @@ -224,7 +224,7 @@ char const* MultiscaleDeformableAttnPluginLegacy::getPluginNamespace() const noe MultiscaleDeformableAttnPluginCreatorLegacy::MultiscaleDeformableAttnPluginCreatorLegacy() { mPluginAttributes.clear(); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -244,7 +244,7 @@ nvinfer1::PluginFieldCollection const* MultiscaleDeformableAttnPluginCreatorLega } IPluginV2* MultiscaleDeformableAttnPluginCreatorLegacy::createPlugin( - char const* name, PluginFieldCollection const* fc) noexcept + char const* /*name*/, PluginFieldCollection const* /*fc*/) noexcept { try { @@ -259,7 +259,7 @@ IPluginV2* MultiscaleDeformableAttnPluginCreatorLegacy::createPlugin( } IPluginV2* MultiscaleDeformableAttnPluginCreatorLegacy::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.h b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.h index 18da1b789..ec19aca8f 100644 --- a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.h +++ b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.h @@ -42,6 +42,11 @@ namespace plugin class MultiscaleDeformableAttnPluginLegacy : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + // Constructors/destructors MultiscaleDeformableAttnPluginLegacy(); MultiscaleDeformableAttnPluginLegacy(void const* data, size_t length); @@ -80,16 +85,6 @@ class MultiscaleDeformableAttnPluginLegacy : public nvinfer1::IPluginV2DynamicEx private: std::string mNamespace; - -#if NV_TENSORRT_MAJOR < 8 - using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch; - using nvinfer1::IPluginV2DynamicExt::configurePlugin; - using nvinfer1::IPluginV2DynamicExt::enqueue; - using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; - using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; - using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch; - using nvinfer1::IPluginV2DynamicExt::supportsFormat; -#endif }; // Legacy creator class diff --git a/plugin/nmsPlugin/nmsPlugin.cpp b/plugin/nmsPlugin/nmsPlugin.cpp index 6e49d3aed..820f47191 100644 --- a/plugin/nmsPlugin/nmsPlugin.cpp +++ b/plugin/nmsPlugin/nmsPlugin.cpp @@ -52,22 +52,22 @@ DetectionOutputDynamic::DetectionOutputDynamic(DetectionOutputParameters params) { } -DetectionOutput::DetectionOutput(DetectionOutputParameters params, int32_t C1, int32_t C2, int32_t numPriors) +DetectionOutput::DetectionOutput(DetectionOutputParameters params, int32_t C1_, int32_t C2_, int32_t numPriors_) : param(params) - , C1(C1) - , C2(C2) - , numPriors(numPriors) + , C1(C1_) + , C2(C2_) + , numPriors(numPriors_) , mType(DataType::kFLOAT) , mScoreBits(16) { } DetectionOutputDynamic::DetectionOutputDynamic( - DetectionOutputParameters params, int32_t C1, int32_t C2, int32_t numPriors) + DetectionOutputParameters params, int32_t C1_, int32_t C2_, int32_t numPriors_) : param(params) - , C1(C1) - , C2(C2) - , numPriors(numPriors) + , C1(C1_) + , C2(C2_) + , numPriors(numPriors_) , mType(DataType::kFLOAT) , mScoreBits(16) { @@ -139,7 +139,7 @@ void DetectionOutput::terminate() noexcept {} void DetectionOutputDynamic::terminate() noexcept {} // Returns output dimensions at given index -Dims DetectionOutput::getOutputDimensions(int32_t index, Dims const* inputs, int32_t nbInputDims) noexcept +Dims DetectionOutput::getOutputDimensions(int32_t index, Dims const* /*inputs*/, int32_t nbInputDims) noexcept { PLUGIN_ASSERT(nbInputDims == 3); PLUGIN_ASSERT(index == 0 || index == 1); @@ -169,17 +169,18 @@ DimsExprs DetectionOutputDynamic::getOutputDimensions( if (inputs[C1_idx].d[0]->isConstant() && inputs[C1_idx].d[1]->isConstant() && inputs[C1_idx].d[2]->isConstant() && inputs[C1_idx].d[3]->isConstant()) { - C1 = exprBuilder - .operation(DimensionOperation::kPROD, - *exprBuilder.operation(DimensionOperation::kPROD, *inputs[C1_idx].d[1], *inputs[C1_idx].d[2]), - *inputs[C1_idx].d[3]) - ->getConstantValue(); + C1 = static_cast(exprBuilder + .operation(DimensionOperation::kPROD, + *exprBuilder.operation(DimensionOperation::kPROD, *inputs[C1_idx].d[1], *inputs[C1_idx].d[2]), + *inputs[C1_idx].d[3]) + ->getConstantValue()); } if (inputs[C2_idx].d[0]->isConstant() && inputs[C2_idx].d[1]->isConstant() && inputs[C2_idx].d[2]->isConstant()) { - C2 = exprBuilder.operation(DimensionOperation::kPROD, *inputs[C2_idx].d[1], *inputs[C2_idx].d[2]) - ->getConstantValue(); + C2 = static_cast( + exprBuilder.operation(DimensionOperation::kPROD, *inputs[C2_idx].d[1], *inputs[C2_idx].d[2]) + ->getConstantValue()); } // Output dimensions // index 0 : Dimensions 1x param.keepTopK x 7 @@ -212,11 +213,11 @@ size_t DetectionOutput::getWorkspaceSize(int32_t maxBatchSize) const noexcept param.shareLocation, maxBatchSize, C1, C2, param.numClasses, numPriors, param.topK, mType, mType); } -size_t DetectionOutputDynamic::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t DetectionOutputDynamic::getWorkspaceSize(PluginTensorDesc const* inputs, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { - return detectionInferenceWorkspaceSize( - param.shareLocation, inputs[0].dims.d[0], C1, C2, param.numClasses, numPriors, param.topK, mType, mType); + return detectionInferenceWorkspaceSize(param.shareLocation, static_cast(inputs[0].dims.d[0]), C1, C2, + param.numClasses, numPriors, param.topK, mType, mType); } // Plugin layer implementation @@ -253,10 +254,11 @@ int32_t DetectionOutputDynamic::enqueue(PluginTensorDesc const* inputDesc, Plugi void* topDetections = outputs[0]; void* keepCount = outputs[1]; - pluginStatus_t status = detectionInference(stream, inputDesc[0].dims.d[0], C1, C2, param.shareLocation, - param.varianceEncodedInTarget, param.backgroundLabelId, numPriors, param.numClasses, param.topK, param.keepTopK, - param.confidenceThreshold, param.nmsThreshold, param.codeType, mType, locData, priorData, mType, confData, - keepCount, topDetections, workspace, param.isNormalized, param.confSigmoid, mScoreBits, false); + pluginStatus_t status = detectionInference(stream, static_cast(inputDesc[0].dims.d[0]), C1, C2, + param.shareLocation, param.varianceEncodedInTarget, param.backgroundLabelId, numPriors, param.numClasses, + param.topK, param.keepTopK, param.confidenceThreshold, param.nmsThreshold, param.codeType, mType, locData, + priorData, mType, confData, keepCount, topDetections, workspace, param.isNormalized, param.confSigmoid, + mScoreBits, false); return status; } @@ -305,7 +307,7 @@ bool DetectionOutput::supportsFormat(DataType type, PluginFormat format) const n } bool DetectionOutputDynamic::supportsFormatCombination( - int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept + int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t /*nbOutputs*/) noexcept { // 3 inputs, 2 outputs, so 5 input/output in total PLUGIN_ASSERT(0 <= pos && pos < 5); @@ -438,7 +440,7 @@ char const* DetectionOutputDynamic::getPluginNamespace() const noexcept // Return the DataType of the plugin output at the requested index. DataType DetectionOutput::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { // Two outputs PLUGIN_ASSERT(index == 0 || index == 1); @@ -454,7 +456,7 @@ DataType DetectionOutput::getOutputDataType( } DataType DetectionOutputDynamic::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { // Two outputs PLUGIN_ASSERT(index == 0 || index == 1); @@ -471,13 +473,13 @@ DataType DetectionOutputDynamic::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool DetectionOutput::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool DetectionOutput::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool DetectionOutput::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } @@ -491,8 +493,8 @@ bool DetectionOutput::canBroadcastInputAcrossBatch(int32_t inputIndex) const noe // format: format NCHW, NHWC etc // maxbatchSize: maximum batch size for the plugin layer void DetectionOutput::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept + int32_t nbOutputs, DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(nbInputs == 3); PLUGIN_ASSERT(nbOutputs == 2); @@ -511,11 +513,11 @@ void DetectionOutput::configurePlugin(Dims const* inputDims, int32_t nbInputs, D // Configure C1, C2 and numPriors // Input ordering C1, C2, numPriors - C1 = inputDims[param.inputOrder[0]].d[0]; - C2 = inputDims[param.inputOrder[1]].d[0]; + C1 = static_cast(inputDims[param.inputOrder[0]].d[0]); + C2 = static_cast(inputDims[param.inputOrder[1]].d[0]); int32_t const nbBoxCoordinates = 4; - numPriors = inputDims[param.inputOrder[2]].d[1] / nbBoxCoordinates; + numPriors = static_cast(inputDims[param.inputOrder[2]].d[1] / nbBoxCoordinates); int32_t const numLocClasses = param.shareLocation ? 1 : param.numClasses; // Verify C1 @@ -548,11 +550,11 @@ void DetectionOutputDynamic::configurePlugin( // Configure C1, C2 and numPriors // Input ordering C1, C2, numPriors - C1 = in[param.inputOrder[0]].desc.dims.d[1]; - C2 = in[param.inputOrder[1]].desc.dims.d[1]; + C1 = static_cast(in[param.inputOrder[0]].desc.dims.d[1]); + C2 = static_cast(in[param.inputOrder[1]].desc.dims.d[1]); int32_t const nbBoxCoordinates = 4; - numPriors = in[param.inputOrder[2]].desc.dims.d[2] / nbBoxCoordinates; + numPriors = static_cast(in[param.inputOrder[2]].desc.dims.d[2] / nbBoxCoordinates); int32_t const numLocClasses = param.shareLocation ? 1 : param.numClasses; // Verify C1 @@ -567,7 +569,7 @@ void DetectionOutputDynamic::configurePlugin( // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void DetectionOutput::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } @@ -593,7 +595,7 @@ NMSBasePluginCreator::NMSBasePluginCreator() mPluginAttributes.emplace_back(PluginField("codeType", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("scoreBits", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("isBatchAgnostic", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -626,7 +628,7 @@ PluginFieldCollection const* NMSBasePluginCreator::getFieldNames() noexcept } // Creates the NMS plugin -IPluginV2Ext* NMSPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* NMSPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -734,7 +736,8 @@ IPluginV2Ext* NMSPluginCreator::createPlugin(char const* name, PluginFieldCollec return nullptr; } -IPluginV2DynamicExt* NMSDynamicPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2DynamicExt* NMSDynamicPluginCreator::createPlugin( + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -838,7 +841,7 @@ IPluginV2DynamicExt* NMSDynamicPluginCreator::createPlugin(char const* name, Plu } IPluginV2Ext* NMSPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { @@ -859,7 +862,7 @@ IPluginV2Ext* NMSPluginCreator::deserializePlugin( } IPluginV2DynamicExt* NMSDynamicPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/nmsPlugin/nmsPlugin.h b/plugin/nmsPlugin/nmsPlugin.h index bdce5f2d7..6b31f3922 100644 --- a/plugin/nmsPlugin/nmsPlugin.h +++ b/plugin/nmsPlugin/nmsPlugin.h @@ -99,6 +99,11 @@ class TRT_DEPRECATED DetectionOutput : public IPluginV2Ext class TRT_DEPRECATED DetectionOutputDynamic : public IPluginV2DynamicExt { public: + using IPluginV2DynamicExt::configurePlugin; + using IPluginV2DynamicExt::getOutputDimensions; + using IPluginV2DynamicExt::getWorkspaceSize; + using IPluginV2DynamicExt::enqueue; + DetectionOutputDynamic(DetectionOutputParameters param); DetectionOutputDynamic(DetectionOutputParameters param, int32_t C1, int32_t C2, int32_t numPriors); DetectionOutputDynamic(void const* data, size_t length); diff --git a/plugin/normalizePlugin/normalizePlugin.cpp b/plugin/normalizePlugin/normalizePlugin.cpp index 0bec31dcc..c0cd25f1a 100644 --- a/plugin/normalizePlugin/normalizePlugin.cpp +++ b/plugin/normalizePlugin/normalizePlugin.cpp @@ -31,10 +31,10 @@ char const* const kNORMALIZE_PLUGIN_VERSION{"1"}; char const* const kNORMALIZE_PLUGIN_NAME{"Normalize_TRT"}; } // namespace -Normalize::Normalize(Weights const* weights, int32_t nbWeights, bool acrossSpatial, bool channelShared, float eps) - : acrossSpatial(acrossSpatial) - , channelShared(channelShared) - , eps(eps) +Normalize::Normalize(Weights const* weights, int32_t nbWeights, bool acrossSpatial_, bool channelShared_, float eps_) + : acrossSpatial(acrossSpatial_) + , channelShared(channelShared_) + , eps(eps_) { gLogWarning << "NormalizePlugin is deprecated since TensorRT 9.0. Use INetworkDefinition::addNormalization() to " "add an INormalizationLayer." @@ -46,15 +46,15 @@ Normalize::Normalize(Weights const* weights, int32_t nbWeights, bool acrossSpati mScalarScale = static_cast(weights[0].values)[0]; } -Normalize::Normalize(Weights const* weights, int32_t nbWeights, float scalarScale, bool acrossSpatial, - bool channelShared, float eps, int32_t C, int32_t H, int32_t W) +Normalize::Normalize(Weights const* weights, int32_t nbWeights, float scalarScale, bool acrossSpatial_, + bool channelShared_, float eps_, int32_t C_, int32_t H_, int32_t W_) : mScalarScale(scalarScale) - , acrossSpatial(acrossSpatial) - , channelShared(channelShared) - , eps(eps) - , C(C) - , H(H) - , W(W) + , acrossSpatial(acrossSpatial_) + , channelShared(channelShared_) + , eps(eps_) + , C(C_) + , H(H_) + , W(W_) { mNbWeights = nbWeights; PLUGIN_VALIDATE(nbWeights == 1); @@ -101,7 +101,7 @@ int32_t Normalize::initialize() noexcept void Normalize::terminate() noexcept {} -size_t Normalize::getWorkspaceSize(int32_t maxBatchSize) const noexcept +size_t Normalize::getWorkspaceSize(int32_t /*maxBatchSize*/) const noexcept { return normalizePluginWorkspaceSize(acrossSpatial, C, H, W); } @@ -192,7 +192,7 @@ char const* Normalize::getPluginNamespace() const noexcept // Return the DataType of the plugin output at the requested index DataType Normalize::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { PLUGIN_ASSERT(index == 0); return DataType::kFLOAT; @@ -200,26 +200,26 @@ DataType Normalize::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool Normalize::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool Normalize::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool Normalize::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. void Normalize::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept + DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, PluginFormat floatFormat, int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(*inputTypes == DataType::kFLOAT && floatFormat == PluginFormat::kLINEAR); - C = inputDims[0].d[0]; - H = inputDims[0].d[1]; - W = inputDims[0].d[2]; + C = static_cast(inputDims[0].d[0]); + H = static_cast(inputDims[0].d[1]); + W = static_cast(inputDims[0].d[2]); if (channelShared) { PLUGIN_ASSERT(mWeights.count == 1); @@ -237,7 +237,8 @@ void Normalize::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims co } // Attach the plugin object to an execution context and grant the plugin the access to some context resource. -void Normalize::attachToContext(cudnnContext* cudnn, cublasContext* cublas, IGpuAllocator* gpuAllocator) noexcept +void Normalize::attachToContext( + cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, IGpuAllocator* gpuAllocator) noexcept { try { @@ -299,7 +300,7 @@ NormalizePluginCreator::NormalizePluginCreator() mPluginAttributes.emplace_back(PluginField("nbWeights", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("eps", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -318,7 +319,7 @@ PluginFieldCollection const* NormalizePluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2Ext* NormalizePluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* NormalizePluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -378,7 +379,7 @@ IPluginV2Ext* NormalizePluginCreator::createPlugin(char const* name, PluginField } IPluginV2Ext* NormalizePluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/nvFasterRCNN/nvFasterRCNNPlugin.cpp b/plugin/nvFasterRCNN/nvFasterRCNNPlugin.cpp index 0ccd27e8b..ff9ccd6dc 100644 --- a/plugin/nvFasterRCNN/nvFasterRCNNPlugin.cpp +++ b/plugin/nvFasterRCNN/nvFasterRCNNPlugin.cpp @@ -27,8 +27,8 @@ char const* const kRPROI_PLUGIN_VERSION{"1"}; char const* const kRPROI_PLUGIN_NAME{"RPROI_TRT"}; } // namespace -RPROIPlugin::RPROIPlugin(RPROIParams params, float const* anchorsRatios, float const* anchorsScales) - : params(params) +RPROIPlugin::RPROIPlugin(RPROIParams params_, float const* anchorsRatios, float const* anchorsScales) + : params(params_) { /* * It only supports the scenario where params.featureStride == params.minBoxSize @@ -48,18 +48,18 @@ RPROIPlugin::RPROIPlugin(RPROIParams params, float const* anchorsRatios, float c } // Constructor for cloning one plugin instance to another -RPROIPlugin::RPROIPlugin(RPROIParams params, float const* anchorsRatios, float const* anchorsScales, int32_t A, - int32_t C, int32_t H, int32_t W, float const* _anchorsDev, size_t deviceSmemSize, DataType inFeatureType, - DataType outFeatureType, DLayout_t inFeatureLayout) - : deviceSmemSize(deviceSmemSize) - , params(params) - , A(A) - , C(C) - , H(H) - , W(W) - , inFeatureType(inFeatureType) - , outFeatureType(outFeatureType) - , inFeatureLayout(inFeatureLayout) +RPROIPlugin::RPROIPlugin(RPROIParams params_, float const* anchorsRatios, float const* anchorsScales, int32_t A_, + int32_t C_, int32_t H_, int32_t W_, float const* _anchorsDev, size_t deviceSmemSize_, DataType inFeatureType_, + DataType outFeatureType_, DLayout_t inFeatureLayout_) + : deviceSmemSize(deviceSmemSize_) + , params(params_) + , A(A_) + , C(C_) + , H(H_) + , W(W_) + , inFeatureType(inFeatureType_) + , outFeatureType(outFeatureType_) + , inFeatureLayout(inFeatureLayout_) { PLUGIN_VALIDATE(params.anchorsRatioCount > 0 && params.anchorsScaleCount > 0); anchorsRatiosHost = copyToHost(anchorsRatios, params.anchorsRatioCount); @@ -235,7 +235,7 @@ float* RPROIPlugin::copyToHost(void const* srcHostData, int32_t count) noexcept int32_t RPROIPlugin::copyFromHost(char* dstHostBuffer, void const* source, int32_t count) const noexcept { PLUGIN_CHECK(cudaMemcpy(dstHostBuffer, source, count * sizeof(float), cudaMemcpyHostToHost)); - return count * sizeof(float); + return static_cast(count * sizeof(float)); } bool RPROIPlugin::supportsFormatCombination( @@ -313,7 +313,7 @@ char const* RPROIPlugin::getPluginNamespace() const noexcept // Return the DataType of the plugin output at the requested index. DataType RPROIPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { // Two outputs PLUGIN_ASSERT(index == 0 || index == 1); @@ -322,13 +322,13 @@ DataType RPROIPlugin::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool RPROIPlugin::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool RPROIPlugin::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool RPROIPlugin::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } @@ -353,9 +353,9 @@ void RPROIPlugin::configurePlugin( PLUGIN_ASSERT(nbOutput == PluginNbOutputs); A = params.anchorsRatioCount * params.anchorsScaleCount; - C = in[2].dims.d[0]; - H = in[2].dims.d[1]; - W = in[2].dims.d[2]; + C = static_cast(in[2].dims.d[0]); + H = static_cast(in[2].dims.d[1]); + W = static_cast(in[2].dims.d[2]); inFeatureType = in[2].type; outFeatureType = out[1].type; inFeatureLayout = convertTensorFormat(in[2].format); @@ -372,7 +372,7 @@ void RPROIPlugin::configurePlugin( // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void RPROIPlugin::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } @@ -398,7 +398,7 @@ RPROIPluginCreator::RPROIPluginCreator() mPluginAttributes.emplace_back(PluginField("anchorsRatios", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("anchorsScales", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -422,7 +422,7 @@ PluginFieldCollection const* RPROIPluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2Ext* RPROIPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* RPROIPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -520,7 +520,7 @@ IPluginV2Ext* RPROIPluginCreator::createPlugin(char const* name, PluginFieldColl } IPluginV2Ext* RPROIPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/nvFasterRCNN/nvFasterRCNNPlugin.h b/plugin/nvFasterRCNN/nvFasterRCNNPlugin.h index ec9b931b5..665c06513 100644 --- a/plugin/nvFasterRCNN/nvFasterRCNNPlugin.h +++ b/plugin/nvFasterRCNN/nvFasterRCNNPlugin.h @@ -29,6 +29,8 @@ namespace plugin class RPROIPlugin : public IPluginV2IOExt { public: + using IPluginV2IOExt::configurePlugin; + RPROIPlugin(RPROIParams params, float const* anchorsRatios, float const* anchorsScales); RPROIPlugin(RPROIParams params, float const* anchorsRatios, float const* anchorsScales, int32_t A, int32_t C, diff --git a/plugin/pillarScatterPlugin/pillarScatter.cpp b/plugin/pillarScatterPlugin/pillarScatter.cpp index 4e623010d..66c7a0cba 100644 --- a/plugin/pillarScatterPlugin/pillarScatter.cpp +++ b/plugin/pillarScatterPlugin/pillarScatter.cpp @@ -31,7 +31,7 @@ PillarScatterPlugin::PillarScatterPlugin(size_t h, size_t w) { } -PillarScatterPlugin::PillarScatterPlugin(void const* data, size_t length) +PillarScatterPlugin::PillarScatterPlugin(void const* data, size_t /*length*/) { auto const* d = toPointer(data); feature_y_size_ = readFromBuffer(d); @@ -54,7 +54,7 @@ nvinfer1::IPluginV2DynamicExt* PillarScatterPlugin::clone() const noexcept } nvinfer1::DimsExprs PillarScatterPlugin::getOutputDimensions(int32_t outputIndex, nvinfer1::DimsExprs const* inputs, - int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept + int32_t /*nbInputs*/, nvinfer1::IExprBuilder& exprBuilder) noexcept { PLUGIN_ASSERT(outputIndex == 0); nvinfer1::DimsExprs output; @@ -62,8 +62,8 @@ nvinfer1::DimsExprs PillarScatterPlugin::getOutputDimensions(int32_t outputIndex output.nbDims = 4; output.d[0] = batch_size; output.d[1] = inputs[0].d[2]; - output.d[2] = exprBuilder.constant(feature_y_size_); - output.d[3] = exprBuilder.constant(feature_x_size_); + output.d[2] = exprBuilder.constant(static_cast(feature_y_size_)); + output.d[3] = exprBuilder.constant(static_cast(feature_x_size_)); return output; } @@ -93,14 +93,14 @@ bool PillarScatterPlugin::supportsFormatCombination( return false; } -void PillarScatterPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, - nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void PillarScatterPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* /*in*/, int32_t /*nbInputs*/, + nvinfer1::DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { return; } -size_t PillarScatterPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t PillarScatterPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + nvinfer1::PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -112,17 +112,17 @@ int32_t PillarScatterPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDesc { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t batchSize = inputDesc[0].dims.d[0]; - int32_t maxPillarNum = inputDesc[0].dims.d[1]; - int32_t numFeatures = inputDesc[0].dims.d[2]; + int32_t batchSize = static_cast(inputDesc[0].dims.d[0]); + int32_t maxPillarNum = static_cast(inputDesc[0].dims.d[1]); + int32_t numFeatures = static_cast(inputDesc[0].dims.d[2]); nvinfer1::DataType inputType = inputDesc[0].type; auto coords_data = static_cast(inputs[1]); auto params_data = static_cast(inputs[2]); - uint32_t featureY = feature_y_size_; - uint32_t featureX = feature_x_size_; + uint32_t featureY = static_cast(feature_y_size_); + uint32_t featureX = static_cast(feature_x_size_); int32_t status = -1; @@ -155,7 +155,7 @@ int32_t PillarScatterPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDesc } nvinfer1::DataType PillarScatterPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { return inputTypes[0]; } @@ -213,7 +213,7 @@ PillarScatterPluginCreator::PillarScatterPluginCreator() { mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("dense_shape", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -232,7 +232,7 @@ PluginFieldCollection const* PillarScatterPluginCreator::getFieldNames() noexcep return &mFC; } -IPluginV2* PillarScatterPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2* PillarScatterPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -262,7 +262,7 @@ IPluginV2* PillarScatterPluginCreator::createPlugin(char const* name, PluginFiel } IPluginV2* PillarScatterPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/pillarScatterPlugin/pillarScatter.h b/plugin/pillarScatterPlugin/pillarScatter.h index 95c789de3..bc4e78af9 100644 --- a/plugin/pillarScatterPlugin/pillarScatter.h +++ b/plugin/pillarScatterPlugin/pillarScatter.h @@ -32,6 +32,11 @@ namespace plugin class PillarScatterPlugin : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + PillarScatterPlugin() = delete; PillarScatterPlugin(void const* data, size_t length); PillarScatterPlugin(size_t h, size_t w); diff --git a/plugin/priorBoxPlugin/priorBoxPlugin.cpp b/plugin/priorBoxPlugin/priorBoxPlugin.cpp index 6b715faad..a1b8427b0 100644 --- a/plugin/priorBoxPlugin/priorBoxPlugin.cpp +++ b/plugin/priorBoxPlugin/priorBoxPlugin.cpp @@ -39,7 +39,8 @@ PriorBox::PriorBox(PriorBoxParameters param, int32_t H, int32_t W) , mW(W) { // Each object should manage its copy of param. - auto copyParamData = [](float*& dstPtr, std::vector& dstVec, float const* src, int32_t size) { + auto copyParamData = [](float*& dstPtr, std::vector& dstVec, float const* src, int32_t size) + { PLUGIN_VALIDATE(size >= 0); PLUGIN_VALIDATE(src != nullptr); @@ -56,7 +57,8 @@ PriorBox::PriorBox(PriorBoxParameters param, int32_t H, int32_t W) void PriorBox::setupDeviceMemory() noexcept { - auto copyToDevice = [](void const* hostData, int32_t count) -> Weights { + auto copyToDevice = [](void const* hostData, int32_t count) -> Weights + { PLUGIN_VALIDATE(count >= 0); void* deviceData = nullptr; PLUGIN_CUASSERT(cudaMalloc(&deviceData, count * sizeof(float))); @@ -84,7 +86,7 @@ void PriorBox::setupDeviceMemory() noexcept // Prevent duplicated aspect ratios from input for (size_t j = 0; j < tmpAR.size(); ++j) { - if (std::fabs(aspectRatio - tmpAR[j]) < 1e-6) + if (std::fabs(aspectRatio - tmpAR[j]) < 1e-6F) { alreadyExist = true; break; @@ -105,11 +107,11 @@ void PriorBox::setupDeviceMemory() noexcept // https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/c_api/classnvinfer1_1_1_weights.html // mAspectRatiosGPU.count is different to mParam.numAspectRatios. // - mAspectRatiosGPU = copyToDevice(&tmpAR[0], tmpAR.size()); + mAspectRatiosGPU = copyToDevice(&tmpAR[0], static_cast(tmpAR.size())); // Number of prior boxes per grid cell on the feature map // tmpAR already included an aspect ratio of 1.0 - mNumPriors = tmpAR.size() * mParam.numMinSize; + mNumPriors = static_cast(tmpAR.size()) * mParam.numMinSize; // // If we have maxSizes, as long as all the maxSizes meets assertion requirement, we add one bounding box per maxSize @@ -144,7 +146,8 @@ void PriorBox::deserialize(uint8_t const* data, size_t length) auto const* d{data}; mParam = read(d); - auto readArray = [&d](int32_t size, std::vector& dstVec, float*& dstPtr) { + auto readArray = [&d](int32_t size, std::vector& dstVec, float*& dstPtr) + { PLUGIN_VALIDATE(size >= 0); dstVec.resize(size); for (int32_t i = 0; i < size; i++) @@ -179,8 +182,8 @@ Dims PriorBox::getOutputDimensions(int32_t index, Dims const* inputs, int32_t nb // Only one output from the plugin layer PLUGIN_VALIDATE(index == 0); // Particularity of the PriorBox layer: no batchSize dimension needed - mH = inputs[0].d[1]; - mW = inputs[0].d[2]; + mH = static_cast(inputs[0].d[1]); + mW = static_cast(inputs[0].d[2]); // workaround for TRT // The first channel is for prior box coordinates. // The second channel is for prior box scaling factors, which is simply a copy of the variance provided. @@ -201,8 +204,9 @@ int32_t PriorBox::enqueue(int32_t /*batchSize*/, void const* const* /*inputs*/, void* /*workspace*/, cudaStream_t stream) noexcept { void* outputData = outputs[0]; - pluginStatus_t status = priorBoxInference(stream, mParam, mH, mW, mNumPriors, mAspectRatiosGPU.count, - mMinSizeGPU.values, mMaxSizeGPU.values, mAspectRatiosGPU.values, outputData); + pluginStatus_t status + = priorBoxInference(stream, mParam, mH, mW, mNumPriors, static_cast(mAspectRatiosGPU.count), + mMinSizeGPU.values, mMaxSizeGPU.values, mAspectRatiosGPU.values, outputData); return status; } @@ -221,7 +225,8 @@ void PriorBox::serialize(void* buffer) const noexcept uint8_t* a = d; write(d, mParam); - auto writeArray = [&d](int32_t const size, float const* srcPtr, std::vector const& srcVec) { + auto writeArray = [&d](int32_t const size, float const* srcPtr, std::vector const& srcVec) + { // srcVec is only used here to check that the size and srcPtr are correct. PLUGIN_VALIDATE(srcVec.data() == srcPtr); PLUGIN_VALIDATE(srcVec.size() == static_cast(size)); @@ -331,18 +336,18 @@ void PriorBox::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims con PLUGIN_VALIDATE(inputDims[0].nbDims == 3); PLUGIN_VALIDATE(inputDims[1].nbDims == 3); PLUGIN_VALIDATE(outputDims[0].nbDims == 3); - mH = inputDims[0].d[1]; - mW = inputDims[0].d[2]; + mH = static_cast(inputDims[0].d[1]); + mW = static_cast(inputDims[0].d[2]); // Prepare for the inference function. if (mParam.imgH == 0 || mParam.imgW == 0) { - mParam.imgH = inputDims[1].d[1]; - mParam.imgW = inputDims[1].d[2]; + mParam.imgH = static_cast(inputDims[1].d[1]); + mParam.imgW = static_cast(inputDims[1].d[2]); } if (mParam.stepH == 0 || mParam.stepW == 0) { - mParam.stepH = static_cast(mParam.imgH) / mH; - mParam.stepW = static_cast(mParam.imgW) / mW; + mParam.stepH = static_cast(mParam.imgH) / static_cast(mH); + mParam.stepW = static_cast(mParam.imgW) / static_cast(mW); } } @@ -370,7 +375,7 @@ PriorBoxPluginCreator::PriorBoxPluginCreator() mPluginAttributes.emplace_back(PluginField("stepW", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("offset", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/proposalLayerPlugin/proposalLayerPlugin.cpp b/plugin/proposalLayerPlugin/proposalLayerPlugin.cpp index 083445331..5bd76713a 100644 --- a/plugin/proposalLayerPlugin/proposalLayerPlugin.cpp +++ b/plugin/proposalLayerPlugin/proposalLayerPlugin.cpp @@ -41,7 +41,7 @@ ProposalLayerPluginCreator::ProposalLayerPluginCreator() mPluginAttributes.emplace_back(PluginField("iou_threshold", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("image_size", nullptr, PluginFieldType::kINT32, 3)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -60,7 +60,7 @@ PluginFieldCollection const* ProposalLayerPluginCreator::getFieldNames() noexcep return &mFC; } -IPluginV2Ext* ProposalLayerPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* ProposalLayerPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -101,7 +101,8 @@ IPluginV2Ext* ProposalLayerPluginCreator::createPlugin(char const* name, PluginF return nullptr; } -IPluginV2Ext* ProposalLayerPluginCreator::deserializePlugin(char const* name, void const* data, size_t length) noexcept +IPluginV2Ext* ProposalLayerPluginCreator::deserializePlugin( + char const* /*name*/, void const* data, size_t length) noexcept { try { @@ -132,7 +133,7 @@ ProposalLayer::ProposalLayer( mParam.backgroundLabelId = -1; mParam.numClasses = 1; mParam.keepTopK = mKeepTopK; - mParam.scoreThreshold = 0.0; + mParam.scoreThreshold = 0.0F; mParam.iouThreshold = mIOUThreshold; mType = DataType::kFLOAT; @@ -157,7 +158,7 @@ int32_t ProposalLayer::initialize() noexcept // Init the anchors for batch size: mAnchorBoxesDevice = std::make_shared>(mAnchorsCnt * 4 * mMaxBatchSize); - int32_t batch_offset = sizeof(float) * mAnchorsCnt * 4; + int32_t batch_offset = static_cast(sizeof(float)) * mAnchorsCnt * 4; uint8_t* device_ptr = static_cast(mAnchorBoxesDevice->mPtr); for (int32_t i = 0; i < mMaxBatchSize; i++) { @@ -256,7 +257,7 @@ void ProposalLayer::deserialize(int8_t const* data, size_t length) mParam.backgroundLabelId = -1; mParam.numClasses = 1; mParam.keepTopK = mKeepTopK; - mParam.scoreThreshold = 0.0; + mParam.scoreThreshold = 0.0F; mParam.iouThreshold = mIOUThreshold; mType = DataType::kFLOAT; @@ -301,8 +302,8 @@ void ProposalLayer::generate_pyramid_anchors(nvinfer1::Dims const& imageDims) auto const& strides = MaskRCNNConfig::BACKBONE_STRIDES; auto anchor_stride = MaskRCNNConfig::RPN_ANCHOR_STRIDE; - float const cy = imageDims.d[1] - 1; - float const cx = imageDims.d[2] - 1; + float const cy = static_cast(imageDims.d[1] - 1); + float const cx = static_cast(imageDims.d[2] - 1); auto& anchors = mAnchorBoxesHost; PLUGIN_VALIDATE(anchors.empty()); @@ -311,18 +312,19 @@ void ProposalLayer::generate_pyramid_anchors(nvinfer1::Dims const& imageDims) for (size_t s = 0; s < scales.size(); ++s) { float scale = scales[s]; - int32_t stride = strides[s]; + int32_t stride = static_cast(strides[s]); for (int32_t y = 0; y < imageDims.d[1]; y += anchor_stride * stride) for (int32_t x = 0; x < imageDims.d[2]; x += anchor_stride * stride) for (float r : ratios) { - float sqrt_r = sqrt(r); + float sqrt_r = sqrtf(r); float h = scale / sqrt_r; float w = scale * sqrt_r; anchors.insert(anchors.end(), - {(y - h / 2) / cy, (x - w / 2) / cx, (y + h / 2 - 1) / cy, (x + w / 2 - 1) / cx}); + {(static_cast(y) - h / 2) / cy, (static_cast(x) - w / 2) / cx, + (static_cast(y) + h / 2 - 1) / cy, (static_cast(x) + w / 2 - 1) / cx}); } } @@ -352,7 +354,7 @@ int32_t ProposalLayer::enqueue( // Return the DataType of the plugin output at the requested index DataType ProposalLayer::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { // Only DataType::kFLOAT is acceptable by the plugin layer return DataType::kFLOAT; @@ -360,33 +362,34 @@ DataType ProposalLayer::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool ProposalLayer::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool ProposalLayer::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool ProposalLayer::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void ProposalLayer::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void ProposalLayer::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t /*nbOutputs*/, DataType const* /*inputTypes*/, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t maxBatchSize) noexcept { check_valid_inputs(inputDims, nbInputs); PLUGIN_ASSERT(inputDims[0].d[0] == inputDims[1].d[0]); - mAnchorsCnt = inputDims[0].d[0]; - PLUGIN_ASSERT(mAnchorsCnt == (int32_t) (mAnchorBoxesHost.size() / 4)); + mAnchorsCnt = static_cast(inputDims[0].d[0]); + PLUGIN_ASSERT(mAnchorsCnt == static_cast(mAnchorBoxesHost.size() / 4)); mMaxBatchSize = maxBatchSize; } // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void ProposalLayer::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } diff --git a/plugin/proposalPlugin/proposalPlugin.cpp b/plugin/proposalPlugin/proposalPlugin.cpp index da528ce3e..31ab8de32 100644 --- a/plugin/proposalPlugin/proposalPlugin.cpp +++ b/plugin/proposalPlugin/proposalPlugin.cpp @@ -248,7 +248,7 @@ Dims ProposalPlugin::getOutputDimensions(int32_t index, Dims const* inputs, int3 PLUGIN_VALIDATE(nbInputDims == 2); PLUGIN_VALIDATE(inputs->nbDims == 3); PLUGIN_VALIDATE(inputs[1].nbDims == 3); - int32_t channels = mMaxBoxNum; + int32_t channels = static_cast(mMaxBoxNum); int32_t height = 4; int32_t width = 1; return Dims3(channels, height, width); @@ -273,7 +273,7 @@ DimsExprs ProposalDynamicPlugin::getOutputDimensions( DimsExprs outDim; outDim.nbDims = 4; outDim.d[0] = inputs[0].d[0]; - outDim.d[1] = exprBuilder.constant(mMaxBoxNum); + outDim.d[1] = exprBuilder.constant(static_cast(mMaxBoxNum)); outDim.d[2] = exprBuilder.constant(4); outDim.d[3] = exprBuilder.constant(1); return outDim; @@ -297,14 +297,17 @@ int32_t ProposalDynamicPlugin::initialize() noexcept size_t ProposalPlugin::getWorkspaceSize(int32_t maxBatchSize) const noexcept { - return _get_workspace_size(maxBatchSize, mAnchorSizeNum, mAnchorRatioNum, mRpnHeight, mRpnWidth, mMaxBoxNum); + return _get_workspace_size(maxBatchSize, static_cast(mAnchorSizeNum), + static_cast(mAnchorRatioNum), static_cast(mRpnHeight), static_cast(mRpnWidth), + static_cast(mMaxBoxNum)); } -size_t ProposalDynamicPlugin::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t ProposalDynamicPlugin::getWorkspaceSize(PluginTensorDesc const* inputs, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { - int32_t batchSize = inputs[0].dims.d[0]; - return _get_workspace_size(batchSize, mAnchorSizeNum, mAnchorRatioNum, mRpnHeight, mRpnWidth, mMaxBoxNum); + int32_t batchSize = static_cast(inputs[0].dims.d[0]); + return _get_workspace_size(batchSize, static_cast(mAnchorSizeNum), static_cast(mAnchorRatioNum), + static_cast(mRpnHeight), static_cast(mRpnWidth), static_cast(mMaxBoxNum)); } int32_t ProposalPlugin::enqueue( @@ -315,9 +318,11 @@ int32_t ProposalPlugin::enqueue( int32_t status = STATUS_FAILURE; // Our plugin outputs only one tensor void* output = outputs[0]; - status = proposalInference_gpu(stream, inputs[0], inputs[1], batchSize, mInputHeight, mInputWidth, mRpnHeight, - mRpnWidth, mMaxBoxNum, mPreNmsTopN, &mAnchorSizes[0], mAnchorSizeNum, &mAnchorRatios[0], mAnchorRatioNum, - mRpnStdScaling, mRpnStride, mBboxMinSize, mNmsIouThreshold, workspace, output); + status = proposalInference_gpu(stream, inputs[0], inputs[1], batchSize, static_cast(mInputHeight), + static_cast(mInputWidth), static_cast(mRpnHeight), static_cast(mRpnWidth), + static_cast(mMaxBoxNum), static_cast(mPreNmsTopN), &mAnchorSizes[0], + static_cast(mAnchorSizeNum), &mAnchorRatios[0], static_cast(mAnchorRatioNum), + mRpnStdScaling, static_cast(mRpnStride), mBboxMinSize, mNmsIouThreshold, workspace, output); return status; } catch (std::exception const& e) @@ -337,10 +342,12 @@ int32_t ProposalDynamicPlugin::enqueue(PluginTensorDesc const* inputDesc, Plugin int32_t status = STATUS_FAILURE; // Our plugin outputs only one tensor void* output = outputs[0]; - int32_t batchSize = inputDesc[0].dims.d[0]; - status = proposalInference_gpu(stream, inputs[0], inputs[1], batchSize, mInputHeight, mInputWidth, mRpnHeight, - mRpnWidth, mMaxBoxNum, mPreNmsTopN, &mAnchorSizes[0], mAnchorSizeNum, &mAnchorRatios[0], mAnchorRatioNum, - mRpnStdScaling, mRpnStride, mBboxMinSize, mNmsIouThreshold, workspace, output); + int32_t batchSize = static_cast(inputDesc[0].dims.d[0]); + status = proposalInference_gpu(stream, inputs[0], inputs[1], batchSize, static_cast(mInputHeight), + static_cast(mInputWidth), static_cast(mRpnHeight), static_cast(mRpnWidth), + static_cast(mMaxBoxNum), static_cast(mPreNmsTopN), &mAnchorSizes[0], + static_cast(mAnchorSizeNum), &mAnchorRatios[0], static_cast(mAnchorRatioNum), + mRpnStdScaling, static_cast(mRpnStride), mBboxMinSize, mNmsIouThreshold, workspace, output); return status; } catch (std::exception const& e) @@ -429,7 +436,7 @@ bool ProposalPlugin::supportsFormat(DataType type, PluginFormat format) const no } bool ProposalDynamicPlugin::supportsFormatCombination( - int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept + int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t /*nbOutputs*/) noexcept { try { @@ -477,9 +484,11 @@ IPluginV2Ext* ProposalPlugin::clone() const noexcept { try { - IPluginV2Ext* plugin = new ProposalPlugin(mInputHeight, mInputWidth, mRpnHeight, mRpnWidth, mRpnStdScaling, - mRpnStride, mBboxMinSize, mNmsIouThreshold, mPreNmsTopN, mMaxBoxNum, &mAnchorSizes[0], mAnchorSizeNum, - &mAnchorRatios[0], mAnchorRatioNum); + IPluginV2Ext* plugin = new ProposalPlugin(static_cast(mInputHeight), static_cast(mInputWidth), + static_cast(mRpnHeight), static_cast(mRpnWidth), mRpnStdScaling, + static_cast(mRpnStride), mBboxMinSize, mNmsIouThreshold, static_cast(mPreNmsTopN), + static_cast(mMaxBoxNum), &mAnchorSizes[0], static_cast(mAnchorSizeNum), &mAnchorRatios[0], + static_cast(mAnchorRatioNum)); plugin->setPluginNamespace(mNamespace.c_str()); return plugin; } @@ -494,9 +503,11 @@ IPluginV2DynamicExt* ProposalDynamicPlugin::clone() const noexcept { try { - auto* plugin = new ProposalDynamicPlugin(mInputHeight, mInputWidth, mRpnHeight, mRpnWidth, mRpnStdScaling, - mRpnStride, mBboxMinSize, mNmsIouThreshold, mPreNmsTopN, mMaxBoxNum, &mAnchorSizes[0], mAnchorSizeNum, - &mAnchorRatios[0], mAnchorRatioNum); + auto* plugin = new ProposalDynamicPlugin(static_cast(mInputHeight), static_cast(mInputWidth), + static_cast(mRpnHeight), static_cast(mRpnWidth), mRpnStdScaling, + static_cast(mRpnStride), mBboxMinSize, mNmsIouThreshold, static_cast(mPreNmsTopN), + static_cast(mMaxBoxNum), &mAnchorSizes[0], static_cast(mAnchorSizeNum), &mAnchorRatios[0], + static_cast(mAnchorRatioNum)); plugin->setPluginNamespace(mNamespace.c_str()); return plugin; } @@ -545,7 +556,7 @@ char const* ProposalDynamicPlugin::getPluginNamespace() const noexcept // Return the DataType of the plugin output at the requested index. DataType ProposalPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { try { @@ -561,7 +572,7 @@ DataType ProposalPlugin::getOutputDataType( } DataType ProposalDynamicPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { try { @@ -578,28 +589,29 @@ DataType ProposalDynamicPlugin::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool ProposalPlugin::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without // replication. -bool ProposalPlugin::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool ProposalPlugin::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } -void ProposalPlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void ProposalPlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t nbOutputs, DataType const* /*inputTypes*/, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t /*maxBatchSize*/) noexcept { try { PLUGIN_VALIDATE(nbInputs == 2); PLUGIN_VALIDATE(nbOutputs == 1); - mRpnHeight = inputDims->d[1]; - mRpnWidth = inputDims->d[2]; + mRpnHeight = static_cast(inputDims->d[1]); + mRpnWidth = static_cast(inputDims->d[2]); } catch (std::exception const& e) { @@ -607,15 +619,15 @@ void ProposalPlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Di } } -void ProposalDynamicPlugin::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void ProposalDynamicPlugin::configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, + DynamicPluginTensorDesc const* /*out*/, int32_t nbOutputs) noexcept { try { PLUGIN_VALIDATE(nbInputs == 2); PLUGIN_VALIDATE(nbOutputs == 1); - mRpnHeight = in[0].desc.dims.d[2]; - mRpnWidth = in[0].desc.dims.d[3]; + mRpnHeight = static_cast(in[0].desc.dims.d[2]); + mRpnWidth = static_cast(in[0].desc.dims.d[3]); } catch (std::exception const& e) { @@ -626,7 +638,7 @@ void ProposalDynamicPlugin::configurePlugin( // Attach the plugin object to an execution context and grant the plugin the // access to some context resource. void ProposalPlugin::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } @@ -645,7 +657,7 @@ ProposalBasePluginCreator::ProposalBasePluginCreator() noexcept mPluginAttributes.emplace_back(PluginField("post_nms_top_n", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("anchor_sizes", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("anchor_ratios", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -674,7 +686,7 @@ PluginFieldCollection const* ProposalBasePluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2Ext* ProposalPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* ProposalPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -767,9 +779,9 @@ IPluginV2Ext* ProposalPluginCreator::createPlugin(char const* name, PluginFieldC PLUGIN_VALIDATE(roiMinSize >= 0.0F); PLUGIN_VALIDATE(nmsIouThreshold > 0.0F); - IPluginV2Ext* plugin - = new ProposalPlugin(inputHeight, inputWidth, kRPN_STD_SCALING, rpnStride, roiMinSize, nmsIouThreshold, - preNmsTopN, postNmsTopN, &anchorSizes[0], anchorSizes.size(), &anchorRatios[0], anchorRatios.size()); + IPluginV2Ext* plugin = new ProposalPlugin(inputHeight, inputWidth, kRPN_STD_SCALING, rpnStride, roiMinSize, + nmsIouThreshold, preNmsTopN, postNmsTopN, &anchorSizes[0], static_cast(anchorSizes.size()), + &anchorRatios[0], static_cast(anchorRatios.size())); plugin->setPluginNamespace(mNamespace.c_str()); return plugin; } @@ -781,7 +793,7 @@ IPluginV2Ext* ProposalPluginCreator::createPlugin(char const* name, PluginFieldC } IPluginV2DynamicExt* ProposalDynamicPluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc) noexcept + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -872,8 +884,8 @@ IPluginV2DynamicExt* ProposalDynamicPluginCreator::createPlugin( PLUGIN_VALIDATE(nmsIouThreshold > 0.0F); IPluginV2DynamicExt* plugin = new ProposalDynamicPlugin(inputHeight, inputWidth, kRPN_STD_SCALING, rpnStride, - roiMinSize, nmsIouThreshold, preNmsTopN, postNmsTopN, &anchorSizes[0], anchorSizes.size(), &anchorRatios[0], - anchorRatios.size()); + roiMinSize, nmsIouThreshold, preNmsTopN, postNmsTopN, &anchorSizes[0], + static_cast(anchorSizes.size()), &anchorRatios[0], static_cast(anchorRatios.size())); plugin->setPluginNamespace(mNamespace.c_str()); return plugin; } @@ -885,7 +897,7 @@ IPluginV2DynamicExt* ProposalDynamicPluginCreator::createPlugin( } IPluginV2Ext* ProposalPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { @@ -905,7 +917,7 @@ IPluginV2Ext* ProposalPluginCreator::deserializePlugin( } IPluginV2DynamicExt* ProposalDynamicPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/proposalPlugin/proposalPlugin.h b/plugin/proposalPlugin/proposalPlugin.h index 90f0f1071..fa3943bb9 100644 --- a/plugin/proposalPlugin/proposalPlugin.h +++ b/plugin/proposalPlugin/proposalPlugin.h @@ -82,7 +82,8 @@ class TRT_DEPRECATED ProposalPlugin : public IPluginV2Ext char const* getPluginNamespace() const noexcept override; - DataType getOutputDataType(int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept override; + DataType getOutputDataType( + int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept override; bool isOutputBroadcastAcrossBatch( int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept override; @@ -119,13 +120,19 @@ class TRT_DEPRECATED ProposalPlugin : public IPluginV2Ext class ProposalDynamicPlugin : public IPluginV2DynamicExt { public: - ProposalDynamicPlugin(int32_t inputHeight, int32_t inputWidth, int32_t rpnHeight, int32_t rpnWidth, float rpnStdScaling, - int32_t rpnStride, float bboxMinSize, float nmsIouThreshold, int32_t preNmsTopN, int32_t maxBoxNum, - float const* anchorSizes, int32_t ancSizeNum, float const* anchorRatios, int32_t ancRatioNum); + using IPluginV2DynamicExt::configurePlugin; + using IPluginV2DynamicExt::getOutputDimensions; + using IPluginV2DynamicExt::getWorkspaceSize; + using IPluginV2DynamicExt::enqueue; - ProposalDynamicPlugin(int32_t inputHeight, int32_t inputWidth, float rpnStdScaling, int32_t rpnStride, float bboxMinSize, - float nmsIouThreshold, int32_t preNmsTopN, int32_t maxBoxNum, float const* anchorSizes, int32_t ancSizeNum, - float const* anchorRatios, int32_t ancRatioNum); + ProposalDynamicPlugin(int32_t inputHeight, int32_t inputWidth, int32_t rpnHeight, int32_t rpnWidth, + float rpnStdScaling, int32_t rpnStride, float bboxMinSize, float nmsIouThreshold, int32_t preNmsTopN, + int32_t maxBoxNum, float const* anchorSizes, int32_t ancSizeNum, float const* anchorRatios, + int32_t ancRatioNum); + + ProposalDynamicPlugin(int32_t inputHeight, int32_t inputWidth, float rpnStdScaling, int32_t rpnStride, + float bboxMinSize, float nmsIouThreshold, int32_t preNmsTopN, int32_t maxBoxNum, float const* anchorSizes, + int32_t ancSizeNum, float const* anchorRatios, int32_t ancRatioNum); ProposalDynamicPlugin(void const* serialBuf, size_t serialSize); @@ -148,7 +155,8 @@ class ProposalDynamicPlugin : public IPluginV2DynamicExt char const* getPluginNamespace() const noexcept override; // IPluginV2Ext methods - DataType getOutputDataType(int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept override; + DataType getOutputDataType( + int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept override; // IPluginV2DynamicExt methods IPluginV2DynamicExt* clone() const noexcept override; diff --git a/plugin/pyramidROIAlignPlugin/pyramidROIAlignPlugin.cpp b/plugin/pyramidROIAlignPlugin/pyramidROIAlignPlugin.cpp index 598faf949..38b23d60d 100644 --- a/plugin/pyramidROIAlignPlugin/pyramidROIAlignPlugin.cpp +++ b/plugin/pyramidROIAlignPlugin/pyramidROIAlignPlugin.cpp @@ -43,7 +43,7 @@ PyramidROIAlignPluginCreator::PyramidROIAlignPluginCreator() mPluginAttributes.emplace_back(PluginField("sampling_ratio", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("legacy", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -62,7 +62,7 @@ PluginFieldCollection const* PyramidROIAlignPluginCreator::getFieldNames() noexc return &mFC; } -IPluginV2Ext* PyramidROIAlignPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* PyramidROIAlignPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -147,7 +147,7 @@ IPluginV2Ext* PyramidROIAlignPluginCreator::createPlugin(char const* name, Plugi } IPluginV2Ext* PyramidROIAlignPluginCreator::deserializePlugin( - char const* name, void const* data, size_t length) noexcept + char const* /*name*/, void const* data, size_t length) noexcept { try { @@ -280,8 +280,8 @@ Dims PyramidROIAlign::getOutputDimensions(int32_t index, Dims const* inputs, int return result; } -int32_t PyramidROIAlign::enqueue( - int32_t batch_size, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t PyramidROIAlign::enqueue(int32_t batch_size, void const* const* inputs, void* const* outputs, + void* /*workspace*/, cudaStream_t stream) noexcept { void* const pooled = outputs[0]; cudaError_t status; @@ -296,7 +296,9 @@ int32_t PyramidROIAlign::enqueue( mAbsCoords = false; mSamplingRatio = 1; float const firstThreshold - = (224 * 224 * 2.F / (MaskRCNNConfig::IMAGE_SHAPE.d[1] * MaskRCNNConfig::IMAGE_SHAPE.d[2])) / (4.F * 4.F); + = (224 * 224 * 2.F + / static_cast(MaskRCNNConfig::IMAGE_SHAPE.d[1] * MaskRCNNConfig::IMAGE_SHAPE.d[2])) + / (4.F * 4.F); status = roiAlign(stream, batch_size, mImageSize, mFeatureLength, mROICount, firstThreshold, mTransformCoords, mAbsCoords, mSwapCoords, mPlusOneCoords, mSamplingRatio, inputs[0], &inputs[1], mFeatureSpatialSize, pooled, mPooledSize); @@ -309,7 +311,7 @@ int32_t PyramidROIAlign::enqueue( // plugin works with normalized ROI coordinates, the FPN scale must be normalized // by the input image size. float const scale = static_cast(mFPNScale); - float const normScale = sqrtf(scale * scale / (mImageSize.y * mImageSize.x)); + float const normScale = sqrtf(scale * scale / static_cast(mImageSize.y * mImageSize.x)); // Furthermore, the roiAlign kernel expects a first threshold instead. This is // the *area* of an ROI but for one level down, i.e. at the P2->P3 transition. float const firstThreshold = normScale * normScale / 4.F; @@ -395,7 +397,7 @@ void PyramidROIAlign::deserialize(int8_t const* data, size_t length) // Return the DataType of the plugin output at the requested index DataType PyramidROIAlign::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { // Only DataType::kFLOAT is acceptable by the plugin layer return DataType::kFLOAT; @@ -403,21 +405,21 @@ DataType PyramidROIAlign::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool PyramidROIAlign::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool PyramidROIAlign::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool PyramidROIAlign::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void PyramidROIAlign::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void PyramidROIAlign::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t nbOutputs, DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, PluginFormat floatFormat, int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(supportsFormat(inputTypes[0], floatFormat)); check_valid_inputs(inputDims, nbInputs); @@ -425,8 +427,8 @@ void PyramidROIAlign::configurePlugin(Dims const* inputDims, int32_t nbInputs, D PLUGIN_ASSERT(nbOutputs == 1); PLUGIN_ASSERT(nbInputs == 1 + mFeatureMapCount); - mROICount = inputDims[0].d[0]; - mFeatureLength = inputDims[1].d[0]; + mROICount = static_cast(inputDims[0].d[0]); + mFeatureLength = static_cast(inputDims[1].d[0]); for (size_t layer = 0; layer < mFeatureMapCount; ++layer) { @@ -436,7 +438,7 @@ void PyramidROIAlign::configurePlugin(Dims const* inputDims, int32_t nbInputs, D // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void PyramidROIAlign::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } diff --git a/plugin/regionPlugin/regionPlugin.cpp b/plugin/regionPlugin/regionPlugin.cpp index b7a96c080..a932c6503 100644 --- a/plugin/regionPlugin/regionPlugin.cpp +++ b/plugin/regionPlugin/regionPlugin.cpp @@ -70,22 +70,22 @@ struct SoftmaxTreeDeleter } // namespace -Region::Region(RegionParameters params) - : num(params.num) - , coords(params.coords) - , classes(params.classes) - , smTree(params.smTree, SoftmaxTreeDeleter()) +Region::Region(RegionParameters params_) + : num(params_.num) + , coords(params_.coords) + , classes(params_.classes) + , smTree(params_.smTree, SoftmaxTreeDeleter()) { } -Region::Region(RegionParameters params, int32_t C, int32_t H, int32_t W) - : num(params.num) - , coords(params.coords) - , classes(params.classes) - , smTree(params.smTree, SoftmaxTreeDeleter()) - , C(C) - , H(H) - , W(W) +Region::Region(RegionParameters params_, int32_t C_, int32_t H_, int32_t W_) + : num(params_.num) + , coords(params_.coords) + , classes(params_.classes) + , smTree(params_.smTree, SoftmaxTreeDeleter()) + , C(C_) + , H(H_) + , W(W_) { } @@ -237,8 +237,8 @@ Dims Region::getOutputDimensions(int32_t index, Dims const* inputs, int32_t nbIn return inputs[0]; } -int32_t Region::enqueue( - int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t Region::enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* /*workspace*/, + cudaStream_t stream) noexcept { void const* inputData = inputs[0]; void* outputData = outputs[0]; @@ -384,7 +384,7 @@ char const* Region::getPluginVersion() const noexcept return kREGION_PLUGIN_VERSION; } -size_t Region::getWorkspaceSize(int32_t maxBatchSize) const noexcept +size_t Region::getWorkspaceSize(int32_t /*maxBatchSize*/) const noexcept { return 0; } @@ -398,8 +398,8 @@ IPluginV2Ext* Region::clone() const noexcept { try { - RegionParameters params{num, coords, classes, nullptr}; - Region* plugin = new Region(params, C, H, W); + RegionParameters params_{num, coords, classes, nullptr}; + Region* plugin = new Region(params_, C, H, W); plugin->setPluginNamespace(mPluginNamespace.c_str()); plugin->setSoftmaxTree(smTree); @@ -424,7 +424,8 @@ char const* Region::getPluginNamespace() const noexcept } // Return the DataType of the plugin output at the requested index -DataType Region::getOutputDataType(int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept +DataType Region::getOutputDataType( + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { PLUGIN_ASSERT(index == 0); return DataType::kFLOAT; @@ -432,28 +433,28 @@ DataType Region::getOutputDataType(int32_t index, nvinfer1::DataType const* inpu // Return true if output tensor is broadcast across a batch. bool Region::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool Region::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool Region::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void Region::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void Region::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, int32_t nbOutputs, + DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, PluginFormat floatFormat, int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(*inputTypes == DataType::kFLOAT && floatFormat == PluginFormat::kLINEAR); PLUGIN_ASSERT(nbInputs == 1); PLUGIN_ASSERT(nbOutputs == 1); - C = inputDims[0].d[0]; - H = inputDims[0].d[1]; - W = inputDims[0].d[2]; + C = static_cast(inputDims[0].d[0]); + H = static_cast(inputDims[0].d[1]); + W = static_cast(inputDims[0].d[2]); /* * In the below assertion, 1 stands for the objectness of the bounding box * We should also @@ -464,7 +465,7 @@ void Region::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void Region::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } @@ -479,7 +480,7 @@ RegionPluginCreator::RegionPluginCreator() mPluginAttributes.emplace_back(PluginField("classes", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("smTree", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -498,7 +499,7 @@ PluginFieldCollection const* RegionPluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2Ext* RegionPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* RegionPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -541,7 +542,7 @@ IPluginV2Ext* RegionPluginCreator::createPlugin(char const* name, PluginFieldCol } IPluginV2Ext* RegionPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/reorgPlugin/reorgPlugin.cpp b/plugin/reorgPlugin/reorgPlugin.cpp index 7eb1a0e6d..0daca0384 100644 --- a/plugin/reorgPlugin/reorgPlugin.cpp +++ b/plugin/reorgPlugin/reorgPlugin.cpp @@ -73,7 +73,7 @@ char const* Reorg::getPluginNamespace() const noexcept // Return the DataType of the plugin output at the requested index template DataType Reorg::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { // Only 1 input and 1 output from the plugin layer PLUGIN_ASSERT(index == 0); @@ -85,7 +85,7 @@ DataType Reorg::getOutputDataType( // Attach the plugin object to an execution context and grant the plugin the access to some context resource. template void Reorg::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } @@ -95,8 +95,8 @@ void Reorg::detachFromContext() noexcept { } -ReorgDynamic::ReorgDynamic(int32_t stride) - : Reorg(stride) +ReorgDynamic::ReorgDynamic(int32_t stride_) + : Reorg(stride_) { } @@ -119,8 +119,8 @@ size_t ReorgDynamic::getSerializationSize() const noexcept return sizeof(int32_t); } -size_t ReorgDynamic::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t ReorgDynamic::getWorkspaceSize(nvinfer1::PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -166,22 +166,23 @@ void ReorgDynamic::configurePlugin( PLUGIN_ASSERT(out->desc.format == PluginFormat::kLINEAR); PLUGIN_ASSERT(stride > 0); - int32_t H = in->desc.dims.d[2]; - int32_t W = in->desc.dims.d[3]; - PLUGIN_ASSERT(H % stride == 0); - PLUGIN_ASSERT(W % stride == 0); + int32_t H_ = static_cast(in->desc.dims.d[2]); + int32_t W_ = static_cast(in->desc.dims.d[3]); + PLUGIN_ASSERT(H_ % stride == 0); + PLUGIN_ASSERT(W_ % stride == 0); } -int32_t ReorgDynamic::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, nvinfer1::PluginTensorDesc const* outputDesc, - void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t ReorgDynamic::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, + nvinfer1::PluginTensorDesc const* /*outputDesc*/, void const* const* inputs, void* const* outputs, + void* /*workspace*/, cudaStream_t stream) noexcept { void const* inputData = inputs[0]; void* outputData = outputs[0]; - int32_t const N = inputDesc[0].dims.d[0]; - int32_t const C = inputDesc[0].dims.d[1]; - int32_t const H = inputDesc[0].dims.d[2]; - int32_t const W = inputDesc[0].dims.d[3]; - pluginStatus_t status = reorgInference(stream, N, C, H, W, stride, inputData, outputData); + int32_t const N = static_cast(inputDesc[0].dims.d[0]); + int32_t const C_ = static_cast(inputDesc[0].dims.d[1]); + int32_t const H_ = static_cast(inputDesc[0].dims.d[2]); + int32_t const W_ = static_cast(inputDesc[0].dims.d[3]); + pluginStatus_t status = reorgInference(stream, N, C_, H_, W_, stride, inputData, outputData); return status; } @@ -200,16 +201,16 @@ IPluginV2DynamicExt* ReorgDynamic::clone() const noexcept return nullptr; } -ReorgStatic::ReorgStatic(int32_t stride) - : Reorg(stride) +ReorgStatic::ReorgStatic(int32_t stride_) + : Reorg(stride_) { } -ReorgStatic::ReorgStatic(int32_t C, int32_t H, int32_t W, int32_t stride) - : Reorg(stride) - , C(C) - , H(H) - , W(W) +ReorgStatic::ReorgStatic(int32_t C_, int32_t H_, int32_t W_, int32_t stride_) + : Reorg(stride_) + , C(C_) + , H(H_) + , W(W_) { } @@ -229,7 +230,7 @@ char const* ReorgStatic::getPluginVersion() const noexcept return kREORG_PLUGIN_STATIC_VERSION; } -size_t ReorgStatic::getWorkspaceSize(int32_t maxBatchSize) const noexcept +size_t ReorgStatic::getWorkspaceSize(int32_t /*maxBatchSize*/) const noexcept { return 0; } @@ -257,8 +258,8 @@ Dims ReorgStatic::getOutputDimensions(int32_t index, Dims const* inputs, int32_t return Dims3(inputs[0].d[0] * stride * stride, inputs[0].d[1] / stride, inputs[0].d[2] / stride); } -int32_t ReorgStatic::enqueue( - int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t ReorgStatic::enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* /*workspace*/, + cudaStream_t stream) noexcept { void const* inputData = inputs[0]; void* outputData = outputs[0]; @@ -288,29 +289,29 @@ IPluginV2Ext* ReorgStatic::clone() const noexcept // Return true if output tensor is broadcast across a batch. bool ReorgStatic::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool ReorgStatic::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool ReorgStatic::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void ReorgStatic::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void ReorgStatic::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t nbOutputs, DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, PluginFormat floatFormat, int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(*inputTypes == DataType::kFLOAT && floatFormat == PluginFormat::kLINEAR); PLUGIN_ASSERT(nbInputs == 1); PLUGIN_ASSERT(nbOutputs == 1); PLUGIN_ASSERT(stride > 0); - C = inputDims[0].d[0]; - H = inputDims[0].d[1]; - W = inputDims[0].d[2]; + C = static_cast(inputDims[0].d[0]); + H = static_cast(inputDims[0].d[1]); + W = static_cast(inputDims[0].d[2]); PLUGIN_ASSERT(H % stride == 0); PLUGIN_ASSERT(W % stride == 0); } @@ -321,7 +322,7 @@ ReorgPluginCreator::ReorgPluginCreator() mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("stride", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -352,7 +353,8 @@ PluginFieldCollection const* ReorgPluginCreator::getFieldNames() n } template -IPluginV2Ext* ReorgPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* ReorgPluginCreator::createPlugin( + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -377,7 +379,7 @@ IPluginV2Ext* ReorgPluginCreator::createPlugin(char const* name, P template IPluginV2Ext* ReorgPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/reorgPlugin/reorgPlugin.h b/plugin/reorgPlugin/reorgPlugin.h index 7cc6d8cd2..5ee3e05bf 100644 --- a/plugin/reorgPlugin/reorgPlugin.h +++ b/plugin/reorgPlugin/reorgPlugin.h @@ -92,6 +92,11 @@ class TRT_DEPRECATED ReorgStatic : public Reorg class ReorgDynamic : public Reorg { public: + using IPluginV2DynamicExt::configurePlugin; + using IPluginV2DynamicExt::getOutputDimensions; + using IPluginV2DynamicExt::getWorkspaceSize; + using IPluginV2DynamicExt::enqueue; + ReorgDynamic(int32_t stride); ReorgDynamic(void const* buffer, size_t length); diff --git a/plugin/resizeNearestPlugin/resizeNearestPlugin.cpp b/plugin/resizeNearestPlugin/resizeNearestPlugin.cpp index a918f41a9..4cd7618a3 100644 --- a/plugin/resizeNearestPlugin/resizeNearestPlugin.cpp +++ b/plugin/resizeNearestPlugin/resizeNearestPlugin.cpp @@ -38,7 +38,7 @@ ResizeNearestPluginCreator::ResizeNearestPluginCreator() mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("scale", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -57,7 +57,7 @@ PluginFieldCollection const* ResizeNearestPluginCreator::getFieldNames() noexcep return &mFC; } -IPluginV2Ext* ResizeNearestPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* ResizeNearestPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -81,7 +81,8 @@ IPluginV2Ext* ResizeNearestPluginCreator::createPlugin(char const* name, PluginF return nullptr; } -IPluginV2Ext* ResizeNearestPluginCreator::deserializePlugin(char const* name, void const* data, size_t length) noexcept +IPluginV2Ext* ResizeNearestPluginCreator::deserializePlugin( + char const* /*name*/, void const* data, size_t length) noexcept { try { @@ -116,7 +117,7 @@ Dims ResizeNearest::getOutputDimensions(int32_t index, Dims const* inputDims, in { if (d == input.nbDims - 2 || d == input.nbDims - 1) { - output.d[d] = int32_t(input.d[d] * mScale); + output.d[d] = static_cast(static_cast(input.d[d]) * mScale); } else { @@ -222,19 +223,21 @@ bool ResizeNearest::supportsFormat(DataType type, PluginFormat format) const noe return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR); } -int32_t ResizeNearest::enqueue( - int32_t batch_size, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t ResizeNearest::enqueue(int32_t batch_size, void const* const* inputs, void* const* outputs, void* /*workspace*/, + cudaStream_t stream) noexcept { - int32_t nchan = mOutputDims.d[0]; + int32_t nchan = static_cast(mOutputDims.d[0]); float scale = mScale; int2 osize = {dimToInt32(mOutputDims.d[2]), dimToInt32(mOutputDims.d[1])}; - int32_t istride = mInputDims.d[2]; - int32_t ostride = mOutputDims.d[2]; - int32_t ibatchstride = mInputDims.d[1] * istride; - int32_t obatchstride = mOutputDims.d[1] * ostride; + int32_t istride = static_cast(mInputDims.d[2]); + int32_t ostride = static_cast(mOutputDims.d[2]); + int32_t ibatchstride = static_cast(mInputDims.d[1]) * istride; + int32_t obatchstride = static_cast(mOutputDims.d[1]) * ostride; dim3 block(32, 16); - dim3 grid((osize.x - 1) / block.x + 1, (osize.y - 1) / block.y + 1, std::min(batch_size * nchan, 65535)); + dim3 grid(static_cast((osize.x - 1) / static_cast(block.x) + 1), + static_cast((osize.y - 1) / static_cast(block.y) + 1), + static_cast(std::min(batch_size * nchan, 65535))); resizeNearest(grid, block, stream, batch_size * nchan, scale, osize, static_cast(inputs[0]), istride, ibatchstride, static_cast(outputs[0]), ostride, obatchstride); @@ -244,7 +247,7 @@ int32_t ResizeNearest::enqueue( // Return the DataType of the plugin output at the requested index DataType ResizeNearest::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { // Only 1 input and 1 output from the plugin layer PLUGIN_ASSERT(index == 0); @@ -255,21 +258,21 @@ DataType ResizeNearest::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool ResizeNearest::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool ResizeNearest::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool ResizeNearest::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. void ResizeNearest::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept + DataType const* /*inputTypes*/, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(nbInputs == 1); mInputDims = inputDims[0]; @@ -280,7 +283,7 @@ void ResizeNearest::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dim // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void ResizeNearest::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } diff --git a/plugin/roiAlignPlugin/roiAlignPlugin.cpp b/plugin/roiAlignPlugin/roiAlignPlugin.cpp index b563eea1c..a0bee084b 100644 --- a/plugin/roiAlignPlugin/roiAlignPlugin.cpp +++ b/plugin/roiAlignPlugin/roiAlignPlugin.cpp @@ -42,7 +42,7 @@ ROIAlignV3PluginCreator::ROIAlignV3PluginCreator() mPluginAttributes.emplace_back(PluginField("sampling_ratio", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("spatial_scale", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -62,7 +62,7 @@ PluginFieldCollection const* ROIAlignV3PluginCreator::getFieldNames() noexcept } IPluginV3* ROIAlignV3PluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* /*name*/, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { @@ -210,8 +210,8 @@ int32_t ROIAlignV3::getNbOutputs() const noexcept return 1; } -int32_t ROIAlignV3::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +int32_t ROIAlignV3::configurePlugin(DynamicPluginTensorDesc const* /*in*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { return 0; } @@ -256,8 +256,8 @@ int32_t ROIAlignV3::getOutputDataTypes( return 0; } -int32_t ROIAlignV3::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs, - int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept +int32_t ROIAlignV3::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* /*shapeInputs*/, + int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept { PLUGIN_ASSERT(inputs != nullptr); PLUGIN_ASSERT(nbInputs == 3); @@ -281,8 +281,8 @@ int32_t ROIAlignV3::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, D return 0; } -int32_t ROIAlignV3::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, - void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t ROIAlignV3::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* /*outputDesc*/, + void const* const* inputs, void* const* outputs, void* /*workspace*/, cudaStream_t stream) noexcept { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); @@ -346,15 +346,15 @@ int32_t ROIAlignV3::onShapeChange( // Check batch_indices matches rois in length PLUGIN_ASSERT(rois.d[0] == batchIndices.d[0]); - mFeatureLength = in[0].dims.d[1]; - mHeight = in[0].dims.d[2]; - mWidth = in[0].dims.d[3]; + mFeatureLength = static_cast(in[0].dims.d[1]); + mHeight = static_cast(in[0].dims.d[2]); + mWidth = static_cast(in[0].dims.d[3]); - mROICount = in[1].dims.d[0]; + mROICount = static_cast(in[1].dims.d[0]); return 0; } -IPluginV3* ROIAlignV3::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* ROIAlignV3::attachToContext(IPluginResourceContext* /*context*/) noexcept { return clone(); } @@ -368,13 +368,13 @@ PluginFieldCollection const* ROIAlignV3::getFieldsToSerialize() noexcept mDataToSerialize.emplace_back("output_width", &mOutputWidth, PluginFieldType::kINT32, 1); mDataToSerialize.emplace_back("sampling_ratio", &mSamplingRatio, PluginFieldType::kINT32, 1); mDataToSerialize.emplace_back("spatial_scale", &mSpatialScale, PluginFieldType::kFLOAT32, 1); - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; } -size_t ROIAlignV3::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t ROIAlignV3::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } diff --git a/plugin/roiAlignPlugin/roiAlignPluginLegacy.cpp b/plugin/roiAlignPlugin/roiAlignPluginLegacy.cpp index 3051137b9..56d592885 100644 --- a/plugin/roiAlignPlugin/roiAlignPluginLegacy.cpp +++ b/plugin/roiAlignPlugin/roiAlignPluginLegacy.cpp @@ -43,7 +43,7 @@ ROIAlignPluginCreator::ROIAlignPluginCreator() mPluginAttributes.emplace_back(PluginField("sampling_ratio", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("spatial_scale", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -62,7 +62,7 @@ PluginFieldCollection const* ROIAlignPluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2DynamicExt* ROIAlignPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2DynamicExt* ROIAlignPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -121,7 +121,7 @@ IPluginV2DynamicExt* ROIAlignPluginCreator::createPlugin(char const* name, Plugi } IPluginV2DynamicExt* ROIAlignPluginCreator::deserializePlugin( - char const* name, void const* data, size_t length) noexcept + char const* /*name*/, void const* data, size_t length) noexcept { try { @@ -159,8 +159,8 @@ void ROIAlign::destroy() noexcept delete this; } -size_t ROIAlign::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t ROIAlign::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -417,9 +417,9 @@ void ROIAlign::configurePlugin( checkValidInputs(in, nbInputs); - mFeatureLength = in[0].desc.dims.d[1]; - mHeight = in[0].desc.dims.d[2]; - mWidth = in[0].desc.dims.d[3]; + mFeatureLength = static_cast(in[0].desc.dims.d[1]); + mHeight = static_cast(in[0].desc.dims.d[2]); + mWidth = static_cast(in[0].desc.dims.d[3]); - mROICount = in[1].desc.dims.d[0]; + mROICount = static_cast(in[1].desc.dims.d[0]); } diff --git a/plugin/roiAlignPlugin/roiAlignPluginLegacy.h b/plugin/roiAlignPlugin/roiAlignPluginLegacy.h index fc1cf5723..739dfefb7 100644 --- a/plugin/roiAlignPlugin/roiAlignPluginLegacy.h +++ b/plugin/roiAlignPlugin/roiAlignPluginLegacy.h @@ -33,6 +33,11 @@ namespace plugin class ROIAlign : public IPluginV2DynamicExt { public: + using IPluginV2DynamicExt::configurePlugin; + using IPluginV2DynamicExt::getOutputDimensions; + using IPluginV2DynamicExt::getWorkspaceSize; + using IPluginV2DynamicExt::enqueue; + ROIAlign(int32_t outputHeight, int32_t outputWidth, int32_t samplingRatio, int32_t mode, float spatialScale, int32_t aligned); ROIAlign(void const* data, size_t length); diff --git a/plugin/scatterElementsPlugin/scatterElementsPlugin.cpp b/plugin/scatterElementsPlugin/scatterElementsPlugin.cpp index fef8c0ca8..c7fb06bfd 100644 --- a/plugin/scatterElementsPlugin/scatterElementsPlugin.cpp +++ b/plugin/scatterElementsPlugin/scatterElementsPlugin.cpp @@ -93,8 +93,8 @@ char const* ScatterElementsPluginV3::getPluginVersion() const noexcept } int32_t ScatterElementsPluginV3::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, - IExprBuilder& exprBuilder) noexcept + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, + IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -112,7 +112,7 @@ int32_t ScatterElementsPluginV3::getOutputShapes(DimsExprs const* inputs, int32_ } int32_t ScatterElementsPluginV3::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, - void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept + void const* const* inputs, void* const* outputs, void* /*workspace*/, cudaStream_t stream) noexcept { try { @@ -153,10 +153,10 @@ PluginFieldCollection const* ScatterElementsPluginV3::getFieldsToSerialize() noe mDataToSerialize.clear(); // "reduction" field is serialized as string mDataToSerialize.emplace_back("reduction", kREDUCE_ENUM_TO_STR.at(mReduction).c_str(), PluginFieldType::kCHAR, - kREDUCE_ENUM_TO_STR.at(mReduction).size()); + static_cast(kREDUCE_ENUM_TO_STR.at(mReduction).size())); mDataToSerialize.emplace_back("axis", &mAxis, PluginFieldType::kINT32, 1); - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; } @@ -205,14 +205,14 @@ ScatterElementsPluginV3* ScatterElementsPluginV3::clone() noexcept return nullptr; } -IPluginV3* ScatterElementsPluginV3::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* ScatterElementsPluginV3::attachToContext(IPluginResourceContext* /*context*/) noexcept { ScatterElementsPluginV3* obj = clone(); return obj; } -int32_t ScatterElementsPluginV3::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +int32_t ScatterElementsPluginV3::configurePlugin(DynamicPluginTensorDesc const* /*in*/, int32_t nbInputs, + DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { try { @@ -244,8 +244,8 @@ int32_t ScatterElementsPluginV3::getOutputDataTypes( return pluginStatus_t::STATUS_FAILURE; } -size_t ScatterElementsPluginV3::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t ScatterElementsPluginV3::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -284,7 +284,7 @@ ScatterElementsPluginV3Creator::ScatterElementsPluginV3Creator() gPluginAttributes.clear(); gPluginAttributes.emplace_back(PluginField("reduction")); gPluginAttributes.emplace_back(PluginField("axis")); - gFC.nbFields = gPluginAttributes.size(); + gFC.nbFields = static_cast(gPluginAttributes.size()); gFC.fields = gPluginAttributes.data(); } @@ -315,7 +315,7 @@ void ScatterElementsPluginV3Creator::setPluginNamespace(char const* libNamespace } IPluginV3* ScatterElementsPluginV3Creator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* /*name*/, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { std::string reductionArg; int32_t axisArg = 0; diff --git a/plugin/scatterElementsPlugin/scatterElementsPluginLegacy.cpp b/plugin/scatterElementsPlugin/scatterElementsPluginLegacy.cpp index 5a0b13724..c673e656d 100644 --- a/plugin/scatterElementsPlugin/scatterElementsPluginLegacy.cpp +++ b/plugin/scatterElementsPlugin/scatterElementsPluginLegacy.cpp @@ -80,7 +80,7 @@ char const* ScatterElementsPluginV2::getPluginVersion() const noexcept } DimsExprs ScatterElementsPluginV2::getOutputDimensions( - int32_t index, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept + int32_t index, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -99,7 +99,7 @@ DimsExprs ScatterElementsPluginV2::getOutputDimensions( } int32_t ScatterElementsPluginV2::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, - void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept + void const* const* inputs, void* const* outputs, void* /*workspace*/, cudaStream_t stream) noexcept { try { @@ -173,8 +173,8 @@ IPluginV2DynamicExt* ScatterElementsPluginV2::clone() const noexcept return plugin; } -void ScatterElementsPluginV2::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void ScatterElementsPluginV2::configurePlugin(DynamicPluginTensorDesc const* /*in*/, int32_t nbInputs, + DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { try { @@ -200,8 +200,8 @@ DataType ScatterElementsPluginV2::getOutputDataType( return inputTypes[kDATA_TENSOR_IDX]; } -size_t ScatterElementsPluginV2::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t ScatterElementsPluginV2::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -225,7 +225,7 @@ ScatterElementsPluginV2Creator::ScatterElementsPluginV2Creator() gPluginAttributes.clear(); gPluginAttributes.emplace_back(PluginField("reduction")); gPluginAttributes.emplace_back(PluginField("axis")); - gFC.nbFields = gPluginAttributes.size(); + gFC.nbFields = static_cast(gPluginAttributes.size()); gFC.fields = gPluginAttributes.data(); } @@ -255,7 +255,7 @@ void ScatterElementsPluginV2Creator::setPluginNamespace(char const* libNamespace } IPluginV2DynamicExt* ScatterElementsPluginV2Creator::createPlugin( - char const* name, PluginFieldCollection const* fc) noexcept + char const* /*name*/, PluginFieldCollection const* fc) noexcept { std::string reductionArg; int32_t axisArg = 0; @@ -299,7 +299,7 @@ IPluginV2DynamicExt* ScatterElementsPluginV2Creator::createPlugin( } IPluginV2DynamicExt* ScatterElementsPluginV2Creator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { ScatterElementsPluginV2* plugin = new ScatterElementsPluginV2(serialData, serialLength); plugin->setPluginNamespace(mNamespace.c_str()); diff --git a/plugin/scatterElementsPlugin/scatterElementsPluginLegacy.h b/plugin/scatterElementsPlugin/scatterElementsPluginLegacy.h index 2c5aa6e1a..7179a7302 100644 --- a/plugin/scatterElementsPlugin/scatterElementsPluginLegacy.h +++ b/plugin/scatterElementsPlugin/scatterElementsPluginLegacy.h @@ -29,6 +29,11 @@ namespace plugin class ScatterElementsPluginV2 final : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + ScatterElementsPluginV2() = delete; ScatterElementsPluginV2(ScatterElementsPluginV2 const&) = delete; ScatterElementsPluginV2(std::string const&, int32_t); diff --git a/plugin/scatterPlugin/scatterPlugin.cpp b/plugin/scatterPlugin/scatterPlugin.cpp index 100d69c57..9ddc83d9b 100644 --- a/plugin/scatterPlugin/scatterPlugin.cpp +++ b/plugin/scatterPlugin/scatterPlugin.cpp @@ -39,7 +39,7 @@ int32_t ScatterND::getNbOutputs() const noexcept } DimsExprs ScatterND::getOutputDimensions( - int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept + int32_t /*outputIndex*/, DimsExprs const* inputs, int32_t /*nbInputs*/, IExprBuilder& /*exprBuilder*/) noexcept { // output should have same dimensions as data tensor DimsExprs ret = inputs[dataTensorIdx]; @@ -77,8 +77,8 @@ bool ScatterND::supportsFormatCombination( return ret; } -void ScatterND::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void ScatterND::configurePlugin(DynamicPluginTensorDesc const* /*in*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { } @@ -87,13 +87,13 @@ int32_t ScatterND::calculateNumSlices(Dims indexTensorDims) const noexcept int32_t nSlices = 1; for (int32_t i = 0; i < indexTensorDims.nbDims - 1; i++) { - nSlices *= indexTensorDims.d[i]; + nSlices *= static_cast(indexTensorDims.d[i]); } return nSlices; } -size_t ScatterND::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t ScatterND::getWorkspaceSize(PluginTensorDesc const* inputs, int32_t /*nbInputs*/, + PluginTensorDesc const* outputs, int32_t /*nbOutputs*/) const noexcept { int32_t nSlices = calculateNumSlices(inputs[indexTensorIdx].dims); // transformCoeffs + transformed indices @@ -107,7 +107,7 @@ void ScatterND::calculateTransformCoeff( for (int32_t i = indexRank - 1, nIndx = 1; i >= 0; i--) { pitches.push_back(nIndx); - nIndx *= dataTensorDims.d[i]; + nIndx *= static_cast(dataTensorDims.d[i]); } std::reverse(pitches.begin(), pitches.end()); // last dimension pitch is always one (assuming linear mem) @@ -120,9 +120,9 @@ int32_t ScatterND::calculateCopySize(Dims const& dataDims) const noexcept int32_t copySize = 1; for (int32_t i = 0; i < dataDims.nbDims; i++) { - copySize *= dataDims.d[i]; + copySize *= static_cast(dataDims.d[i]); } - copySize *= sizeof(float); + copySize *= static_cast(sizeof(float)); return copySize; } @@ -138,7 +138,7 @@ int32_t ScatterND::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc c Dims dataDims = inputDesc[dataTensorIdx].dims; - int32_t indexRank = IndexDims.d[IndexDims.nbDims - 1]; + int32_t indexRank = static_cast(IndexDims.d[IndexDims.nbDims - 1]); PLUGIN_ASSERT(indexRank <= dataDims.nbDims); int32_t nSlices = calculateNumSlices(IndexDims); @@ -163,7 +163,7 @@ int32_t ScatterND::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc c for (int32_t i = indexRank; i < dataDims.nbDims; i++) { - rowSize *= dataDims.d[i]; + rowSize *= static_cast(dataDims.d[i]); } calculateTransformCoeff(dataDims, indexRank, transformCoeff); @@ -180,7 +180,7 @@ size_t ScatterND::getSerializationSize() const noexcept return 0; } -void ScatterND::serialize(void* buffer) const noexcept +void ScatterND::serialize(void* /*buffer*/) const noexcept { return; } @@ -198,14 +198,15 @@ char const* ScatterND::getPluginNamespace() const noexcept // Return the DataType of the plugin output at the requested index DataType ScatterND::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { PLUGIN_ASSERT(index == 0); return inputTypes[dataTensorIdx]; } // Attach the plugin object to an execution context and grant the plugin the access to some context resource. -void ScatterND::attachToContext(cudnnContext* cudnn, cublasContext* cublas, IGpuAllocator* gpuAllocator) noexcept +void ScatterND::attachToContext( + cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, IGpuAllocator* /*gpuAllocator*/) noexcept { return; } @@ -265,7 +266,7 @@ PluginFieldCollection const* ScatterNDPluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2Ext* ScatterNDPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* ScatterNDPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* /*fc*/) noexcept { try { @@ -281,7 +282,7 @@ IPluginV2Ext* ScatterNDPluginCreator::createPlugin(char const* name, PluginField } IPluginV2Ext* ScatterNDPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* /*serialData*/, size_t /*serialLength*/) noexcept { try { diff --git a/plugin/scatterPlugin/scatterPlugin.h b/plugin/scatterPlugin/scatterPlugin.h index 8c979e511..4983aa0bf 100644 --- a/plugin/scatterPlugin/scatterPlugin.h +++ b/plugin/scatterPlugin/scatterPlugin.h @@ -29,6 +29,11 @@ namespace plugin class ScatterND : public IPluginV2DynamicExt { public: + using IPluginV2DynamicExt::configurePlugin; + using IPluginV2DynamicExt::getOutputDimensions; + using IPluginV2DynamicExt::getWorkspaceSize; + using IPluginV2DynamicExt::enqueue; + ScatterND(); ~ScatterND() override = default; diff --git a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPlugin.cpp b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPlugin.cpp index 9e8a7ff42..44d15c176 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPlugin.cpp +++ b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPlugin.cpp @@ -223,7 +223,7 @@ PluginFieldCollection const* SkipLayerNormInterleavedPluginBase::getFieldsToSeri mDataToSerialize.emplace_back( "gamma", static_cast(mGamma.values), PluginFieldType::kFLOAT16, mGamma.count); PLUGIN_ASSERT(mGamma.type == kPARAM_TYPE); - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; } @@ -265,7 +265,7 @@ int32_t SkipLayerNormInterleavedPluginBase::onShapeChange( return pluginStatus_t::STATUS_FAILURE; } -IPluginV3* SkipLayerNormInterleavedPluginBase::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* SkipLayerNormInterleavedPluginBase::attachToContext(IPluginResourceContext* /*context*/) noexcept { return clone(); } @@ -284,8 +284,8 @@ int32_t SkipLayerNormInterleavedPluginHFace::enqueue(PluginTensorDesc const* inp auto const oDesc = outputDesc[0]; checkDescs(iDesc, sDesc, oDesc); - const int32_t ld = iDesc.dims.d[1]; - const int32_t total = iDesc.dims.d[2]; + const int32_t ld = static_cast(iDesc.dims.d[1]); + const int32_t total = static_cast(iDesc.dims.d[2]); float const dqScaleIn = iDesc.scale; float const dqScaleSkip = sDesc.scale; float const qScale = 1.F / oDesc.scale; @@ -326,8 +326,8 @@ int32_t SkipLayerNormInterleavedPluginMTron::enqueue(PluginTensorDesc const* inp checkDescs(iDesc, sDesc, oDesc); PLUGIN_VALIDATE(std::equal(iDesc.dims.d, iDesc.dims.d + iDesc.dims.nbDims, pDesc.dims.d)); - const int32_t ld = iDesc.dims.d[1]; - const int32_t total = iDesc.dims.d[2]; + const int32_t ld = static_cast(iDesc.dims.d[1]); + const int32_t total = static_cast(iDesc.dims.d[2]); float const dqScaleIn = iDesc.scale; float const dqScaleSkip = sDesc.scale; float const qScale = 1.F / oDesc.scale; @@ -395,8 +395,8 @@ bool SkipLayerNormInterleavedPluginBase::supportsFormatCombination( } int32_t SkipLayerNormInterleavedPluginBase::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, - IExprBuilder& exprBuilder) noexcept + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, + IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -438,14 +438,14 @@ int32_t SkipLayerNormInterleavedPluginBase::getOutputDataTypes( return pluginStatus_t::STATUS_FAILURE; } -int32_t SkipLayerNormInterleavedPluginBase::configurePlugin(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) noexcept +int32_t SkipLayerNormInterleavedPluginBase::configurePlugin(DynamicPluginTensorDesc const* /*inputs*/, + int32_t /*nbInputs*/, DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) noexcept { return pluginStatus_t::STATUS_SUCCESS; } -size_t SkipLayerNormInterleavedPluginBase::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t SkipLayerNormInterleavedPluginBase::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, + int32_t /*nbInputs*/, DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -493,7 +493,7 @@ SkipLayerNormInterleavedPluginBaseCreator::SkipLayerNormInterleavedPluginBaseCre mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("beta")); mPluginAttributes.emplace_back(PluginField("gamma")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -528,7 +528,7 @@ PluginFieldCollection const* SkipLayerNormInterleavedPluginBaseCreator::getField } IPluginV3* SkipLayerNormInterleavedPluginHFaceCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* name, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { @@ -548,7 +548,7 @@ IPluginV3* SkipLayerNormInterleavedPluginHFaceCreator::createPlugin( } IPluginV3* SkipLayerNormInterleavedPluginMTronCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* name, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { diff --git a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPluginLegacy.cpp b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPluginLegacy.cpp index 9c776a84b..b94294d5e 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPluginLegacy.cpp +++ b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPluginLegacy.cpp @@ -187,7 +187,7 @@ IPluginV2DynamicExt* SkipLayerNormInterleavedPluginMTronLegacy::clone() const no } DimsExprs SkipLayerNormInterleavedPluginBaseLegacy::getOutputDimensions( - int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept + int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -259,8 +259,8 @@ void SkipLayerNormInterleavedPluginBaseLegacy::configurePlugin(DynamicPluginTens } } -size_t SkipLayerNormInterleavedPluginBaseLegacy::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t SkipLayerNormInterleavedPluginBaseLegacy::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, + int32_t /*nbInputs*/, PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -279,8 +279,8 @@ int32_t SkipLayerNormInterleavedPluginHFaceLegacy::enqueue(PluginTensorDesc cons auto const oDesc = outputDesc[0]; checkDescs(iDesc, sDesc, oDesc); - int32_t const ld = iDesc.dims.d[1]; - int32_t const total = iDesc.dims.d[2]; + int32_t const ld = static_cast(iDesc.dims.d[1]); + int32_t const total = static_cast(iDesc.dims.d[2]); float const dqScaleIn = iDesc.scale; float const dqScaleSkip = sDesc.scale; float const qScale = 1.F / oDesc.scale; @@ -321,8 +321,8 @@ int32_t SkipLayerNormInterleavedPluginMTronLegacy::enqueue(PluginTensorDesc cons checkDescs(iDesc, sDesc, oDesc); PLUGIN_VALIDATE(std::equal(iDesc.dims.d, iDesc.dims.d + iDesc.dims.nbDims, pDesc.dims.d)); - int32_t const ld = iDesc.dims.d[1]; - int32_t const total = iDesc.dims.d[2]; + int32_t const ld = static_cast(iDesc.dims.d[1]); + int32_t const total = static_cast(iDesc.dims.d[2]); float const dqScaleIn = iDesc.scale; float const dqScaleSkip = sDesc.scale; float const qScale = 1.F / oDesc.scale; @@ -481,7 +481,7 @@ SkipLayerNormInterleavedPluginBaseLegacyCreator::SkipLayerNormInterleavedPluginB mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("beta")); mPluginAttributes.emplace_back(PluginField("gamma")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPluginLegacy.h b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPluginLegacy.h index e05a15a0b..e542d7194 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPluginLegacy.h +++ b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPluginLegacy.h @@ -51,6 +51,11 @@ int32_t launch_large_mtron(cudaStream_t stream, int32_t const ld, int32_t const class SkipLayerNormInterleavedPluginBaseLegacy : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + SkipLayerNormInterleavedPluginBaseLegacy( std::string const& name, nvinfer1::Weights const& beta, nvinfer1::Weights const& gamma); @@ -99,6 +104,11 @@ class SkipLayerNormInterleavedPluginBaseLegacy : public nvinfer1::IPluginV2Dynam class SkipLayerNormInterleavedPluginHFaceLegacy : public SkipLayerNormInterleavedPluginBaseLegacy { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + SkipLayerNormInterleavedPluginHFaceLegacy( std::string const& name, nvinfer1::Weights const& beta, nvinfer1::Weights const& gamma); @@ -124,6 +134,11 @@ class SkipLayerNormInterleavedPluginHFaceLegacy : public SkipLayerNormInterleave class SkipLayerNormInterleavedPluginMTronLegacy : public SkipLayerNormInterleavedPluginBaseLegacy { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + SkipLayerNormInterleavedPluginMTronLegacy( std::string const& name, nvinfer1::Weights const& beta, nvinfer1::Weights const& gamma); diff --git a/plugin/skipLayerNormPlugin/skipLayerNormPlugin.cpp b/plugin/skipLayerNormPlugin/skipLayerNormPlugin.cpp index ac599ce3c..e274090d8 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormPlugin.cpp +++ b/plugin/skipLayerNormPlugin/skipLayerNormPlugin.cpp @@ -107,8 +107,9 @@ IPluginV3* SkipLayerNormPluginV3::clone() noexcept return nullptr; } -int32_t SkipLayerNormPluginV3::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs, - int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept +int32_t SkipLayerNormPluginV3::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t /*nbOutputs*/, + IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -168,20 +169,20 @@ bool SkipLayerNormPluginV3::supportsFormatCombination( return false; } -int32_t SkipLayerNormPluginV3::configurePlugin(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) noexcept +int32_t SkipLayerNormPluginV3::configurePlugin(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) noexcept { return pluginStatus_t::STATUS_SUCCESS; } -size_t SkipLayerNormPluginV3::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t SkipLayerNormPluginV3::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } int32_t SkipLayerNormPluginV3::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, - nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, void* workspace, + nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, void* /*workspace*/, cudaStream_t stream) noexcept { int32_t status = -1; @@ -189,7 +190,7 @@ int32_t SkipLayerNormPluginV3::enqueue(nvinfer1::PluginTensorDesc const* inputDe { PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const inputVolume = volume(inputDesc[0].dims); + int32_t const inputVolume = static_cast(volume(inputDesc[0].dims)); DataType iType = inputDesc->type; // Our plugin outputs only one tensor @@ -259,7 +260,7 @@ int32_t SkipLayerNormPluginV3::enqueue(nvinfer1::PluginTensorDesc const* inputDe { PLUGIN_ERROR(("Unsupported type error, expected [kINT8,kHALF,kFLOAT], but received " + std::to_string(static_cast(iType))) - .c_str()); + .c_str()); } } catch (std::exception const& e) @@ -335,7 +336,7 @@ PluginFieldCollection const* SkipLayerNormPluginV3::getFieldsToSerialize() noexc } } - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; @@ -391,7 +392,7 @@ int32_t SkipLayerNormPluginV3::onShapeChange( PLUGIN_VALIDATE(std::equal(inDims0.d, inDims0.d + inDims0.nbDims, inDims1.d)); PLUGIN_VALIDATE(inDims0.nbDims == 5); - mLd = inDims0.d[HDIM]; // hiddensize + mLd = static_cast(inDims0.d[HDIM]); // hiddensize PLUGIN_VALIDATE(mLd != 0); PLUGIN_VALIDATE(inDims0.d[3] == 1); PLUGIN_VALIDATE(inDims0.d[4] == 1); @@ -408,7 +409,7 @@ int32_t SkipLayerNormPluginV3::onShapeChange( return pluginStatus_t::STATUS_FAILURE; } -IPluginV3* SkipLayerNormPluginV3::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* SkipLayerNormPluginV3::attachToContext(IPluginResourceContext* /*context*/) noexcept { return clone(); } @@ -447,7 +448,7 @@ SkipLayerNormPluginV3Creator::SkipLayerNormPluginV3Creator() mPluginAttributes.emplace_back(PluginField("beta")); mPluginAttributes.emplace_back(PluginField("gamma")); mPluginAttributes.emplace_back(PluginField("bias")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -467,7 +468,7 @@ PluginFieldCollection const* SkipLayerNormPluginV3Creator::getFieldNames() noexc } IPluginV3* SkipLayerNormPluginV3Creator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* name, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { @@ -555,7 +556,7 @@ SkipLayerNormVarSeqlenPluginV3::SkipLayerNormVarSeqlenPluginV3( : mLayerName(name) , mGammaDev(nullptr) , mBetaDev(nullptr) - , mLd(beta.count) + , mLd(static_cast(beta.count)) , mType(type) , mBiasDev(nullptr) { @@ -617,8 +618,8 @@ IPluginV3* SkipLayerNormVarSeqlenPluginV3::clone() noexcept } int32_t SkipLayerNormVarSeqlenPluginV3::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, - IExprBuilder& exprBuilder) noexcept + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, + IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -682,20 +683,20 @@ bool SkipLayerNormVarSeqlenPluginV3::supportsFormatCombination( return false; } -int32_t SkipLayerNormVarSeqlenPluginV3::configurePlugin(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) noexcept +int32_t SkipLayerNormVarSeqlenPluginV3::configurePlugin(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) noexcept { return pluginStatus_t::STATUS_SUCCESS; } -size_t SkipLayerNormVarSeqlenPluginV3::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t SkipLayerNormVarSeqlenPluginV3::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } int32_t SkipLayerNormVarSeqlenPluginV3::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, - nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, void* workspace, + nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, void* /*workspace*/, cudaStream_t stream) noexcept { int32_t status = -1; @@ -703,7 +704,7 @@ int32_t SkipLayerNormVarSeqlenPluginV3::enqueue(nvinfer1::PluginTensorDesc const { PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const inputVolume = volume(inputDesc[0].dims); + int32_t const inputVolume = static_cast(volume(inputDesc[0].dims)); PLUGIN_VALIDATE(inputVolume % mLd == 0 && "inconsistent dimensions"); DataType iType = inputDesc->type; @@ -774,7 +775,7 @@ int32_t SkipLayerNormVarSeqlenPluginV3::enqueue(nvinfer1::PluginTensorDesc const { PLUGIN_VALIDATE(("Unsupported type error, expected [kINT8,kHALF,kFLOAT], but received " + std::to_string(static_cast(iType))) - .c_str()); + .c_str()); } } catch (std::exception const& e) @@ -849,7 +850,7 @@ PluginFieldCollection const* SkipLayerNormVarSeqlenPluginV3::getFieldsToSerializ } } - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; @@ -910,7 +911,7 @@ int32_t SkipLayerNormVarSeqlenPluginV3::onShapeChange( return pluginStatus_t::STATUS_FAILURE; } -IPluginV3* SkipLayerNormVarSeqlenPluginV3::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* SkipLayerNormVarSeqlenPluginV3::attachToContext(IPluginResourceContext* /*context*/) noexcept { return clone(); } @@ -948,7 +949,7 @@ SkipLayerNormVarSeqlenPluginV3Creator::SkipLayerNormVarSeqlenPluginV3Creator() mPluginAttributes.emplace_back(PluginField("beta")); mPluginAttributes.emplace_back(PluginField("gamma")); mPluginAttributes.emplace_back(PluginField("bias")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -968,7 +969,7 @@ PluginFieldCollection const* SkipLayerNormVarSeqlenPluginV3Creator::getFieldName } IPluginV3* SkipLayerNormVarSeqlenPluginV3Creator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* name, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { diff --git a/plugin/skipLayerNormPlugin/skipLayerNormPluginLegacy.cpp b/plugin/skipLayerNormPlugin/skipLayerNormPluginLegacy.cpp index 2a679ac10..3dfd47274 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormPluginLegacy.cpp +++ b/plugin/skipLayerNormPlugin/skipLayerNormPluginLegacy.cpp @@ -113,7 +113,7 @@ IPluginV2DynamicExt* SkipLayerNormPluginDynamic::clone() const noexcept { BERT_DEBUG_MSG("SkipLayerNormPluginDynamic clone"); - auto* p = new SkipLayerNormPluginDynamic(mLayerName, mType, mLd, mBeta, mGamma, mBias); + auto* p = new SkipLayerNormPluginDynamic(mLayerName, mType, static_cast(mLd), mBeta, mGamma, mBias); p->initialize(); p->setPluginNamespace(mNamespace.c_str()); return p; @@ -126,7 +126,7 @@ IPluginV2DynamicExt* SkipLayerNormPluginDynamic::clone() const noexcept } DimsExprs SkipLayerNormPluginDynamic::getOutputDimensions( - int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept + int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -231,8 +231,8 @@ void SkipLayerNormPluginDynamic::configurePlugin(DynamicPluginTensorDesc const* } } -size_t SkipLayerNormPluginDynamic::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t SkipLayerNormPluginDynamic::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -245,7 +245,7 @@ int32_t SkipLayerNormPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, P { PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const inputVolume = volume(inputDesc[0].dims); + int32_t const inputVolume = static_cast(volume(inputDesc[0].dims)); DataType iType = inputDesc->type; // Our plugin outputs only one tensor @@ -315,7 +315,7 @@ int32_t SkipLayerNormPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, P { PLUGIN_ERROR(("Unsupported type error, expected [kINT8,kHALF,kFLOAT], but received " + std::to_string(static_cast(iType))) - .c_str()); + .c_str()); } } catch (std::exception const& e) @@ -442,7 +442,7 @@ SkipLayerNormPluginDynamicCreator::SkipLayerNormPluginDynamicCreator() mPluginAttributes.emplace_back(PluginField("beta")); mPluginAttributes.emplace_back(PluginField("gamma")); mPluginAttributes.emplace_back(PluginField("bias")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -657,7 +657,7 @@ IPluginV2DynamicExt* SkipLayerNormVarSeqlenPlugin::clone() const noexcept } DimsExprs SkipLayerNormVarSeqlenPlugin::getOutputDimensions( - int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept + int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -758,8 +758,8 @@ void SkipLayerNormVarSeqlenPlugin::configurePlugin(DynamicPluginTensorDesc const } } -size_t SkipLayerNormVarSeqlenPlugin::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t SkipLayerNormVarSeqlenPlugin::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -772,7 +772,7 @@ int32_t SkipLayerNormVarSeqlenPlugin::enqueue(PluginTensorDesc const* inputDesc, { PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const inputVolume = volume(inputDesc[0].dims); + int32_t const inputVolume = static_cast(volume(inputDesc[0].dims)); PLUGIN_VALIDATE(inputVolume % mLd == 0 && "inconsistent dimensions"); DataType iType = inputDesc->type; @@ -843,7 +843,7 @@ int32_t SkipLayerNormVarSeqlenPlugin::enqueue(PluginTensorDesc const* inputDesc, { PLUGIN_VALIDATE(("Unsupported type error, expected [kINT8,kHALF,kFLOAT], but received " + std::to_string(static_cast(iType))) - .c_str()); + .c_str()); } } catch (std::exception const& e) @@ -954,7 +954,7 @@ SkipLayerNormVarSeqlenPluginCreator::SkipLayerNormVarSeqlenPluginCreator() mPluginAttributes.emplace_back(PluginField("beta")); mPluginAttributes.emplace_back(PluginField("gamma")); mPluginAttributes.emplace_back(PluginField("bias")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/skipLayerNormPlugin/skipLayerNormPluginLegacy.h b/plugin/skipLayerNormPlugin/skipLayerNormPluginLegacy.h index abd4ff81e..156373e22 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormPluginLegacy.h +++ b/plugin/skipLayerNormPlugin/skipLayerNormPluginLegacy.h @@ -46,6 +46,11 @@ int32_t computeSkipLayerNorm(cudaStream_t stream, int32_t const ld, int32_t cons class SkipLayerNormPluginDynamic : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + SkipLayerNormPluginDynamic(const std::string name, const nvinfer1::DataType type, int32_t const ld, nvinfer1::Weights const& beta, nvinfer1::Weights const& gamma, nvinfer1::Weights const& bias); @@ -101,11 +106,6 @@ class SkipLayerNormPluginDynamic : public nvinfer1::IPluginV2DynamicExt bert::WeightsWithOwnership mBias; size_t mParamWordsize{}; - - using IPluginV2::enqueue; - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2Ext::configurePlugin; }; class SkipLayerNormPluginDynamicCreator : public nvinfer1::IPluginCreator @@ -137,6 +137,11 @@ class SkipLayerNormPluginDynamicCreator : public nvinfer1::IPluginCreator class SkipLayerNormVarSeqlenPlugin : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + SkipLayerNormVarSeqlenPlugin(const std::string name, const nvinfer1::DataType type, nvinfer1::Weights const& beta, nvinfer1::Weights const& gamma, nvinfer1::Weights const& bias); @@ -192,11 +197,6 @@ class SkipLayerNormVarSeqlenPlugin : public nvinfer1::IPluginV2DynamicExt bert::WeightsWithOwnership mBias; size_t mParamWordsize{}; - - using IPluginV2::enqueue; - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2Ext::configurePlugin; }; class SkipLayerNormVarSeqlenPluginCreator : public nvinfer1::IPluginCreator diff --git a/plugin/specialSlicePlugin/specialSlicePlugin.cpp b/plugin/specialSlicePlugin/specialSlicePlugin.cpp index 56748c4ba..32a49e4d0 100644 --- a/plugin/specialSlicePlugin/specialSlicePlugin.cpp +++ b/plugin/specialSlicePlugin/specialSlicePlugin.cpp @@ -31,7 +31,7 @@ char const* const kSPECIALSLICE_PLUGIN_NAME{"SpecialSlice_TRT"}; SpecialSlicePluginCreator::SpecialSlicePluginCreator() { - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -50,7 +50,8 @@ PluginFieldCollection const* SpecialSlicePluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2Ext* SpecialSlicePluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* SpecialSlicePluginCreator::createPlugin( + char const* /*name*/, PluginFieldCollection const* /*fc*/) noexcept { try { @@ -67,7 +68,8 @@ IPluginV2Ext* SpecialSlicePluginCreator::createPlugin(char const* name, PluginFi return nullptr; } -IPluginV2Ext* SpecialSlicePluginCreator::deserializePlugin(char const* name, void const* data, size_t length) noexcept +IPluginV2Ext* SpecialSlicePluginCreator::deserializePlugin( + char const* /*name*/, void const* data, size_t length) noexcept { try { @@ -84,7 +86,7 @@ IPluginV2Ext* SpecialSlicePluginCreator::deserializePlugin(char const* name, voi return nullptr; } -size_t SpecialSlice::getWorkspaceSize(int32_t) const noexcept +size_t SpecialSlice::getWorkspaceSize(int32_t /*maxBatchSize*/) const noexcept { return 0; } @@ -185,8 +187,8 @@ Dims SpecialSlice::getOutputDimensions(int32_t index, Dims const* inputDims, int return output; } -int32_t SpecialSlice::enqueue( - int32_t batch_size, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t SpecialSlice::enqueue(int32_t batch_size, void const* const* inputs, void* const* outputs, void* /*workspace*/, + cudaStream_t stream) noexcept { specialSlice(stream, batch_size, mBboxesCnt, inputs[0], outputs[0]); @@ -196,7 +198,7 @@ int32_t SpecialSlice::enqueue( // Return the DataType of the plugin output at the requested index DataType SpecialSlice::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { // Only 1 input and 1 output from the plugin layer PLUGIN_ASSERT(index == 0); @@ -207,32 +209,33 @@ DataType SpecialSlice::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool SpecialSlice::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool SpecialSlice::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool SpecialSlice::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void SpecialSlice::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void SpecialSlice::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t nbOutputs, DataType const* /*inputTypes*/, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(nbInputs == 1); PLUGIN_ASSERT(nbOutputs == 1); - mBboxesCnt = inputDims[0].d[0]; + mBboxesCnt = static_cast(inputDims[0].d[0]); } // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void SpecialSlice::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } diff --git a/plugin/splitPlugin/split.h b/plugin/splitPlugin/split.h index 4553b28aa..ba4ac438d 100644 --- a/plugin/splitPlugin/split.h +++ b/plugin/splitPlugin/split.h @@ -46,11 +46,6 @@ class TRT_DEPRECATED SplitPlugin final : public nvinfer1::IPluginV2DynamicExt int32_t _x_stride, _y_stride, _z_stride; std::shared_ptr deviceVectors; - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2::enqueue; - using IPluginV2Ext::configurePlugin; - protected: void deserialize(void const* serialData, size_t serialLength) noexcept { @@ -68,6 +63,11 @@ class TRT_DEPRECATED SplitPlugin final : public nvinfer1::IPluginV2DynamicExt } public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + SplitPlugin(int32_t axis, int32_t* const& output_lengths, int32_t noutput) : _axis(axis) , _output_lengths(std::vector(output_lengths, output_lengths + noutput)) @@ -132,7 +132,7 @@ class TRT_DEPRECATED SplitPlugin final : public nvinfer1::IPluginV2DynamicExt } int32_t getNbOutputs() const noexcept override { - return _output_lengths.size(); + return static_cast(_output_lengths.size()); } void attachToContext( cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, nvinfer1::IGpuAllocator* /*allocator*/) noexcept override diff --git a/plugin/vc/checkMacrosPlugin.h b/plugin/vc/checkMacrosPlugin.h index e35cc64f3..fec821768 100644 --- a/plugin/vc/checkMacrosPlugin.h +++ b/plugin/vc/checkMacrosPlugin.h @@ -49,7 +49,7 @@ class LogStream : public std::ostream return mLogStreamMutex; } LogStream() - : std::ostream(&buffer){}; + : std::ostream(&buffer) {}; }; // Use mutex to protect multi-stream write to buffer @@ -88,7 +88,7 @@ extern LogStream gLogInfo; extern LogStream gLogVerbose; void reportValidationFailure(char const* msg, char const* file, int32_t line); -void reportAssertion(char const* msg, char const* file, int32_t line); +[[noreturn]] void reportAssertion(char const* msg, char const* file, int32_t line); void logError(char const* msg, char const* file, char const* fn, int32_t line); [[noreturn]] void throwCudaError( @@ -173,31 +173,33 @@ inline void caughtError(std::exception const& e) #define PLUGIN_API_CHECK_ENUM_RANGE_RETVAL(Type, val, retval) \ PLUGIN_API_CHECK_RETVAL(int32_t(val) >= 0 && int32_t(val) < EnumMax(), retval) +#undef PLUGIN_CHECK_CUDA #define PLUGIN_CHECK_CUDA(call) \ do \ { \ - cudaError_t status = call; \ - if (status != cudaSuccess) \ + cudaError_t status_check_cuda_ = call; \ + if (status_check_cuda_ != cudaSuccess) \ { \ - return status; \ + return status_check_cuda_; \ } \ } while (0) #define PLUGIN_CUASSERT(status_) \ do \ { \ - auto s_ = status_; \ - if (s_ != cudaSuccess) \ + auto s_cuassert_ = status_; \ + if (s_cuassert_ != cudaSuccess) \ { \ - char const* msg = cudaGetErrorString(s_); \ - nvinfer1::plugin::throwCudaError(__FILE__, FN_NAME, __LINE__, s_, msg); \ + char const* msg_cuassert_ = cudaGetErrorString(s_cuassert_); \ + nvinfer1::plugin::throwCudaError(__FILE__, FN_NAME, __LINE__, s_cuassert_, msg_cuassert_); \ } \ } while (0) // On MSVC, nested macros don't expand correctly without some help, so use TRT_EXPAND to help it out. #define TRT_EXPAND(x) x #define GET_MACRO(_1, _2, NAME, ...) NAME -#define PLUGIN_VALIDATE(...) TRT_EXPAND(GET_MACRO(__VA_ARGS__, PLUGIN_VALIDATE_MSG, PLUGIN_VALIDATE_DEFAULT, )(__VA_ARGS__)) +#define PLUGIN_VALIDATE(...) \ + TRT_EXPAND(GET_MACRO(__VA_ARGS__, PLUGIN_VALIDATE_MSG, PLUGIN_VALIDATE_DEFAULT, )(__VA_ARGS__)) // Logs failed condition and throws a PluginError. // PLUGIN_ASSERT will eventually perform this function, at which point PLUGIN_VALIDATE @@ -247,8 +249,8 @@ inline void caughtError(std::exception const& e) #define PLUGIN_CUERROR(status_) \ do \ { \ - auto s_ = status_; \ - if (s_ != 0) \ + auto s_cuerror_ = status_; \ + if (s_cuerror_ != 0) \ nvinfer1::plugin::logError(#status_ " failure.", __FILE__, FN_NAME, __LINE__); \ } while (0) diff --git a/plugin/voxelGeneratorPlugin/voxelGenerator.cpp b/plugin/voxelGeneratorPlugin/voxelGenerator.cpp index 0a25687e6..68c3e2f52 100644 --- a/plugin/voxelGeneratorPlugin/voxelGenerator.cpp +++ b/plugin/voxelGeneratorPlugin/voxelGenerator.cpp @@ -39,12 +39,12 @@ size_t constexpr kSERIALIZATION_SIZE{9 * sizeof(float) + 7 * sizeof(int32_t)}; int32_t npRound(float x) { // half way round to nearest-even - int32_t x2 = lround(x * 2.0F); - if (x != static_cast(x) && x2 == x * 2.0F) + int32_t x2 = static_cast(lroundf(x * 2.0F)); + if (x != static_cast(static_cast(x)) && static_cast(x2) == x * 2.0F) { - return lround(x / 2.0F + 0.5F) * 2; + return static_cast(lroundf(x / 2.0F + 0.5F)) * 2; } - return lround(x); + return static_cast(lroundf(x)); } VoxelGeneratorPlugin::VoxelGeneratorPlugin(int32_t maxVoxels, int32_t maxPoints, int32_t voxelFeatures, float xMin, @@ -90,7 +90,7 @@ VoxelGeneratorPlugin::VoxelGeneratorPlugin(void const* data, size_t length) { PLUGIN_ASSERT(data != nullptr); uint8_t const* d = reinterpret_cast(data); - auto const *a = d; + auto const* a = d; mPillarNum = readFromBuffer(d); mPointNum = readFromBuffer(d); mFeatureNum = readFromBuffer(d); @@ -128,7 +128,7 @@ nvinfer1::IPluginV2DynamicExt* VoxelGeneratorPlugin::clone() const noexcept } nvinfer1::DimsExprs VoxelGeneratorPlugin::getOutputDimensions(int32_t outputIndex, nvinfer1::DimsExprs const* inputs, - int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept + int32_t /*nbInputs*/, nvinfer1::IExprBuilder& exprBuilder) noexcept { try { @@ -213,7 +213,7 @@ void VoxelGeneratorPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc con PLUGIN_VALIDATE(nbInputs == 2); PLUGIN_VALIDATE(nbOutputs == 3); - mPointFeatureNum = in[0].desc.dims.d[2]; + mPointFeatureNum = static_cast(in[0].desc.dims.d[2]); mGridXSize = npRound((mMaxXRange - mMinXRange) / mPillarXSize); mGridYSize = npRound((mMaxYRange - mMinYRange) / mPillarYSize); mGridZSize = npRound((mMaxZRange - mMinZRange) / mPillarZSize); @@ -224,14 +224,15 @@ void VoxelGeneratorPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc con } } -size_t VoxelGeneratorPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t VoxelGeneratorPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t /*nbInputs*/, + nvinfer1::PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { try { - int32_t batchSize = inputs[0].dims.d[0]; + int32_t batchSize = static_cast(inputs[0].dims.d[0]); size_t maskSize = batchSize * mGridZSize * mGridYSize * mGridXSize * sizeof(uint32_t); - size_t voxelsSize = batchSize * mGridZSize * mGridYSize * mGridXSize * mPointNum * mPointFeatureNum * sizeof(float); + size_t voxelsSize + = batchSize * mGridZSize * mGridYSize * mGridXSize * mPointNum * mPointFeatureNum * sizeof(float); // the actual max pillar num cannot be determined, use upper bound size_t voxelFeaturesSize = voxelsSize; size_t voxelNumPointsSize = maskSize; @@ -257,8 +258,8 @@ int32_t VoxelGeneratorPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDes { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr && workspace != nullptr); - int32_t batchSize = inputDesc[0].dims.d[0]; - int32_t maxNumPoints = inputDesc[0].dims.d[1]; + int32_t batchSize = static_cast(inputDesc[0].dims.d[0]); + int32_t maxNumPoints = static_cast(inputDesc[0].dims.d[1]); // TRT-input float* pointCloud = const_cast((float const*) inputs[0]); uint32_t* pointNumPtr = const_cast((uint32_t const*) inputs[1]); @@ -285,9 +286,10 @@ int32_t VoxelGeneratorPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDes nextWorkspacePtr(reinterpret_cast(voxelFeatures), voxelFeaturesSize)); // Initialize workspace memory PLUGIN_CUASSERT(cudaMemsetAsync(mask, 0, totalWorkspace, stream)); - uint32_t pillarFeaturesDataSize = batchSize * mPillarNum * mPointNum * mFeatureNum * sizeof(float); - uint32_t coordsDataSize = batchSize * mPillarNum * 4 * sizeof(uint32_t); - uint32_t paramsDataSize = batchSize * sizeof(uint32_t); + size_t pillarFeaturesDataSize + = static_cast(batchSize) * mPillarNum * mPointNum * mFeatureNum * sizeof(float); + size_t coordsDataSize = static_cast(batchSize) * mPillarNum * 4 * sizeof(uint32_t); + size_t paramsDataSize = static_cast(batchSize) * sizeof(uint32_t); PLUGIN_CUASSERT(cudaMemsetAsync(pillarFeaturesData, 0, pillarFeaturesDataSize, stream)); PLUGIN_CUASSERT(cudaMemsetAsync(coordsData, 0, coordsDataSize, stream)); PLUGIN_CUASSERT(cudaMemsetAsync(paramsData, 0, paramsDataSize, stream)); @@ -300,8 +302,8 @@ int32_t VoxelGeneratorPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDes generateBaseFeatures_launch(batchSize, mask, voxels, mGridYSize, mGridXSize, paramsData, mPillarNum, mPointNum, mPointFeatureNum, voxelFeatures, voxelNumPoints, coordsData, stream); generateFeatures_launch(batchSize, densePillarNum, voxelFeatures, voxelNumPoints, coordsData, paramsData, - mPillarXSize, mPillarYSize, mPillarZSize, mMinXRange, mMinYRange, mMinZRange, mFeatureNum, mPointNum, mPillarNum, - mPointFeatureNum, pillarFeaturesData, stream); + mPillarXSize, mPillarYSize, mPillarZSize, mMinXRange, mMinYRange, mMinZRange, mFeatureNum, mPointNum, + mPillarNum, mPointFeatureNum, pillarFeaturesData, stream); return 0; } catch (std::exception const& e) @@ -312,7 +314,7 @@ int32_t VoxelGeneratorPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDes } nvinfer1::DataType VoxelGeneratorPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { try { @@ -362,7 +364,7 @@ void VoxelGeneratorPlugin::serialize(void* buffer) const noexcept PLUGIN_ASSERT(buffer != nullptr); uint8_t* d = reinterpret_cast(buffer); - auto *a = d; + auto* a = d; writeToBuffer(d, mPillarNum); writeToBuffer(d, mPointNum); writeToBuffer(d, mFeatureNum); @@ -413,7 +415,7 @@ VoxelGeneratorPluginCreator::VoxelGeneratorPluginCreator() mPluginAttributes.emplace_back(PluginField("point_cloud_range", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("voxel_feature_num", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("voxel_size", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -432,7 +434,7 @@ PluginFieldCollection const* VoxelGeneratorPluginCreator::getFieldNames() noexce return &mFC; } -IPluginV2* VoxelGeneratorPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2* VoxelGeneratorPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -493,7 +495,7 @@ IPluginV2* VoxelGeneratorPluginCreator::createPlugin(char const* name, PluginFie } IPluginV2* VoxelGeneratorPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/voxelGeneratorPlugin/voxelGenerator.h b/plugin/voxelGeneratorPlugin/voxelGenerator.h index c6c401467..44fda7b07 100644 --- a/plugin/voxelGeneratorPlugin/voxelGenerator.h +++ b/plugin/voxelGeneratorPlugin/voxelGenerator.h @@ -34,12 +34,17 @@ namespace plugin class VoxelGeneratorPlugin : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + VoxelGeneratorPlugin() = delete; - VoxelGeneratorPlugin(int32_t maxVoxels, int32_t maxPoints, int32_t voxelFeatures, float xMin, float xMax, float yMin, - float yMax, float zMin, float zMax, float pillarX, float pillarY, float pillarZ); - VoxelGeneratorPlugin(int32_t maxVoxels, int32_t maxPoints, int32_t voxelFeatures, float xMin, float xMax, float yMin, - float yMax, float zMin, float zMax, float pillarX, float pillarY, float pillarZ, int32_t pointFeatures, - int32_t gridX, int32_t gridY, int32_t gridZ); + VoxelGeneratorPlugin(int32_t maxVoxels, int32_t maxPoints, int32_t voxelFeatures, float xMin, float xMax, + float yMin, float yMax, float zMin, float zMax, float pillarX, float pillarY, float pillarZ); + VoxelGeneratorPlugin(int32_t maxVoxels, int32_t maxPoints, int32_t voxelFeatures, float xMin, float xMax, + float yMin, float yMax, float zMin, float zMax, float pillarX, float pillarY, float pillarZ, + int32_t pointFeatures, int32_t gridX, int32_t gridY, int32_t gridZ); VoxelGeneratorPlugin(void const* data, size_t length); // IPluginV2DynamicExt Methods nvinfer1::IPluginV2DynamicExt* clone() const noexcept override; diff --git a/samples/common/BatchStream.h b/samples/common/BatchStream.h index d12596e2c..7bae344e6 100644 --- a/samples/common/BatchStream.h +++ b/samples/common/BatchStream.h @@ -26,6 +26,7 @@ class IBatchStream { public: + virtual ~IBatchStream() = default; virtual void reset(int firstBatch) = 0; virtual bool next() = 0; virtual void skip(int skipCount) = 0; @@ -173,7 +174,7 @@ class BatchStream : public IBatchStream mDims.d[3] = d[3]; // Width ASSERT(mDims.d[0] > 0 && mDims.d[1] > 0 && mDims.d[2] > 0 && mDims.d[3] > 0); - mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3]; + mImageSize = static_cast(mDims.d[1] * mDims.d[2] * mDims.d[3]); mBatch.resize(mBatchSize * mImageSize, 0); mLabels.resize(mBatchSize, 0); mFileBatch.resize(mDims.d[0] * mImageSize, 0); @@ -193,7 +194,7 @@ class BatchStream : public IBatchStream , mListFile(listFile) , mDataDir(directories) { - mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3]; + mImageSize = static_cast(mDims.d[1] * mDims.d[2] * mDims.d[3]); mBatch.resize(mBatchSize * mImageSize, 0); mLabels.resize(mBatchSize, 0); mFileBatch.resize(mDims.d[0] * mImageSize, 0); @@ -205,7 +206,7 @@ class BatchStream : public IBatchStream { mBatchCount = 0; mFileCount = 0; - mFileBatchPos = mDims.d[0]; + mFileBatchPos = static_cast(mDims.d[0]); skip(firstBatch); } @@ -217,7 +218,7 @@ class BatchStream : public IBatchStream return false; } - for (int64_t csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize, mFileBatchPos += csize) + for (int64_t csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize, mFileBatchPos += static_cast(csize)) { ASSERT(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]); if (mFileBatchPos == mDims.d[0] && !update()) @@ -238,9 +239,9 @@ class BatchStream : public IBatchStream // Skips the batches void skip(int skipCount) override { - if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 && mFileBatchPos == mDims.d[0]) + if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 && mFileBatchPos == static_cast(mDims.d[0])) { - mFileCount += skipCount * mBatchSize / mDims.d[0]; + mFileCount += static_cast(skipCount * mBatchSize / mDims.d[0]); return; } @@ -269,7 +270,7 @@ class BatchStream : public IBatchStream int getBatchSize() const override { - return mBatchSize; + return static_cast(mBatchSize); } nvinfer1::Dims getDims() const override @@ -338,14 +339,14 @@ class BatchStream : public IBatchStream } std::vector data(samplesCommon::volume(mDims)); - const float scale = 2.0 / 255.0; - const float bias = 1.0; - long int volChl = mDims.d[2] * mDims.d[3]; + const float scale = 2.0f / 255.0f; + const float bias = 1.0f; + long int volChl = static_cast(mDims.d[2] * mDims.d[3]); // Normalize input data - for (int i = 0, volImg = mDims.d[1] * mDims.d[2] * mDims.d[3]; i < mBatchSize; ++i) + for (int i = 0, volImg = static_cast(mDims.d[1] * mDims.d[2] * mDims.d[3]); i < mBatchSize; ++i) { - for (int c = 0; c < mDims.d[1]; ++c) + for (int64_t c = 0; c < mDims.d[1]; ++c) { for (int j = 0; j < volChl; ++j) { diff --git a/samples/common/ErrorRecorder.h b/samples/common/ErrorRecorder.h index 91ddb176f..d96723d37 100644 --- a/samples/common/ErrorRecorder.h +++ b/samples/common/ErrorRecorder.h @@ -48,15 +48,17 @@ class SampleErrorRecorder : public IErrorRecorder ~SampleErrorRecorder() noexcept override {} int32_t getNbErrors() const noexcept final { - return mErrorStack.size(); + return static_cast(mErrorStack.size()); } ErrorCode getErrorCode(int32_t errorIdx) const noexcept final { - return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT : (*this)[errorIdx].first; + return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT + : (*this)[static_cast(errorIdx)].first; }; IErrorRecorder::ErrorDesc getErrorDesc(int32_t errorIdx) const noexcept final { - return invalidIndexCheck(errorIdx) ? "errorIdx out of range." : (*this)[errorIdx].second.c_str(); + return invalidIndexCheck(errorIdx) ? "errorIdx out of range." + : (*this)[static_cast(errorIdx)].second.c_str(); } // This class can never overflow since we have dynamic resize via std::vector usage. bool hasOverflowed() const noexcept final @@ -122,7 +124,7 @@ class SampleErrorRecorder : public IErrorRecorder { // By converting signed to unsigned, we only need a single check since // negative numbers turn into large positive greater than the size. - size_t sIndex = index; + size_t sIndex = static_cast(index); return sIndex >= mErrorStack.size(); } // Mutex to hold when locking mErrorStack. @@ -134,5 +136,5 @@ class SampleErrorRecorder : public IErrorRecorder // The error stack that holds the errors recorded by TensorRT. errorStack mErrorStack; -}; // class SampleErrorRecorder +}; // class SampleErrorRecorder #endif // ERROR_RECORDER_H diff --git a/samples/common/common.h b/samples/common/common.h index bf53058ca..6af6454b3 100644 --- a/samples/common/common.h +++ b/samples/common/common.h @@ -105,15 +105,15 @@ using namespace nvinfer1; #undef CHECK #define CHECK(status) CHECK_WITH_STREAM(status, std::cerr) -constexpr long double operator"" _GiB(long double val) +constexpr long double operator""_GiB(long double val) { return val * (1 << 30); } -constexpr long double operator"" _MiB(long double val) +constexpr long double operator""_MiB(long double val) { return val * (1 << 20); } -constexpr long double operator"" _KiB(long double val) +constexpr long double operator""_KiB(long double val) { return val * (1 << 10); } @@ -162,11 +162,11 @@ struct SimpleProfiler : public nvinfer1::IProfiler out << "========== " << value.mName << " profile ==========" << std::endl; float totalTime = 0; std::string layerNameStr = "TensorRT layer name"; - int maxLayerNameLength = std::max(static_cast(layerNameStr.size()), 70); + int32_t maxLayerNameLength = std::max(static_cast(layerNameStr.size()), 70); for (const auto& elem : value.mProfile) { totalTime += elem.second.time; - maxLayerNameLength = std::max(maxLayerNameLength, static_cast(elem.first.size())); + maxLayerNameLength = std::max(maxLayerNameLength, static_cast(elem.first.size())); } auto old_settings = out.flags(); @@ -264,7 +264,7 @@ class TypedHostMemory : public HostMemory }; ~TypedHostMemory() noexcept override { - delete[](ElemType*) mData; + delete[] (ElemType*) mData; } ElemType* raw() noexcept { @@ -293,7 +293,8 @@ inline bool isDebug() return std::getenv("TENSORRT_DEBUG") != nullptr; } -static auto StreamDeleter = [](cudaStream_t* pStream) { +static auto StreamDeleter = [](cudaStream_t* pStream) +{ if (pStream) { static_cast(cudaStreamDestroy(*pStream)); @@ -535,28 +536,28 @@ inline size_t getNbBytes(nvinfer1::DataType t, int64_t vol) noexcept { switch (t) { - case nvinfer1::DataType::kINT64: return 8 * vol; + case nvinfer1::DataType::kINT64: return static_cast(8 * vol); case nvinfer1::DataType::kINT32: - case nvinfer1::DataType::kFLOAT: return 4 * vol; + case nvinfer1::DataType::kFLOAT: return static_cast(4 * vol); case nvinfer1::DataType::kBF16: - case nvinfer1::DataType::kHALF: return 2 * vol; + case nvinfer1::DataType::kHALF: return static_cast(2 * vol); case nvinfer1::DataType::kBOOL: case nvinfer1::DataType::kUINT8: - case nvinfer1::DataType::kINT8: return vol; + case nvinfer1::DataType::kINT8: return static_cast(vol); case nvinfer1::DataType::kFP8: #if CUDA_VERSION < 11060 ASSERT(false && "FP8 is not supported"); #else - return vol; + return static_cast(vol); #endif case nvinfer1::DataType::kE8M0: #if CUDA_VERSION < 12080 ASSERT(false && "E8M0 is not supported"); #else - return vol; + return static_cast(vol); #endif // CUDA_VERSION < 12080 case nvinfer1::DataType::kINT4: - case nvinfer1::DataType::kFP4: return (vol + 1) / 2; + case nvinfer1::DataType::kFP4: return static_cast((vol + 1) / 2); } ASSERT(false && "Unknown element type"); } @@ -758,25 +759,25 @@ inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm, std::vec for (int x = int(bbox.x1); x < int(bbox.x2); ++x) { // bbox top border - ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3] = 255; - ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 1] = 0; - ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 2] = 0; + ppm.buffer[static_cast((round(bbox.y1) * ppm.w + x) * 3)] = 255; + ppm.buffer[static_cast((round(bbox.y1) * ppm.w + x) * 3 + 1)] = 0; + ppm.buffer[static_cast((round(bbox.y1) * ppm.w + x) * 3 + 2)] = 0; // bbox bottom border - ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3] = 255; - ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 1] = 0; - ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 2] = 0; + ppm.buffer[static_cast((round(bbox.y2) * ppm.w + x) * 3)] = 255; + ppm.buffer[static_cast((round(bbox.y2) * ppm.w + x) * 3 + 1)] = 0; + ppm.buffer[static_cast((round(bbox.y2) * ppm.w + x) * 3 + 2)] = 0; } for (int y = int(bbox.y1); y < int(bbox.y2); ++y) { // bbox left border - ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3] = 255; - ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 1] = 0; - ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 2] = 0; + ppm.buffer[static_cast((y * ppm.w + round(bbox.x1)) * 3)] = 255; + ppm.buffer[static_cast((y * ppm.w + round(bbox.x1)) * 3 + 1)] = 0; + ppm.buffer[static_cast((y * ppm.w + round(bbox.x1)) * 3 + 2)] = 0; // bbox right border - ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3] = 255; - ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 1] = 0; - ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 2] = 0; + ppm.buffer[static_cast((y * ppm.w + round(bbox.x2)) * 3)] = 255; + ppm.buffer[static_cast((y * ppm.w + round(bbox.x2)) * 3 + 1)] = 0; + ppm.buffer[static_cast((y * ppm.w + round(bbox.x2)) * 3 + 2)] = 0; } } @@ -786,6 +787,7 @@ inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm, std::vec class TimerBase { public: + virtual ~TimerBase() = default; virtual void start() {} virtual void stop() {} float microseconds() const noexcept @@ -879,17 +881,17 @@ inline std::vector splitString(std::string str, char delimiter = ', inline int getC(nvinfer1::Dims const& d) { - return d.nbDims >= 3 ? d.d[d.nbDims - 3] : 1; + return d.nbDims >= 3 ? static_cast(d.d[d.nbDims - 3]) : 1; } inline int getH(const nvinfer1::Dims& d) { - return d.nbDims >= 2 ? d.d[d.nbDims - 2] : 1; + return d.nbDims >= 2 ? static_cast(d.d[d.nbDims - 2]) : 1; } inline int getW(const nvinfer1::Dims& d) { - return d.nbDims >= 1 ? d.d[d.nbDims - 1] : 1; + return d.nbDims >= 1 ? static_cast(d.d[d.nbDims - 1]) : 1; } //! Platform-agnostic wrapper around dynamic libraries. @@ -1052,9 +1054,8 @@ inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) constexpr size_t kMAX_FILENAME_LENGTH = 150; // Leave some margin due to Windows path length limitation constexpr size_t kELLIPSIS_LENGTH = 3; // Length of "..." - auto processChar = [&kALLOWED](char c) { - return std::isalnum(static_cast(c)) || kALLOWED.find(c) != std::string_view::npos ? c : '_'; - }; + auto processChar = [&kALLOWED](char c) + { return std::isalnum(static_cast(c)) || kALLOWED.find(c) != std::string_view::npos ? c : '_'; }; std::string res; if (s.length() <= kMAX_FILENAME_LENGTH) diff --git a/samples/common/debugTensorWriter.cpp b/samples/common/debugTensorWriter.cpp index f6207c07c..c68a3c7c2 100644 --- a/samples/common/debugTensorWriter.cpp +++ b/samples/common/debugTensorWriter.cpp @@ -127,14 +127,14 @@ class DataIterator { // For Int4x2, each byte contains two 4-bit integers Int4x2 packed(mData[mIndex / 2]); - return packed.element(mIndex % 2); + return packed.element(static_cast(mIndex % 2)); } #if CUDA_VERSION >= 12070 else if constexpr (std::is_same_v) { // For Fp4x2, each byte contains two 4-bit floating point numbers Fp4x2 packed(mData[mIndex / 2]); - return packed.element(mIndex % 2); + return packed.element(static_cast(mIndex % 2)); } #endif else @@ -175,7 +175,7 @@ class DataIterator private: uint8_t const* mData; - int64_t mVolume; + [[maybe_unused]] int64_t mVolume; int64_t mIndex; }; @@ -203,7 +203,7 @@ class DataRange private: void const* mData; - int64_t mVolume; + [[maybe_unused]] int64_t mVolume; }; template @@ -250,7 +250,8 @@ void printTensorElements(T const* data, int64_t volume, std::ofstream& f) ? kPRINT_ELEMENTS_COUNT / 2 : std::max(static_cast(0), volume - kPRINT_ELEMENTS_COUNT / 2); - auto printElement = [&f](auto value) { + auto printElement = [&f](auto value) + { if constexpr (isFloatingPoint) { f << static_cast(value); @@ -303,12 +304,13 @@ void processTensorSummary(void const* addr_host, int64_t volume, std::ofstream& float val = static_cast(value); minVal = std::min(minVal, val); maxVal = std::max(maxVal, val); - sum += val; + sum += static_cast(val); } - float avgVal = sum / volume; + float avgVal = static_cast(sum / static_cast(volume)); // nan and inf turn into string in json - auto valueToStr = [](float val) -> std::string { + auto valueToStr = [](float val) -> std::string + { std::stringstream ss; if (!std::isfinite(val)) { @@ -338,7 +340,7 @@ void processTensorSummary(void const* addr_host, int64_t volume, std::ofstream& maxVal = std::max(maxVal, val); sum += val; } - double avgVal = static_cast(sum) / volume; + double avgVal = static_cast(sum) / static_cast(volume); f << " \"min\": " << minVal << "," << std::endl; f << " \"max\": " << maxVal << "," << std::endl; @@ -359,7 +361,7 @@ std::string getCurrentTimeString() template void writeTensorStringRecursive(T const* data, nvinfer1::Dims const& shape, int32_t currentDim, int64_t offset, - int64_t stride, std::ofstream& f, bool isFirstElement = true, int32_t indent = 0, int32_t maxWidth = 0) + int64_t stride, std::ofstream& f, bool /*isFirstElement*/ = true, int32_t indent = 0, int32_t maxWidth = 0) { bool isLastDim = currentDim == shape.nbDims - 1; if (isLastDim) @@ -533,7 +535,7 @@ std::string writeStringFile(void const* addr_host, nvinfer1::DataType type, nvin std::string escapeJsonString(std::string_view str) { std::string result; - result.reserve(str.length()); + result.reserve(str.size()); for (char c : str) { switch (c) @@ -751,13 +753,13 @@ bool writeNumpyFile(void const* addr_host, std::string_view dtype, nvinfer1::Dim // Pad header to 16 bytes alignment std::string headerStr = header.str(); - int32_t headerLen = 10 + headerStr.length(); + int32_t headerLen = 10 + static_cast(headerStr.length()); int32_t padding = 16 - ((headerLen + 1) % 16); headerStr.append(padding, ' '); headerStr += '\n'; // Write header length and header - uint16_t headerSize = headerStr.length(); + uint16_t headerSize = static_cast(headerStr.length()); f.write(reinterpret_cast(&headerSize), sizeof(uint16_t)); f.write(headerStr.c_str(), headerSize); @@ -778,7 +780,8 @@ std::string writeNumpy(nvinfer1::DataType type, void const* addr_host, int64_t v std::vector floatBuffer; std::vector int8Buffer; - auto convertToFloat = [&](std::vector const& buffer) { + auto convertToFloat = [&](std::vector const& buffer) + { sample::gLogWarning << "Converting " << getDataTypeString(type) << " to float for numpy dump of tensor '" << name << "'." << std::endl; dtype = " const& buffer) { + auto convertToInt8 = [&](std::vector const& buffer) + { sample::gLogWarning << "Converting " << getDataTypeString(type) << " to int8 for numpy dump of tensor '" << name << "'." << std::endl; dtype = "& options, const TRTOption& op { if (matches(opt, options[i])) { - return i; + return static_cast(i); } } return -1; @@ -149,13 +149,13 @@ TRTParsedArgs parseArgs(int argc, const char* const* argv, const std::vector 2) { - return TRTParsedArgs{"Short arg contains more than 1 character at index " + std::to_string(i)}; + return TRTParsedArgs{"Short arg contains more than 1 character at index " + std::to_string(i), {}, {}}; } opt.shortName = argStr[1]; } @@ -201,18 +201,19 @@ TRTParsedArgs parseArgs(int argc, const char* const* argv, const std::vector(idx)].valueRequired) { if (!value.empty()) { - parsedArgs.values[idx].second.push_back(value); - parsedArgs.values[idx].first = parsedArgs.values[idx].second.size(); + parsedArgs.values[static_cast(idx)].second.push_back(value); + parsedArgs.values[static_cast(idx)].first + = static_cast(parsedArgs.values[static_cast(idx)].second.size()); continue; } if (i + 1 >= argc) { - return TRTParsedArgs{"Last argument requires value, but none given"}; + return TRTParsedArgs{"Last argument requires value, but none given", {}, {}}; } const std::string nextArg(argv[i + 1]); @@ -222,14 +223,15 @@ TRTParsedArgs parseArgs(int argc, const char* const* argv, const std::vector(idx)].second.push_back(nextArg); i += 1; // Next argument already consumed - parsedArgs.values[idx].first = parsedArgs.values[idx].second.size(); + parsedArgs.values[static_cast(idx)].first + = static_cast(parsedArgs.values[static_cast(idx)].second.size()); } else { - parsedArgs.values[idx].first += 1; + parsedArgs.values[static_cast(idx)].first += 1; } } return parsedArgs; @@ -240,7 +242,7 @@ TRTParsedArgs getOptions(int argc, const char* const* argv, const std::vector uint16 float2half(T value) { return float2half_impl( - value, bool_type < std::numeric_limits::is_iec559 && sizeof(typename bits::type) == sizeof(T) > ()); + value, bool_type::is_iec559 && sizeof(typename bits::type) == sizeof(T)>()); } /// Convert integer to half-precision floating point. @@ -1237,7 +1242,7 @@ template T half2float(uint16 value) { return half2float_impl( - value, T(), bool_type < std::numeric_limits::is_iec559 && sizeof(typename bits::type) == sizeof(T) > ()); + value, T(), bool_type::is_iec559 && sizeof(typename bits::type) == sizeof(T)>()); } /// Convert half-precision floating point to integer. @@ -1553,7 +1558,7 @@ class half private: /// Rounding mode to use - static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE); + static const std::float_round_style round_style = (std::float_round_style) (HALF_ROUND_STYLE); /// Constructor. /// \param bits binary representation to set half to @@ -2609,7 +2614,7 @@ struct binary_specialized /// \tparam T destination type /// \tparam U source type /// \tparam R rounding mode to use -template +template struct half_caster { }; diff --git a/samples/common/sampleDevice.cpp b/samples/common/sampleDevice.cpp index 5eedfc1e2..e0ad088d7 100644 --- a/samples/common/sampleDevice.cpp +++ b/samples/common/sampleDevice.cpp @@ -101,8 +101,8 @@ void setCudaDevice(int32_t device, std::ostream& os) int32_t memoryClockRate = 0; CHECK(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, device)); CHECK(cudaDeviceGetAttribute(&memoryClockRate, cudaDevAttrMemoryClockRate, device)); - os << "Application Compute Clock Rate: " << clockRate / 1000000.0F << " GHz" << std::endl; - os << "Application Memory Clock Rate: " << memoryClockRate / 1000000.0F << " GHz" << std::endl; + os << "Application Compute Clock Rate: " << static_cast(clockRate) / 1000000.0F << " GHz" << std::endl; + os << "Application Memory Clock Rate: " << static_cast(memoryClockRate) / 1000000.0F << " GHz" << std::endl; os << std::endl; os << "Note: The application clock rates do not reflect the actual clock rates that the GPU is " << "currently running at." << std::endl; diff --git a/samples/common/sampleDevice.h b/samples/common/sampleDevice.h index e784b9f99..767f0141c 100644 --- a/samples/common/sampleDevice.h +++ b/samples/common/sampleDevice.h @@ -502,12 +502,12 @@ class UnifiedMirroredBuffer : public IMirroredBuffer return mBuffer.get(); } - void hostToDevice(TrtCudaStream& stream) override + void hostToDevice(TrtCudaStream& /*stream*/) override { // Does nothing since we are using unified memory. } - void deviceToHost(TrtCudaStream& stream) override + void deviceToHost(TrtCudaStream& /*stream*/) override { // Does nothing since we are using unified memory. } @@ -539,7 +539,7 @@ class OutputAllocator : public nvinfer1::IOutputAllocator ~OutputAllocator() override = default; void* reallocateOutput( - char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept override + char const* /*tensorName*/, void* /*currentMemory*/, uint64_t size, uint64_t alignment) noexcept override { // Some memory allocators return nullptr when allocating zero bytes, but TensorRT requires a non-null ptr // even for empty tensors, so allocate a dummy byte. @@ -559,7 +559,7 @@ class OutputAllocator : public nvinfer1::IOutputAllocator return reallocateOutput(tensorName, currentMemory, size, alignment); } - void notifyShape(char const* tensorName, nvinfer1::Dims const& dims) noexcept override + void notifyShape(char const* /*tensorName*/, nvinfer1::Dims const& dims) noexcept override { mFinalDims = dims; } @@ -589,7 +589,6 @@ int32_t getCudaDriverVersion(); //! Get the CUDA version of the current CUDA runtime. int32_t getCudaRuntimeVersion(); - } // namespace sample #endif // TRT_SAMPLE_DEVICE_H diff --git a/samples/common/sampleEngines.cpp b/samples/common/sampleEngines.cpp index 1a6b87322..4c6b516f2 100644 --- a/samples/common/sampleEngines.cpp +++ b/samples/common/sampleEngines.cpp @@ -223,7 +223,6 @@ void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, std:: } } - //! //! \brief Generate a network definition for a given model //! @@ -637,7 +636,7 @@ void setDecomposables(INetworkDefinition& network, DecomposableAttentions const& void markDebugTensors(INetworkDefinition& network, StringSet const& debugTensors) { - for (int64_t inputIndex = 0; inputIndex < network.getNbInputs(); ++inputIndex) + for (int32_t inputIndex = 0; inputIndex < network.getNbInputs(); ++inputIndex) { auto* t = network.getInput(inputIndex); auto const tensorName = t->getName(); @@ -646,10 +645,10 @@ void markDebugTensors(INetworkDefinition& network, StringSet const& debugTensors network.markDebug(*t); } } - for (int64_t layerIndex = 0; layerIndex < network.getNbLayers(); ++layerIndex) + for (int32_t layerIndex = 0; layerIndex < network.getNbLayers(); ++layerIndex) { auto* layer = network.getLayer(layerIndex); - for (int64_t outputIndex = 0; outputIndex < layer->getNbOutputs(); ++outputIndex) + for (int32_t outputIndex = 0; outputIndex < layer->getNbOutputs(); ++outputIndex) { auto* t = layer->getOutput(outputIndex); auto const tensorName = t->getName(); @@ -662,9 +661,8 @@ void markDebugTensors(INetworkDefinition& network, StringSet const& debugTensors } void setMemoryPoolLimits(IBuilderConfig& config, BuildOptions const& build) { - auto const roundToBytes = [](double const size, bool fromMB = true) { - return static_cast(size * (fromMB ? 1.0_MiB : 1.0_KiB)); - }; + auto const roundToBytes = [](double const size, bool fromMB = true) + { return static_cast(static_cast(size) * static_cast(fromMB ? 1.0_MiB : 1.0_KiB)); }; if (build.workspace >= 0) { config.setMemoryPoolLimit(MemoryPoolType::kWORKSPACE, roundToBytes(build.workspace)); @@ -703,7 +701,8 @@ void setMemoryPoolLimits(IBuilderConfig& config, BuildOptions const& build) void setPreviewFeatures(IBuilderConfig& config, BuildOptions const& build) { - auto const setFlag = [&](PreviewFeature feat) { + auto const setFlag = [&](PreviewFeature feat) + { int32_t featVal = static_cast(feat); if (build.previewFeatures.find(featVal) != build.previewFeatures.end()) { @@ -805,10 +804,10 @@ bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, if (isDynamicInput) { hasDynamicShapes = true; - for (size_t i = 0; i < build.optProfiles.size(); i++) + for (size_t pi = 0; pi < build.optProfiles.size(); pi++) { - auto const& optShapes = build.optProfiles[i]; - auto profile = profiles[i]; + auto const& optShapes = build.optProfiles[pi]; + auto profile = profiles[pi]; auto const tensorName = input->getName(); auto shape = findPlausible(optShapes, tensorName); ShapeRange shapes{}; @@ -1008,7 +1007,7 @@ bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, } if (!pluginPaths.empty()) { - config.setPluginsToSerialize(pluginPaths.data(), pluginPaths.size()); + config.setPluginsToSerialize(pluginPaths.data(), static_cast(pluginPaths.size())); } if (build.excludeLeanRuntime) { @@ -1072,13 +1071,14 @@ bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, auto int8IO = std::count_if(build.inputFormats.begin(), build.inputFormats.end(), isInt8) + std::count_if(build.outputFormats.begin(), build.outputFormats.end(), isInt8); - auto hasQDQLayers = [](INetworkDefinition& network) { + auto hasQDQLayers = [](INetworkDefinition& net) + { // Determine if our network has QDQ layers. - auto const nbLayers = network.getNbLayers(); + auto const nbLayers = net.getNbLayers(); for (int32_t i = 0; i < nbLayers; i++) { - auto const& layer = network.getLayer(i); - if (layer->getType() == LayerType::kQUANTIZE || layer->getType() == LayerType::kDEQUANTIZE) + auto const& layer_ = net.getLayer(i); + if (layer_->getType() == LayerType::kQUANTIZE || layer_->getType() == LayerType::kDEQUANTIZE) { return true; } @@ -1266,7 +1266,6 @@ bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, config.setHardwareCompatibilityLevel(build.hardwareCompatibilityLevel); - config.setRuntimePlatform(build.runtimePlatform); if (build.maxAuxStreams != defaultMaxAuxStreams) @@ -1345,7 +1344,8 @@ bool networkToSerializedEngine( reader.read(reinterpret_cast(streamEngine.data()), engineSize); SMP_RETVAL_IF_FALSE((!reader.fail()), "Error when reading engine file", false, err); reader.close(); - sample::gLogInfo << "Created engine with size: " << (engineSize / 1.0_MiB) << " MiB" << std::endl; + sample::gLogInfo << "Created engine with size: " << static_cast(engineSize / 1.0_MiB) << " MiB" + << std::endl; env.engine.setBlob(std::move(streamEngine)); } else @@ -1359,8 +1359,8 @@ bool networkToSerializedEngine( { std::unique_ptr kernelTextPtr(kernelText); env.kernelText.setBlob(kernelTextPtr); - sample::gLogInfo << "Created kernel CPP with size: " << (kernelText->size() / 1.0_MiB) << " MiB" - << std::endl; + sample::gLogInfo << "Created kernel CPP with size: " + << static_cast(kernelText->size() / 1.0_MiB) << " MiB" << std::endl; } else { @@ -1372,7 +1372,8 @@ bool networkToSerializedEngine( serializedEngine = builder.buildSerializedNetwork(*env.network, *config); } SMP_RETVAL_IF_FALSE(serializedEngine != nullptr, "Engine could not be created from network", false, err); - sample::gLogInfo << "Created engine with size: " << (serializedEngine->size() / 1.0_MiB) << " MiB" << std::endl; + sample::gLogInfo << "Created engine with size: " << static_cast(serializedEngine->size() / 1.0_MiB) + << " MiB" << std::endl; if (build.safe && build.consistency) { @@ -1399,14 +1400,14 @@ bool networkToSerializedEngine( { if (build.timingCacheMode == TimingCacheMode::kGLOBAL) { - auto timingCache = config->getTimingCache(); - samplesCommon::updateTimingCacheFile(gLogger.getTRTLogger(), build.timingCacheFile, timingCache, builder); + auto timingCacheUpdated = config->getTimingCache(); + samplesCommon::updateTimingCacheFile( + gLogger.getTRTLogger(), build.timingCacheFile, timingCacheUpdated, builder); } } return true; } - //! //! \brief Parse a given model, create a network and an engine. //! @@ -1476,13 +1477,15 @@ std::pair, std::vector> getLayerWeightsRol std::vector weightsRoles(nbAll); refitter.getAll(nbAll, layerNames.data(), weightsRoles.data()); std::vector layerNameStrs(nbAll); - std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), [](char const* name) { - if (name == nullptr) + std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), + [](char const* name) { - return std::string{}; - } - return std::string{name}; - }); + if (name == nullptr) + { + return std::string{}; + } + return std::string{name}; + }); return {layerNameStrs, weightsRoles}; } @@ -1496,13 +1499,15 @@ std::pair, std::vector> getMissingLayerWei refitter.getMissing(nbMissing, layerNames.data(), weightsRoles.data()); // Convert null names in `layerNames` to empty strings: std::vector layerNameStrs(nbMissing); - std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), [](char const* name) { - if (name == nullptr) + std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), + [](char const* name) { - return std::string{}; - } - return std::string{name}; - }); + if (name == nullptr) + { + return std::string{}; + } + return std::string{name}; + }); return {std::move(layerNameStrs), std::move(weightsRoles)}; } } // namespace @@ -1521,10 +1526,10 @@ bool loadAsyncStreamingEngineToBuildEnv(std::string const& filepath, BuildEnviro return true; } - bool loadEngineToBuildEnv(std::string const& filepath, BuildEnvironment& env, std::ostream& err, SystemOptions const& sys, bool const enableConsistency) { + static_cast(sys); auto const tBegin = std::chrono::high_resolution_clock::now(); std::ifstream engineFile(filepath, std::ios::binary); SMP_RETVAL_IF_FALSE(engineFile.good(), "", false, err << "Error opening engine file: " << filepath); @@ -1538,7 +1543,7 @@ bool loadEngineToBuildEnv(std::string const& filepath, BuildEnvironment& env, st auto const tEnd = std::chrono::high_resolution_clock::now(); float const loadTime = std::chrono::duration(tEnd - tBegin).count(); sample::gLogInfo << "Engine loaded in " << loadTime << " sec." << std::endl; - sample::gLogInfo << "Loaded engine with size: " << (fsize / 1.0_MiB) << " MiB" << std::endl; + sample::gLogInfo << "Loaded engine with size: " << static_cast(fsize / 1.0_MiB) << " MiB" << std::endl; if (enableConsistency) { @@ -1593,9 +1598,9 @@ bool printPlanVersion(BuildEnvironment& env, std::ostream& err) case 0U: { // Blob index to store the plan version may depend on the serialization version. - sample::gLogInfo << "Plan was created with TensorRT version " << static_cast(blob[24]) - << "." << static_cast(blob[25]) << "." << static_cast(blob[26]) - << "." << static_cast(blob[27]) << std::endl; + sample::gLogInfo << "Plan was created with TensorRT version " << static_cast(blob[24]) << "." + << static_cast(blob[25]) << "." << static_cast(blob[26]) << "." + << static_cast(blob[27]) << std::endl; return true; } } @@ -1889,11 +1894,11 @@ bool timeRefit(INetworkDefinition const& network, nvinfer1::ICudaEngine& engine, std::inserter(layerRoleSet, layerRoleSet.begin()), [](std::string const& layerName, WeightsRole const role) { return std::make_pair(layerName, role); }); - auto const isRefittable = [&layerRoleSet](char const* layerName, WeightsRole const role) { - return layerRoleSet.find(std::make_pair(layerName, role)) != layerRoleSet.end(); - }; + auto const isRefittable = [&layerRoleSet](char const* layerName, WeightsRole const role) + { return layerRoleSet.find(std::make_pair(layerName, role)) != layerRoleSet.end(); }; - auto const setWeights = [&] { + auto const setWeights = [&] + { for (int32_t i = 0; i < nbLayers; i++) { auto const layer = network.getLayer(i); @@ -1913,16 +1918,17 @@ bool timeRefit(INetworkDefinition const& network, nvinfer1::ICudaEngine& engine, return true; }; - auto const reportMissingWeights = [&] { + auto const reportMissingWeights = [&] + { auto const& missingPair = getMissingLayerWeightsRolePair(*refitter); - auto const& layerNames = missingPair.first; - auto const& weightsRoles = missingPair.second; - for (size_t i = 0; i < layerNames.size(); ++i) + auto const& missingLayerNames = missingPair.first; + auto const& missingWeightsRoles = missingPair.second; + for (size_t i = 0; i < missingLayerNames.size(); ++i) { - sample::gLogError << "Missing (" << layerNames[i] << ", " << weightsRoles[i] << ") for refitting." - << std::endl; + sample::gLogError << "Missing (" << missingLayerNames[i] << ", " << missingWeightsRoles[i] + << ") for refitting." << std::endl; } - return layerNames.empty(); + return missingLayerNames.empty(); }; // Skip weights validation since we are confident that the new weights are similar to the weights used to build @@ -2023,7 +2029,7 @@ std::unique_ptr createConsiste if (auto const createFn = reinterpret_cast(dlsym(kCONSISTENCY_CHECKER_LIBRARY.get(), symbolName))) { - if (nvinfer2::safe::consistency::IConsistencyChecker * checker{nullptr}; + if (nvinfer2::safe::consistency::IConsistencyChecker* checker{nullptr}; ErrorCode::kSUCCESS == createFn(checker, recorder, serializedEngine, engineSize, pluginBuildLibPath)) { return std::unique_ptr{checker}; @@ -2049,6 +2055,9 @@ bool checkSafeEngine( void const* serializedEngine, int64_t const engineSize, std::vector const& pluginBuildLibPath) { #if !ENABLE_UNIFIED_BUILDER + static_cast(serializedEngine); + static_cast(engineSize); + static_cast(pluginBuildLibPath); return false; #else if (!hasConsistencyChecker()) diff --git a/samples/common/sampleEngines.h b/samples/common/sampleEngines.h index 3adae4a5b..da096f66a 100644 --- a/samples/common/sampleEngines.h +++ b/samples/common/sampleEngines.h @@ -71,19 +71,19 @@ class LazilyDeserializedEngine //! //! \brief Constructor of LazilyDeserializedEngine. //! - LazilyDeserializedEngine(bool isSafe, bool versionCompatible, int32_t DLACore, std::string const& tempdir, - nvinfer1::TempfileControlFlags tempfileControls, std::string const& leanDLLPath) - : mIsSafe(isSafe) - , mVersionCompatible(versionCompatible) - , mDLACore(DLACore) - , mTempdir(tempdir) - , mTempfileControls(tempfileControls) - , mLeanDLLPath(leanDLLPath) + LazilyDeserializedEngine(bool isSafe_, bool versionCompatible_, int32_t DLACore_, std::string const& tempdir_, + nvinfer1::TempfileControlFlags tempfileControls_, std::string const& leanDLLPath_) + : mIsSafe(isSafe_) + , mVersionCompatible(versionCompatible_) + , mDLACore(DLACore_) + , mTempdir(tempdir_) + , mTempfileControls(tempfileControls_) + , mLeanDLLPath(leanDLLPath_) { // Only one of these is relevant for any given trtexec call. // Enabled using --asyncFileReader flag. mAsyncFileReader = std::make_unique(); - // Enabled using --load flag. + // Enabled using --load flag. mFileReader = std::make_unique(); } @@ -216,7 +216,6 @@ class LazilyDeserializedEngine return *mAsyncFileReader; } - //! //! \brief Get if safe mode is enabled. //! @@ -238,7 +237,6 @@ class LazilyDeserializedEngine std::unique_ptr mFileReader; std::unique_ptr mAsyncFileReader; - // Directly use the host memory of a serialized engine instead of duplicating the engine in CPU memory. std::unique_ptr mEngineBlobHostMemory; @@ -272,12 +270,12 @@ struct BuildEnvironment BuildEnvironment() = delete; BuildEnvironment(BuildEnvironment const& other) = delete; BuildEnvironment(BuildEnvironment&& other) = delete; - BuildEnvironment(bool isSafe, bool versionCompatible, int32_t DLACore, std::string const& tempdir, - nvinfer1::TempfileControlFlags tempfileControls, std::string const& leanDLLPath = "", - std::string const& cmdline = "") - : engine(isSafe, versionCompatible, DLACore, tempdir, tempfileControls, leanDLLPath) - , kernelText(false, false, -1, "", tempfileControls, "") - , cmdline(cmdline) + BuildEnvironment(bool isSafe_, bool versionCompatible_, int32_t DLACore_, std::string const& tempdir_, + nvinfer1::TempfileControlFlags tempfileControls_, std::string const& leanDLLPath_ = "", + std::string const& cmdline_ = "") + : engine(isSafe_, versionCompatible_, DLACore_, tempdir_, tempfileControls_, leanDLLPath_) + , kernelText(false, false, -1, "", tempfileControls_, "") + , cmdline(cmdline_) { } @@ -380,7 +378,6 @@ bool timeRefit(const nvinfer1::INetworkDefinition& network, nvinfer1::ICudaEngin void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, std::vector const& inputFormats, std::vector const& outputFormats, std::string const& calibrationFile); - //! \brief Check if safe runtime is loaded. [[nodiscard]] bool hasSafeRuntime(); diff --git a/samples/common/sampleInference.cpp b/samples/common/sampleInference.cpp index e50b59add..7f68fc7dd 100644 --- a/samples/common/sampleInference.cpp +++ b/samples/common/sampleInference.cpp @@ -113,7 +113,8 @@ bool initNvinferSafe() { #if !TRT_STATIC static LibraryPtr libnvinfersafePtr{}; - auto fetchPtrs = [](samplesCommon::DynamicLibrary* l) { + auto fetchPtrs = [](samplesCommon::DynamicLibrary* l) + { if (gUseRuntime == RuntimeMode::kSAFE) { pcreateTRTGraphInternal = l->symbolAddress outputTensors; - outputTensors.reserve(endBindingIndex); + outputTensors.reserve(endBindingIndex_); - for (int32_t b = 0; b < endBindingIndex; b++) + for (int32_t b = 0; b < endBindingIndex_; b++) { TensorInfo tensorInfo; tensorInfo.bindingIndex = b; getTensorInfo(tensorInfo); - tensorInfo.updateVolume(batch); + tensorInfo.updateVolume(batch_); if (tensorInfo.isInput) { fillOneBinding(tensorInfo); @@ -375,9 +376,8 @@ bool allocateContextMemory(InferenceEnvironmentStd& iEnv, InferenceOptions const auto const& ec = iEnv.contexts.at(i); if (inference.memoryAllocationStrategy == MemoryAllocationStrategy::kSTATIC) { - sample::gLogInfo << "Created execution context with device memory size: " << - (engine->getDeviceMemorySize() / 1.0_MiB) - << " MiB" << std::endl; + sample::gLogInfo << "Created execution context with device memory size: " + << static_cast(engine->getDeviceMemorySize() / 1.0_MiB) << " MiB" << std::endl; } else { @@ -402,9 +402,9 @@ bool allocateContextMemory(InferenceEnvironmentStd& iEnv, InferenceOptions const iEnv.deviceMemory.at(i) = TrtDeviceBuffer(sizeToAlloc); ec->setDeviceMemoryV2(iEnv.deviceMemory.at(i).get(), iEnv.deviceMemory.at(i).getSize()); sample::gLogInfo << "Maximum device memory size across all profiles: " - << (engine->getDeviceMemorySizeV2() / 1.0_MiB) << " MiB" << std::endl; + << static_cast(engine->getDeviceMemorySizeV2() / 1.0_MiB) << " MiB" << std::endl; sample::gLogInfo << "Only allocated device memory enough for " << allocReason << ": " - << (sizeToAlloc / 1.0_MiB) << " MiB" << std::endl; + << static_cast(sizeToAlloc / 1.0_MiB) << " MiB" << std::endl; } } return true; @@ -430,7 +430,6 @@ void contractInt64ToInt32(std::vector& shapeData) } // namespace - bool setUpInference(InferenceEnvironmentBase& iEnv, InferenceOptions const& inference, SystemOptions const& system) { #if ENABLE_UNIFIED_BUILDER @@ -550,7 +549,8 @@ bool setUpSafeInference(InferenceEnvironmentSafe& iEnv, InferenceOptions const& } #endif -bool setUpStdInference(InferenceEnvironmentStd& iEnv, InferenceOptions const& inference, SystemOptions const& system) +bool setUpStdInference( + InferenceEnvironmentStd& iEnv, InferenceOptions const& inference, SystemOptions const& /*system*/) { int32_t device{}; CHECK(cudaGetDevice(&device)); @@ -570,7 +570,6 @@ bool setUpStdInference(InferenceEnvironmentStd& iEnv, InferenceOptions const& in // Release serialized blob to save memory space. iEnv.engine.releaseBlob(); - // Setup weight streaming if enabled if (engine->getStreamableWeightsSize() > 0) { @@ -581,7 +580,8 @@ bool setUpStdInference(InferenceEnvironmentStd& iEnv, InferenceOptions const& in double const percent = budget.percent; ASSERT(percent < 100.0); auto const max = engine->getStreamableWeightsSize(); - wsBudget = (max >= 0) ? (percent / 100) * (max) : WeightStreamingBudget::kDISABLE; + wsBudget = (max >= 0) ? static_cast((percent / 100.0) * static_cast(max)) + : WeightStreamingBudget::kDISABLE; } if (wsBudget == WeightStreamingBudget::kDISABLE) @@ -642,8 +642,9 @@ bool setUpStdInference(InferenceEnvironmentStd& iEnv, InferenceOptions const& in IExecutionContext* ec{nullptr}; //! \return the `ExecutionContextAllocationStrategy` to use for the given allocation strategy, \p s. - auto getExecutionContextAllocationStrategy = [](MemoryAllocationStrategy s) { - return s == MemoryAllocationStrategy::kSTATIC + auto getExecutionContextAllocationStrategy = [](MemoryAllocationStrategy strategy) + { + return strategy == MemoryAllocationStrategy::kSTATIC // Let TRT pre-allocate and manage the memory. ? ExecutionContextAllocationStrategy::kSTATIC // Allocate based on the current profile or runtime shapes. @@ -658,8 +659,8 @@ bool setUpStdInference(InferenceEnvironmentStd& iEnv, InferenceOptions const& in } ec->setNvtxVerbosity(inference.nvtxVerbosity); - int32_t const persistentCacheLimit - = samplesCommon::getMaxPersistentCacheSize() * inference.persistentCacheRatio; + int32_t const persistentCacheLimit = static_cast( + static_cast(samplesCommon::getMaxPersistentCacheSize()) * inference.persistentCacheRatio); sample::gLogInfo << "Setting persistentCacheLimit to " << persistentCacheLimit << " bytes." << std::endl; ec->setPersistentCacheLimit(persistentCacheLimit); @@ -832,7 +833,6 @@ bool setUpStdInference(InferenceEnvironmentStd& iEnv, InferenceOptions const& in bool fillBindingsSuccess = FillStdBindings( engine, context, inference.inputs, iEnv.bindings, 1, endBindingIndex, inference.optProfileIndex)(); - return fillBindingsSuccess; } @@ -1426,7 +1426,7 @@ bool inferenceLoop(std::vector>& iStreams, TimePo } if (durationMs < warmupMs) // Warming up { - if (durationMs) // Skip complete iterations + if (durationMs != 0.0F) // Skip complete iterations { ++skip; } @@ -1463,7 +1463,8 @@ void inferenceExecution(InferenceOptions const& inference, InferenceEnvironmentB if (iEnv.safe) { //! Function to make one iteration: - auto makeIteration = [&](int32_t s) -> std::unique_ptr { + auto makeIteration = [&](int32_t s) -> std::unique_ptr + { int32_t const streamId{threadIdx * streamsPerThread + s}; auto iteration = std::make_unique(streamId, inference, *static_cast(iEnv).mClonedGraphs[streamId], @@ -1506,7 +1507,8 @@ void inferenceExecution(InferenceOptions const& inference, InferenceEnvironmentB #endif //! Function to make one iteration: - auto makeIteration = [&](int32_t s) -> std::unique_ptr { + auto makeIteration = [&](int32_t s) -> std::unique_ptr + { int32_t const streamId{threadIdx * streamsPerThread + s}; auto iteration = std::make_unique(streamId, inference, *static_cast(iEnv).getContext(streamId), @@ -1601,7 +1603,6 @@ bool runInference(InferenceOptions const& inference, InferenceEnvironmentBase& i auto cmpTrace = [](InferenceTrace const& a, InferenceTrace const& b) { return a.h2dStart < b.h2dStart; }; std::sort(trace.begin(), trace.end(), cmpTrace); - return !iEnv.error; } @@ -1620,9 +1621,8 @@ bool runMultiTasksInference(std::vectoriOptions, *(tEnv->iEnv), sync, /*threadIdx*/ 0, /*streamsPerThread*/ 1, tEnv->device, tEnv->trace, - tEnv->rOptions)); + threads.emplace_back(makeThread(tEnv->iOptions, *(tEnv->iEnv), sync, /*threadIdx*/ 0, /*streamsPerThread*/ 1, + tEnv->device, tEnv->trace, tEnv->rOptions)); } for (auto& th : threads) { @@ -1650,11 +1650,12 @@ size_t reportGpuMemory() size_t total{0}; size_t newlyAllocated{0}; CHECK(cudaMemGetInfo(&free, &total)); - sample::gLogInfo << "Free GPU memory = " << free / 1024.0_MiB << " GiB"; + sample::gLogInfo << "Free GPU memory = " << static_cast(free / 1024.0_MiB) << " GiB"; if (prevFree != 0) { newlyAllocated = (prevFree - free); - sample::gLogInfo << ", newly allocated GPU memory = " << newlyAllocated / 1024.0_MiB << " GiB"; + sample::gLogInfo << ", newly allocated GPU memory = " << static_cast(newlyAllocated / 1024.0_MiB) + << " GiB"; } sample::gLogInfo << ", total GPU memory = " << total / 1024.0_MiB << " GiB" << std::endl; prevFree = free; @@ -1671,7 +1672,8 @@ bool timeDeserialize(InferenceEnvironmentBase& iEnv, SystemOptions const& sys) SMP_RETVAL_IF_FALSE(!iEnv.safe, "Safe inference is not supported!", false, sample::gLogError); - auto timeDeserializeFn = [&]() -> float { + auto timeDeserializeFn = [&]() -> float + { bool deserializeOK{false}; engine.reset(nullptr); auto startClock = std::chrono::high_resolution_clock::now(); @@ -1736,7 +1738,9 @@ bool timeDeserialize(InferenceEnvironmentBase& iEnv, SystemOptions const& sys) sample::gLogInfo << "Total deserialization time = " << totalTime << " milliseconds in " << kNB_ITERS << " iterations, average time = " << averageTime << " milliseconds, first time = " << first << " milliseconds." << std::endl; - sample::gLogInfo << "Deserialization Bandwidth = " << 1E-6 * totalEngineSizeGpu / totalTime << " GB/s" << std::endl; + sample::gLogInfo << "Deserialization Bandwidth = " + << 1E-6 * static_cast(totalEngineSizeGpu) / static_cast(totalTime) << " GB/s" + << std::endl; // If the first deserialization is more than tolerance slower than // the average deserialization, return true, which means an error occurred. @@ -1937,7 +1941,8 @@ void BindingsBase::addBinding( } //! Make a UnifiedMirroredBuffer if useManaged or Discrete othereise: - auto makeBuffer = [](bool useManaged) -> std::shared_ptr { + auto makeBuffer = [](bool useManaged) -> std::shared_ptr + { if (useManaged) { return std::make_shared(); @@ -2021,7 +2026,7 @@ void BindingsBase::transferOutputToHost(TrtCudaStream& stream) } void BindingsStd::dumpBindingValues(nvinfer1::IExecutionContext const& context, int32_t binding, std::ostream& os, - std::string const& separator /*= " "*/, int32_t batch /*= 1*/) const + std::string const& separator /*= " "*/, int32_t /*batch*/ /*= 1*/) const { auto const tensorName = context.getEngine().getIOTensorName(binding); Dims dims = context.getTensorShape(tensorName); diff --git a/samples/common/sampleInference.h b/samples/common/sampleInference.h index ba2bb6338..a86ddf1f0 100644 --- a/samples/common/sampleInference.h +++ b/samples/common/sampleInference.h @@ -343,7 +343,7 @@ class BindingsBase std::unordered_map getBindings() const { - auto all = [](Binding const& b) { return true; }; + auto all = [](Binding const& /*b*/) { return true; }; return getBindings(all); } @@ -379,7 +379,7 @@ class BindingsStd : public BindingsBase void dumpBindings(nvinfer1::IExecutionContext const& context, std::ostream& os) const { - auto all = [](Binding const& b) { return true; }; + auto all = [](Binding const& /*b*/) { return true; }; dumpBindings(context, all, os); } @@ -436,7 +436,7 @@ class BindingsSafe : public BindingsBase void dumpBindings(ITRTGraph const& graph, std::ostream& os) const { - auto all = [](Binding const& b) { return true; }; + auto all = [](Binding const& /*b*/) { return true; }; dumpBindings(graph, all, os); } @@ -472,8 +472,7 @@ class BindingsSafe : public BindingsBase struct TaskInferenceEnvironment { TaskInferenceEnvironment(std::string engineFile, InferenceOptions const& inference, - ReportingOptions const& reporting, int32_t deviceId = 0, - int32_t DLACore = -1, int32_t bs = batchNotProvided); + ReportingOptions const& reporting, int32_t deviceId = 0, int32_t DLACore = -1, int32_t bs = batchNotProvided); InferenceOptions iOptions{}; ReportingOptions rOptions{}; int32_t device{defaultDevice}; diff --git a/samples/common/sampleOptions.cpp b/samples/common/sampleOptions.cpp index 6ef03e5e0..18556550a 100644 --- a/samples/common/sampleOptions.cpp +++ b/samples/common/sampleOptions.cpp @@ -71,7 +71,7 @@ int64_t getUnitMultiplier(std::string const& option) char lastChar = option.at(option.size() - 1); if (!std::isdigit(lastChar)) { - char unit = std::toupper(lastChar); + char unit = static_cast(std::toupper(lastChar)); auto found = kUNIT_MULTIPLIERS.find(unit); if (found == kUNIT_MULTIPLIERS.end()) { @@ -103,13 +103,13 @@ int32_t stringToValue(const std::string& option) template <> int64_t stringToValue(const std::string& option) { - return std::stoi(option); + return static_cast(std::stoi(option)); } template <> size_t stringToValue(const std::string& option) { - return std::stoi(option) * getUnitMultiplier(option); + return static_cast(std::stoi(option) * getUnitMultiplier(option)); } template <> @@ -121,11 +121,11 @@ float stringToValue(const std::string& option) template <> double stringToValue(const std::string& option) { - return std::stod(option) * getUnitMultiplier(option); + return std::stod(option) * static_cast(getUnitMultiplier(option)); } template <> -bool stringToValue(const std::string& option) +bool stringToValue(const std::string& /*option*/) { return true; } @@ -223,11 +223,7 @@ template <> SparsityFlag stringToValue(std::string const& option) { std::unordered_map const table{ - {"disable", SparsityFlag::kDISABLE}, {"enable", SparsityFlag::kENABLE}, - { - "force", SparsityFlag::kFORCE - } - }; + {"disable", SparsityFlag::kDISABLE}, {"enable", SparsityFlag::kENABLE}, {"force", SparsityFlag::kFORCE}}; auto search = table.find(option); if (search == table.end()) { @@ -287,7 +283,6 @@ samplesSafeCommon::SafetyPluginLibraryArgument stringToValue std::pair splitNameAndValue(const std::string& s) { @@ -346,7 +341,7 @@ template std::string joinValuesToString(std::vector const& list, std::string const& sep) { std::ostringstream os; - for (int32_t i = 0, n = list.size(); i < n; ++i) + for (int32_t i = 0, n = static_cast(list.size()); i < n; ++i) { os << list[i]; if (i != n - 1) @@ -718,16 +713,17 @@ bool getOptimizationProfiles( size_t profileIndex{}; auto getShapes - = [](BuildOptions::ShapeProfile& shapes, std::string const& list, nvinfer1::OptProfileSelector selector) { - std::vector shapeList{splitToStringVec(list, ',')}; - for (auto const& s : shapeList) - { - auto nameDimsPair = splitNameAndValue>(s); - auto tensorName = removeSingleQuotationMarks(nameDimsPair.first); - auto dims = nameDimsPair.second; - insertShapesBuild(shapes, selector, tensorName, dims); - } - }; + = [](BuildOptions::ShapeProfile& shapes, std::string const& list, nvinfer1::OptProfileSelector selector) + { + std::vector shapeList{splitToStringVec(list, ',')}; + for (auto const& s : shapeList) + { + auto nameDimsPair = splitNameAndValue>(s); + auto tensorName = removeSingleQuotationMarks(nameDimsPair.first); + auto dims = nameDimsPair.second; + insertShapesBuild(shapes, selector, tensorName, dims); + } + }; while (getAndDelOptionWithPosition(arguments, argument, profileIndex, pos)) { @@ -964,7 +960,8 @@ std::ostream& printPreviewFlags(std::ostream& os, BuildOptions const& options) return os; } - auto const addFlag = [&](PreviewFeature feat) { + auto const addFlag = [&](PreviewFeature feat) + { int32_t featVal = static_cast(feat); if (options.previewFeatures.find(featVal) != options.previewFeatures.end()) { @@ -1100,7 +1097,8 @@ void getTempfileControls(Arguments& arguments, char const* argument, TempfileCon void BuildOptions::parse(Arguments& arguments) { getAndDelOption(arguments, "--cpuOnly", cpuOnly); - auto getFormats = [&arguments](std::vector& formatsVector, const char* argument) { + auto getFormats = [&arguments](std::vector& formatsVector, const char* argument) + { std::string list; getAndDelOption(arguments, argument, list); std::vector formats{splitToStringVec(list, ',')}; @@ -1191,27 +1189,27 @@ void BuildOptions::parse(Arguments& arguments) if (memPoolName == "workspace") { // use unit in MB. - workspace = memPoolSize / 1.0_MiB; + workspace = static_cast(static_cast(memPoolSize) / 1.0_MiB); } else if (memPoolName == "dlaSRAM") { // use unit in MB. - dlaSRAM = memPoolSize / 1.0_MiB; + dlaSRAM = static_cast(static_cast(memPoolSize) / 1.0_MiB); } else if (memPoolName == "dlaLocalDRAM") { // use unit in MB. - dlaLocalDRAM = memPoolSize / 1.0_MiB; + dlaLocalDRAM = static_cast(static_cast(memPoolSize) / 1.0_MiB); } else if (memPoolName == "dlaGlobalDRAM") { // use unit in MB. - dlaGlobalDRAM = memPoolSize / 1.0_MiB; + dlaGlobalDRAM = static_cast(static_cast(memPoolSize) / 1.0_MiB); } else if (memPoolName == "tacticSharedMem") { // use unit in KB. - tacticSharedMem = memPoolSize / 1.0_KiB; + tacticSharedMem = static_cast(static_cast(memPoolSize) / 1.0_KiB); } else if (!memPoolName.empty()) { @@ -1477,23 +1475,22 @@ void BuildOptions::parse(Arguments& arguments) { source = nvinfer1::TacticSource::kCUBLAS_LT; } + else if (t == "CUDNN") + { + source = nvinfer1::TacticSource::kCUDNN; + } + else if (t == "EDGE_MASK_CONVOLUTIONS") + { + source = nvinfer1::TacticSource::kEDGE_MASK_CONVOLUTIONS; + } + else if (t == "JIT_CONVOLUTIONS") + { + source = nvinfer1::TacticSource::kJIT_CONVOLUTIONS; + } else - if (t == "CUDNN") - { - source = nvinfer1::TacticSource::kCUDNN; - } - else if (t == "EDGE_MASK_CONVOLUTIONS") - { - source = nvinfer1::TacticSource::kEDGE_MASK_CONVOLUTIONS; - } - else if (t == "JIT_CONVOLUTIONS") - { - source = nvinfer1::TacticSource::kJIT_CONVOLUTIONS; - } - else - { - throw std::invalid_argument(std::string("Unknown tactic source: ") + t); - } + { + throw std::invalid_argument(std::string("Unknown tactic source: ") + t); + } uint32_t sourceBit = 1U << static_cast(source); @@ -1908,20 +1905,24 @@ void AllOptions::parse(Arguments& arguments) if (build.buildDLAStandalone) { build.skipInference = true; - auto checkSafeDLAFormats = [](std::vector const& fmt, bool isInput) { - return !fmt.empty() && std::all_of(fmt.begin(), fmt.end(), [&](IOFormat const& pair) { - auto const& [dataType, tensorFormats] = pair; - using TF = nvinfer1::TensorFormat; - using nvinfer1::DataType; - - bool const isDLA_LINEAR{tensorFormats == 1U << static_cast(TF::kDLA_LINEAR)}; - bool const isHWC4{tensorFormats == 1U << static_cast(TF::kCHW4) - || tensorFormats == 1U << static_cast(TF::kDLA_HWC4)}; - bool const isCHW32{tensorFormats == 1U << static_cast(TF::kCHW32)}; - bool const isCHW16{tensorFormats == 1U << static_cast(TF::kCHW16)}; - return (dataType == DataType::kINT8 && (isDLA_LINEAR || (isInput && isHWC4) || isCHW32)) - || (dataType == DataType::kHALF && (isDLA_LINEAR || (isInput && isHWC4) || isCHW16)); - }); + auto checkSafeDLAFormats = [](std::vector const& fmt, bool isInput) + { + return !fmt.empty() + && std::all_of(fmt.begin(), fmt.end(), + [&](IOFormat const& pair) + { + auto const& [dataType, tensorFormats] = pair; + using TF = nvinfer1::TensorFormat; + using nvinfer1::DataType; + + bool const isDLA_LINEAR{tensorFormats == 1U << static_cast(TF::kDLA_LINEAR)}; + bool const isHWC4{tensorFormats == 1U << static_cast(TF::kCHW4) + || tensorFormats == 1U << static_cast(TF::kDLA_HWC4)}; + bool const isCHW32{tensorFormats == 1U << static_cast(TF::kCHW32)}; + bool const isCHW16{tensorFormats == 1U << static_cast(TF::kCHW16)}; + return (dataType == DataType::kINT8 && (isDLA_LINEAR || (isInput && isHWC4) || isCHW32)) + || (dataType == DataType::kHALF && (isDLA_LINEAR || (isInput && isHWC4) || isCHW16)); + }); }; if (!checkSafeDLAFormats(build.inputFormats, true) || !checkSafeDLAFormats(build.outputFormats, false)) { @@ -1954,7 +1955,8 @@ void TaskInferenceOptions::parse(Arguments& arguments) void SafeBuilderOptions::parse(Arguments& arguments) { - auto getFormats = [&arguments](std::vector& formatsVector, const char* argument) { + auto getFormats = [&arguments](std::vector& formatsVector, const char* argument) + { std::string list; getAndDelOption(arguments, argument, list); std::vector formats{splitToStringVec(list, ',')}; @@ -2212,7 +2214,6 @@ std::ostream& operator<<(std::ostream& os, nvinfer1::DeviceType devType) return os; } - std::ostream& operator<<(std::ostream& os, nvinfer1::RuntimePlatform platform) { switch (platform) @@ -2345,16 +2346,17 @@ std::ostream& operator<<(std::ostream& os, const BuildOptions& options) "Mark Unfused Tensors As Debug Tensors: " << boolToEnabled(options.markUnfusedTensorsAsDebugTensors) << std::endl; // clang-format on - auto printIOFormats = [](std::ostream& os, const char* direction, const std::vector formats) { + auto printIOFormats = [](std::ostream& oss, const char* direction, const std::vector formats) + { if (formats.empty()) { - os << direction << "s format: fp32:CHW" << std::endl; + oss << direction << "s format: fp32:CHW" << std::endl; } else { for (const auto& f : formats) { - os << direction << ": " << f << std::endl; + oss << direction << ": " << f << std::endl; } } }; @@ -2363,7 +2365,7 @@ std::ostream& operator<<(std::ostream& os, const BuildOptions& options) printIOFormats(os, "Output(s)", options.outputFormats); for (size_t i = 0; i < options.optProfiles.size(); i++) { - printShapes(os, "build", options.optProfiles[i], i); + printShapes(os, "build", options.optProfiles[i], static_cast(i)); } printShapes(os, "calibration", options.shapesCalib, -1); @@ -2503,17 +2505,17 @@ std::ostream& operator<<(std::ostream& os, const AllOptions& options) std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options) { - auto printIOFormats = [](std::ostream& os, const char* direction, const std::vector formats) + auto printIOFormats = [](std::ostream& oss, const char* direction, const std::vector formats) { if (formats.empty()) { - os << direction << "s format: fp32:CHW" << std::endl; + oss << direction << "s format: fp32:CHW" << std::endl; } else { for (const auto& f : formats) { - os << direction << ": " << f << std::endl; + oss << direction << ": " << f << std::endl; } } }; diff --git a/samples/common/sampleReporting.cpp b/samples/common/sampleReporting.cpp index febb08699..2ea70008c 100644 --- a/samples/common/sampleReporting.cpp +++ b/samples/common/sampleReporting.cpp @@ -51,7 +51,7 @@ template float findPercentile(float percentile, std::vector const& timings, T const& toFloat) { int32_t const all = static_cast(timings.size()); - int32_t const exclude = static_cast((1 - percentile / 100) * all); + int32_t const exclude = static_cast((1.0F - percentile / 100.0F) * static_cast(all)); if (timings.empty()) { return std::numeric_limits::infinity(); @@ -74,7 +74,7 @@ float findMedian(std::vector const& timings, T const& toFloat) return std::numeric_limits::infinity(); } - int32_t const m = timings.size() / 2; + int32_t const m = static_cast(timings.size() / 2); if (timings.size() % 2) { return toFloat(timings[m]); @@ -99,11 +99,13 @@ float findCoeffOfVariance(std::vector const& timings, T const& to return std::numeric_limits::infinity(); } - auto const metricAccumulator = [toFloat, mean](float acc, InferenceTime const& a) { + auto const metricAccumulator = [toFloat, mean](float acc, InferenceTime const& a) + { float const diff = toFloat(a) - mean; return acc + diff * diff; }; - float const variance = std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) / timings.size(); + float const variance + = std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) / static_cast(timings.size()); return std::sqrt(variance) / mean * 100.F; } @@ -137,7 +139,7 @@ inline std::string dimsToString(Dims const& shape) void printProlog(int32_t warmups, int32_t timings, float warmupMs, float benchTimeMs, std::ostream& os) { os << "Warmup completed " << warmups << " queries over " << warmupMs << " ms" << std::endl; - os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000 << " s" << std::endl; + os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000.0F << " s" << std::endl; } void printTiming(std::vector const& timings, int32_t runsPerAvg, std::ostream& os) @@ -167,8 +169,8 @@ void printTiming(std::vector const& timings, int32_t runsPerAvg, if (++count == runsPerAvg) { // clang-format off - os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / runsPerAvg - << " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (enqueue " << sum.enq / runsPerAvg + os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / static_cast(runsPerAvg) + << " ms - Host latency: " << sum.latency() / static_cast(runsPerAvg) << " ms (enqueue " << sum.enq / static_cast(runsPerAvg) << " ms)" << std::endl; // clang-format on count = 0; @@ -219,7 +221,8 @@ PerformanceResult getPerformanceResult(std::vector const& timings PerformanceResult result; result.min = metricGetter(newTimings.front()); result.max = metricGetter(newTimings.back()); - result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0F, metricAccumulator) / newTimings.size(); + result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0F, metricAccumulator) + / static_cast(newTimings.size()); result.median = findMedian(newTimings, metricGetter); for (auto percentile : percentiles) { @@ -232,7 +235,7 @@ PerformanceResult getPerformanceResult(std::vector const& timings void printEpilog(std::vector const& timings, float walltimeMs, std::vector const& percentiles, int32_t batchSize, int32_t infStreams, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose) { - float const throughput = batchSize * timings.size() / walltimeMs * 1000; + float const throughput = static_cast(batchSize) * static_cast(timings.size()) / walltimeMs * 1000; auto const getLatency = [](InferenceTime const& t) { return t.latency(); }; auto const latencyResult = getPerformanceResult(timings, getLatency, percentiles); @@ -249,11 +252,12 @@ void printEpilog(std::vector const& timings, float walltimeMs, st auto const getD2h = [](InferenceTime const& t) { return t.d2h; }; auto const d2hResult = getPerformanceResult(timings, getD2h, percentiles); - auto const toPerfString = [&](const PerformanceResult& r) { + auto const toPerfString = [&](const PerformanceResult& r) + { std::stringstream s; s << "min = " << r.min << " ms, max = " << r.max << " ms, mean = " << r.mean << " ms, " << "median = " << r.median << " ms"; - for (int32_t i = 0, n = percentiles.size(); i < n; ++i) + for (int32_t i = 0, n = static_cast(percentiles.size()); i < n; ++i) { s << ", percentile(" << percentiles[i] << "%) = " << r.percentiles[i] << " ms"; } @@ -269,7 +273,8 @@ void printEpilog(std::vector const& timings, float walltimeMs, st osInfo << "GPU Compute Time: " << toPerfString(gpuComputeResult) << std::endl; osInfo << "D2H Latency: " << toPerfString(d2hResult) << std::endl; osInfo << "Total Host Walltime: " << walltimeMs / 1000 << " s" << std::endl; - osInfo << "Total GPU Compute Time: " << gpuComputeResult.mean * timings.size() / 1000 << " s" << std::endl; + osInfo << "Total GPU Compute Time: " << gpuComputeResult.mean * static_cast(timings.size()) / 1000 << " s" + << std::endl; // Report warnings if the throughput is bound by other factors than GPU Compute Time. constexpr float kENQUEUE_BOUND_REPORTING_THRESHOLD{0.8F}; @@ -328,11 +333,12 @@ void printPerformanceReport(std::vector const& trace, ReportingO float const warmupMs = infOpts.warmup; auto const isNotWarmup = [&warmupMs](const InferenceTrace& a) { return a.computeStart >= warmupMs; }; auto const noWarmup = std::find_if(trace.begin(), trace.end(), isNotWarmup); - int32_t const warmups = noWarmup - trace.begin(); + int32_t const warmups = static_cast(noWarmup - trace.begin()); float const benchTime = trace.back().d2hEnd - noWarmup->h2dStart; // treat inference with explicit batch as a single query and report the throughput batchSize = batchSize ? batchSize : 1; - printProlog(warmups * batchSize, (trace.size() - warmups) * batchSize, warmupMs, benchTime, osInfo); + printProlog( + warmups * batchSize, static_cast(trace.size() - warmups) * batchSize, warmupMs, benchTime, osInfo); std::vector timings(trace.size() - warmups); std::transform(noWarmup, trace.end(), timings.begin(), traceToTiming); @@ -425,17 +431,17 @@ void Profiler::print(std::ostream& os) const noexcept continue; } // clang-format off - os << std::setw(timeLength) << std::fixed << std::setprecision(2) << getTotalTime(p) - << std::setw(avgLength) << std::fixed << std::setprecision(4) << getAvgTime(p) - << std::setw(medLength) << std::fixed << std::setprecision(4) << getMedianTime(p) - << std::setw(percentageLength) << std::fixed << std::setprecision(1) << getTotalTime(p) / totalTimeMs * 100 + os << std::setw(static_cast(timeLength)) << std::fixed << std::setprecision(2) << getTotalTime(p) + << std::setw(static_cast(avgLength)) << std::fixed << std::setprecision(4) << getAvgTime(p) + << std::setw(static_cast(medLength)) << std::fixed << std::setprecision(4) << getMedianTime(p) + << std::setw(static_cast(percentageLength)) << std::fixed << std::setprecision(1) << getTotalTime(p) / totalTimeMs * 100 << " " << p.name << std::endl; } { - os << std::setw(timeLength) << std::fixed << std::setprecision(2) - << totalTimeMs << std::setw(avgLength) << std::fixed << std::setprecision(4) << totalTimeMs / mUpdatesCount - << std::setw(medLength) << std::fixed << std::setprecision(4) << getMedianTime() - << std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0 + os << std::setw(static_cast(timeLength)) << std::fixed << std::setprecision(2) + << totalTimeMs << std::setw(static_cast(avgLength)) << std::fixed << std::setprecision(4) << totalTimeMs / static_cast(mUpdatesCount) + << std::setw(static_cast(medLength)) << std::fixed << std::setprecision(4) << getMedianTime() + << std::setw(static_cast(percentageLength)) << std::fixed << std::setprecision(1) << 100.0 << " Total" << std::endl; // clang-format on } diff --git a/samples/common/sampleReporting.h b/samples/common/sampleReporting.h index 013273a8d..4bba90849 100644 --- a/samples/common/sampleReporting.h +++ b/samples/common/sampleReporting.h @@ -210,9 +210,8 @@ class Profiler : public nvinfer1::IProfiler private: float getTotalTime() const noexcept { - auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) { - return accumulator + std::accumulate(lp.timeMs.begin(), lp.timeMs.end(), 0.F, std::plus()); - }; + auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) + { return accumulator + std::accumulate(lp.timeMs.begin(), lp.timeMs.end(), 0.F, std::plus()); }; return std::accumulate(mLayers.begin(), mLayers.end(), 0.0F, plusLayerTime); } @@ -261,7 +260,7 @@ class Profiler : public nvinfer1::IProfiler float getAvgTime(LayerProfile const& p) const noexcept { - return getTotalTime(p) / p.timeMs.size(); + return getTotalTime(p) / static_cast(p.timeMs.size()); } std::vector mLayers; diff --git a/samples/common/sampleUtils.cpp b/samples/common/sampleUtils.cpp index 1ffcc546b..a7aa13e1d 100644 --- a/samples/common/sampleUtils.cpp +++ b/samples/common/sampleUtils.cpp @@ -166,7 +166,8 @@ void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vec TensorToLayer constO2L; TensorToLayer shuffleI2L; LayerToTensor shuffleL2O; - auto collectMappingInfo = [&](int32_t const idx) { + auto collectMappingInfo = [&](int32_t const idx) + { ILayer* l = network.getLayer(idx); switch (l->getType()) { @@ -210,7 +211,8 @@ void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vec auto isTranspose = [](nvinfer1::Permutation const& perm) -> bool { return (perm.order[0] == 1 && perm.order[1] == 0); }; auto is2D = [](nvinfer1::Dims const& dims) -> bool { return dims.nbDims == 2; }; - auto isIdenticalReshape = [](nvinfer1::Dims const& dims) -> bool { + auto isIdenticalReshape = [](nvinfer1::Dims const& dims) -> bool + { for (int32_t i = 0; i < dims.nbDims; ++i) { if (dims.d[i] != i || dims.d[i] != -1) @@ -220,7 +222,8 @@ void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vec } return true; }; - auto tensorReachedViaTranspose = [&](nvinfer1::ITensor* t, bool& needTranspose) -> ITensor* { + auto tensorReachedViaTranspose = [&](nvinfer1::ITensor* t, bool& needTranspose) -> ITensor* + { while (shuffleI2L.find(t) != shuffleI2L.end()) { nvinfer1::IShuffleLayer* s = static_cast(shuffleI2L.at(t)); @@ -276,12 +279,13 @@ void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vec } // 3. Finally, sparsify the weights - auto sparsifyConstantWeights = [&sparseWeights](nvinfer1::IConstantLayer* layer, bool const needTranspose) { + auto sparsifyConstantWeights = [&sparseWeights](nvinfer1::IConstantLayer* layer, bool const needTranspose) + { Dims dims = layer->getOutput(0)->getDimensions(); ASSERT(dims.nbDims == 2); int32_t const idxN = needTranspose ? 1 : 0; - int32_t const n = dims.d[idxN]; - int32_t const k = dims.d[1 - idxN]; + int32_t const n = static_cast(dims.d[idxN]); + int32_t const k = static_cast(dims.d[1 - idxN]); sparseWeights.emplace_back(); std::vector& spw = sparseWeights.back(); Weights w = layer->getWeights(); @@ -293,12 +297,12 @@ void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vec { if (dtype == nvinfer1::DataType::kFLOAT) { - spw.resize(w.count * sizeof(float)); + spw.resize(static_cast(w.count) * sizeof(float)); transpose2DWeights(spw.data(), w.values, k, n); } else if (dtype == nvinfer1::DataType::kHALF) { - spw.resize(w.count * sizeof(half_float::half)); + spw.resize(static_cast(w.count) * sizeof(half_float::half)); transpose2DWeights(spw.data(), w.values, k, n); } @@ -353,8 +357,9 @@ void sparsify(nvinfer1::INetworkDefinition& network, std::vector(layer); auto const& dims = conv.getKernelSizeNd(); ASSERT(dims.nbDims == 2 || dims.nbDims == 3); - auto const k = conv.getNbOutputMaps(); - auto const trs = std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies()); + auto const k = static_cast(conv.getNbOutputMaps()); + auto const trs = static_cast( + std::accumulate(dims.d, dims.d + dims.nbDims, static_cast(1), std::multiplies())); sparseWeights.emplace_back(); setSparseWeights(conv, k, trs, sparseWeights.back()); } @@ -421,16 +426,16 @@ int32_t dataOffsetFromDims(int64_t v, Dims const& dims, Dims const& strides, int int32_t dataOffset = 0; for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex) { - int32_t dimVal = v % dims.d[dimIndex]; + int32_t dimVal = static_cast(v % dims.d[dimIndex]); if (dimIndex == vectorDim) { - dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv; + dataOffset += static_cast((dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv); } else { - dataOffset += dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv); + dataOffset += static_cast(dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv)); } - v /= dims.d[dimIndex]; + v /= static_cast(dims.d[dimIndex]); ASSERT(v >= 0); } @@ -507,14 +512,14 @@ template void sparsify(T const* values, int64_t count, int32_t k, int32_t trs, std::vector& sparseWeights) { auto const c = count / (k * trs); - sparseWeights.resize(count * sizeof(T)); + sparseWeights.resize(static_cast(count) * sizeof(T)); auto* sparseValues = reinterpret_cast(sparseWeights.data()); constexpr int32_t window = 4; constexpr int32_t nonzeros = 2; - int32_t const crs = c * trs; - auto const getIndex = [=](int32_t ki, int32_t ci, int32_t rsi) { return ki * crs + ci * trs + rsi; }; + int64_t const crs = c * trs; + auto const getIndex = [=](int64_t ki, int64_t ci, int64_t rsi) { return ki * crs + ci * trs + rsi; }; for (int64_t ki = 0; ki < k; ++ki) { diff --git a/samples/sampleCharRNN/sampleCharRNN.cpp b/samples/sampleCharRNN/sampleCharRNN.cpp index 0a59712f4..39fe46709 100644 --- a/samples/sampleCharRNN/sampleCharRNN.cpp +++ b/samples/sampleCharRNN/sampleCharRNN.cpp @@ -639,12 +639,12 @@ nvinfer1::ILayer* SampleCharRNNLoop::addLSTMLayers(std::unique_ptr nvinfer1::Weights { - const char* data = static_cast(weights.values); + const char* data_ = static_cast(weights.values); int64_t shift = samplesCommon::volume(start); - const int bufferSize = samplesCommon::getNbBytes(weights.type, shift); + const int bufferSize = static_cast(samplesCommon::getNbBytes(weights.type, shift)); int64_t count = samplesCommon::volume(size); ASSERT(shift + count <= weights.count); - return nvinfer1::Weights{weights.type, data + bufferSize, count}; + return nvinfer1::Weights{weights.type, data_ + bufferSize, count}; }; for (int i = 0; i < mParams.layerCount; ++i) { @@ -667,7 +667,7 @@ nvinfer1::ILayer* SampleCharRNNLoop::addLSTMLayers(std::unique_ptr tensors) -> nvinfer1::ITensor* { - nvinfer1::IConcatenationLayer* concat = network->addConcatenation(tensors.data(), tensors.size()); + nvinfer1::IConcatenationLayer* concat = network->addConcatenation(tensors.data(), static_cast(tensors.size())); concat->setAxis(0); return concat->getOutput(0); }; @@ -776,7 +776,7 @@ bool SampleCharRNNBase::infer() // Select a random seed string. srand(unsigned(time(nullptr))); - int sentenceIndex = rand() % mParams.inputSentences.size(); + int sentenceIndex = static_cast(rand() % mParams.inputSentences.size()); std::string inputSentence = mParams.inputSentences[sentenceIndex]; std::string expected = mParams.outputSentences[sentenceIndex]; std::string genstr; diff --git a/samples/sampleDynamicReshape/sampleDynamicReshape.cpp b/samples/sampleDynamicReshape/sampleDynamicReshape.cpp index d5c3f221f..5504002ad 100644 --- a/samples/sampleDynamicReshape/sampleDynamicReshape.cpp +++ b/samples/sampleDynamicReshape/sampleDynamicReshape.cpp @@ -150,7 +150,7 @@ bool SampleDynamicReshape::build() //! \return false if error in build preprocessor engine. //! bool SampleDynamicReshape::buildPreprocessorEngine( - nvinfer1::IBuilder& builder, nvinfer1::IRuntime& runtime, cudaStream_t profileStream) + nvinfer1::IBuilder& builder, nvinfer1::IRuntime& runtime, cudaStream_t /*profileStream*/) { // Create the preprocessor engine using a network that supports full dimensions (createNetworkV2). auto preprocessorNetwork = std::unique_ptr( @@ -437,7 +437,7 @@ Dims SampleDynamicReshape::loadPGMFile(const std::string& fileName) mInput.hostBuffer.resize(inputDims); float* hostDataBuffer = static_cast(mInput.hostBuffer.data()); std::transform(fileData.begin(), fileData.end(), hostDataBuffer, - [](uint8_t x) { return 1.0 - static_cast(x / 255.0); }); + [](uint8_t x) { return 1.0f - static_cast(x) / 255.0f; }); return inputDims; } @@ -459,7 +459,7 @@ bool SampleDynamicReshape::validateOutput(int digit) ++curIndex; } - int predictedDigit = std::max_element(prob.begin(), prob.end()) - prob.begin(); + int predictedDigit = static_cast(std::max_element(prob.begin(), prob.end()) - prob.begin()); return digit == predictedDigit; } diff --git a/samples/sampleINT8API/sampleINT8API.cpp b/samples/sampleINT8API/sampleINT8API.cpp index 9a6f9e17f..c644396d9 100644 --- a/samples/sampleINT8API/sampleINT8API.cpp +++ b/samples/sampleINT8API/sampleINT8API.cpp @@ -241,7 +241,6 @@ void SampleINT8API::setLayerPrecision(nvinfer1::INetworkDefinition const& networ std::string tensorName = layer->getOutput(j)->getName(); if (mParams.verbose) { - std::string tensorName = layer->getOutput(j)->getName(); sample::gLogInfo << "Tensor: " << tensorName << ". OutputType: INT8" << std::endl; } // set output type of execution tensors and not shape tensors. @@ -391,7 +390,7 @@ bool SampleINT8API::setDynamicRange(nvinfer1::INetworkDefinition& network) max = std::max(max, std::abs(val)); } - if (!lyr->getOutput(j)->setDynamicRange(-max, max)) + if (!lyr->getOutput(j)->setDynamicRange(static_cast(-max), static_cast(max))) { return false; } @@ -466,7 +465,7 @@ bool SampleINT8API::verifyOutput(samplesCommon::BufferManager const& buffers) co { // copy output host buffer data for further processing float const* probPtr = static_cast(buffers.getHostBuffer(mInOut.at("output"))); - std::vector output(probPtr, probPtr + mOutputDims.d[1]); + std::vector output(probPtr, probPtr + static_cast(mOutputDims.d[1])); auto inds = samplesCommon::argMagnitudeSort(output.cbegin(), output.cend()); diff --git a/samples/sampleIOFormats/sampleIOFormats.cpp b/samples/sampleIOFormats/sampleIOFormats.cpp index 6039bee33..213c2599c 100644 --- a/samples/sampleIOFormats/sampleIOFormats.cpp +++ b/samples/sampleIOFormats/sampleIOFormats.cpp @@ -74,32 +74,32 @@ class BufferDesc public: BufferDesc() = default; - BufferDesc(nvinfer1::Dims dims, int32_t dataWidth, TensorFormat format) + BufferDesc(nvinfer1::Dims dims_, int32_t dataWidth_, TensorFormat format) { - this->dataWidth = dataWidth; + this->dataWidth = dataWidth_; if (format == TensorFormat::kLINEAR) { - this->dims[0] = dims.d[0]; - this->dims[1] = dims.d[1]; - this->dims[2] = dims.d[2]; - this->dims[3] = dims.d[3]; + this->dims[0] = static_cast(dims_.d[0]); + this->dims[1] = static_cast(dims_.d[1]); + this->dims[2] = static_cast(dims_.d[2]); + this->dims[3] = static_cast(dims_.d[3]); this->dims[4] = 1; } else if (format == TensorFormat::kCHW32) { - this->dims[0] = dims.d[0]; - this->dims[1] = divUp(dims.d[1], 32); - this->dims[2] = dims.d[2]; - this->dims[3] = dims.d[3]; + this->dims[0] = static_cast(dims_.d[0]); + this->dims[1] = divUp(static_cast(dims_.d[1]), 32); + this->dims[2] = static_cast(dims_.d[2]); + this->dims[3] = static_cast(dims_.d[3]); this->dims[4] = 32; this->scalarPerVector = 32; } else if (format == TensorFormat::kHWC) { - this->dims[0] = dims.d[0]; - this->dims[1] = dims.d[2]; - this->dims[2] = dims.d[3]; - this->dims[3] = dims.d[1]; + this->dims[0] = static_cast(dims_.d[0]); + this->dims[1] = static_cast(dims_.d[2]); + this->dims[2] = static_cast(dims_.d[3]); + this->dims[3] = static_cast(dims_.d[1]); this->dims[4] = 1; this->channelPivot = true; } @@ -144,23 +144,23 @@ class SampleBuffer dims.d[3] = 1; } - SampleBuffer(nvinfer1::Dims dims, int32_t dataWidth, TensorFormat format, bool isInput) - : dims(dims) - , dataWidth(dataWidth) - , format(format) - , isInput(isInput) + SampleBuffer(nvinfer1::Dims dims_, int32_t dataWidth_, TensorFormat format_, bool isInput_) + : dims(dims_) + , dataWidth(dataWidth_) + , format(format_) + , isInput(isInput_) { // Output buffer is unsqueezed to 4D in order to reuse the BufferDesc class - if (isInput == false) + if (isInput_ == false) { - dims.d[2] = dims.d[0]; - dims.d[3] = dims.d[1]; - dims.d[0] = 1; - dims.d[1] = 1; + dims_.d[2] = dims_.d[0]; + dims_.d[3] = dims_.d[1]; + dims_.d[0] = 1; + dims_.d[1] = 1; } - desc = BufferDesc(dims, dataWidth, format); + desc = BufferDesc(dims_, dataWidth_, format_); if (nullptr == buffer) { @@ -330,7 +330,7 @@ bool SampleIOFormats::verify(TypeSpec const& spec) //! //! \return true if the engine was created successfully and false otherwise //! -bool SampleIOFormats::build(int32_t dataWidth) +bool SampleIOFormats::build(int32_t /*dataWidth*/) { auto builder = std::unique_ptr(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); if (!builder) @@ -436,7 +436,7 @@ bool SampleIOFormats::build(int32_t dataWidth) //! \param builder Pointer to the engine builder //! bool SampleIOFormats::constructNetwork(std::unique_ptr& builder, - std::unique_ptr& network, std::unique_ptr& config, + std::unique_ptr& /*network*/, std::unique_ptr& config, std::unique_ptr& parser) { auto parsed = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), @@ -548,7 +548,7 @@ void printHelpInfo() //! \brief Used to run the engine build and inference/reference functions //! template -bool process(SampleIOFormats& sample, sample::Logger::TestAtom const& sampleTest, SampleBuffer& inputBuf, +bool process(SampleIOFormats& sample, sample::Logger::TestAtom const& /*sampleTest*/, SampleBuffer& inputBuf, SampleBuffer& outputBuf, TypeSpec& spec) { sample::gLogInfo << "Building and running a GPU inference engine with specified I/O formats." << std::endl; @@ -562,8 +562,8 @@ bool process(SampleIOFormats& sample, sample::Logger::TestAtom const& sampleTest return false; } - inputBuf = SampleBuffer(sample.mInputDims, sizeof(T), sample.mTensorFormat, true); - outputBuf = SampleBuffer(sample.mOutputDims, sizeof(T), TensorFormat::kLINEAR, false); + inputBuf = SampleBuffer(sample.mInputDims, static_cast(sizeof(T)), sample.mTensorFormat, true); + outputBuf = SampleBuffer(sample.mOutputDims, static_cast(sizeof(T)), TensorFormat::kLINEAR, false); if (!sample.infer(inputBuf, outputBuf)) { diff --git a/samples/sampleNamedDimensions/sampleNamedDimensions.cpp b/samples/sampleNamedDimensions/sampleNamedDimensions.cpp index c1da71a68..b0a6983a4 100644 --- a/samples/sampleNamedDimensions/sampleNamedDimensions.cpp +++ b/samples/sampleNamedDimensions/sampleNamedDimensions.cpp @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -223,8 +224,8 @@ bool SampleNamedDimensions::build() //! //! \brief Uses ONNX parser to create the ONNX Network and marks the output layers //! -bool SampleNamedDimensions::constructNetwork(std::unique_ptr& builder, - std::unique_ptr& network, std::unique_ptr& config, +bool SampleNamedDimensions::constructNetwork(std::unique_ptr& /*builder*/, + std::unique_ptr& /*network*/, std::unique_ptr& /*config*/, std::unique_ptr& parser) { auto parsed = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), @@ -314,15 +315,15 @@ bool SampleNamedDimensions::infer() bool SampleNamedDimensions::processInput(samplesCommon::BufferManager const& buffers) { int32_t const input0H = mNamedDimension; - int32_t const input0W = mInputDims[0].d[1]; + int32_t const input0W = static_cast(mInputDims[0].d[1]); int32_t const input1H = mNamedDimension; - int32_t const input1W = mInputDims[1].d[1]; + int32_t const input1W = static_cast(mInputDims[1].d[1]); // Generate random input mInput0.resize(input0H * input0W); mInput1.resize(input1H * input1W); std::default_random_engine generator(static_cast(time(nullptr))); - std::uniform_real_distribution unif_real_distr(-10., 10.); + std::uniform_real_distribution unif_real_distr(-10.0f, 10.0f); sample::gLogInfo << "Input0:\n"; for (int32_t i = 0; i < input0H * input0W; i++) @@ -357,7 +358,7 @@ bool SampleNamedDimensions::processInput(samplesCommon::BufferManager const& buf bool SampleNamedDimensions::verifyOutput(samplesCommon::BufferManager const& buffers) { int32_t const outputH = 2 * mNamedDimension; - int32_t const outputW = mOutputDims[0].d[1]; + int32_t const outputW = static_cast(mOutputDims[0].d[1]); int32_t const outputSize = outputH * outputW; auto* output = static_cast(buffers.getHostBuffer(mParams.outputTensorNames[0])); @@ -374,7 +375,7 @@ bool SampleNamedDimensions::verifyOutput(samplesCommon::BufferManager const& buf for (int32_t i = 0; i < outputH * outputW; i++) { auto const reference_value = i > outputSize / 2 ? mInput1[i - outputSize / 2] : mInput0[i]; - if (fabs(output[i] - reference_value) > std::numeric_limits::epsilon()) + if (std::abs(output[i] - reference_value) > std::numeric_limits::epsilon()) { return false; } diff --git a/samples/sampleNonZeroPlugin/sampleNonZeroPlugin.cpp b/samples/sampleNonZeroPlugin/sampleNonZeroPlugin.cpp index 3edf8519d..1c7b7b419 100644 --- a/samples/sampleNonZeroPlugin/sampleNonZeroPlugin.cpp +++ b/samples/sampleNonZeroPlugin/sampleNonZeroPlugin.cpp @@ -80,7 +80,7 @@ class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV { mDataToSerialize.clear(); mDataToSerialize.emplace_back(PluginField("rowOrder", &mRowOrder, PluginFieldType::kINT32, 1)); - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); } @@ -137,14 +137,14 @@ class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV return 2; } - int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, - int32_t nbOutputs) noexcept override + int32_t configurePlugin(DynamicPluginTensorDesc const* /*in*/, int32_t /*nbInputs*/, DynamicPluginTensorDesc const* /*out*/, + int32_t /*nbOutputs*/) noexcept override { return 0; } bool supportsFormatCombination( - int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override + int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t /*nbInputs*/, int32_t /*nbOutputs*/) noexcept override { bool typeOk{false}; if (pos == 0) @@ -165,15 +165,15 @@ class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV } int32_t getOutputDataTypes( - DataType* outputTypes, int32_t nbOutputs, DataType const* inputTypes, int32_t nbInputs) const noexcept override + DataType* outputTypes, int32_t /*nbOutputs*/, DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept override { outputTypes[0] = DataType::kINT32; outputTypes[1] = DataType::kINT64; return 0; } - int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs, - int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept override + int32_t getOutputShapes(DimsExprs const* inputs, int32_t /*nbInputs*/, DimsExprs const* /*shapeInputs*/, + int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t /*nbOutputs*/, IExprBuilder& exprBuilder) noexcept override { // The input tensor must be 2-D if (inputs[0].nbDims != 2) @@ -207,12 +207,12 @@ class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV } // IPluginV3OneRuntime methods - int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, void const* const* inputs, + int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* /*outputDesc*/, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override { - int32_t const R = inputDesc[0].dims.d[0]; - int32_t const C = inputDesc[0].dims.d[1]; + int32_t const R = static_cast(inputDesc[0].dims.d[0]); + int32_t const C = static_cast(inputDesc[0].dims.d[1]); auto type = inputDesc[0].type; @@ -249,12 +249,12 @@ class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV } int32_t onShapeChange( - PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept override + PluginTensorDesc const* /*in*/, int32_t /*nbInputs*/, PluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept override { return 0; } - IPluginV3* attachToContext(IPluginResourceContext* context) noexcept override + IPluginV3* attachToContext(IPluginResourceContext* /*context*/) noexcept override { return clone(); } @@ -264,8 +264,8 @@ class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV return &mFCToSerialize; } - size_t getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept override + size_t getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept override { return sizeof(int64_t); } @@ -283,7 +283,7 @@ class NonZeroPluginCreator : public nvinfer1::IPluginCreatorV3One { mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("rowOrder", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -302,7 +302,7 @@ class NonZeroPluginCreator : public nvinfer1::IPluginCreatorV3One return &mFC; } - IPluginV3* createPlugin(char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept override + IPluginV3* createPlugin(char const* /*name*/, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept override { try { @@ -483,8 +483,8 @@ bool SampleNonZeroPlugin::build() //! //! \param builder Pointer to the engine builder //! -bool SampleNonZeroPlugin::constructNetwork(std::unique_ptr& builder, - std::unique_ptr& network, std::unique_ptr& config) +bool SampleNonZeroPlugin::constructNetwork(std::unique_ptr& /*builder*/, + std::unique_ptr& network, std::unique_ptr& /*config*/) { std::default_random_engine generator(mSeed); std::uniform_int_distribution distr(10, 25); @@ -510,7 +510,7 @@ bool SampleNonZeroPlugin::constructNetwork(std::unique_ptr& ASSERT(plugin != nullptr && "NonZeroPlugin construction failed"); std::vector inputsVec{in}; - auto pluginNonZeroLayer = network->addPluginV3(inputsVec.data(), inputsVec.size(), nullptr, 0, *plugin); + auto pluginNonZeroLayer = network->addPluginV3(inputsVec.data(), static_cast(inputsVec.size()), nullptr, 0, *plugin); ASSERT(pluginNonZeroLayer != nullptr); ASSERT(pluginNonZeroLayer->getOutput(0) != nullptr); ASSERT(pluginNonZeroLayer->getOutput(1) != nullptr); @@ -595,8 +595,8 @@ bool SampleNonZeroPlugin::infer() //! bool SampleNonZeroPlugin::processInput(samplesCommon::BufferManager const& buffers) { - int32_t const inputH = mInputDims.d[0]; - int32_t const inputW = mInputDims.d[1]; + int32_t const inputH = static_cast(mInputDims.d[0]); + int32_t const inputW = static_cast(mInputDims.d[1]); std::vector fileData(inputH * inputW); @@ -609,7 +609,7 @@ bool SampleNonZeroPlugin::processInput(samplesCommon::BufferManager const& buffe float* hostDataBuffer = static_cast(buffers.getHostBuffer(mParams.inputTensorNames[0])); for (int32_t i = 0; i < inputH * inputW; ++i) { - auto const raw = 1.0 - float(fileData[i] / 255.0); + auto const raw = 1.0f - static_cast(fileData[i]) / 255.0f; hostDataBuffer[i] = raw; } @@ -640,7 +640,7 @@ bool SampleNonZeroPlugin::verifyOutput(samplesCommon::BufferManager const& buffe { float* input = static_cast(buffers.getHostBuffer(mParams.inputTensorNames[0])); int32_t* output = static_cast(buffers.getHostBuffer(mParams.outputTensorNames[0])); - int64_t count = *static_cast(buffers.getHostBuffer(mParams.outputTensorNames[1])); + int32_t count = static_cast(*static_cast(buffers.getHostBuffer(mParams.outputTensorNames[1]))); std::vector covered(mInputDims.d[0] * mInputDims.d[1], false); diff --git a/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp b/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp index 33bb5f57d..2654fa67e 100644 --- a/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp +++ b/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp @@ -192,7 +192,7 @@ bool SampleOnnxMNIST::build() //! \param builder Pointer to the engine builder //! bool SampleOnnxMNIST::constructNetwork(std::unique_ptr& builder, - std::unique_ptr& network, std::unique_ptr& config, + std::unique_ptr& /*network*/, std::unique_ptr& config, std::unique_ptr& parser, std::unique_ptr& timingCache) { auto parsed = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), @@ -269,8 +269,8 @@ bool SampleOnnxMNIST::infer() //! bool SampleOnnxMNIST::processInput(const samplesCommon::BufferManager& buffers) { - const int inputH = mInputDims.d[2]; - const int inputW = mInputDims.d[3]; + const int inputH = static_cast(mInputDims.d[2]); + const int inputW = static_cast(mInputDims.d[3]); // Read a random digit file srand(unsigned(time(nullptr))); @@ -290,7 +290,7 @@ bool SampleOnnxMNIST::processInput(const samplesCommon::BufferManager& buffers) float* hostDataBuffer = static_cast(buffers.getHostBuffer(mParams.inputTensorNames[0])); for (int i = 0; i < inputH * inputW; i++) { - hostDataBuffer[i] = 1.0 - float(fileData[i] / 255.0); + hostDataBuffer[i] = 1.0f - static_cast(fileData[i]) / 255.0f; } return true; @@ -303,7 +303,7 @@ bool SampleOnnxMNIST::processInput(const samplesCommon::BufferManager& buffers) //! bool SampleOnnxMNIST::verifyOutput(const samplesCommon::BufferManager& buffers) { - const int outputSize = mOutputDims.d[1]; + const int outputSize = static_cast(mOutputDims.d[1]); float* output = static_cast(buffers.getHostBuffer(mParams.outputTensorNames[0])); float val{0.0F}; int idx{0}; @@ -312,7 +312,7 @@ bool SampleOnnxMNIST::verifyOutput(const samplesCommon::BufferManager& buffers) float sum{0.0F}; for (int i = 0; i < outputSize; i++) { - output[i] = exp(output[i]); + output[i] = static_cast(exp(output[i])); sum += output[i]; } diff --git a/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp b/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp index 2acf88bcd..d3f154b2f 100644 --- a/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp +++ b/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp @@ -48,8 +48,8 @@ const std::string gSampleName = "TensorRT.sample_onnx_mnist_coord_conv_ac"; // Normalization constants from Pytorch transform.Normalize(). // They are needed to preprocess the data: // https://discuss.pytorch.org/t/understanding-transform-normalize/21730 -const float PYTORCH_NORMALIZE_MEAN = 0.1307; -const float PYTORCH_NORMALIZE_STD = 0.3081; +const float PYTORCH_NORMALIZE_MEAN = 0.1307f; +const float PYTORCH_NORMALIZE_STD = 0.3081f; //! \brief The SampleOnnxMnistCoordConvAC class implements the ONNX MNIST sample //! @@ -210,7 +210,7 @@ bool SampleOnnxMnistCoordConvAC::build() //! \param builder Pointer to the engine builder //! bool SampleOnnxMnistCoordConvAC::constructNetwork(std::unique_ptr& builder, - std::unique_ptr& network, std::unique_ptr& config, + std::unique_ptr& /*network*/, std::unique_ptr& config, std::unique_ptr& parser) { auto parsed = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), @@ -281,8 +281,8 @@ bool SampleOnnxMnistCoordConvAC::infer() //! bool SampleOnnxMnistCoordConvAC::processInput(const samplesCommon::BufferManager& buffers) { - const int inputH = mInputDims.d[2]; - const int inputW = mInputDims.d[3]; + const int inputH = static_cast(mInputDims.d[2]); + const int inputW = static_cast(mInputDims.d[3]); // Read a random digit file srand(unsigned(time(nullptr))); @@ -301,7 +301,7 @@ bool SampleOnnxMnistCoordConvAC::processInput(const samplesCommon::BufferManager float* hostDataBuffer = static_cast(buffers.getHostBuffer(mParams.inputTensorNames[0])); for (int i = 0; i < inputH * inputW; i++) { - hostDataBuffer[i] = ((1.0 - float(fileData[i] / 255.0)) - PYTORCH_NORMALIZE_MEAN) / PYTORCH_NORMALIZE_STD; + hostDataBuffer[i] = ((1.0f - static_cast(fileData[i]) / 255.0f) - PYTORCH_NORMALIZE_MEAN) / PYTORCH_NORMALIZE_STD; } return true; @@ -314,7 +314,7 @@ bool SampleOnnxMnistCoordConvAC::processInput(const samplesCommon::BufferManager //! bool SampleOnnxMnistCoordConvAC::verifyOutput(const samplesCommon::BufferManager& buffers) { - const int outputSize = mOutputDims.d[1]; + const int outputSize = static_cast(mOutputDims.d[1]); float* output = static_cast(buffers.getHostBuffer(mParams.outputTensorNames[0])); float val{0.0F}; int idx{0}; @@ -323,7 +323,7 @@ bool SampleOnnxMnistCoordConvAC::verifyOutput(const samplesCommon::BufferManager float sum{0.0F}; for (int i = 0; i < outputSize; i++) { - output[i] = exp(output[i]); + output[i] = static_cast(exp(output[i])); sum += output[i]; } diff --git a/samples/sampleProgressMonitor/sampleProgressMonitor.cpp b/samples/sampleProgressMonitor/sampleProgressMonitor.cpp index 5500e3467..c116c186c 100644 --- a/samples/sampleProgressMonitor/sampleProgressMonitor.cpp +++ b/samples/sampleProgressMonitor/sampleProgressMonitor.cpp @@ -155,7 +155,7 @@ class ConsoleProgressMonitor : public IProgressMonitor std::ostringstream bar; bar << '['; int32_t const completedChars - = static_cast(kPROGRESS_INNER_WIDTH * steps / static_cast(nbSteps)); + = static_cast(static_cast(kPROGRESS_INNER_WIDTH) * static_cast(steps) / static_cast(nbSteps)); for (int32_t i = 0; i < completedChars; ++i) { bar << '='; @@ -328,8 +328,8 @@ bool SampleProgressMonitor::build(IProgressMonitor* monitor) bool SampleProgressMonitor::processInput( samplesCommon::BufferManager const& buffers, std::string const& inputTensorName, int32_t inputFileIdx) const { - int32_t const inputH = mInputDims.d[2]; - int32_t const inputW = mInputDims.d[3]; + int32_t const inputH = static_cast(mInputDims.d[2]); + int32_t const inputW = static_cast(mInputDims.d[3]); // Read a random digit file. srand(unsigned(time(nullptr))); @@ -364,7 +364,7 @@ bool SampleProgressMonitor::verifyOutput( float* prob = static_cast(buffers.getHostBuffer(outputTensorName)); int32_t constexpr kDIGITS = 10; - std::for_each(prob, prob + kDIGITS, [](float& n) { n = exp(n); }); + std::for_each(prob, prob + kDIGITS, [](float& n) { n = static_cast(exp(n)); }); float const sum = std::accumulate(prob, prob + kDIGITS, 0.F); @@ -374,7 +374,7 @@ bool SampleProgressMonitor::verifyOutput( float const val = *max_ele; - int32_t const idx = max_ele - prob; + int32_t const idx = static_cast(max_ele - prob); // Print histogram of the output probability distribution. sample::gLogInfo << "Output:\n"; @@ -399,7 +399,7 @@ bool SampleProgressMonitor::verifyOutput( //! \param builder Pointer to the engine builder. //! bool SampleProgressMonitor::constructNetwork(std::unique_ptr& builder, - std::unique_ptr& network, std::unique_ptr& config, + std::unique_ptr& /*network*/, std::unique_ptr& config, std::unique_ptr& parser) { auto parsed = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), @@ -432,7 +432,7 @@ bool SampleProgressMonitor::infer() } // Pick a random digit to try to infer. - srand(time(NULL)); + srand(static_cast(time(NULL))); int32_t const digit = rand() % 10; // Read the input data into the managed buffers. diff --git a/third_party/ieee/half.h b/third_party/ieee/half.h index c4df4b670..f7c3d1d9d 100644 --- a/third_party/ieee/half.h +++ b/third_party/ieee/half.h @@ -19,17 +19,18 @@ // // Copyright (c) 2012-2017 Christian Rau // -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: // -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +// Software. // -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // Version 1.12.0 @@ -39,168 +40,173 @@ #ifndef HALF_HALF_HPP #define HALF_HALF_HPP +// Third-party header - suppress compiler warnings +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC system_header +#endif + /// Combined gcc version number. -#define HALF_GNUC_VERSION (__GNUC__*100+__GNUC_MINOR__) - -//check C++11 language features -#if defined(__clang__) //clang - #if __has_feature(cxx_static_assert) && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR) - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT) - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if __has_feature(cxx_user_literals) && !defined(HALF_ENABLE_CPP11_USER_LITERALS) - #define HALF_ENABLE_CPP11_USER_LITERALS 1 - #endif - #if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && !defined(HALF_ENABLE_CPP11_LONG_LONG) - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif +#define HALF_GNUC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +// check C++11 language features +#if defined(__clang__) // clang +#if __has_feature(cxx_static_assert) && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) +#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 +#endif +#if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR) +#define HALF_ENABLE_CPP11_CONSTEXPR 1 +#endif +#if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT) +#define HALF_ENABLE_CPP11_NOEXCEPT 1 +#endif +#if __has_feature(cxx_user_literals) && !defined(HALF_ENABLE_CPP11_USER_LITERALS) +#define HALF_ENABLE_CPP11_USER_LITERALS 1 +#endif +#if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && !defined(HALF_ENABLE_CPP11_LONG_LONG) +#define HALF_ENABLE_CPP11_LONG_LONG 1 +#endif /*#elif defined(__INTEL_COMPILER) //Intel C++ - #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) ???????? - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) ???????? - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) ???????? - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_LONG_LONG) ???????? - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif*/ -#elif defined(__GNUC__) //gcc - #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L - #if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if HALF_GNUC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) - #define HALF_ENABLE_CPP11_USER_LITERALS 1 - #endif - #if !defined(HALF_ENABLE_CPP11_LONG_LONG) - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif - #endif -#elif defined(_MSC_VER) //Visual C++ - #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) - #define HALF_ENABLE_CPP11_USER_LITERALS 1 - #endif - #if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG) - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif - #define HALF_POP_WARNINGS 1 - #pragma warning(push) - #pragma warning(disable : 4099 4127 4146) //struct vs class, constant in if, negative unsigned -#endif - -//check C++11 library features + #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) ???????? + #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 + #endif + #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) ???????? + #define HALF_ENABLE_CPP11_CONSTEXPR 1 + #endif + #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) ???????? + #define HALF_ENABLE_CPP11_NOEXCEPT 1 + #endif + #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_LONG_LONG) ???????? + #define HALF_ENABLE_CPP11_LONG_LONG 1 + #endif*/ +#elif defined(__GNUC__) // gcc +#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L +#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) +#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 +#endif +#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) +#define HALF_ENABLE_CPP11_CONSTEXPR 1 +#endif +#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) +#define HALF_ENABLE_CPP11_NOEXCEPT 1 +#endif +#if HALF_GNUC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) +#define HALF_ENABLE_CPP11_USER_LITERALS 1 +#endif +#if !defined(HALF_ENABLE_CPP11_LONG_LONG) +#define HALF_ENABLE_CPP11_LONG_LONG 1 +#endif +#endif +#elif defined(_MSC_VER) // Visual C++ +#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) +#define HALF_ENABLE_CPP11_CONSTEXPR 1 +#endif +#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) +#define HALF_ENABLE_CPP11_NOEXCEPT 1 +#endif +#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) +#define HALF_ENABLE_CPP11_USER_LITERALS 1 +#endif +#if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) +#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 +#endif +#if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG) +#define HALF_ENABLE_CPP11_LONG_LONG 1 +#endif +#define HALF_POP_WARNINGS 1 +#pragma warning(push) +#pragma warning(disable : 4099 4127 4146) // struct vs class, constant in if, negative unsigned +#endif + +// check C++11 library features #include -#if defined(_LIBCPP_VERSION) //libc++ - #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 - #ifndef HALF_ENABLE_CPP11_TYPE_TRAITS - #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 - #endif - #ifndef HALF_ENABLE_CPP11_CSTDINT - #define HALF_ENABLE_CPP11_CSTDINT 1 - #endif - #ifndef HALF_ENABLE_CPP11_CMATH - #define HALF_ENABLE_CPP11_CMATH 1 - #endif - #ifndef HALF_ENABLE_CPP11_HASH - #define HALF_ENABLE_CPP11_HASH 1 - #endif - #endif -#elif defined(__GLIBCXX__) //libstdc++ - #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 - #ifdef __clang__ - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) - #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 - #endif - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT) - #define HALF_ENABLE_CPP11_CSTDINT 1 - #endif - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH) - #define HALF_ENABLE_CPP11_CMATH 1 - #endif - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH) - #define HALF_ENABLE_CPP11_HASH 1 - #endif - #else - #if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT) - #define HALF_ENABLE_CPP11_CSTDINT 1 - #endif - #if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH) - #define HALF_ENABLE_CPP11_CMATH 1 - #endif - #if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH) - #define HALF_ENABLE_CPP11_HASH 1 - #endif - #endif - #endif -#elif defined(_CPPLIB_VER) //Dinkumware/Visual C++ - #if _CPPLIB_VER >= 520 - #ifndef HALF_ENABLE_CPP11_TYPE_TRAITS - #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 - #endif - #ifndef HALF_ENABLE_CPP11_CSTDINT - #define HALF_ENABLE_CPP11_CSTDINT 1 - #endif - #ifndef HALF_ENABLE_CPP11_HASH - #define HALF_ENABLE_CPP11_HASH 1 - #endif - #endif - #if _CPPLIB_VER >= 610 - #ifndef HALF_ENABLE_CPP11_CMATH - #define HALF_ENABLE_CPP11_CMATH 1 - #endif - #endif +#if defined(_LIBCPP_VERSION) // libc++ +#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 +#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS +#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 +#endif +#ifndef HALF_ENABLE_CPP11_CSTDINT +#define HALF_ENABLE_CPP11_CSTDINT 1 +#endif +#ifndef HALF_ENABLE_CPP11_CMATH +#define HALF_ENABLE_CPP11_CMATH 1 +#endif +#ifndef HALF_ENABLE_CPP11_HASH +#define HALF_ENABLE_CPP11_HASH 1 +#endif +#endif +#elif defined(__GLIBCXX__) // libstdc++ +#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 +#ifdef __clang__ +#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) +#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 +#endif +#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT) +#define HALF_ENABLE_CPP11_CSTDINT 1 +#endif +#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH) +#define HALF_ENABLE_CPP11_CMATH 1 +#endif +#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH) +#define HALF_ENABLE_CPP11_HASH 1 +#endif +#else +#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT) +#define HALF_ENABLE_CPP11_CSTDINT 1 +#endif +#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH) +#define HALF_ENABLE_CPP11_CMATH 1 +#endif +#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH) +#define HALF_ENABLE_CPP11_HASH 1 +#endif +#endif +#endif +#elif defined(_CPPLIB_VER) // Dinkumware/Visual C++ +#if _CPPLIB_VER >= 520 +#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS +#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 +#endif +#ifndef HALF_ENABLE_CPP11_CSTDINT +#define HALF_ENABLE_CPP11_CSTDINT 1 +#endif +#ifndef HALF_ENABLE_CPP11_HASH +#define HALF_ENABLE_CPP11_HASH 1 +#endif +#endif +#if _CPPLIB_VER >= 610 +#ifndef HALF_ENABLE_CPP11_CMATH +#define HALF_ENABLE_CPP11_CMATH 1 +#endif +#endif #endif #undef HALF_GNUC_VERSION -//support constexpr +// support constexpr #if HALF_ENABLE_CPP11_CONSTEXPR - #define HALF_CONSTEXPR constexpr - #define HALF_CONSTEXPR_CONST constexpr +#define HALF_CONSTEXPR constexpr +#define HALF_CONSTEXPR_CONST constexpr #else - #define HALF_CONSTEXPR - #define HALF_CONSTEXPR_CONST const +#define HALF_CONSTEXPR +#define HALF_CONSTEXPR_CONST const #endif -//support noexcept +// support noexcept #if HALF_ENABLE_CPP11_NOEXCEPT - #define HALF_NOEXCEPT noexcept - #define HALF_NOTHROW noexcept +#define HALF_NOEXCEPT noexcept +#define HALF_NOTHROW noexcept #else - #define HALF_NOEXCEPT - #define HALF_NOTHROW throw() +#define HALF_NOEXCEPT +#define HALF_NOTHROW throw() #endif #include -#include -#include #include #include #include +#include +#include #if HALF_ENABLE_CPP11_TYPE_TRAITS - #include +#include #endif #if HALF_ENABLE_CPP11_CSTDINT #include @@ -228,2867 +234,4088 @@ /// `std::numeric_limits::round_style` to synchronize the rounding mode with that of the underlying /// single-precision implementation. For TRT-1275, changing it to 1 (to nearest) #ifndef HALF_ROUND_STYLE - #define HALF_ROUND_STYLE 1 // = std::round_to_nearest +#define HALF_ROUND_STYLE 1 // = std::round_to_nearest #endif /// Tie-breaking behaviour for round to nearest. -/// This specifies if ties in round to nearest should be resolved by rounding to the nearest even value. By default this is -/// defined to `0` resulting in the faster but slightly more biased behaviour of rounding away from zero in half-way cases (and -/// thus equal to the round() function), but can be redefined to `1` (before including half.hpp) if more IEEE-conformant -/// behaviour is needed. +/// This specifies if ties in round to nearest should be resolved by rounding to the nearest even value. By default this +/// is defined to `0` resulting in the faster but slightly more biased behaviour of rounding away from zero in half-way +/// cases (and thus equal to the round() function), but can be redefined to `1` (before including half.hpp) if more +/// IEEE-conformant behaviour is needed. #ifndef HALF_ROUND_TIES_TO_EVEN - #define HALF_ROUND_TIES_TO_EVEN 0 // ties away from zero +#define HALF_ROUND_TIES_TO_EVEN 0 // ties away from zero #endif /// Value signaling overflow. -/// In correspondence with `HUGE_VAL[F|L]` from `` this symbol expands to a positive value signaling the overflow of an -/// operation, in particular it just evaluates to positive infinity. -#define HUGE_VALH std::numeric_limits::infinity() +/// In correspondence with `HUGE_VAL[F|L]` from `` this symbol expands to a positive value signaling the overflow +/// of an operation, in particular it just evaluates to positive infinity. +#define HUGE_VALH std::numeric_limits::infinity() /// Fast half-precision fma function. -/// This symbol is only defined if the fma() function generally executes as fast as, or faster than, a separate -/// half-precision multiplication followed by an addition. Due to the internal single-precision implementation of all +/// This symbol is only defined if the fma() function generally executes as fast as, or faster than, a separate +/// half-precision multiplication followed by an addition. Due to the internal single-precision implementation of all /// arithmetic operations, this is in fact always the case. -#define FP_FAST_FMAH 1 +#define FP_FAST_FMAH 1 #ifndef FP_ILOGB0 - #define FP_ILOGB0 INT_MIN +#define FP_ILOGB0 INT_MIN #endif #ifndef FP_ILOGBNAN - #define FP_ILOGBNAN INT_MAX +#define FP_ILOGBNAN INT_MAX #endif #ifndef FP_SUBNORMAL - #define FP_SUBNORMAL 0 +#define FP_SUBNORMAL 0 #endif #ifndef FP_ZERO - #define FP_ZERO 1 +#define FP_ZERO 1 #endif #ifndef FP_NAN - #define FP_NAN 2 +#define FP_NAN 2 #endif #ifndef FP_INFINITE - #define FP_INFINITE 3 +#define FP_INFINITE 3 #endif #ifndef FP_NORMAL - #define FP_NORMAL 4 +#define FP_NORMAL 4 #endif - /// Main namespace for half precision functionality. /// This namespace contains all the functionality provided by the library. namespace half_float { - class half; - -#if HALF_ENABLE_CPP11_USER_LITERALS - /// Library-defined half-precision literals. - /// Import this namespace to enable half-precision floating point literals: - /// ~~~~{.cpp} - /// using namespace half_float::literal; - /// half_float::half = 4.2_h; - /// ~~~~ - namespace literal - { - half operator "" _h(long double); - } -#endif - - /// \internal - /// \brief Implementation details. - namespace detail - { - #if HALF_ENABLE_CPP11_TYPE_TRAITS - /// Conditional type. - template struct conditional : std::conditional {}; - - /// Helper for tag dispatching. - template struct bool_type : std::integral_constant {}; - using std::true_type; - using std::false_type; - - /// Type traits for floating point types. - template struct is_float : std::is_floating_point {}; - #else - /// Conditional type. - template struct conditional { typedef T type; }; - template struct conditional { typedef F type; }; - - /// Helper for tag dispatching. - template struct bool_type {}; - typedef bool_type true_type; - typedef bool_type false_type; - - /// Type traits for floating point types. - template struct is_float : false_type {}; - template struct is_float : is_float {}; - template struct is_float : is_float {}; - template struct is_float : is_float {}; - template<> struct is_float : true_type {}; - template<> struct is_float : true_type {}; - template<> struct is_float : true_type {}; - #endif - - /// Type traits for floating point bits. - template struct bits { typedef unsigned char type; }; - template struct bits : bits {}; - template struct bits : bits {}; - template struct bits : bits {}; - - #if HALF_ENABLE_CPP11_CSTDINT - /// Unsigned integer of (at least) 16 bits width. - typedef std::uint_least16_t uint16; - - /// Unsigned integer of (at least) 32 bits width. - template<> struct bits { typedef std::uint_least32_t type; }; - - /// Unsigned integer of (at least) 64 bits width. - template<> struct bits { typedef std::uint_least64_t type; }; - #else - /// Unsigned integer of (at least) 16 bits width. - typedef unsigned short uint16; - - /// Unsigned integer of (at least) 32 bits width. - template<> struct bits : conditional::digits>=32,unsigned int,unsigned long> {}; - - #if HALF_ENABLE_CPP11_LONG_LONG - /// Unsigned integer of (at least) 64 bits width. - template<> struct bits : conditional::digits>=64,unsigned long,unsigned long long> {}; - #else - /// Unsigned integer of (at least) 64 bits width. - template<> struct bits { typedef unsigned long type; }; - #endif - #endif - - /// Tag type for binary construction. - struct binary_t {}; - - /// Tag for binary construction. - HALF_CONSTEXPR_CONST binary_t binary = binary_t(); - - /// Temporary half-precision expression. - /// This class represents a half-precision expression which just stores a single-precision value internally. - struct expr - { - /// Conversion constructor. - /// \param f single-precision value to convert - explicit HALF_CONSTEXPR expr(float f) HALF_NOEXCEPT : value_(f) {} - - /// Conversion to single-precision. - /// \return single precision value representing expression value - HALF_CONSTEXPR operator float() const HALF_NOEXCEPT { return value_; } - - private: - /// Internal expression value stored in single-precision. - float value_; - }; - - /// SFINAE helper for generic half-precision functions. - /// This class template has to be specialized for each valid combination of argument types to provide a corresponding - /// `type` member equivalent to \a T. - /// \tparam T type to return - template struct enable {}; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - - /// Return type for specialized generic 2-argument half-precision functions. - /// This class template has to be specialized for each valid combination of argument types to provide a corresponding - /// `type` member denoting the appropriate return type. - /// \tparam T first argument type - /// \tparam U first argument type - template struct result : enable {}; - template<> struct result { typedef half type; }; - - /// \name Classification helpers - /// \{ - - /// Check for infinity. - /// \tparam T argument type (builtin floating point type) - /// \param arg value to query - /// \retval true if infinity - /// \retval false else - template bool builtin_isinf(T arg) - { - #if HALF_ENABLE_CPP11_CMATH - return std::isinf(arg); - #elif defined(_MSC_VER) - return !::_finite(static_cast(arg)) && !::_isnan(static_cast(arg)); - #else - return arg == std::numeric_limits::infinity() || arg == -std::numeric_limits::infinity(); - #endif - } - - /// Check for NaN. - /// \tparam T argument type (builtin floating point type) - /// \param arg value to query - /// \retval true if not a number - /// \retval false else - template bool builtin_isnan(T arg) - { - #if HALF_ENABLE_CPP11_CMATH - return std::isnan(arg); - #elif defined(_MSC_VER) - return ::_isnan(static_cast(arg)) != 0; - #else - return arg != arg; - #endif - } - - /// Check sign. - /// \tparam T argument type (builtin floating point type) - /// \param arg value to query - /// \retval true if signbit set - /// \retval false else - template bool builtin_signbit(T arg) - { - #if HALF_ENABLE_CPP11_CMATH - return std::signbit(arg); - #else - return arg < T() || (arg == T() && T(1)/arg < T()); - #endif - } - - /// \} - /// \name Conversion - /// \{ - - /// Convert IEEE single-precision to half-precision. - /// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \param value single-precision value - /// \return binary representation of half-precision value - template uint16 float2half_impl(float value, true_type) - { - typedef bits::type uint32; - uint32 bits;// = *reinterpret_cast(&value); //violating strict aliasing! - std::memcpy(&bits, &value, sizeof(float)); -/* uint16 hbits = (bits>>16) & 0x8000; - bits &= 0x7FFFFFFF; - int exp = bits >> 23; - if(exp == 255) - return hbits | 0x7C00 | (0x3FF&-static_cast((bits&0x7FFFFF)!=0)); - if(exp > 142) - { - if(R == std::round_toward_infinity) - return hbits | 0x7C00 - (hbits>>15); - if(R == std::round_toward_neg_infinity) - return hbits | 0x7BFF + (hbits>>15); - return hbits | 0x7BFF + (R!=std::round_toward_zero); - } - int g, s; - if(exp > 112) - { - g = (bits>>12) & 1; - s = (bits&0xFFF) != 0; - hbits |= ((exp-112)<<10) | ((bits>>13)&0x3FF); - } - else if(exp > 101) - { - int i = 125 - exp; - bits = (bits&0x7FFFFF) | 0x800000; - g = (bits>>i) & 1; - s = (bits&((1L<> (i+1); - } - else - { - g = 0; - s = bits != 0; - } - if(R == std::round_to_nearest) - #if HALF_ROUND_TIES_TO_EVEN - hbits += g & (s|hbits); - #else - hbits += g; - #endif - else if(R == std::round_toward_infinity) - hbits += ~(hbits>>15) & (s|g); - else if(R == std::round_toward_neg_infinity) - hbits += (hbits>>15) & (g|s); -*/ static uint16 const base_table[512] = { - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, - 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00, - 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, - 0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, - 0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00 }; - static const unsigned char shift_table[512] = { - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13 }; - uint16 hbits = base_table[bits>>23] + static_cast((bits&0x7FFFFF)>>shift_table[bits>>23]); - if(R == std::round_to_nearest) - hbits += (((bits&0x7FFFFF)>>(shift_table[bits>>23]-1))|(((bits>>23)&0xFF)==102)) & ((hbits&0x7C00)!=0x7C00) - #if HALF_ROUND_TIES_TO_EVEN - & (((((static_cast(1)<<(shift_table[bits>>23]-1))-1)&bits)!=0)|hbits) - #endif - ; - else if(R == std::round_toward_zero) - hbits -= ((hbits&0x7FFF)==0x7C00) & ~shift_table[bits>>23]; - else if(R == std::round_toward_infinity) - hbits += ((((bits&0x7FFFFF&((static_cast(1)<<(shift_table[bits>>23]))-1))!=0)|(((bits>>23)<=102)& - ((bits>>23)!=0)))&(hbits<0x7C00)) - ((hbits==0xFC00)&((bits>>23)!=511)); - else if(R == std::round_toward_neg_infinity) - hbits += ((((bits&0x7FFFFF&((static_cast(1)<<(shift_table[bits>>23]))-1))!=0)|(((bits>>23)<=358)& - ((bits>>23)!=256)))&(hbits<0xFC00)&(hbits>>15)) - ((hbits==0x7C00)&((bits>>23)!=255)); - return hbits; - } - - /// Convert IEEE double-precision to half-precision. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \param value double-precision value - /// \return binary representation of half-precision value - template uint16 float2half_impl(double value, true_type) - { - typedef bits::type uint32; - typedef bits::type uint64; - uint64 bits;// = *reinterpret_cast(&value); //violating strict aliasing! - std::memcpy(&bits, &value, sizeof(double)); - uint32 hi = bits >> 32, lo = bits & 0xFFFFFFFF; - uint16 hbits = (hi>>16) & 0x8000; - hi &= 0x7FFFFFFF; - int exp = hi >> 20; - if(exp == 2047) - return hbits | 0x7C00 | (0x3FF&-static_cast((bits&0xFFFFFFFFFFFFF)!=0)); - if(exp > 1038) - { - if(R == std::round_toward_infinity) - return hbits | (0x7C00 - (hbits>>15)); - if(R == std::round_toward_neg_infinity) - return hbits | (0x7BFF + (hbits>>15)); - return hbits | (0x7BFF + (R!=std::round_toward_zero)); - } - int g, s = lo != 0; - if(exp > 1008) - { - g = (hi>>9) & 1; - s |= (hi&0x1FF) != 0; - hbits |= ((exp-1008)<<10) | ((hi>>10)&0x3FF); - } - else if(exp > 997) - { - int i = 1018 - exp; - hi = (hi&0xFFFFF) | 0x100000; - g = (hi>>i) & 1; - s |= (hi&((1L<> (i+1); - } - else - { - g = 0; - s |= hi != 0; - } - if(R == std::round_to_nearest) - #if HALF_ROUND_TIES_TO_EVEN - hbits += g & (s|hbits); - #else - hbits += g; - #endif - else if(R == std::round_toward_infinity) - hbits += ~(hbits>>15) & (s|g); - else if(R == std::round_toward_neg_infinity) - hbits += (hbits>>15) & (g|s); - return hbits; - } - - /// Convert non-IEEE floating point to half-precision. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \tparam T source type (builtin floating point type) - /// \param value floating point value - /// \return binary representation of half-precision value - template uint16 float2half_impl(T value, false_type) - { - uint16 hbits = static_cast(builtin_signbit(value)) << 15; - if(value == T()) - return hbits; - if(builtin_isnan(value)) - return hbits | 0x7FFF; - if(builtin_isinf(value)) - return hbits | 0x7C00; - int exp; - std::frexp(value, &exp); - if(exp > 16) - { - if(R == std::round_toward_infinity) - return hbits | (0x7C00 - (hbits>>15)); - if(R == std::round_toward_neg_infinity) - return hbits | (0x7BFF + (hbits>>15)); - return hbits | (0x7BFF + (R!=std::round_toward_zero)); - } - if(exp < -13) - value = std::ldexp(value, 24); - else - { - value = std::ldexp(value, 11-exp); - hbits |= ((exp+13)<<10); - } - T ival, frac = std::modf(value, &ival); - hbits += static_cast(std::abs(static_cast(ival))); - if(R == std::round_to_nearest) - { - frac = std::abs(frac); - #if HALF_ROUND_TIES_TO_EVEN - hbits += (frac>T(0.5)) | ((frac==T(0.5))&hbits); - #else - hbits += frac >= T(0.5); - #endif - } - else if(R == std::round_toward_infinity) - hbits += frac > T(); - else if(R == std::round_toward_neg_infinity) - hbits += frac < T(); - return hbits; - } - - /// Convert floating point to half-precision. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \tparam T source type (builtin floating point type) - /// \param value floating point value - /// \return binary representation of half-precision value - template uint16 float2half(T value) - { - return float2half_impl(value, bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); - } - - /// Convert integer to half-precision floating point. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \tparam S `true` if value negative, `false` else - /// \tparam T type to convert (builtin integer type) - /// \param value non-negative integral value - /// \return binary representation of half-precision value - template uint16 int2half_impl(T value) - { - #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_integral::value, "int to half conversion only supports builtin integer types"); - #endif - if(S) - value = -value; - uint16 bits = S << 15; - if(value > 0xFFFF) - { - if(R == std::round_toward_infinity) - bits |= 0x7C00 - S; - else if(R == std::round_toward_neg_infinity) - bits |= 0x7BFF + S; - else - bits |= 0x7BFF + (R!=std::round_toward_zero); - } - else if(value) - { - unsigned int m = value, exp = 24; - for(; m<0x400; m<<=1,--exp) ; - for(; m>0x7FF; m>>=1,++exp) ; - bits |= (exp<<10) + m; - if(exp > 24) - { - if(R == std::round_to_nearest) - bits += (value>>(exp-25)) & 1 - #if HALF_ROUND_TIES_TO_EVEN - & (((((1<<(exp-25))-1)&value)!=0)|bits) - #endif - ; - else if(R == std::round_toward_infinity) - bits += ((value&((1<<(exp-24))-1))!=0) & !S; - else if(R == std::round_toward_neg_infinity) - bits += ((value&((1<<(exp-24))-1))!=0) & S; - } - } - return bits; - } - - /// Convert integer to half-precision floating point. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \tparam T type to convert (builtin integer type) - /// \param value integral value - /// \return binary representation of half-precision value - template uint16 int2half(T value) - { - return (value<0) ? int2half_impl(value) : int2half_impl(value); - } - - /// Convert half-precision to IEEE single-precision. - /// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). - /// \param value binary representation of half-precision value - /// \return single-precision value - inline float half2float_impl(uint16 value, float, true_type) - { - typedef bits::type uint32; -/* uint32 bits = static_cast(value&0x8000) << 16; - int abs = value & 0x7FFF; - if(abs) - { - bits |= 0x38000000 << static_cast(abs>=0x7C00); - for(; abs<0x400; abs<<=1,bits-=0x800000) ; - bits += static_cast(abs) << 13; - } -*/ static const uint32 mantissa_table[2048] = { - 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000, - 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, - 0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, - 0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000, - 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000, - 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, - 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, - 0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, - 0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000, - 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, - 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, - 0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, - 0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, - 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000, - 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, - 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, - 0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, - 0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000, - 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000, - 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, - 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, - 0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, - 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000, - 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, - 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, - 0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, - 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000, - 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, - 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, - 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, - 0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, - 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000, - 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, - 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, - 0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, - 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000, - 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000, - 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, - 0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, - 0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, - 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000, - 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, - 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, - 0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, - 0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000, - 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000, - 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, - 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, - 0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000, - 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000, - 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, - 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, - 0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, - 0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000, - 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, - 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, - 0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, - 0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000, - 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000, - 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, - 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, - 0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, - 0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000, - 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, - 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, - 0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, - 0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, - 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, 0x3807C000, 0x3807E000, - 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, - 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, - 0x380C0000, 0x380C2000, 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, - 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000, - 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, - 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, - 0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, - 0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, - 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, 0x3819C000, 0x3819E000, - 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, - 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, - 0x381E0000, 0x381E2000, 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, - 0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000, - 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, - 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, - 0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, - 0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, - 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, 0x382BC000, 0x382BE000, - 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, - 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, - 0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, - 0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000, - 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, - 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, - 0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, - 0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, - 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, 0x383DC000, 0x383DE000, - 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, - 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, - 0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, - 0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000, - 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, - 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, - 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, - 0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, - 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, 0x384FC000, 0x384FE000, - 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, - 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, - 0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, - 0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000, - 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, - 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, - 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, - 0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, - 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, 0x3861C000, 0x3861E000, - 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, - 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, - 0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, - 0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000, - 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, - 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, - 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, - 0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, - 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, 0x3873C000, 0x3873E000, - 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, - 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, - 0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, - 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000, - 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, - 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000 }; - static const uint32 exponent_table[64] = { - 0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000, - 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, - 0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, - 0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000 }; - static const unsigned short offset_table[64] = { - 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, - 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 }; - uint32 bits = mantissa_table[offset_table[value>>10]+(value&0x3FF)] + exponent_table[value>>10]; -// return *reinterpret_cast(&bits); //violating strict aliasing! - float out; - std::memcpy(&out, &bits, sizeof(float)); - return out; - } - - /// Convert half-precision to IEEE double-precision. - /// \param value binary representation of half-precision value - /// \return double-precision value - inline double half2float_impl(uint16 value, double, true_type) - { - typedef bits::type uint32; - typedef bits::type uint64; - uint32 hi = static_cast(value&0x8000) << 16; - int abs = value & 0x7FFF; - if(abs) - { - hi |= 0x3F000000 << static_cast(abs>=0x7C00); - for(; abs<0x400; abs<<=1,hi-=0x100000) ; - hi += static_cast(abs) << 10; - } - uint64 bits = static_cast(hi) << 32; -// return *reinterpret_cast(&bits); //violating strict aliasing! - double out; - std::memcpy(&out, &bits, sizeof(double)); - return out; - } - - /// Convert half-precision to non-IEEE floating point. - /// \tparam T type to convert to (builtin integer type) - /// \param value binary representation of half-precision value - /// \return floating point value - template T half2float_impl(uint16 value, T, ...) - { - T out; - int abs = value & 0x7FFF; - if(abs > 0x7C00) - out = std::numeric_limits::has_quiet_NaN ? std::numeric_limits::quiet_NaN() : T(); - else if(abs == 0x7C00) - out = std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : std::numeric_limits::max(); - else if(abs > 0x3FF) - out = std::ldexp(static_cast((abs&0x3FF)|0x400), (abs>>10)-25); - else - out = std::ldexp(static_cast(abs), -24); - return (value&0x8000) ? -out : out; - } - - /// Convert half-precision to floating point. - /// \tparam T type to convert to (builtin integer type) - /// \param value binary representation of half-precision value - /// \return floating point value - template T half2float(uint16 value) - { - return half2float_impl(value, T(), bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); - } - - /// Convert half-precision floating point to integer. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \tparam E `true` for round to even, `false` for round away from zero - /// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits) - /// \param value binary representation of half-precision value - /// \return integral value - template T half2int_impl(uint16 value) - { - #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_integral::value, "half to int conversion only supports builtin integer types"); - #endif - unsigned int e = value & 0x7FFF; - if(e >= 0x7C00) - return (value&0x8000) ? std::numeric_limits::min() : std::numeric_limits::max(); - if(e < 0x3800) - { - if(R == std::round_toward_infinity) - return T(~(value>>15)&(e!=0)); - if(R == std::round_toward_neg_infinity) - return -T(value>0x8000); - return T(); - } - unsigned int m = (value&0x3FF) | 0x400; - e >>= 10; - if(e < 25) - { - if(R == std::round_to_nearest) - m += (1<<(24-e)) - (~(m>>(25-e))&E); - else if(R == std::round_toward_infinity) - m += ((value>>15)-1) & ((1<<(25-e))-1U); - else if(R == std::round_toward_neg_infinity) - m += -(value>>15) & ((1<<(25-e))-1U); - m >>= 25 - e; - } - else - m <<= e - 25; - return (value&0x8000) ? -static_cast(m) : static_cast(m); - } - - /// Convert half-precision floating point to integer. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits) - /// \param value binary representation of half-precision value - /// \return integral value - template T half2int(uint16 value) { return half2int_impl(value); } - - /// Convert half-precision floating point to integer using round-to-nearest-away-from-zero. - /// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits) - /// \param value binary representation of half-precision value - /// \return integral value - template T half2int_up(uint16 value) { return half2int_impl(value); } - - /// Round half-precision number to nearest integer value. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \tparam E `true` for round to even, `false` for round away from zero - /// \param value binary representation of half-precision value - /// \return half-precision bits for nearest integral value - template uint16 round_half_impl(uint16 value) - { - unsigned int e = value & 0x7FFF; - uint16 result = value; - if(e < 0x3C00) - { - result &= 0x8000; - if(R == std::round_to_nearest) - result |= 0x3C00U & -(e>=(0x3800+E)); - else if(R == std::round_toward_infinity) - result |= 0x3C00U & -(~(value>>15)&(e!=0)); - else if(R == std::round_toward_neg_infinity) - result |= 0x3C00U & -(value>0x8000); - } - else if(e < 0x6400) - { - e = 25 - (e>>10); - unsigned int mask = (1<>e)&E); - else if(R == std::round_toward_infinity) - result += mask & ((value>>15)-1); - else if(R == std::round_toward_neg_infinity) - result += mask & -(value>>15); - result &= ~mask; - } - return result; - } - - /// Round half-precision number to nearest integer value. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \param value binary representation of half-precision value - /// \return half-precision bits for nearest integral value - template uint16 round_half(uint16 value) { return round_half_impl(value); } - - /// Round half-precision number to nearest integer value using round-to-nearest-away-from-zero. - /// \param value binary representation of half-precision value - /// \return half-precision bits for nearest integral value - inline uint16 round_half_up(uint16 value) { return round_half_impl(value); } - /// \} - - struct functions; - template struct unary_specialized; - template struct binary_specialized; - template struct half_caster; - } - - /// Half-precision floating point type. - /// This class implements an IEEE-conformant half-precision floating point type with the usual arithmetic operators and - /// conversions. It is implicitly convertible to single-precision floating point, which makes artihmetic expressions and - /// functions with mixed-type operands to be of the most precise operand type. Additionally all arithmetic operations - /// (and many mathematical functions) are carried out in single-precision internally. All conversions from single- to - /// half-precision are done using the library's default rounding mode, but temporary results inside chained arithmetic - /// expressions are kept in single-precision as long as possible (while of course still maintaining a strong half-precision type). - /// - /// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and - /// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which - /// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the - /// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be of - /// exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will most - /// probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying 16-bit - /// IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 bits if - /// your C++ implementation supports an unsigned integer type of exactly 16 bits width. But this should be the case on - /// nearly any reasonable platform. - /// - /// So if your C++ implementation is not totally exotic or imposes special alignment requirements, it is a reasonable - /// assumption that the data of a half is just comprised of the 2 bytes of the underlying IEEE representation. - #if defined(__clang__) - /* this is a WAR, after nvcc's process, - * `friend class std::numeric_limits;` in the following code, becomes - * `friend class numeric_limits;`, namespsace `std` is removed, which results compilation error in clang. - * tested on nvcc V10.0.95, and clang 5.0.300080 in ndk 16b - */ - using std::numeric_limits; - using std::hash; - #endif - class half - { - friend struct detail::functions; - friend struct detail::unary_specialized; - friend struct detail::binary_specialized; - template friend struct detail::half_caster; - friend class std::numeric_limits; - #if HALF_ENABLE_CPP11_HASH - friend struct std::hash; - #endif - #if HALF_ENABLE_CPP11_USER_LITERALS - friend half literal::operator "" _h(long double); - #endif - - public: - /// Default constructor. - /// This initializes the half to 0. Although this does not match the builtin types' default-initialization semantics - /// and may be less efficient than no initialization, it is needed to provide proper value-initialization semantics. - HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {} - - /// Copy constructor. - /// \tparam T type of concrete half expression - /// \param rhs half expression to copy from - half(detail::expr rhs) : data_(detail::float2half(static_cast(rhs))) {} - - /// Conversion constructor. - /// \param rhs float to convert - explicit half(float rhs) : data_(detail::float2half(rhs)) {} - - /// Conversion to single-precision. - /// \return single precision value representing expression value - operator float() const { return detail::half2float(data_); } - - /// Assignment operator. - /// \tparam T type of concrete half expression - /// \param rhs half expression to copy from - /// \return reference to this half - half& operator=(detail::expr rhs) { return *this = static_cast(rhs); } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to add - /// \return reference to this half - template typename detail::enable::type operator+=(T rhs) { return *this += static_cast(rhs); } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to subtract - /// \return reference to this half - template typename detail::enable::type operator-=(T rhs) { return *this -= static_cast(rhs); } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to multiply with - /// \return reference to this half - template typename detail::enable::type operator*=(T rhs) { return *this *= static_cast(rhs); } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to divide by - /// \return reference to this half - template typename detail::enable::type operator/=(T rhs) { return *this /= static_cast(rhs); } - - /// Assignment operator. - /// \param rhs single-precision value to copy from - /// \return reference to this half - half& operator=(float rhs) { data_ = detail::float2half(rhs); return *this; } - - /// Arithmetic assignment. - /// \param rhs single-precision value to add - /// \return reference to this half - half& operator+=(float rhs) { data_ = detail::float2half(detail::half2float(data_)+rhs); return *this; } - - /// Arithmetic assignment. - /// \param rhs single-precision value to subtract - /// \return reference to this half - half& operator-=(float rhs) { data_ = detail::float2half(detail::half2float(data_)-rhs); return *this; } - - /// Arithmetic assignment. - /// \param rhs single-precision value to multiply with - /// \return reference to this half - half& operator*=(float rhs) { data_ = detail::float2half(detail::half2float(data_)*rhs); return *this; } - - /// Arithmetic assignment. - /// \param rhs single-precision value to divide by - /// \return reference to this half - half& operator/=(float rhs) { data_ = detail::float2half(detail::half2float(data_)/rhs); return *this; } - - /// Prefix increment. - /// \return incremented half value - half& operator++() { return *this += 1.0f; } - - /// Prefix decrement. - /// \return decremented half value - half& operator--() { return *this -= 1.0f; } - - /// Postfix increment. - /// \return non-incremented half value - half operator++(int) { half out(*this); ++*this; return out; } - - /// Postfix decrement. - /// \return non-decremented half value - half operator--(int) { half out(*this); --*this; return out; } - - private: - /// Rounding mode to use - static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE); - - /// Constructor. - /// \param bits binary representation to set half to - HALF_CONSTEXPR half(detail::binary_t, detail::uint16 bits) HALF_NOEXCEPT : data_(bits) {} - - /// Internal binary representation - detail::uint16 data_; - }; +class half; #if HALF_ENABLE_CPP11_USER_LITERALS - namespace literal - { - /// Half literal. - /// While this returns an actual half-precision value, half literals can unfortunately not be constant expressions due - /// to rather involved conversions. - /// \param value literal value - /// \return half with given value (if representable) - inline half operator "" _h(long double value) { return half(detail::binary, detail::float2half(value)); } - } -#endif - - namespace detail - { - /// Wrapper implementing unspecialized half-precision functions. - struct functions - { - /// Addition implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision sum stored in single-precision - static expr plus(float x, float y) { return expr(x+y); } - - /// Subtraction implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision difference stored in single-precision - static expr minus(float x, float y) { return expr(x-y); } - - /// Multiplication implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision product stored in single-precision - static expr multiplies(float x, float y) { return expr(x*y); } - - /// Division implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision quotient stored in single-precision - static expr divides(float x, float y) { return expr(x/y); } - - /// Output implementation. - /// \param out stream to write to - /// \param arg value to write - /// \return reference to stream - template static std::basic_ostream& write(std::basic_ostream &out, float arg) { return out << arg; } - - /// Input implementation. - /// \param in stream to read from - /// \param arg half to read into - /// \return reference to stream - template static std::basic_istream& read(std::basic_istream &in, half &arg) - { - float f; - if(in >> f) - arg = f; - return in; - } - - /// Modulo implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision division remainder stored in single-precision - static expr fmod(float x, float y) { return expr(std::fmod(x, y)); } - - /// Remainder implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision division remainder stored in single-precision - static expr remainder(float x, float y) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::remainder(x, y)); - #else - if(builtin_isnan(x) || builtin_isnan(y)) - return expr(std::numeric_limits::quiet_NaN()); - float ax = std::fabs(x), ay = std::fabs(y); - if(ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) - return expr(std::numeric_limits::quiet_NaN()); - if(ay >= 65536.0f) - return expr(x); - if(ax == ay) - return expr(builtin_signbit(x) ? -0.0f : 0.0f); - ax = std::fmod(ax, ay+ay); - float y2 = 0.5f * ay; - if(ax > y2) - { - ax -= ay; - if(ax >= y2) - ax -= ay; - } - return expr(builtin_signbit(x) ? -ax : ax); - #endif - } - - /// Remainder implementation. - /// \param x first operand - /// \param y second operand - /// \param quo address to store quotient bits at - /// \return Half-precision division remainder stored in single-precision - static expr remquo(float x, float y, int *quo) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::remquo(x, y, quo)); - #else - if(builtin_isnan(x) || builtin_isnan(y)) - return expr(std::numeric_limits::quiet_NaN()); - bool sign = builtin_signbit(x), qsign = static_cast(sign^builtin_signbit(y)); - float ax = std::fabs(x), ay = std::fabs(y); - if(ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) - return expr(std::numeric_limits::quiet_NaN()); - if(ay >= 65536.0f) - return expr(x); - if(ax == ay) - return *quo = qsign ? -1 : 1, expr(sign ? -0.0f : 0.0f); - ax = std::fmod(ax, 8.0f*ay); - int cquo = 0; - if(ax >= 4.0f * ay) - { - ax -= 4.0f * ay; - cquo += 4; - } - if(ax >= 2.0f * ay) - { - ax -= 2.0f * ay; - cquo += 2; - } - float y2 = 0.5f * ay; - if(ax > y2) - { - ax -= ay; - ++cquo; - if(ax >= y2) - { - ax -= ay; - ++cquo; - } - } - return *quo = qsign ? -cquo : cquo, expr(sign ? -ax : ax); - #endif - } - - /// Positive difference implementation. - /// \param x first operand - /// \param y second operand - /// \return Positive difference stored in single-precision - static expr fdim(float x, float y) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::fdim(x, y)); - #else - return expr((x<=y) ? 0.0f : (x-y)); - #endif - } - - /// Fused multiply-add implementation. - /// \param x first operand - /// \param y second operand - /// \param z third operand - /// \return \a x * \a y + \a z stored in single-precision - static expr fma(float x, float y, float z) - { - #if HALF_ENABLE_CPP11_CMATH && defined(FP_FAST_FMAF) - return expr(std::fma(x, y, z)); - #else - return expr(x*y+z); - #endif - } - - /// Get NaN. - /// \return Half-precision quiet NaN - static half nanh() { return half(binary, 0x7FFF); } - - /// Exponential implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr exp(float arg) { return expr(std::exp(arg)); } - - /// Exponential implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr expm1(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::expm1(arg)); - #else - return expr(static_cast(std::exp(static_cast(arg))-1.0)); - #endif - } - - /// Binary exponential implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr exp2(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::exp2(arg)); - #else - return expr(static_cast(std::exp(arg*0.69314718055994530941723212145818))); - #endif - } - - /// Logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr log(float arg) { return expr(std::log(arg)); } - - /// Common logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr log10(float arg) { return expr(std::log10(arg)); } - - /// Logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr log1p(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::log1p(arg)); - #else - return expr(static_cast(std::log(1.0+arg))); - #endif - } - - /// Binary logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr log2(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::log2(arg)); - #else - return expr(static_cast(std::log(static_cast(arg))*1.4426950408889634073599246810019)); - #endif - } - - /// Square root implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr sqrt(float arg) { return expr(std::sqrt(arg)); } - - /// Cubic root implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr cbrt(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::cbrt(arg)); - #else - if(builtin_isnan(arg) || builtin_isinf(arg)) - return expr(arg); - return expr(builtin_signbit(arg) ? -static_cast(std::pow(-static_cast(arg), 1.0/3.0)) : - static_cast(std::pow(static_cast(arg), 1.0/3.0))); - #endif - } - - /// Hypotenuse implementation. - /// \param x first argument - /// \param y second argument - /// \return function value stored in single-preicision - static expr hypot(float x, float y) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::hypot(x, y)); - #else - return expr((builtin_isinf(x) || builtin_isinf(y)) ? std::numeric_limits::infinity() : - static_cast(std::sqrt(static_cast(x)*x+static_cast(y)*y))); - #endif - } - - /// Power implementation. - /// \param base value to exponentiate - /// \param exp power to expontiate to - /// \return function value stored in single-preicision - static expr pow(float base, float exp) { return expr(std::pow(base, exp)); } - - /// Sine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr sin(float arg) { return expr(std::sin(arg)); } - - /// Cosine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr cos(float arg) { return expr(std::cos(arg)); } - - /// Tan implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr tan(float arg) { return expr(std::tan(arg)); } - - /// Arc sine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr asin(float arg) { return expr(std::asin(arg)); } - - /// Arc cosine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr acos(float arg) { return expr(std::acos(arg)); } - - /// Arc tangent implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr atan(float arg) { return expr(std::atan(arg)); } - - /// Arc tangent implementation. - /// \param x first argument - /// \param y second argument - /// \return function value stored in single-preicision - static expr atan2(float x, float y) { return expr(std::atan2(x, y)); } - - /// Hyperbolic sine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr sinh(float arg) { return expr(std::sinh(arg)); } - - /// Hyperbolic cosine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr cosh(float arg) { return expr(std::cosh(arg)); } - - /// Hyperbolic tangent implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr tanh(float arg) { return expr(std::tanh(arg)); } - - /// Hyperbolic area sine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr asinh(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::asinh(arg)); - #else - return expr((arg==-std::numeric_limits::infinity()) ? arg : static_cast(std::log(arg+std::sqrt(arg*arg+1.0)))); - #endif - } - - /// Hyperbolic area cosine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr acosh(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::acosh(arg)); - #else - return expr((arg<-1.0f) ? std::numeric_limits::quiet_NaN() : static_cast(std::log(arg+std::sqrt(arg*arg-1.0)))); - #endif - } - - /// Hyperbolic area tangent implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr atanh(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::atanh(arg)); - #else - return expr(static_cast(0.5*std::log((1.0+arg)/(1.0-arg)))); - #endif - } - - /// Error function implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr erf(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::erf(arg)); - #else - return expr(static_cast(erf(static_cast(arg)))); - #endif - } - - /// Complementary implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr erfc(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::erfc(arg)); - #else - return expr(static_cast(1.0-erf(static_cast(arg)))); - #endif - } - - /// Gamma logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr lgamma(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::lgamma(arg)); - #else - if(builtin_isinf(arg)) - return expr(std::numeric_limits::infinity()); - if(arg < 0.0f) - { - float i, f = std::modf(-arg, &i); - if(f == 0.0f) - return expr(std::numeric_limits::infinity()); - return expr(static_cast(1.1447298858494001741434273513531- - std::log(std::abs(std::sin(3.1415926535897932384626433832795*f)))-lgamma(1.0-arg))); - } - return expr(static_cast(lgamma(static_cast(arg)))); - #endif - } - - /// Gamma implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr tgamma(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::tgamma(arg)); - #else - if(arg == 0.0f) - return builtin_signbit(arg) ? expr(-std::numeric_limits::infinity()) : expr(std::numeric_limits::infinity()); - if(arg < 0.0f) - { - float i, f = std::modf(-arg, &i); - if(f == 0.0f) - return expr(std::numeric_limits::quiet_NaN()); - double value = 3.1415926535897932384626433832795 / (std::sin(3.1415926535897932384626433832795*f)*std::exp(lgamma(1.0-arg))); - return expr(static_cast((std::fmod(i, 2.0f)==0.0f) ? -value : value)); - } - if(builtin_isinf(arg)) - return expr(arg); - return expr(static_cast(std::exp(lgamma(static_cast(arg))))); - #endif - } - - /// Floor implementation. - /// \param arg value to round - /// \return rounded value - static half floor(half arg) { return half(binary, round_half(arg.data_)); } - - /// Ceiling implementation. - /// \param arg value to round - /// \return rounded value - static half ceil(half arg) { return half(binary, round_half(arg.data_)); } - - /// Truncation implementation. - /// \param arg value to round - /// \return rounded value - static half trunc(half arg) { return half(binary, round_half(arg.data_)); } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static half round(half arg) { return half(binary, round_half_up(arg.data_)); } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static long lround(half arg) { return detail::half2int_up(arg.data_); } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static half rint(half arg) { return half(binary, round_half(arg.data_)); } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static long lrint(half arg) { return detail::half2int(arg.data_); } - - #if HALF_ENABLE_CPP11_LONG_LONG - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static long long llround(half arg) { return detail::half2int_up(arg.data_); } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static long long llrint(half arg) { return detail::half2int(arg.data_); } - #endif - - /// Decompression implementation. - /// \param arg number to decompress - /// \param exp address to store exponent at - /// \return normalized significant - static half frexp(half arg, int *exp) - { - int m = arg.data_ & 0x7FFF, e = -14; - if(m >= 0x7C00 || !m) - return *exp = 0, arg; - for(; m<0x400; m<<=1,--e) ; - return *exp = e+(m>>10), half(binary, (arg.data_&0x8000)|0x3800|(m&0x3FF)); - } - - /// Decompression implementation. - /// \param arg number to decompress - /// \param iptr address to store integer part at - /// \return fractional part - static half modf(half arg, half *iptr) - { - unsigned int e = arg.data_ & 0x7FFF; - if(e >= 0x6400) - return *iptr = arg, half(binary, arg.data_&(0x8000U|-(e>0x7C00))); - if(e < 0x3C00) - return iptr->data_ = arg.data_ & 0x8000, arg; - e >>= 10; - unsigned int mask = (1<<(25-e)) - 1, m = arg.data_ & mask; - iptr->data_ = arg.data_ & ~mask; - if(!m) - return half(binary, arg.data_&0x8000); - for(; m<0x400; m<<=1,--e) ; - return half(binary, static_cast((arg.data_&0x8000)|(e<<10)|(m&0x3FF))); - } - - /// Scaling implementation. - /// \param arg number to scale - /// \param exp power of two to scale by - /// \return scaled number - static half scalbln(half arg, long exp) - { - unsigned int m = arg.data_ & 0x7FFF; - if(m >= 0x7C00 || !m) - return arg; - for(; m<0x400; m<<=1,--exp) ; - exp += m >> 10; - uint16 value = arg.data_ & 0x8000; - if(exp > 30) - { - if(half::round_style == std::round_toward_zero) - value |= 0x7BFF; - else if(half::round_style == std::round_toward_infinity) - value |= 0x7C00 - (value>>15); - else if(half::round_style == std::round_toward_neg_infinity) - value |= 0x7BFF + (value>>15); - else - value |= 0x7C00; - } - else if(exp > 0) - value |= (exp<<10) | (m&0x3FF); - else if(exp > -11) - { - m = (m&0x3FF) | 0x400; - if(half::round_style == std::round_to_nearest) - { - m += 1 << -exp; - #if HALF_ROUND_TIES_TO_EVEN - m -= (m>>(1-exp)) & 1; - #endif - } - else if(half::round_style == std::round_toward_infinity) - m += ((value>>15)-1) & ((1<<(1-exp))-1U); - else if(half::round_style == std::round_toward_neg_infinity) - m += -(value>>15) & ((1<<(1-exp))-1U); - value |= m >> (1-exp); - } - else if(half::round_style == std::round_toward_infinity) - value -= (value>>15) - 1; - else if(half::round_style == std::round_toward_neg_infinity) - value += value >> 15; - return half(binary, value); - } - - /// Exponent implementation. - /// \param arg number to query - /// \return floating point exponent - static int ilogb(half arg) - { - int abs = arg.data_ & 0x7FFF; - if(!abs) - return FP_ILOGB0; - if(abs < 0x7C00) - { - int exp = (abs>>10) - 15; - if(abs < 0x400) - for(; abs<0x200; abs<<=1,--exp) ; - return exp; - } - if(abs > 0x7C00) - return FP_ILOGBNAN; - return INT_MAX; - } - - /// Exponent implementation. - /// \param arg number to query - /// \return floating point exponent - static half logb(half arg) - { - int abs = arg.data_ & 0x7FFF; - if(!abs) - return half(binary, 0xFC00); - if(abs < 0x7C00) - { - int exp = (abs>>10) - 15; - if(abs < 0x400) - for(; abs<0x200; abs<<=1,--exp) ; - uint16 bits = (exp<0) << 15; - if(exp) - { - unsigned int m = std::abs(exp) << 6, e = 18; - for(; m<0x400; m<<=1,--e) ; - bits |= (e<<10) + m; - } - return half(binary, bits); - } - if(abs > 0x7C00) - return arg; - return half(binary, 0x7C00); - } - - /// Enumeration implementation. - /// \param from number to increase/decrease - /// \param to direction to enumerate into - /// \return next representable number - static half nextafter(half from, half to) - { - uint16 fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF; - if(fabs > 0x7C00) - return from; - if(tabs > 0x7C00 || from.data_ == to.data_ || !(fabs|tabs)) - return to; - if(!fabs) - return half(binary, (to.data_&0x8000)+1); - bool lt = ((fabs==from.data_) ? static_cast(fabs) : -static_cast(fabs)) < - ((tabs==to.data_) ? static_cast(tabs) : -static_cast(tabs)); - return half(binary, from.data_+(((from.data_>>15)^static_cast(lt))<<1)-1); - } - - /// Enumeration implementation. - /// \param from number to increase/decrease - /// \param to direction to enumerate into - /// \return next representable number - static half nexttoward(half from, long double to) - { - if(isnan(from)) - return from; - auto lfrom = static_cast(from); - if(builtin_isnan(to) || lfrom == to) - return half(static_cast(to)); - if(!(from.data_&0x7FFF)) - return half(binary, (static_cast(builtin_signbit(to))<<15)+1); - return half(binary, from.data_+(((from.data_>>15)^static_cast(lfrom0x3FF) ? ((abs>=0x7C00) ? ((abs>0x7C00) ? FP_NAN : FP_INFINITE) : FP_NORMAL) :FP_SUBNORMAL) : FP_ZERO; - } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if finite number - /// \retval false else - static bool isfinite(half arg) { return (arg.data_&0x7C00) != 0x7C00; } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if infinite number - /// \retval false else - static bool isinf(half arg) { return (arg.data_&0x7FFF) == 0x7C00; } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if not a number - /// \retval false else - static bool isnan(half arg) { return (arg.data_&0x7FFF) > 0x7C00; } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if normal number - /// \retval false else - static bool isnormal(half arg) { return ((arg.data_&0x7C00)!=0) & ((arg.data_&0x7C00)!=0x7C00); } - - /// Sign bit implementation. - /// \param arg value to check - /// \retval true if signed - /// \retval false if unsigned - static bool signbit(half arg) { return (arg.data_&0x8000) != 0; } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if operands equal - /// \retval false else - static bool isequal(half x, half y) { return (x.data_==y.data_ || !((x.data_|y.data_)&0x7FFF)) && !isnan(x); } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if operands not equal - /// \retval false else - static bool isnotequal(half x, half y) { return (x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF)) || isnan(x); } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x > \a y - /// \retval false else - static bool isgreater(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - return xabs<=0x7C00 && yabs<=0x7C00 && (((xabs==x.data_) ? xabs : -xabs) > ((yabs==y.data_) ? yabs : -yabs)); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x >= \a y - /// \retval false else - static bool isgreaterequal(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - return xabs<=0x7C00 && yabs<=0x7C00 && (((xabs==x.data_) ? xabs : -xabs) >= ((yabs==y.data_) ? yabs : -yabs)); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x < \a y - /// \retval false else - static bool isless(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - return xabs<=0x7C00 && yabs<=0x7C00 && (((xabs==x.data_) ? xabs : -xabs) < ((yabs==y.data_) ? yabs : -yabs)); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x <= \a y - /// \retval false else - static bool islessequal(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - return xabs<=0x7C00 && yabs<=0x7C00 && (((xabs==x.data_) ? xabs : -xabs) <= ((yabs==y.data_) ? yabs : -yabs)); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if either \a x > \a y nor \a x < \a y - /// \retval false else - static bool islessgreater(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - if(xabs > 0x7C00 || yabs > 0x7C00) - return false; - int a = (xabs==x.data_) ? xabs : -xabs, b = (yabs==y.data_) ? yabs : -yabs; - return a < b || a > b; - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if operand unordered - /// \retval false else - static bool isunordered(half x, half y) { return isnan(x) || isnan(y); } - - private: - static double erf(double arg) - { - if(builtin_isinf(arg)) - return (arg<0.0) ? -1.0 : 1.0; - double x2 = arg * arg, ax2 = 0.147 * x2, value = std::sqrt(1.0-std::exp(-x2*(1.2732395447351626861510701069801+ax2)/(1.0+ax2))); - return builtin_signbit(arg) ? -value : value; - } - - static double lgamma(double arg) - { - double v = 1.0; - for(; arg<8.0; ++arg) v *= arg; - double w = 1.0 / (arg*arg); - return (((((((-0.02955065359477124183006535947712*w+0.00641025641025641025641025641026)*w+ - -0.00191752691752691752691752691753)*w+8.4175084175084175084175084175084e-4)*w+ - -5.952380952380952380952380952381e-4)*w+7.9365079365079365079365079365079e-4)*w+ - -0.00277777777777777777777777777778)*w+0.08333333333333333333333333333333)/arg + - 0.91893853320467274178032973640562 - std::log(v) - arg + (arg-0.5) * std::log(arg); - } - }; - - /// Wrapper for unary half-precision functions needing specialization for individual argument types. - /// \tparam T argument type - template struct unary_specialized - { - /// Negation implementation. - /// \param arg value to negate - /// \return negated value - static HALF_CONSTEXPR half negate(half arg) { return half(binary, arg.data_^0x8000); } - - /// Absolute value implementation. - /// \param arg function argument - /// \return absolute value - static half fabs(half arg) { return half(binary, arg.data_&0x7FFF); } - }; - template<> struct unary_specialized - { - static HALF_CONSTEXPR expr negate(float arg) { return expr(-arg); } - static expr fabs(float arg) { return expr(std::fabs(arg)); } - }; - - /// Wrapper for binary half-precision functions needing specialization for individual argument types. - /// \tparam T first argument type - /// \tparam U first argument type - template struct binary_specialized - { - /// Minimum implementation. - /// \param x first operand - /// \param y second operand - /// \return minimum value - static expr fmin(float x, float y) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::fmin(x, y)); - #else - if(builtin_isnan(x)) - return expr(y); - if(builtin_isnan(y)) - return expr(x); - return expr(std::min(x, y)); - #endif - } - - /// Maximum implementation. - /// \param x first operand - /// \param y second operand - /// \return maximum value - static expr fmax(float x, float y) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::fmax(x, y)); - #else - if(builtin_isnan(x)) - return expr(y); - if(builtin_isnan(y)) - return expr(x); - return expr(std::max(x, y)); - #endif - } - }; - template<> struct binary_specialized - { - static half fmin(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - if(xabs > 0x7C00) - return y; - if(yabs > 0x7C00) - return x; - return (((xabs==x.data_) ? xabs : -xabs) > ((yabs==y.data_) ? yabs : -yabs)) ? y : x; - } - static half fmax(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - if(xabs > 0x7C00) - return y; - if(yabs > 0x7C00) - return x; - return (((xabs==x.data_) ? xabs : -xabs) < ((yabs==y.data_) ? yabs : -yabs)) ? y : x; - } - }; - - /// Helper class for half casts. - /// This class template has to be specialized for all valid cast argument to define an appropriate static `cast` member - /// function and a corresponding `type` member denoting its return type. - /// \tparam T destination type - /// \tparam U source type - /// \tparam R rounding mode to use - template struct half_caster {}; - template struct half_caster - { - #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_arithmetic::value, "half_cast from non-arithmetic type unsupported"); - #endif - - static half cast(U arg) { return cast_impl(arg, is_float()); }; - - private: - static half cast_impl(U arg, true_type) { return half(binary, float2half(arg)); } - static half cast_impl(U arg, false_type) { return half(binary, int2half(arg)); } - }; - template struct half_caster - { - #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); - #endif - - static T cast(half arg) { return cast_impl(arg, is_float()); } - - private: - static T cast_impl(half arg, true_type) { return half2float(arg.data_); } - static T cast_impl(half arg, false_type) { return half2int(arg.data_); } - }; - template struct half_caster - { - #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); - #endif - - static T cast(expr arg) { return cast_impl(arg, is_float()); } - - private: - static T cast_impl(float arg, true_type) { return static_cast(arg); } - static T cast_impl(half arg, false_type) { return half2int(arg.data_); } - }; - template struct half_caster - { - static half cast(half arg) { return arg; } - }; - template struct half_caster : half_caster {}; - - /// \name Comparison operators - /// \{ - - /// Comparison for equality. - /// \param x first operand - /// \param y second operand - /// \retval true if operands equal - /// \retval false else - template typename enable::type operator==(T x, U y) { return functions::isequal(x, y); } - - /// Comparison for inequality. - /// \param x first operand - /// \param y second operand - /// \retval true if operands not equal - /// \retval false else - template typename enable::type operator!=(T x, U y) { return functions::isnotequal(x, y); } - - /// Comparison for less than. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x less than \a y - /// \retval false else - template typename enable::type operator<(T x, U y) { return functions::isless(x, y); } - - /// Comparison for greater than. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x greater than \a y - /// \retval false else - template typename enable::type operator>(T x, U y) { return functions::isgreater(x, y); } - - /// Comparison for less equal. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x less equal \a y - /// \retval false else - template typename enable::type operator<=(T x, U y) { return functions::islessequal(x, y); } - - /// Comparison for greater equal. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x greater equal \a y - /// \retval false else - template typename enable::type operator>=(T x, U y) { return functions::isgreaterequal(x, y); } - - /// \} - /// \name Arithmetic operators - /// \{ - - /// Add halfs. - /// \param x left operand - /// \param y right operand - /// \return sum of half expressions - template typename enable::type operator+(T x, U y) { return functions::plus(x, y); } - - /// Subtract halfs. - /// \param x left operand - /// \param y right operand - /// \return difference of half expressions - template typename enable::type operator-(T x, U y) { return functions::minus(x, y); } - - /// Multiply halfs. - /// \param x left operand - /// \param y right operand - /// \return product of half expressions - template typename enable::type operator*(T x, U y) { return functions::multiplies(x, y); } - - /// Divide halfs. - /// \param x left operand - /// \param y right operand - /// \return quotient of half expressions - template typename enable::type operator/(T x, U y) { return functions::divides(x, y); } - - /// Identity. - /// \param arg operand - /// \return uncahnged operand - template HALF_CONSTEXPR typename enable::type operator+(T arg) { return arg; } - - /// Negation. - /// \param arg operand - /// \return negated operand - template HALF_CONSTEXPR typename enable::type operator-(T arg) { return unary_specialized::negate(arg); } - - /// \} - /// \name Input and output - /// \{ - - /// Output operator. - /// \param out output stream to write into - /// \param arg half expression to write - /// \return reference to output stream - template typename enable&,T>::type - operator<<(std::basic_ostream &out, T arg) { return functions::write(out, arg); } - - /// Input operator. - /// \param in input stream to read from - /// \param arg half to read into - /// \return reference to input stream - template std::basic_istream& - operator>>(std::basic_istream &in, half &arg) { return functions::read(in, arg); } - - /// \} - /// \name Basic mathematical operations - /// \{ - - /// Absolute value. - /// \param arg operand - /// \return absolute value of \a arg -// template typename enable::type abs(T arg) { return unary_specialized::fabs(arg); } - inline half abs(half arg) { return unary_specialized::fabs(arg); } - inline expr abs(expr arg) { return unary_specialized::fabs(arg); } - - /// Absolute value. - /// \param arg operand - /// \return absolute value of \a arg -// template typename enable::type fabs(T arg) { return unary_specialized::fabs(arg); } - inline half fabs(half arg) { return unary_specialized::fabs(arg); } - inline expr fabs(expr arg) { return unary_specialized::fabs(arg); } - - /// Remainder of division. - /// \param x first operand - /// \param y second operand - /// \return remainder of floating point division. -// template typename enable::type fmod(T x, U y) { return functions::fmod(x, y); } - inline expr fmod(half x, half y) { return functions::fmod(x, y); } - inline expr fmod(half x, expr y) { return functions::fmod(x, y); } - inline expr fmod(expr x, half y) { return functions::fmod(x, y); } - inline expr fmod(expr x, expr y) { return functions::fmod(x, y); } - - /// Remainder of division. - /// \param x first operand - /// \param y second operand - /// \return remainder of floating point division. -// template typename enable::type remainder(T x, U y) { return functions::remainder(x, y); } - inline expr remainder(half x, half y) { return functions::remainder(x, y); } - inline expr remainder(half x, expr y) { return functions::remainder(x, y); } - inline expr remainder(expr x, half y) { return functions::remainder(x, y); } - inline expr remainder(expr x, expr y) { return functions::remainder(x, y); } - - /// Remainder of division. - /// \param x first operand - /// \param y second operand - /// \param quo address to store some bits of quotient at - /// \return remainder of floating point division. -// template typename enable::type remquo(T x, U y, int *quo) { return functions::remquo(x, y, quo); } - inline expr remquo(half x, half y, int *quo) { return functions::remquo(x, y, quo); } - inline expr remquo(half x, expr y, int *quo) { return functions::remquo(x, y, quo); } - inline expr remquo(expr x, half y, int *quo) { return functions::remquo(x, y, quo); } - inline expr remquo(expr x, expr y, int *quo) { return functions::remquo(x, y, quo); } - - /// Fused multiply add. - /// \param x first operand - /// \param y second operand - /// \param z third operand - /// \return ( \a x * \a y ) + \a z rounded as one operation. -// template typename enable::type fma(T x, U y, V z) { return functions::fma(x, y, z); } - inline expr fma(half x, half y, half z) { return functions::fma(x, y, z); } - inline expr fma(half x, half y, expr z) { return functions::fma(x, y, z); } - inline expr fma(half x, expr y, half z) { return functions::fma(x, y, z); } - inline expr fma(half x, expr y, expr z) { return functions::fma(x, y, z); } - inline expr fma(expr x, half y, half z) { return functions::fma(x, y, z); } - inline expr fma(expr x, half y, expr z) { return functions::fma(x, y, z); } - inline expr fma(expr x, expr y, half z) { return functions::fma(x, y, z); } - inline expr fma(expr x, expr y, expr z) { return functions::fma(x, y, z); } - - /// Maximum of half expressions. - /// \param x first operand - /// \param y second operand - /// \return maximum of operands -// template typename result::type fmax(T x, U y) { return binary_specialized::fmax(x, y); } - inline half fmax(half x, half y) { return binary_specialized::fmax(x, y); } - inline expr fmax(half x, expr y) { return binary_specialized::fmax(x, y); } - inline expr fmax(expr x, half y) { return binary_specialized::fmax(x, y); } - inline expr fmax(expr x, expr y) { return binary_specialized::fmax(x, y); } - - /// Minimum of half expressions. - /// \param x first operand - /// \param y second operand - /// \return minimum of operands -// template typename result::type fmin(T x, U y) { return binary_specialized::fmin(x, y); } - inline half fmin(half x, half y) { return binary_specialized::fmin(x, y); } - inline expr fmin(half x, expr y) { return binary_specialized::fmin(x, y); } - inline expr fmin(expr x, half y) { return binary_specialized::fmin(x, y); } - inline expr fmin(expr x, expr y) { return binary_specialized::fmin(x, y); } - - /// Positive difference. - /// \param x first operand - /// \param y second operand - /// \return \a x - \a y or 0 if difference negative -// template typename enable::type fdim(T x, U y) { return functions::fdim(x, y); } - inline expr fdim(half x, half y) { return functions::fdim(x, y); } - inline expr fdim(half x, expr y) { return functions::fdim(x, y); } - inline expr fdim(expr x, half y) { return functions::fdim(x, y); } - inline expr fdim(expr x, expr y) { return functions::fdim(x, y); } - - /// Get NaN value. - /// \return quiet NaN - inline half nanh(const char*) { return functions::nanh(); } - - /// \} - /// \name Exponential functions - /// \{ - - /// Exponential function. - /// \param arg function argument - /// \return e raised to \a arg -// template typename enable::type exp(T arg) { return functions::exp(arg); } - inline expr exp(half arg) { return functions::exp(arg); } - inline expr exp(expr arg) { return functions::exp(arg); } - - /// Exponential minus one. - /// \param arg function argument - /// \return e raised to \a arg subtracted by 1 -// template typename enable::type expm1(T arg) { return functions::expm1(arg); } - inline expr expm1(half arg) { return functions::expm1(arg); } - inline expr expm1(expr arg) { return functions::expm1(arg); } - - /// Binary exponential. - /// \param arg function argument - /// \return 2 raised to \a arg -// template typename enable::type exp2(T arg) { return functions::exp2(arg); } - inline expr exp2(half arg) { return functions::exp2(arg); } - inline expr exp2(expr arg) { return functions::exp2(arg); } - - /// Natural logorithm. - /// \param arg function argument - /// \return logarithm of \a arg to base e -// template typename enable::type log(T arg) { return functions::log(arg); } - inline expr log(half arg) { return functions::log(arg); } - inline expr log(expr arg) { return functions::log(arg); } - - /// Common logorithm. - /// \param arg function argument - /// \return logarithm of \a arg to base 10 -// template typename enable::type log10(T arg) { return functions::log10(arg); } - inline expr log10(half arg) { return functions::log10(arg); } - inline expr log10(expr arg) { return functions::log10(arg); } - - /// Natural logorithm. - /// \param arg function argument - /// \return logarithm of \a arg plus 1 to base e -// template typename enable::type log1p(T arg) { return functions::log1p(arg); } - inline expr log1p(half arg) { return functions::log1p(arg); } - inline expr log1p(expr arg) { return functions::log1p(arg); } - - /// Binary logorithm. - /// \param arg function argument - /// \return logarithm of \a arg to base 2 -// template typename enable::type log2(T arg) { return functions::log2(arg); } - inline expr log2(half arg) { return functions::log2(arg); } - inline expr log2(expr arg) { return functions::log2(arg); } - - /// \} - /// \name Power functions - /// \{ - - /// Square root. - /// \param arg function argument - /// \return square root of \a arg -// template typename enable::type sqrt(T arg) { return functions::sqrt(arg); } - inline expr sqrt(half arg) { return functions::sqrt(arg); } - inline expr sqrt(expr arg) { return functions::sqrt(arg); } - - /// Cubic root. - /// \param arg function argument - /// \return cubic root of \a arg -// template typename enable::type cbrt(T arg) { return functions::cbrt(arg); } - inline expr cbrt(half arg) { return functions::cbrt(arg); } - inline expr cbrt(expr arg) { return functions::cbrt(arg); } - - /// Hypotenuse function. - /// \param x first argument - /// \param y second argument - /// \return square root of sum of squares without internal over- or underflows -// template typename enable::type hypot(T x, U y) { return functions::hypot(x, y); } - inline expr hypot(half x, half y) { return functions::hypot(x, y); } - inline expr hypot(half x, expr y) { return functions::hypot(x, y); } - inline expr hypot(expr x, half y) { return functions::hypot(x, y); } - inline expr hypot(expr x, expr y) { return functions::hypot(x, y); } - - /// Power function. - /// \param base first argument - /// \param exp second argument - /// \return \a base raised to \a exp -// template typename enable::type pow(T base, U exp) { return functions::pow(base, exp); } - inline expr pow(half base, half exp) { return functions::pow(base, exp); } - inline expr pow(half base, expr exp) { return functions::pow(base, exp); } - inline expr pow(expr base, half exp) { return functions::pow(base, exp); } - inline expr pow(expr base, expr exp) { return functions::pow(base, exp); } - - /// \} - /// \name Trigonometric functions - /// \{ - - /// Sine function. - /// \param arg function argument - /// \return sine value of \a arg -// template typename enable::type sin(T arg) { return functions::sin(arg); } - inline expr sin(half arg) { return functions::sin(arg); } - inline expr sin(expr arg) { return functions::sin(arg); } - - /// Cosine function. - /// \param arg function argument - /// \return cosine value of \a arg -// template typename enable::type cos(T arg) { return functions::cos(arg); } - inline expr cos(half arg) { return functions::cos(arg); } - inline expr cos(expr arg) { return functions::cos(arg); } - - /// Tangent function. - /// \param arg function argument - /// \return tangent value of \a arg -// template typename enable::type tan(T arg) { return functions::tan(arg); } - inline expr tan(half arg) { return functions::tan(arg); } - inline expr tan(expr arg) { return functions::tan(arg); } - - /// Arc sine. - /// \param arg function argument - /// \return arc sine value of \a arg -// template typename enable::type asin(T arg) { return functions::asin(arg); } - inline expr asin(half arg) { return functions::asin(arg); } - inline expr asin(expr arg) { return functions::asin(arg); } - - /// Arc cosine function. - /// \param arg function argument - /// \return arc cosine value of \a arg -// template typename enable::type acos(T arg) { return functions::acos(arg); } - inline expr acos(half arg) { return functions::acos(arg); } - inline expr acos(expr arg) { return functions::acos(arg); } - - /// Arc tangent function. - /// \param arg function argument - /// \return arc tangent value of \a arg -// template typename enable::type atan(T arg) { return functions::atan(arg); } - inline expr atan(half arg) { return functions::atan(arg); } - inline expr atan(expr arg) { return functions::atan(arg); } - - /// Arc tangent function. - /// \param x first argument - /// \param y second argument - /// \return arc tangent value -// template typename enable::type atan2(T x, U y) { return functions::atan2(x, y); } - inline expr atan2(half x, half y) { return functions::atan2(x, y); } - inline expr atan2(half x, expr y) { return functions::atan2(x, y); } - inline expr atan2(expr x, half y) { return functions::atan2(x, y); } - inline expr atan2(expr x, expr y) { return functions::atan2(x, y); } - - /// \} - /// \name Hyperbolic functions - /// \{ - - /// Hyperbolic sine. - /// \param arg function argument - /// \return hyperbolic sine value of \a arg -// template typename enable::type sinh(T arg) { return functions::sinh(arg); } - inline expr sinh(half arg) { return functions::sinh(arg); } - inline expr sinh(expr arg) { return functions::sinh(arg); } - - /// Hyperbolic cosine. - /// \param arg function argument - /// \return hyperbolic cosine value of \a arg -// template typename enable::type cosh(T arg) { return functions::cosh(arg); } - inline expr cosh(half arg) { return functions::cosh(arg); } - inline expr cosh(expr arg) { return functions::cosh(arg); } - - /// Hyperbolic tangent. - /// \param arg function argument - /// \return hyperbolic tangent value of \a arg -// template typename enable::type tanh(T arg) { return functions::tanh(arg); } - inline expr tanh(half arg) { return functions::tanh(arg); } - inline expr tanh(expr arg) { return functions::tanh(arg); } - - /// Hyperbolic area sine. - /// \param arg function argument - /// \return area sine value of \a arg -// template typename enable::type asinh(T arg) { return functions::asinh(arg); } - inline expr asinh(half arg) { return functions::asinh(arg); } - inline expr asinh(expr arg) { return functions::asinh(arg); } - - /// Hyperbolic area cosine. - /// \param arg function argument - /// \return area cosine value of \a arg -// template typename enable::type acosh(T arg) { return functions::acosh(arg); } - inline expr acosh(half arg) { return functions::acosh(arg); } - inline expr acosh(expr arg) { return functions::acosh(arg); } - - /// Hyperbolic area tangent. - /// \param arg function argument - /// \return area tangent value of \a arg -// template typename enable::type atanh(T arg) { return functions::atanh(arg); } - inline expr atanh(half arg) { return functions::atanh(arg); } - inline expr atanh(expr arg) { return functions::atanh(arg); } - - /// \} - /// \name Error and gamma functions - /// \{ - - /// Error function. - /// \param arg function argument - /// \return error function value of \a arg -// template typename enable::type erf(T arg) { return functions::erf(arg); } - inline expr erf(half arg) { return functions::erf(arg); } - inline expr erf(expr arg) { return functions::erf(arg); } - - /// Complementary error function. - /// \param arg function argument - /// \return 1 minus error function value of \a arg -// template typename enable::type erfc(T arg) { return functions::erfc(arg); } - inline expr erfc(half arg) { return functions::erfc(arg); } - inline expr erfc(expr arg) { return functions::erfc(arg); } - - /// Natural logarithm of gamma function. - /// \param arg function argument - /// \return natural logarith of gamma function for \a arg -// template typename enable::type lgamma(T arg) { return functions::lgamma(arg); } - inline expr lgamma(half arg) { return functions::lgamma(arg); } - inline expr lgamma(expr arg) { return functions::lgamma(arg); } - - /// Gamma function. - /// \param arg function argument - /// \return gamma function value of \a arg -// template typename enable::type tgamma(T arg) { return functions::tgamma(arg); } - inline expr tgamma(half arg) { return functions::tgamma(arg); } - inline expr tgamma(expr arg) { return functions::tgamma(arg); } - - /// \} - /// \name Rounding - /// \{ - - /// Nearest integer not less than half value. - /// \param arg half to round - /// \return nearest integer not less than \a arg -// template typename enable::type ceil(T arg) { return functions::ceil(arg); } - inline half ceil(half arg) { return functions::ceil(arg); } - inline half ceil(expr arg) { return functions::ceil(arg); } - - /// Nearest integer not greater than half value. - /// \param arg half to round - /// \return nearest integer not greater than \a arg -// template typename enable::type floor(T arg) { return functions::floor(arg); } - inline half floor(half arg) { return functions::floor(arg); } - inline half floor(expr arg) { return functions::floor(arg); } - - /// Nearest integer not greater in magnitude than half value. - /// \param arg half to round - /// \return nearest integer not greater in magnitude than \a arg -// template typename enable::type trunc(T arg) { return functions::trunc(arg); } - inline half trunc(half arg) { return functions::trunc(arg); } - inline half trunc(expr arg) { return functions::trunc(arg); } - - /// Nearest integer. - /// \param arg half to round - /// \return nearest integer, rounded away from zero in half-way cases -// template typename enable::type round(T arg) { return functions::round(arg); } - inline half round(half arg) { return functions::round(arg); } - inline half round(expr arg) { return functions::round(arg); } +/// Library-defined half-precision literals. +/// Import this namespace to enable half-precision floating point literals: +/// ~~~~{.cpp} +/// using namespace half_float::literal; +/// half_float::half = 4.2_h; +/// ~~~~ +namespace literal +{ +half operator"" _h(long double); +} +#endif - /// Nearest integer. - /// \param arg half to round - /// \return nearest integer, rounded away from zero in half-way cases -// template typename enable::type lround(T arg) { return functions::lround(arg); } - inline long lround(half arg) { return functions::lround(arg); } - inline long lround(expr arg) { return functions::lround(arg); } +/// \internal +/// \brief Implementation details. +namespace detail +{ +#if HALF_ENABLE_CPP11_TYPE_TRAITS +/// Conditional type. +template +struct conditional : std::conditional +{ +}; - /// Nearest integer using half's internal rounding mode. - /// \param arg half expression to round - /// \return nearest integer using default rounding mode -// template typename enable::type nearbyint(T arg) { return functions::nearbyint(arg); } - inline half nearbyint(half arg) { return functions::rint(arg); } - inline half nearbyint(expr arg) { return functions::rint(arg); } +/// Helper for tag dispatching. +template +struct bool_type : std::integral_constant +{ +}; +using std::true_type; +using std::false_type; - /// Nearest integer using half's internal rounding mode. - /// \param arg half expression to round - /// \return nearest integer using default rounding mode -// template typename enable::type rint(T arg) { return functions::rint(arg); } - inline half rint(half arg) { return functions::rint(arg); } - inline half rint(expr arg) { return functions::rint(arg); } +/// Type traits for floating point types. +template +struct is_float : std::is_floating_point +{ +}; +#else +/// Conditional type. +template +struct conditional +{ + typedef T type; +}; +template +struct conditional +{ + typedef F type; +}; - /// Nearest integer using half's internal rounding mode. - /// \param arg half expression to round - /// \return nearest integer using default rounding mode -// template typename enable::type lrint(T arg) { return functions::lrint(arg); } - inline long lrint(half arg) { return functions::lrint(arg); } - inline long lrint(expr arg) { return functions::lrint(arg); } - #if HALF_ENABLE_CPP11_LONG_LONG - /// Nearest integer. - /// \param arg half to round - /// \return nearest integer, rounded away from zero in half-way cases -// template typename enable::type llround(T arg) { return functions::llround(arg); } - inline long long llround(half arg) { return functions::llround(arg); } - inline long long llround(expr arg) { return functions::llround(arg); } +/// Helper for tag dispatching. +template +struct bool_type +{ +}; +typedef bool_type true_type; +typedef bool_type false_type; - /// Nearest integer using half's internal rounding mode. - /// \param arg half expression to round - /// \return nearest integer using default rounding mode -// template typename enable::type llrint(T arg) { return functions::llrint(arg); } - inline long long llrint(half arg) { return functions::llrint(arg); } - inline long long llrint(expr arg) { return functions::llrint(arg); } - #endif - - /// \} - /// \name Floating point manipulation - /// \{ - - /// Decompress floating point number. - /// \param arg number to decompress - /// \param exp address to store exponent at - /// \return significant in range [0.5, 1) -// template typename enable::type frexp(T arg, int *exp) { return functions::frexp(arg, exp); } - inline half frexp(half arg, int *exp) { return functions::frexp(arg, exp); } - inline half frexp(expr arg, int *exp) { return functions::frexp(arg, exp); } - - /// Multiply by power of two. - /// \param arg number to modify - /// \param exp power of two to multiply with - /// \return \a arg multplied by 2 raised to \a exp -// template typename enable::type ldexp(T arg, int exp) { return functions::scalbln(arg, exp); } - inline half ldexp(half arg, int exp) { return functions::scalbln(arg, exp); } - inline half ldexp(expr arg, int exp) { return functions::scalbln(arg, exp); } - - /// Extract integer and fractional parts. - /// \param arg number to decompress - /// \param iptr address to store integer part at - /// \return fractional part -// template typename enable::type modf(T arg, half *iptr) { return functions::modf(arg, iptr); } - inline half modf(half arg, half *iptr) { return functions::modf(arg, iptr); } - inline half modf(expr arg, half *iptr) { return functions::modf(arg, iptr); } - - /// Multiply by power of two. - /// \param arg number to modify - /// \param exp power of two to multiply with - /// \return \a arg multplied by 2 raised to \a exp -// template typename enable::type scalbn(T arg, int exp) { return functions::scalbln(arg, exp); } - inline half scalbn(half arg, int exp) { return functions::scalbln(arg, exp); } - inline half scalbn(expr arg, int exp) { return functions::scalbln(arg, exp); } - - /// Multiply by power of two. - /// \param arg number to modify - /// \param exp power of two to multiply with - /// \return \a arg multplied by 2 raised to \a exp -// template typename enable::type scalbln(T arg, long exp) { return functions::scalbln(arg, exp); } - inline half scalbln(half arg, long exp) { return functions::scalbln(arg, exp); } - inline half scalbln(expr arg, long exp) { return functions::scalbln(arg, exp); } - - /// Extract exponent. - /// \param arg number to query - /// \return floating point exponent - /// \retval FP_ILOGB0 for zero - /// \retval FP_ILOGBNAN for NaN - /// \retval MAX_INT for infinity -// template typename enable::type ilogb(T arg) { return functions::ilogb(arg); } - inline int ilogb(half arg) { return functions::ilogb(arg); } - inline int ilogb(expr arg) { return functions::ilogb(arg); } +/// Type traits for floating point types. +template +struct is_float : false_type +{ +}; +template +struct is_float : is_float +{ +}; +template +struct is_float : is_float +{ +}; +template +struct is_float : is_float +{ +}; +template <> +struct is_float : true_type +{ +}; +template <> +struct is_float : true_type +{ +}; +template <> +struct is_float : true_type +{ +}; +#endif - /// Extract exponent. - /// \param arg number to query - /// \return floating point exponent -// template typename enable::type logb(T arg) { return functions::logb(arg); } - inline half logb(half arg) { return functions::logb(arg); } - inline half logb(expr arg) { return functions::logb(arg); } - - /// Next representable value. - /// \param from value to compute next representable value for - /// \param to direction towards which to compute next value - /// \return next representable value after \a from in direction towards \a to -// template typename enable::type nextafter(T from, U to) { return functions::nextafter(from, to); } - inline half nextafter(half from, half to) { return functions::nextafter(from, to); } - inline half nextafter(half from, expr to) { return functions::nextafter(from, to); } - inline half nextafter(expr from, half to) { return functions::nextafter(from, to); } - inline half nextafter(expr from, expr to) { return functions::nextafter(from, to); } - - /// Next representable value. - /// \param from value to compute next representable value for - /// \param to direction towards which to compute next value - /// \return next representable value after \a from in direction towards \a to -// template typename enable::type nexttoward(T from, long double to) { return functions::nexttoward(from, to); } - inline half nexttoward(half from, long double to) { return functions::nexttoward(from, to); } - inline half nexttoward(expr from, long double to) { return functions::nexttoward(from, to); } - - /// Take sign. - /// \param x value to change sign for - /// \param y value to take sign from - /// \return value equal to \a x in magnitude and to \a y in sign -// template typename enable::type copysign(T x, U y) { return functions::copysign(x, y); } - inline half copysign(half x, half y) { return functions::copysign(x, y); } - inline half copysign(half x, expr y) { return functions::copysign(x, y); } - inline half copysign(expr x, half y) { return functions::copysign(x, y); } - inline half copysign(expr x, expr y) { return functions::copysign(x, y); } - - /// \} - /// \name Floating point classification - /// \{ - - - /// Classify floating point value. - /// \param arg number to classify - /// \retval FP_ZERO for positive and negative zero - /// \retval FP_SUBNORMAL for subnormal numbers - /// \retval FP_INFINITY for positive and negative infinity - /// \retval FP_NAN for NaNs - /// \retval FP_NORMAL for all other (normal) values -// template typename enable::type fpclassify(T arg) { return functions::fpclassify(arg); } - inline int fpclassify(half arg) { return functions::fpclassify(arg); } - inline int fpclassify(expr arg) { return functions::fpclassify(arg); } +/// Type traits for floating point bits. +template +struct bits +{ + typedef unsigned char type; +}; +template +struct bits : bits +{ +}; +template +struct bits : bits +{ +}; +template +struct bits : bits +{ +}; - /// Check if finite number. - /// \param arg number to check - /// \retval true if neither infinity nor NaN - /// \retval false else -// template typename enable::type isfinite(T arg) { return functions::isfinite(arg); } - inline bool isfinite(half arg) { return functions::isfinite(arg); } - inline bool isfinite(expr arg) { return functions::isfinite(arg); } +#if HALF_ENABLE_CPP11_CSTDINT +/// Unsigned integer of (at least) 16 bits width. +typedef std::uint_least16_t uint16; - /// Check for infinity. - /// \param arg number to check - /// \retval true for positive or negative infinity - /// \retval false else -// template typename enable::type isinf(T arg) { return functions::isinf(arg); } - inline bool isinf(half arg) { return functions::isinf(arg); } - inline bool isinf(expr arg) { return functions::isinf(arg); } +/// Unsigned integer of (at least) 32 bits width. +template <> +struct bits +{ + typedef std::uint_least32_t type; +}; - /// Check for NaN. - /// \param arg number to check - /// \retval true for NaNs - /// \retval false else -// template typename enable::type isnan(T arg) { return functions::isnan(arg); } - inline bool isnan(half arg) { return functions::isnan(arg); } - inline bool isnan(expr arg) { return functions::isnan(arg); } +/// Unsigned integer of (at least) 64 bits width. +template <> +struct bits +{ + typedef std::uint_least64_t type; +}; +#else +/// Unsigned integer of (at least) 16 bits width. +typedef unsigned short uint16; - /// Check if normal number. - /// \param arg number to check - /// \retval true if normal number - /// \retval false if either subnormal, zero, infinity or NaN -// template typename enable::type isnormal(T arg) { return functions::isnormal(arg); } - inline bool isnormal(half arg) { return functions::isnormal(arg); } - inline bool isnormal(expr arg) { return functions::isnormal(arg); } +/// Unsigned integer of (at least) 32 bits width. +template <> +struct bits : conditional::digits >= 32, unsigned int, unsigned long> +{ +}; - /// Check sign. - /// \param arg number to check - /// \retval true for negative number - /// \retval false for positive number -// template typename enable::type signbit(T arg) { return functions::signbit(arg); } - inline bool signbit(half arg) { return functions::signbit(arg); } - inline bool signbit(expr arg) { return functions::signbit(arg); } - - /// \} - /// \name Comparison - /// \{ - - /// Comparison for greater than. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x greater than \a y - /// \retval false else -// template typename enable::type isgreater(T x, U y) { return functions::isgreater(x, y); } - inline bool isgreater(half x, half y) { return functions::isgreater(x, y); } - inline bool isgreater(half x, expr y) { return functions::isgreater(x, y); } - inline bool isgreater(expr x, half y) { return functions::isgreater(x, y); } - inline bool isgreater(expr x, expr y) { return functions::isgreater(x, y); } - - /// Comparison for greater equal. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x greater equal \a y - /// \retval false else -// template typename enable::type isgreaterequal(T x, U y) { return functions::isgreaterequal(x, y); } - inline bool isgreaterequal(half x, half y) { return functions::isgreaterequal(x, y); } - inline bool isgreaterequal(half x, expr y) { return functions::isgreaterequal(x, y); } - inline bool isgreaterequal(expr x, half y) { return functions::isgreaterequal(x, y); } - inline bool isgreaterequal(expr x, expr y) { return functions::isgreaterequal(x, y); } - - /// Comparison for less than. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x less than \a y - /// \retval false else -// template typename enable::type isless(T x, U y) { return functions::isless(x, y); } - inline bool isless(half x, half y) { return functions::isless(x, y); } - inline bool isless(half x, expr y) { return functions::isless(x, y); } - inline bool isless(expr x, half y) { return functions::isless(x, y); } - inline bool isless(expr x, expr y) { return functions::isless(x, y); } - - /// Comparison for less equal. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x less equal \a y - /// \retval false else -// template typename enable::type islessequal(T x, U y) { return functions::islessequal(x, y); } - inline bool islessequal(half x, half y) { return functions::islessequal(x, y); } - inline bool islessequal(half x, expr y) { return functions::islessequal(x, y); } - inline bool islessequal(expr x, half y) { return functions::islessequal(x, y); } - inline bool islessequal(expr x, expr y) { return functions::islessequal(x, y); } - - /// Comarison for less or greater. - /// \param x first operand - /// \param y second operand - /// \retval true if either less or greater - /// \retval false else -// template typename enable::type islessgreater(T x, U y) { return functions::islessgreater(x, y); } - inline bool islessgreater(half x, half y) { return functions::islessgreater(x, y); } - inline bool islessgreater(half x, expr y) { return functions::islessgreater(x, y); } - inline bool islessgreater(expr x, half y) { return functions::islessgreater(x, y); } - inline bool islessgreater(expr x, expr y) { return functions::islessgreater(x, y); } - - /// Check if unordered. - /// \param x first operand - /// \param y second operand - /// \retval true if unordered (one or two NaN operands) - /// \retval false else -// template typename enable::type isunordered(T x, U y) { return functions::isunordered(x, y); } - inline bool isunordered(half x, half y) { return functions::isunordered(x, y); } - inline bool isunordered(half x, expr y) { return functions::isunordered(x, y); } - inline bool isunordered(expr x, half y) { return functions::isunordered(x, y); } - inline bool isunordered(expr x, expr y) { return functions::isunordered(x, y); } - - /// \name Casting - /// \{ - - /// Cast to or from half-precision floating point number. - /// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted - /// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. - /// It uses the default rounding mode. - /// - /// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types - /// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler - /// error and casting between [half](\ref half_float::half)s is just a no-op. - /// \tparam T destination type (half or built-in arithmetic type) - /// \tparam U source type (half or built-in arithmetic type) - /// \param arg value to cast - /// \return \a arg converted to destination type - template T half_cast(U arg) { return half_caster::cast(arg); } - - /// Cast to or from half-precision floating point number. - /// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted - /// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. - /// - /// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types - /// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler - /// error and casting between [half](\ref half_float::half)s is just a no-op. - /// \tparam T destination type (half or built-in arithmetic type) - /// \tparam R rounding mode to use. - /// \tparam U source type (half or built-in arithmetic type) - /// \param arg value to cast - /// \return \a arg converted to destination type - template T half_cast(U arg) { return half_caster::cast(arg); } - /// \} - } - - using detail::operator==; - using detail::operator!=; - using detail::operator<; - using detail::operator>; - using detail::operator<=; - using detail::operator>=; - using detail::operator+; - using detail::operator-; - using detail::operator*; - using detail::operator/; - using detail::operator<<; - using detail::operator>>; - - using detail::abs; - using detail::fabs; - using detail::fmod; - using detail::remainder; - using detail::remquo; - using detail::fma; - using detail::fmax; - using detail::fmin; - using detail::fdim; - using detail::nanh; - using detail::exp; - using detail::expm1; - using detail::exp2; - using detail::log; - using detail::log10; - using detail::log1p; - using detail::log2; - using detail::sqrt; - using detail::cbrt; - using detail::hypot; - using detail::pow; - using detail::sin; - using detail::cos; - using detail::tan; - using detail::asin; - using detail::acos; - using detail::atan; - using detail::atan2; - using detail::sinh; - using detail::cosh; - using detail::tanh; - using detail::asinh; - using detail::acosh; - using detail::atanh; - using detail::erf; - using detail::erfc; - using detail::lgamma; - using detail::tgamma; - using detail::ceil; - using detail::floor; - using detail::trunc; - using detail::round; - using detail::lround; - using detail::nearbyint; - using detail::rint; - using detail::lrint; #if HALF_ENABLE_CPP11_LONG_LONG - using detail::llround; - using detail::llrint; -#endif - using detail::frexp; - using detail::ldexp; - using detail::modf; - using detail::scalbn; - using detail::scalbln; - using detail::ilogb; - using detail::logb; - using detail::nextafter; - using detail::nexttoward; - using detail::copysign; - using detail::fpclassify; - using detail::isfinite; - using detail::isinf; - using detail::isnan; - using detail::isnormal; - using detail::signbit; - using detail::isgreater; - using detail::isgreaterequal; - using detail::isless; - using detail::islessequal; - using detail::islessgreater; - using detail::isunordered; - - using detail::half_cast; -} - - -/// Extensions to the C++ standard library. -namespace std +/// Unsigned integer of (at least) 64 bits width. +template <> +struct bits : conditional::digits >= 64, unsigned long, unsigned long long> { - /// Numeric limits for half-precision floats. - /// Because of the underlying single-precision implementation of many operations, it inherits some properties from - /// `std::numeric_limits`. - template<> class numeric_limits : public numeric_limits - { - public: - /// Supports signed values. - static HALF_CONSTEXPR_CONST bool is_signed = true; - - /// Is not exact. - static HALF_CONSTEXPR_CONST bool is_exact = false; - - /// Doesn't provide modulo arithmetic. - static HALF_CONSTEXPR_CONST bool is_modulo = false; - - /// IEEE conformant. - static HALF_CONSTEXPR_CONST bool is_iec559 = true; - - /// Supports infinity. - static HALF_CONSTEXPR_CONST bool has_infinity = true; +}; +#else +/// Unsigned integer of (at least) 64 bits width. +template <> +struct bits +{ + typedef unsigned long type; +}; +#endif +#endif - /// Supports quiet NaNs. - static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true; +/// Tag type for binary construction. +struct binary_t +{ +}; - /// Supports subnormal values. - static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present; +/// Tag for binary construction. +HALF_CONSTEXPR_CONST binary_t binary = binary_t(); - /// Rounding mode. - /// Due to the mix of internal single-precision computations (using the rounding mode of the underlying - /// single-precision implementation) with the rounding mode of the single-to-half conversions, the actual rounding - /// mode might be `std::round_indeterminate` if the default half-precision rounding mode doesn't match the - /// single-precision rounding mode. - static HALF_CONSTEXPR_CONST float_round_style round_style = (std::numeric_limits::round_style== - half_float::half::round_style) ? half_float::half::round_style : round_indeterminate; +/// Temporary half-precision expression. +/// This class represents a half-precision expression which just stores a single-precision value internally. +struct expr +{ + /// Conversion constructor. + /// \param f single-precision value to convert + explicit HALF_CONSTEXPR expr(float f) HALF_NOEXCEPT : value_(f) {} + + /// Conversion to single-precision. + /// \return single precision value representing expression value + HALF_CONSTEXPR operator float() const HALF_NOEXCEPT + { + return value_; + } + +private: + /// Internal expression value stored in single-precision. + float value_; +}; + +/// SFINAE helper for generic half-precision functions. +/// This class template has to be specialized for each valid combination of argument types to provide a corresponding +/// `type` member equivalent to \a T. +/// \tparam T type to return +template +struct enable +{ +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; + +/// Return type for specialized generic 2-argument half-precision functions. +/// This class template has to be specialized for each valid combination of argument types to provide a corresponding +/// `type` member denoting the appropriate return type. +/// \tparam T first argument type +/// \tparam U first argument type +template +struct result : enable +{ +}; +template <> +struct result +{ + typedef half type; +}; + +/// \name Classification helpers +/// \{ + +/// Check for infinity. +/// \tparam T argument type (builtin floating point type) +/// \param arg value to query +/// \retval true if infinity +/// \retval false else +template +bool builtin_isinf(T arg) +{ +#if HALF_ENABLE_CPP11_CMATH + return std::isinf(arg); +#elif defined(_MSC_VER) + return !::_finite(static_cast(arg)) && !::_isnan(static_cast(arg)); +#else + return arg == std::numeric_limits::infinity() || arg == -std::numeric_limits::infinity(); +#endif +} - /// Significant digits. - static HALF_CONSTEXPR_CONST int digits = 11; +/// Check for NaN. +/// \tparam T argument type (builtin floating point type) +/// \param arg value to query +/// \retval true if not a number +/// \retval false else +template +bool builtin_isnan(T arg) +{ +#if HALF_ENABLE_CPP11_CMATH + return std::isnan(arg); +#elif defined(_MSC_VER) + return ::_isnan(static_cast(arg)) != 0; +#else + return arg != arg; +#endif +} - /// Significant decimal digits. - static HALF_CONSTEXPR_CONST int digits10 = 3; +/// Check sign. +/// \tparam T argument type (builtin floating point type) +/// \param arg value to query +/// \retval true if signbit set +/// \retval false else +template +bool builtin_signbit(T arg) +{ +#if HALF_ENABLE_CPP11_CMATH + return std::signbit(arg); +#else + return arg < T() || (arg == T() && T(1) / arg < T()); +#endif +} - /// Required decimal digits to represent all possible values. - static HALF_CONSTEXPR_CONST int max_digits10 = 5; +/// \} +/// \name Conversion +/// \{ + +/// Convert IEEE single-precision to half-precision. +/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \param value single-precision value +/// \return binary representation of half-precision value +template +uint16 float2half_impl(float value, true_type) +{ + typedef bits::type uint32; + uint32 bits; // = *reinterpret_cast(&value); //violating strict aliasing! + std::memcpy(&bits, &value, sizeof(float)); + /* uint16 hbits = (bits>>16) & 0x8000; + bits &= 0x7FFFFFFF; + int exp = bits >> 23; + if(exp == 255) + return hbits | 0x7C00 | (0x3FF&-static_cast((bits&0x7FFFFF)!=0)); + if(exp > 142) + { + if(R == std::round_toward_infinity) + return hbits | 0x7C00 - (hbits>>15); + if(R == std::round_toward_neg_infinity) + return hbits | 0x7BFF + (hbits>>15); + return hbits | 0x7BFF + (R!=std::round_toward_zero); + } + int g, s; + if(exp > 112) + { + g = (bits>>12) & 1; + s = (bits&0xFFF) != 0; + hbits |= ((exp-112)<<10) | ((bits>>13)&0x3FF); + } + else if(exp > 101) + { + int i = 125 - exp; + bits = (bits&0x7FFFFF) | 0x800000; + g = (bits>>i) & 1; + s = (bits&((1L<> (i+1); + } + else + { + g = 0; + s = bits != 0; + } + if(R == std::round_to_nearest) + #if HALF_ROUND_TIES_TO_EVEN + hbits += g & (s|hbits); + #else + hbits += g; + #endif + else if(R == std::round_toward_infinity) + hbits += ~(hbits>>15) & (s|g); + else if(R == std::round_toward_neg_infinity) + hbits += (hbits>>15) & (g|s); + */ + static uint16 const base_table[512] = {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, + 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, + 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00, 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, + 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, + 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, 0xC000, 0xC400, 0xC800, + 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00}; + static const unsigned char shift_table[512] = {24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13}; + uint16 hbits = base_table[bits >> 23] + static_cast((bits & 0x7FFFFF) >> shift_table[bits >> 23]); + if (R == std::round_to_nearest) + hbits += (((bits & 0x7FFFFF) >> (shift_table[bits >> 23] - 1)) | (((bits >> 23) & 0xFF) == 102)) + & ((hbits & 0x7C00) != 0x7C00) +#if HALF_ROUND_TIES_TO_EVEN + & (((((static_cast(1) << (shift_table[bits >> 23] - 1)) - 1) & bits) != 0) | hbits) +#endif + ; + else if (R == std::round_toward_zero) + hbits -= ((hbits & 0x7FFF) == 0x7C00) & ~shift_table[bits >> 23]; + else if (R == std::round_toward_infinity) + hbits += ((((bits & 0x7FFFFF & ((static_cast(1) << (shift_table[bits >> 23])) - 1)) != 0) + | (((bits >> 23) <= 102) & ((bits >> 23) != 0))) + & (hbits < 0x7C00)) + - ((hbits == 0xFC00) & ((bits >> 23) != 511)); + else if (R == std::round_toward_neg_infinity) + hbits += ((((bits & 0x7FFFFF & ((static_cast(1) << (shift_table[bits >> 23])) - 1)) != 0) + | (((bits >> 23) <= 358) & ((bits >> 23) != 256))) + & (hbits < 0xFC00) & (hbits >> 15)) + - ((hbits == 0x7C00) & ((bits >> 23) != 255)); + return hbits; +} - /// Number base. - static HALF_CONSTEXPR_CONST int radix = 2; +/// Convert IEEE double-precision to half-precision. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \param value double-precision value +/// \return binary representation of half-precision value +template +uint16 float2half_impl(double value, true_type) +{ + typedef bits::type uint32; + typedef bits::type uint64; + uint64 bits; // = *reinterpret_cast(&value); //violating strict aliasing! + std::memcpy(&bits, &value, sizeof(double)); + uint32 hi = bits >> 32, lo = bits & 0xFFFFFFFF; + uint16 hbits = (hi >> 16) & 0x8000; + hi &= 0x7FFFFFFF; + int exp = hi >> 20; + if (exp == 2047) + return hbits | 0x7C00 | (0x3FF & -static_cast((bits & 0xFFFFFFFFFFFFF) != 0)); + if (exp > 1038) + { + if (R == std::round_toward_infinity) + return hbits | (0x7C00 - (hbits >> 15)); + if (R == std::round_toward_neg_infinity) + return hbits | (0x7BFF + (hbits >> 15)); + return hbits | (0x7BFF + (R != std::round_toward_zero)); + } + int g, s = lo != 0; + if (exp > 1008) + { + g = (hi >> 9) & 1; + s |= (hi & 0x1FF) != 0; + hbits |= ((exp - 1008) << 10) | ((hi >> 10) & 0x3FF); + } + else if (exp > 997) + { + int i = 1018 - exp; + hi = (hi & 0xFFFFF) | 0x100000; + g = (hi >> i) & 1; + s |= (hi & ((1L << i) - 1)) != 0; + hbits |= hi >> (i + 1); + } + else + { + g = 0; + s |= hi != 0; + } + if (R == std::round_to_nearest) +#if HALF_ROUND_TIES_TO_EVEN + hbits += g & (s | hbits); +#else + hbits += g; +#endif + else if (R == std::round_toward_infinity) + hbits += ~(hbits >> 15) & (s | g); + else if (R == std::round_toward_neg_infinity) + hbits += (hbits >> 15) & (g | s); + return hbits; +} - /// One more than smallest exponent. - static HALF_CONSTEXPR_CONST int min_exponent = -13; +/// Convert non-IEEE floating point to half-precision. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam T source type (builtin floating point type) +/// \param value floating point value +/// \return binary representation of half-precision value +template +uint16 float2half_impl(T value, false_type) +{ + uint16 hbits = static_cast(builtin_signbit(value)) << 15; + if (value == T()) + return hbits; + if (builtin_isnan(value)) + return hbits | 0x7FFF; + if (builtin_isinf(value)) + return hbits | 0x7C00; + int exp; + std::frexp(value, &exp); + if (exp > 16) + { + if (R == std::round_toward_infinity) + return hbits | (0x7C00 - (hbits >> 15)); + if (R == std::round_toward_neg_infinity) + return hbits | (0x7BFF + (hbits >> 15)); + return hbits | (0x7BFF + (R != std::round_toward_zero)); + } + if (exp < -13) + value = std::ldexp(value, 24); + else + { + value = std::ldexp(value, 11 - exp); + hbits |= ((exp + 13) << 10); + } + T ival, frac = std::modf(value, &ival); + hbits += static_cast(std::abs(static_cast(ival))); + if (R == std::round_to_nearest) + { + frac = std::abs(frac); +#if HALF_ROUND_TIES_TO_EVEN + hbits += (frac > T(0.5)) | ((frac == T(0.5)) & hbits); +#else + hbits += frac >= T(0.5); +#endif + } + else if (R == std::round_toward_infinity) + hbits += frac > T(); + else if (R == std::round_toward_neg_infinity) + hbits += frac < T(); + return hbits; +} - /// Smallest normalized representable power of 10. - static HALF_CONSTEXPR_CONST int min_exponent10 = -4; +/// Convert floating point to half-precision. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam T source type (builtin floating point type) +/// \param value floating point value +/// \return binary representation of half-precision value +template +uint16 float2half(T value) +{ + return float2half_impl( + value, bool_type::is_iec559 && sizeof(typename bits::type) == sizeof(T)>()); +} - /// One more than largest exponent - static HALF_CONSTEXPR_CONST int max_exponent = 16; +/// Convert integer to half-precision floating point. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam S `true` if value negative, `false` else +/// \tparam T type to convert (builtin integer type) +/// \param value non-negative integral value +/// \return binary representation of half-precision value +template +uint16 int2half_impl(T value) +{ +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_integral::value, "int to half conversion only supports builtin integer types"); +#endif + if (S) + value = -value; + uint16 bits = S << 15; + if (value > 0xFFFF) + { + if (R == std::round_toward_infinity) + bits |= 0x7C00 - S; + else if (R == std::round_toward_neg_infinity) + bits |= 0x7BFF + S; + else + bits |= 0x7BFF + (R != std::round_toward_zero); + } + else if (value) + { + unsigned int m = value, exp = 24; + for (; m < 0x400; m <<= 1, --exp) + ; + for (; m > 0x7FF; m >>= 1, ++exp) + ; + bits |= (exp << 10) + m; + if (exp > 24) + { + if (R == std::round_to_nearest) + bits += (value >> (exp - 25)) & 1 +#if HALF_ROUND_TIES_TO_EVEN + & (((((1 << (exp - 25)) - 1) & value) != 0) | bits) +#endif + ; + else if (R == std::round_toward_infinity) + bits += ((value & ((1 << (exp - 24)) - 1)) != 0) & !S; + else if (R == std::round_toward_neg_infinity) + bits += ((value & ((1 << (exp - 24)) - 1)) != 0) & S; + } + } + return bits; +} - /// Largest finitely representable power of 10. - static HALF_CONSTEXPR_CONST int max_exponent10 = 4; +/// Convert integer to half-precision floating point. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam T type to convert (builtin integer type) +/// \param value integral value +/// \return binary representation of half-precision value +template +uint16 int2half(T value) +{ + return (value < 0) ? int2half_impl(value) : int2half_impl(value); +} - /// Smallest positive normal value. - static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0400); } +/// Convert half-precision to IEEE single-precision. +/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). +/// \param value binary representation of half-precision value +/// \return single-precision value +inline float half2float_impl(uint16 value, float, true_type) +{ + typedef bits::type uint32; + /* uint32 bits = static_cast(value&0x8000) << 16; + int abs = value & 0x7FFF; + if(abs) + { + bits |= 0x38000000 << static_cast(abs>=0x7C00); + for(; abs<0x400; abs<<=1,bits-=0x800000) ; + bits += static_cast(abs) << 13; + } + */ + static const uint32 mantissa_table[2048] = {0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, + 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, + 0x35700000, 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, + 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, 0x36000000, + 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, + 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, 0x36400000, 0x36440000, 0x36480000, + 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, + 0x36700000, 0x36740000, 0x36780000, 0x367C0000, 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, + 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, + 0x369C0000, 0x369E0000, 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, + 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, + 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, + 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, 0x36E00000, 0x36E20000, + 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, + 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, 0x37000000, 0x37010000, 0x37020000, 0x37030000, + 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, + 0x370D0000, 0x370E0000, 0x370F0000, 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, + 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, + 0x371F0000, 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, + 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, 0x37300000, + 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, + 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, 0x37400000, 0x37410000, 0x37420000, + 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, + 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, + 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, + 0x375E0000, 0x375F0000, 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, + 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, + 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, + 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, 0x37800000, 0x37808000, + 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, + 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, 0x37880000, 0x37888000, 0x37890000, 0x37898000, + 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, + 0x378E8000, 0x378F0000, 0x378F8000, 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, + 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, + 0x37978000, 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, + 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, 0x37A00000, + 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, + 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, 0x37A80000, 0x37A88000, 0x37A90000, + 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, + 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, + 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, + 0x37B70000, 0x37B78000, 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, + 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, + 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, + 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, 0x37C80000, 0x37C88000, + 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, + 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, + 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, + 0x37D68000, 0x37D70000, 0x37D78000, 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, + 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, + 0x37DF8000, 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, + 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, 0x37E80000, + 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, + 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, 0x37F00000, 0x37F08000, 0x37F10000, + 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, + 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, + 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, + 0x37FF0000, 0x37FF8000, 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, + 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, + 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, + 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, 0x38080000, 0x38084000, + 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, + 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, + 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, + 0x380F4000, 0x380F8000, 0x380FC000, 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, + 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, + 0x3813C000, 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, + 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, 0x38180000, + 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, + 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, 0x381C0000, 0x381C4000, 0x381C8000, + 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, + 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, + 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, + 0x38238000, 0x3823C000, 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, + 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, + 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, + 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, 0x382C0000, 0x382C4000, + 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, + 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, 0x38300000, 0x38304000, 0x38308000, 0x3830C000, + 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, + 0x38334000, 0x38338000, 0x3833C000, 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, + 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, + 0x3837C000, 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, + 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, 0x383C0000, + 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, + 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, 0x38400000, 0x38404000, 0x38408000, + 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, + 0x38430000, 0x38434000, 0x38438000, 0x3843C000, 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, + 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, + 0x38478000, 0x3847C000, 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, + 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, + 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, + 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, 0x38500000, 0x38504000, + 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, + 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, 0x38540000, 0x38544000, 0x38548000, 0x3854C000, + 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, + 0x38574000, 0x38578000, 0x3857C000, 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, + 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, + 0x385BC000, 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, + 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, 0x38600000, + 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, + 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, 0x38640000, 0x38644000, 0x38648000, + 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, + 0x38670000, 0x38674000, 0x38678000, 0x3867C000, 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, + 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, + 0x386B8000, 0x386BC000, 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, + 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, + 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, + 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, 0x38740000, 0x38744000, + 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, + 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, 0x38780000, 0x38784000, 0x38788000, 0x3878C000, + 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, + 0x387B4000, 0x387B8000, 0x387BC000, 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, + 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, + 0x387FC000, 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, + 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, 0x38020000, + 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, + 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, 0x38040000, 0x38042000, 0x38044000, + 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, + 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, + 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, + 0x3807C000, 0x3807E000, 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, + 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, + 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, + 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, 0x380C0000, 0x380C2000, + 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, + 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, + 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, + 0x380FA000, 0x380FC000, 0x380FE000, 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, + 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, + 0x3811E000, 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, + 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, 0x38140000, + 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, + 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, 0x38160000, 0x38162000, 0x38164000, + 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, + 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, + 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, + 0x3819C000, 0x3819E000, 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, + 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, + 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, + 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, 0x381E0000, 0x381E2000, + 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, + 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, 0x38200000, 0x38202000, 0x38204000, 0x38206000, + 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, + 0x3821A000, 0x3821C000, 0x3821E000, 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, + 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, + 0x3823E000, 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, + 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, 0x38260000, + 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, + 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, 0x38280000, 0x38282000, 0x38284000, + 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, + 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, + 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, + 0x382BC000, 0x382BE000, 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, + 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, + 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, + 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, 0x38300000, 0x38302000, + 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, + 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, 0x38320000, 0x38322000, 0x38324000, 0x38326000, + 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, + 0x3833A000, 0x3833C000, 0x3833E000, 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, + 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, + 0x3835E000, 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, + 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, 0x38380000, + 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, + 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, 0x383A0000, 0x383A2000, 0x383A4000, + 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, + 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, + 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, + 0x383DC000, 0x383DE000, 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, + 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, + 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, + 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, 0x38420000, 0x38422000, + 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, + 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, 0x38440000, 0x38442000, 0x38444000, 0x38446000, + 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, + 0x3845A000, 0x3845C000, 0x3845E000, 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, + 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, + 0x3847E000, 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, + 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, 0x384A0000, + 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, + 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, 0x384C0000, 0x384C2000, 0x384C4000, + 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, + 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, + 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, + 0x384FC000, 0x384FE000, 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, + 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, + 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, + 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, 0x38540000, 0x38542000, + 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, + 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, 0x38560000, 0x38562000, 0x38564000, 0x38566000, + 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, + 0x3857A000, 0x3857C000, 0x3857E000, 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, + 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, + 0x3859E000, 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, + 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, 0x385C0000, + 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, + 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, 0x385E0000, 0x385E2000, 0x385E4000, + 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, + 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, + 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, + 0x3861C000, 0x3861E000, 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, + 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, + 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, + 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, 0x38660000, 0x38662000, + 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, + 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, 0x38680000, 0x38682000, 0x38684000, 0x38686000, + 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, + 0x3869A000, 0x3869C000, 0x3869E000, 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, + 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, + 0x386BE000, 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, + 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, 0x386E0000, + 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, + 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, 0x38700000, 0x38702000, 0x38704000, + 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, + 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, + 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, + 0x3873C000, 0x3873E000, 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, + 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, + 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, + 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, 0x38780000, 0x38782000, + 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, + 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, + 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, + 0x387BA000, 0x387BC000, 0x387BE000, 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, + 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, + 0x387DE000, 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, + 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000}; + static const uint32 exponent_table[64] = {0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, + 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, + 0x07800000, 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, + 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, 0x80000000, + 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, + 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, 0x88000000, 0x88800000, 0x89000000, + 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, + 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000}; + static const unsigned short offset_table[64] = {0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, + 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, + 1024, 1024, 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, + 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024}; + uint32 bits = mantissa_table[offset_table[value >> 10] + (value & 0x3FF)] + exponent_table[value >> 10]; + // return *reinterpret_cast(&bits); //violating strict aliasing! + float out; + std::memcpy(&out, &bits, sizeof(float)); + return out; +} - /// Smallest finite value. - static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0xFBFF); } +/// Convert half-precision to IEEE double-precision. +/// \param value binary representation of half-precision value +/// \return double-precision value +inline double half2float_impl(uint16 value, double, true_type) +{ + typedef bits::type uint32; + typedef bits::type uint64; + uint32 hi = static_cast(value & 0x8000) << 16; + int abs = value & 0x7FFF; + if (abs) + { + hi |= 0x3F000000 << static_cast(abs >= 0x7C00); + for (; abs < 0x400; abs <<= 1, hi -= 0x100000) + ; + hi += static_cast(abs) << 10; + } + uint64 bits = static_cast(hi) << 32; + // return *reinterpret_cast(&bits); //violating strict aliasing! + double out; + std::memcpy(&out, &bits, sizeof(double)); + return out; +} - /// Largest finite value. - static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7BFF); } +/// Convert half-precision to non-IEEE floating point. +/// \tparam T type to convert to (builtin integer type) +/// \param value binary representation of half-precision value +/// \return floating point value +template +T half2float_impl(uint16 value, T, ...) +{ + T out; + int abs = value & 0x7FFF; + if (abs > 0x7C00) + out = std::numeric_limits::has_quiet_NaN ? std::numeric_limits::quiet_NaN() : T(); + else if (abs == 0x7C00) + out = std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : std::numeric_limits::max(); + else if (abs > 0x3FF) + out = std::ldexp(static_cast((abs & 0x3FF) | 0x400), (abs >> 10) - 25); + else + out = std::ldexp(static_cast(abs), -24); + return (value & 0x8000) ? -out : out; +} - /// Difference between one and next representable value. - static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x1400); } +/// Convert half-precision to floating point. +/// \tparam T type to convert to (builtin integer type) +/// \param value binary representation of half-precision value +/// \return floating point value +template +T half2float(uint16 value) +{ + return half2float_impl( + value, T(), bool_type::is_iec559 && sizeof(typename bits::type) == sizeof(T)>()); +} - /// Maximum rounding error. - static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW - { return half_float::half(half_float::detail::binary, (round_style==std::round_to_nearest) ? 0x3800 : 0x3C00); } +/// Convert half-precision floating point to integer. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam E `true` for round to even, `false` for round away from zero +/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign +/// bits) +/// \param value binary representation of half-precision value +/// \return integral value +template +T half2int_impl(uint16 value) +{ +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_integral::value, "half to int conversion only supports builtin integer types"); +#endif + unsigned int e = value & 0x7FFF; + if (e >= 0x7C00) + return (value & 0x8000) ? std::numeric_limits::min() : std::numeric_limits::max(); + if (e < 0x3800) + { + if (R == std::round_toward_infinity) + return T(~(value >> 15) & (e != 0)); + if (R == std::round_toward_neg_infinity) + return -T(value > 0x8000); + return T(); + } + unsigned int m = (value & 0x3FF) | 0x400; + e >>= 10; + if (e < 25) + { + if (R == std::round_to_nearest) + m += (1 << (24 - e)) - (~(m >> (25 - e)) & E); + else if (R == std::round_toward_infinity) + m += ((value >> 15) - 1) & ((1 << (25 - e)) - 1U); + else if (R == std::round_toward_neg_infinity) + m += -(value >> 15) & ((1 << (25 - e)) - 1U); + m >>= 25 - e; + } + else + m <<= e - 25; + return (value & 0x8000) ? -static_cast(m) : static_cast(m); +} - /// Positive infinity. - static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7C00); } +/// Convert half-precision floating point to integer. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign +/// bits) +/// \param value binary representation of half-precision value +/// \return integral value +template +T half2int(uint16 value) +{ + return half2int_impl(value); +} - /// Quiet NaN. - static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7FFF); } +/// Convert half-precision floating point to integer using round-to-nearest-away-from-zero. +/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign +/// bits) +/// \param value binary representation of half-precision value +/// \return integral value +template +T half2int_up(uint16 value) +{ + return half2int_impl(value); +} - /// Signalling NaN. - static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7DFF); } +/// Round half-precision number to nearest integer value. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam E `true` for round to even, `false` for round away from zero +/// \param value binary representation of half-precision value +/// \return half-precision bits for nearest integral value +template +uint16 round_half_impl(uint16 value) +{ + unsigned int e = value & 0x7FFF; + uint16 result = value; + if (e < 0x3C00) + { + result &= 0x8000; + if (R == std::round_to_nearest) + result |= 0x3C00U & -(e >= (0x3800 + E)); + else if (R == std::round_toward_infinity) + result |= 0x3C00U & -(~(value >> 15) & (e != 0)); + else if (R == std::round_toward_neg_infinity) + result |= 0x3C00U & -(value > 0x8000); + } + else if (e < 0x6400) + { + e = 25 - (e >> 10); + unsigned int mask = (1 << e) - 1; + if (R == std::round_to_nearest) + result += (1 << (e - 1)) - (~(result >> e) & E); + else if (R == std::round_toward_infinity) + result += mask & ((value >> 15) - 1); + else if (R == std::round_toward_neg_infinity) + result += mask & -(value >> 15); + result &= ~mask; + } + return result; +} - /// Smallest positive subnormal value. - static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0001); } - }; +/// Round half-precision number to nearest integer value. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \param value binary representation of half-precision value +/// \return half-precision bits for nearest integral value +template +uint16 round_half(uint16 value) +{ + return round_half_impl(value); +} +/// Round half-precision number to nearest integer value using round-to-nearest-away-from-zero. +/// \param value binary representation of half-precision value +/// \return half-precision bits for nearest integral value +inline uint16 round_half_up(uint16 value) +{ + return round_half_impl(value); +} +/// \} + +struct functions; +template +struct unary_specialized; +template +struct binary_specialized; +template +struct half_caster; +} // namespace detail + +/// Half-precision floating point type. +/// This class implements an IEEE-conformant half-precision floating point type with the usual arithmetic operators and +/// conversions. It is implicitly convertible to single-precision floating point, which makes artihmetic expressions and +/// functions with mixed-type operands to be of the most precise operand type. Additionally all arithmetic operations +/// (and many mathematical functions) are carried out in single-precision internally. All conversions from single- to +/// half-precision are done using the library's default rounding mode, but temporary results inside chained arithmetic +/// expressions are kept in single-precision as long as possible (while of course still maintaining a strong +/// half-precision type). +/// +/// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and +/// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which +/// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the +/// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be +/// of exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will +/// most probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying +/// 16-bit IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 +/// bits if your C++ implementation supports an unsigned integer type of exactly 16 bits width. But this should be the +/// case on nearly any reasonable platform. +/// +/// So if your C++ implementation is not totally exotic or imposes special alignment requirements, it is a reasonable +/// assumption that the data of a half is just comprised of the 2 bytes of the underlying IEEE representation. +#if defined(__clang__) +/* this is a WAR, after nvcc's process, + * `friend class std::numeric_limits;` in the following code, becomes + * `friend class numeric_limits;`, namespsace `std` is removed, which results compilation error in clang. + * tested on nvcc V10.0.95, and clang 5.0.300080 in ndk 16b + */ +using std::numeric_limits; +using std::hash; +#endif +class half +{ + friend struct detail::functions; + friend struct detail::unary_specialized; + friend struct detail::binary_specialized; + template + friend struct detail::half_caster; + friend class std::numeric_limits; #if HALF_ENABLE_CPP11_HASH - /// Hash function for half-precision floats. - /// This is only defined if C++11 `std::hash` is supported and enabled. - template<> struct hash //: unary_function - { - /// Type of function argument. - typedef half_float::half argument_type; + friend struct std::hash; +#endif +#if HALF_ENABLE_CPP11_USER_LITERALS + friend half literal::operator"" _h(long double); +#endif - /// Function return type. - typedef size_t result_type; +public: + /// Default constructor. + /// This initializes the half to 0. Although this does not match the builtin types' default-initialization semantics + /// and may be less efficient than no initialization, it is needed to provide proper value-initialization semantics. + HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {} + + /// Copy constructor. + /// \tparam T type of concrete half expression + /// \param rhs half expression to copy from + half(detail::expr rhs) + : data_(detail::float2half(static_cast(rhs))) + { + } + + /// Conversion constructor. + /// \param rhs float to convert + explicit half(float rhs) + : data_(detail::float2half(rhs)) + { + } + + /// Conversion to single-precision. + /// \return single precision value representing expression value + operator float() const + { + return detail::half2float(data_); + } + + /// Assignment operator. + /// \tparam T type of concrete half expression + /// \param rhs half expression to copy from + /// \return reference to this half + half& operator=(detail::expr rhs) + { + return *this = static_cast(rhs); + } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to add + /// \return reference to this half + template + typename detail::enable::type operator+=(T rhs) + { + return *this += static_cast(rhs); + } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to subtract + /// \return reference to this half + template + typename detail::enable::type operator-=(T rhs) + { + return *this -= static_cast(rhs); + } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to multiply with + /// \return reference to this half + template + typename detail::enable::type operator*=(T rhs) + { + return *this *= static_cast(rhs); + } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to divide by + /// \return reference to this half + template + typename detail::enable::type operator/=(T rhs) + { + return *this /= static_cast(rhs); + } + + /// Assignment operator. + /// \param rhs single-precision value to copy from + /// \return reference to this half + half& operator=(float rhs) + { + data_ = detail::float2half(rhs); + return *this; + } + + /// Arithmetic assignment. + /// \param rhs single-precision value to add + /// \return reference to this half + half& operator+=(float rhs) + { + data_ = detail::float2half(detail::half2float(data_) + rhs); + return *this; + } + + /// Arithmetic assignment. + /// \param rhs single-precision value to subtract + /// \return reference to this half + half& operator-=(float rhs) + { + data_ = detail::float2half(detail::half2float(data_) - rhs); + return *this; + } + + /// Arithmetic assignment. + /// \param rhs single-precision value to multiply with + /// \return reference to this half + half& operator*=(float rhs) + { + data_ = detail::float2half(detail::half2float(data_) * rhs); + return *this; + } + + /// Arithmetic assignment. + /// \param rhs single-precision value to divide by + /// \return reference to this half + half& operator/=(float rhs) + { + data_ = detail::float2half(detail::half2float(data_) / rhs); + return *this; + } + + /// Prefix increment. + /// \return incremented half value + half& operator++() + { + return *this += 1.0f; + } + + /// Prefix decrement. + /// \return decremented half value + half& operator--() + { + return *this -= 1.0f; + } + + /// Postfix increment. + /// \return non-incremented half value + half operator++(int) + { + half out(*this); + ++*this; + return out; + } + + /// Postfix decrement. + /// \return non-decremented half value + half operator--(int) + { + half out(*this); + --*this; + return out; + } + +private: + /// Rounding mode to use + static const std::float_round_style round_style = (std::float_round_style) (HALF_ROUND_STYLE); + + /// Constructor. + /// \param bits binary representation to set half to + HALF_CONSTEXPR half(detail::binary_t, detail::uint16 bits) HALF_NOEXCEPT : data_(bits) {} + + /// Internal binary representation + detail::uint16 data_; +}; - /// Compute hash function. - /// \param arg half to hash - /// \return hash value - result_type operator()(argument_type arg) const - { return hash()(static_cast(arg.data_)&-(arg.data_!=0x8000)); } - }; -#endif +#if HALF_ENABLE_CPP11_USER_LITERALS +namespace literal +{ +/// Half literal. +/// While this returns an actual half-precision value, half literals can unfortunately not be constant expressions due +/// to rather involved conversions. +/// \param value literal value +/// \return half with given value (if representable) +inline half operator"" _h(long double value) +{ + return half(detail::binary, detail::float2half(value)); } +} // namespace literal +#endif +namespace detail +{ +/// Wrapper implementing unspecialized half-precision functions. +struct functions +{ + /// Addition implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision sum stored in single-precision + static expr plus(float x, float y) + { + return expr(x + y); + } + + /// Subtraction implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision difference stored in single-precision + static expr minus(float x, float y) + { + return expr(x - y); + } + + /// Multiplication implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision product stored in single-precision + static expr multiplies(float x, float y) + { + return expr(x * y); + } + + /// Division implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision quotient stored in single-precision + static expr divides(float x, float y) + { + return expr(x / y); + } + + /// Output implementation. + /// \param out stream to write to + /// \param arg value to write + /// \return reference to stream + template + static std::basic_ostream& write(std::basic_ostream& out, float arg) + { + return out << arg; + } + + /// Input implementation. + /// \param in stream to read from + /// \param arg half to read into + /// \return reference to stream + template + static std::basic_istream& read(std::basic_istream& in, half& arg) + { + float f; + if (in >> f) + arg = f; + return in; + } + + /// Modulo implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision division remainder stored in single-precision + static expr fmod(float x, float y) + { + return expr(std::fmod(x, y)); + } + + /// Remainder implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision division remainder stored in single-precision + static expr remainder(float x, float y) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::remainder(x, y)); +#else + if (builtin_isnan(x) || builtin_isnan(y)) + return expr(std::numeric_limits::quiet_NaN()); + float ax = std::fabs(x), ay = std::fabs(y); + if (ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) + return expr(std::numeric_limits::quiet_NaN()); + if (ay >= 65536.0f) + return expr(x); + if (ax == ay) + return expr(builtin_signbit(x) ? -0.0f : 0.0f); + ax = std::fmod(ax, ay + ay); + float y2 = 0.5f * ay; + if (ax > y2) + { + ax -= ay; + if (ax >= y2) + ax -= ay; + } + return expr(builtin_signbit(x) ? -ax : ax); +#endif + } + + /// Remainder implementation. + /// \param x first operand + /// \param y second operand + /// \param quo address to store quotient bits at + /// \return Half-precision division remainder stored in single-precision + static expr remquo(float x, float y, int* quo) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::remquo(x, y, quo)); +#else + if (builtin_isnan(x) || builtin_isnan(y)) + return expr(std::numeric_limits::quiet_NaN()); + bool sign = builtin_signbit(x), qsign = static_cast(sign ^ builtin_signbit(y)); + float ax = std::fabs(x), ay = std::fabs(y); + if (ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) + return expr(std::numeric_limits::quiet_NaN()); + if (ay >= 65536.0f) + return expr(x); + if (ax == ay) + return *quo = qsign ? -1 : 1, expr(sign ? -0.0f : 0.0f); + ax = std::fmod(ax, 8.0f * ay); + int cquo = 0; + if (ax >= 4.0f * ay) + { + ax -= 4.0f * ay; + cquo += 4; + } + if (ax >= 2.0f * ay) + { + ax -= 2.0f * ay; + cquo += 2; + } + float y2 = 0.5f * ay; + if (ax > y2) + { + ax -= ay; + ++cquo; + if (ax >= y2) + { + ax -= ay; + ++cquo; + } + } + return *quo = qsign ? -cquo : cquo, expr(sign ? -ax : ax); +#endif + } + + /// Positive difference implementation. + /// \param x first operand + /// \param y second operand + /// \return Positive difference stored in single-precision + static expr fdim(float x, float y) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::fdim(x, y)); +#else + return expr((x <= y) ? 0.0f : (x - y)); +#endif + } + + /// Fused multiply-add implementation. + /// \param x first operand + /// \param y second operand + /// \param z third operand + /// \return \a x * \a y + \a z stored in single-precision + static expr fma(float x, float y, float z) + { +#if HALF_ENABLE_CPP11_CMATH && defined(FP_FAST_FMAF) + return expr(std::fma(x, y, z)); +#else + return expr(x * y + z); +#endif + } + + /// Get NaN. + /// \return Half-precision quiet NaN + static half nanh() + { + return half(binary, 0x7FFF); + } + + /// Exponential implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr exp(float arg) + { + return expr(std::exp(arg)); + } + + /// Exponential implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr expm1(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::expm1(arg)); +#else + return expr(static_cast(std::exp(static_cast(arg)) - 1.0)); +#endif + } + + /// Binary exponential implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr exp2(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::exp2(arg)); +#else + return expr(static_cast(std::exp(arg * 0.69314718055994530941723212145818))); +#endif + } + + /// Logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr log(float arg) + { + return expr(std::log(arg)); + } + + /// Common logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr log10(float arg) + { + return expr(std::log10(arg)); + } + + /// Logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr log1p(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::log1p(arg)); +#else + return expr(static_cast(std::log(1.0 + arg))); +#endif + } + + /// Binary logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr log2(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::log2(arg)); +#else + return expr(static_cast(std::log(static_cast(arg)) * 1.4426950408889634073599246810019)); +#endif + } + + /// Square root implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr sqrt(float arg) + { + return expr(std::sqrt(arg)); + } + + /// Cubic root implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr cbrt(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::cbrt(arg)); +#else + if (builtin_isnan(arg) || builtin_isinf(arg)) + return expr(arg); + return expr(builtin_signbit(arg) ? -static_cast(std::pow(-static_cast(arg), 1.0 / 3.0)) + : static_cast(std::pow(static_cast(arg), 1.0 / 3.0))); +#endif + } + + /// Hypotenuse implementation. + /// \param x first argument + /// \param y second argument + /// \return function value stored in single-preicision + static expr hypot(float x, float y) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::hypot(x, y)); +#else + return expr((builtin_isinf(x) || builtin_isinf(y)) + ? std::numeric_limits::infinity() + : static_cast(std::sqrt(static_cast(x) * x + static_cast(y) * y))); +#endif + } + + /// Power implementation. + /// \param base value to exponentiate + /// \param exp power to expontiate to + /// \return function value stored in single-preicision + static expr pow(float base, float exp) + { + return expr(std::pow(base, exp)); + } + + /// Sine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr sin(float arg) + { + return expr(std::sin(arg)); + } + + /// Cosine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr cos(float arg) + { + return expr(std::cos(arg)); + } + + /// Tan implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr tan(float arg) + { + return expr(std::tan(arg)); + } + + /// Arc sine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr asin(float arg) + { + return expr(std::asin(arg)); + } + + /// Arc cosine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr acos(float arg) + { + return expr(std::acos(arg)); + } + + /// Arc tangent implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr atan(float arg) + { + return expr(std::atan(arg)); + } + + /// Arc tangent implementation. + /// \param x first argument + /// \param y second argument + /// \return function value stored in single-preicision + static expr atan2(float x, float y) + { + return expr(std::atan2(x, y)); + } + + /// Hyperbolic sine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr sinh(float arg) + { + return expr(std::sinh(arg)); + } + + /// Hyperbolic cosine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr cosh(float arg) + { + return expr(std::cosh(arg)); + } + + /// Hyperbolic tangent implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr tanh(float arg) + { + return expr(std::tanh(arg)); + } + + /// Hyperbolic area sine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr asinh(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::asinh(arg)); +#else + return expr((arg == -std::numeric_limits::infinity()) + ? arg + : static_cast(std::log(arg + std::sqrt(arg * arg + 1.0)))); +#endif + } + + /// Hyperbolic area cosine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr acosh(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::acosh(arg)); +#else + return expr((arg < -1.0f) ? std::numeric_limits::quiet_NaN() + : static_cast(std::log(arg + std::sqrt(arg * arg - 1.0)))); +#endif + } + + /// Hyperbolic area tangent implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr atanh(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::atanh(arg)); +#else + return expr(static_cast(0.5 * std::log((1.0 + arg) / (1.0 - arg)))); +#endif + } + + /// Error function implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr erf(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::erf(arg)); +#else + return expr(static_cast(erf(static_cast(arg)))); +#endif + } + + /// Complementary implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr erfc(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::erfc(arg)); +#else + return expr(static_cast(1.0 - erf(static_cast(arg)))); +#endif + } + + /// Gamma logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr lgamma(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::lgamma(arg)); +#else + if (builtin_isinf(arg)) + return expr(std::numeric_limits::infinity()); + if (arg < 0.0f) + { + float i, f = std::modf(-arg, &i); + if (f == 0.0f) + return expr(std::numeric_limits::infinity()); + return expr(static_cast(1.1447298858494001741434273513531 + - std::log(std::abs(std::sin(3.1415926535897932384626433832795 * f))) - lgamma(1.0 - arg))); + } + return expr(static_cast(lgamma(static_cast(arg)))); +#endif + } + + /// Gamma implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr tgamma(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::tgamma(arg)); +#else + if (arg == 0.0f) + return builtin_signbit(arg) ? expr(-std::numeric_limits::infinity()) + : expr(std::numeric_limits::infinity()); + if (arg < 0.0f) + { + float i, f = std::modf(-arg, &i); + if (f == 0.0f) + return expr(std::numeric_limits::quiet_NaN()); + double value = 3.1415926535897932384626433832795 + / (std::sin(3.1415926535897932384626433832795 * f) * std::exp(lgamma(1.0 - arg))); + return expr(static_cast((std::fmod(i, 2.0f) == 0.0f) ? -value : value)); + } + if (builtin_isinf(arg)) + return expr(arg); + return expr(static_cast(std::exp(lgamma(static_cast(arg))))); +#endif + } + + /// Floor implementation. + /// \param arg value to round + /// \return rounded value + static half floor(half arg) + { + return half(binary, round_half(arg.data_)); + } + + /// Ceiling implementation. + /// \param arg value to round + /// \return rounded value + static half ceil(half arg) + { + return half(binary, round_half(arg.data_)); + } + + /// Truncation implementation. + /// \param arg value to round + /// \return rounded value + static half trunc(half arg) + { + return half(binary, round_half(arg.data_)); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static half round(half arg) + { + return half(binary, round_half_up(arg.data_)); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static long lround(half arg) + { + return detail::half2int_up(arg.data_); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static half rint(half arg) + { + return half(binary, round_half(arg.data_)); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static long lrint(half arg) + { + return detail::half2int(arg.data_); + } + +#if HALF_ENABLE_CPP11_LONG_LONG + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static long long llround(half arg) + { + return detail::half2int_up(arg.data_); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static long long llrint(half arg) + { + return detail::half2int(arg.data_); + } +#endif + + /// Decompression implementation. + /// \param arg number to decompress + /// \param exp address to store exponent at + /// \return normalized significant + static half frexp(half arg, int* exp) + { + int m = arg.data_ & 0x7FFF, e = -14; + if (m >= 0x7C00 || !m) + return *exp = 0, arg; + for (; m < 0x400; m <<= 1, --e) + ; + return *exp = e + (m >> 10), half(binary, (arg.data_ & 0x8000) | 0x3800 | (m & 0x3FF)); + } + + /// Decompression implementation. + /// \param arg number to decompress + /// \param iptr address to store integer part at + /// \return fractional part + static half modf(half arg, half* iptr) + { + unsigned int e = arg.data_ & 0x7FFF; + if (e >= 0x6400) + return *iptr = arg, half(binary, arg.data_ & (0x8000U | -(e > 0x7C00))); + if (e < 0x3C00) + return iptr->data_ = arg.data_ & 0x8000, arg; + e >>= 10; + unsigned int mask = (1 << (25 - e)) - 1, m = arg.data_ & mask; + iptr->data_ = arg.data_ & ~mask; + if (!m) + return half(binary, arg.data_ & 0x8000); + for (; m < 0x400; m <<= 1, --e) + ; + return half(binary, static_cast((arg.data_ & 0x8000) | (e << 10) | (m & 0x3FF))); + } + + /// Scaling implementation. + /// \param arg number to scale + /// \param exp power of two to scale by + /// \return scaled number + static half scalbln(half arg, long exp) + { + unsigned int m = arg.data_ & 0x7FFF; + if (m >= 0x7C00 || !m) + return arg; + for (; m < 0x400; m <<= 1, --exp) + ; + exp += m >> 10; + uint16 value = arg.data_ & 0x8000; + if (exp > 30) + { + if (half::round_style == std::round_toward_zero) + value |= 0x7BFF; + else if (half::round_style == std::round_toward_infinity) + value |= 0x7C00 - (value >> 15); + else if (half::round_style == std::round_toward_neg_infinity) + value |= 0x7BFF + (value >> 15); + else + value |= 0x7C00; + } + else if (exp > 0) + value |= (exp << 10) | (m & 0x3FF); + else if (exp > -11) + { + m = (m & 0x3FF) | 0x400; + if (half::round_style == std::round_to_nearest) + { + m += 1 << -exp; +#if HALF_ROUND_TIES_TO_EVEN + m -= (m >> (1 - exp)) & 1; +#endif + } + else if (half::round_style == std::round_toward_infinity) + m += ((value >> 15) - 1) & ((1 << (1 - exp)) - 1U); + else if (half::round_style == std::round_toward_neg_infinity) + m += -(value >> 15) & ((1 << (1 - exp)) - 1U); + value |= m >> (1 - exp); + } + else if (half::round_style == std::round_toward_infinity) + value -= (value >> 15) - 1; + else if (half::round_style == std::round_toward_neg_infinity) + value += value >> 15; + return half(binary, value); + } + + /// Exponent implementation. + /// \param arg number to query + /// \return floating point exponent + static int ilogb(half arg) + { + int abs = arg.data_ & 0x7FFF; + if (!abs) + return FP_ILOGB0; + if (abs < 0x7C00) + { + int exp = (abs >> 10) - 15; + if (abs < 0x400) + for (; abs < 0x200; abs <<= 1, --exp) + ; + return exp; + } + if (abs > 0x7C00) + return FP_ILOGBNAN; + return INT_MAX; + } + + /// Exponent implementation. + /// \param arg number to query + /// \return floating point exponent + static half logb(half arg) + { + int abs = arg.data_ & 0x7FFF; + if (!abs) + return half(binary, 0xFC00); + if (abs < 0x7C00) + { + int exp = (abs >> 10) - 15; + if (abs < 0x400) + for (; abs < 0x200; abs <<= 1, --exp) + ; + uint16 bits = (exp < 0) << 15; + if (exp) + { + unsigned int m = std::abs(exp) << 6, e = 18; + for (; m < 0x400; m <<= 1, --e) + ; + bits |= (e << 10) + m; + } + return half(binary, bits); + } + if (abs > 0x7C00) + return arg; + return half(binary, 0x7C00); + } + + /// Enumeration implementation. + /// \param from number to increase/decrease + /// \param to direction to enumerate into + /// \return next representable number + static half nextafter(half from, half to) + { + uint16 fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF; + if (fabs > 0x7C00) + return from; + if (tabs > 0x7C00 || from.data_ == to.data_ || !(fabs | tabs)) + return to; + if (!fabs) + return half(binary, (to.data_ & 0x8000) + 1); + bool lt = ((fabs == from.data_) ? static_cast(fabs) : -static_cast(fabs)) + < ((tabs == to.data_) ? static_cast(tabs) : -static_cast(tabs)); + return half(binary, from.data_ + (((from.data_ >> 15) ^ static_cast(lt)) << 1) - 1); + } + + /// Enumeration implementation. + /// \param from number to increase/decrease + /// \param to direction to enumerate into + /// \return next representable number + static half nexttoward(half from, long double to) + { + if (isnan(from)) + return from; + auto lfrom = static_cast(from); + if (builtin_isnan(to) || lfrom == to) + return half(static_cast(to)); + if (!(from.data_ & 0x7FFF)) + return half(binary, (static_cast(builtin_signbit(to)) << 15) + 1); + return half(binary, from.data_ + (((from.data_ >> 15) ^ static_cast(lfrom < to)) << 1) - 1); + } + + /// Sign implementation + /// \param x first operand + /// \param y second operand + /// \return composed value + static half copysign(half x, half y) + { + return half(binary, x.data_ ^ ((x.data_ ^ y.data_) & 0x8000)); + } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if infinite number + /// \retval false else + static int fpclassify(half arg) + { + unsigned int abs = arg.data_ & 0x7FFF; + return abs + ? ((abs > 0x3FF) ? ((abs >= 0x7C00) ? ((abs > 0x7C00) ? FP_NAN : FP_INFINITE) : FP_NORMAL) : FP_SUBNORMAL) + : FP_ZERO; + } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if finite number + /// \retval false else + static bool isfinite(half arg) + { + return (arg.data_ & 0x7C00) != 0x7C00; + } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if infinite number + /// \retval false else + static bool isinf(half arg) + { + return (arg.data_ & 0x7FFF) == 0x7C00; + } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if not a number + /// \retval false else + static bool isnan(half arg) + { + return (arg.data_ & 0x7FFF) > 0x7C00; + } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if normal number + /// \retval false else + static bool isnormal(half arg) + { + return ((arg.data_ & 0x7C00) != 0) & ((arg.data_ & 0x7C00) != 0x7C00); + } + + /// Sign bit implementation. + /// \param arg value to check + /// \retval true if signed + /// \retval false if unsigned + static bool signbit(half arg) + { + return (arg.data_ & 0x8000) != 0; + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if operands equal + /// \retval false else + static bool isequal(half x, half y) + { + return (x.data_ == y.data_ || !((x.data_ | y.data_) & 0x7FFF)) && !isnan(x); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if operands not equal + /// \retval false else + static bool isnotequal(half x, half y) + { + return (x.data_ != y.data_ && ((x.data_ | y.data_) & 0x7FFF)) || isnan(x); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x > \a y + /// \retval false else + static bool isgreater(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + return xabs <= 0x7C00 && yabs <= 0x7C00 + && (((xabs == x.data_) ? xabs : -xabs) > ((yabs == y.data_) ? yabs : -yabs)); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x >= \a y + /// \retval false else + static bool isgreaterequal(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + return xabs <= 0x7C00 && yabs <= 0x7C00 + && (((xabs == x.data_) ? xabs : -xabs) >= ((yabs == y.data_) ? yabs : -yabs)); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x < \a y + /// \retval false else + static bool isless(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + return xabs <= 0x7C00 && yabs <= 0x7C00 + && (((xabs == x.data_) ? xabs : -xabs) < ((yabs == y.data_) ? yabs : -yabs)); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x <= \a y + /// \retval false else + static bool islessequal(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + return xabs <= 0x7C00 && yabs <= 0x7C00 + && (((xabs == x.data_) ? xabs : -xabs) <= ((yabs == y.data_) ? yabs : -yabs)); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if either \a x > \a y nor \a x < \a y + /// \retval false else + static bool islessgreater(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + if (xabs > 0x7C00 || yabs > 0x7C00) + return false; + int a = (xabs == x.data_) ? xabs : -xabs, b = (yabs == y.data_) ? yabs : -yabs; + return a < b || a > b; + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if operand unordered + /// \retval false else + static bool isunordered(half x, half y) + { + return isnan(x) || isnan(y); + } + +private: + static double erf(double arg) + { + if (builtin_isinf(arg)) + return (arg < 0.0) ? -1.0 : 1.0; + double x2 = arg * arg, ax2 = 0.147 * x2, + value = std::sqrt(1.0 - std::exp(-x2 * (1.2732395447351626861510701069801 + ax2) / (1.0 + ax2))); + return builtin_signbit(arg) ? -value : value; + } + + static double lgamma(double arg) + { + double v = 1.0; + for (; arg < 8.0; ++arg) + v *= arg; + double w = 1.0 / (arg * arg); + return (((((((-0.02955065359477124183006535947712 * w + 0.00641025641025641025641025641026) * w + + -0.00191752691752691752691752691753) + * w + + 8.4175084175084175084175084175084e-4) + * w + + -5.952380952380952380952380952381e-4) + * w + + 7.9365079365079365079365079365079e-4) + * w + + -0.00277777777777777777777777777778) + * w + + 0.08333333333333333333333333333333) + / arg + + 0.91893853320467274178032973640562 - std::log(v) - arg + (arg - 0.5) * std::log(arg); + } +}; + +/// Wrapper for unary half-precision functions needing specialization for individual argument types. +/// \tparam T argument type +template +struct unary_specialized +{ + /// Negation implementation. + /// \param arg value to negate + /// \return negated value + static HALF_CONSTEXPR half negate(half arg) + { + return half(binary, arg.data_ ^ 0x8000); + } + + /// Absolute value implementation. + /// \param arg function argument + /// \return absolute value + static half fabs(half arg) + { + return half(binary, arg.data_ & 0x7FFF); + } +}; +template <> +struct unary_specialized +{ + static HALF_CONSTEXPR expr negate(float arg) + { + return expr(-arg); + } + static expr fabs(float arg) + { + return expr(std::fabs(arg)); + } +}; + +/// Wrapper for binary half-precision functions needing specialization for individual argument types. +/// \tparam T first argument type +/// \tparam U first argument type +template +struct binary_specialized +{ + /// Minimum implementation. + /// \param x first operand + /// \param y second operand + /// \return minimum value + static expr fmin(float x, float y) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::fmin(x, y)); +#else + if (builtin_isnan(x)) + return expr(y); + if (builtin_isnan(y)) + return expr(x); + return expr(std::min(x, y)); +#endif + } + + /// Maximum implementation. + /// \param x first operand + /// \param y second operand + /// \return maximum value + static expr fmax(float x, float y) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::fmax(x, y)); +#else + if (builtin_isnan(x)) + return expr(y); + if (builtin_isnan(y)) + return expr(x); + return expr(std::max(x, y)); +#endif + } +}; +template <> +struct binary_specialized +{ + static half fmin(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + if (xabs > 0x7C00) + return y; + if (yabs > 0x7C00) + return x; + return (((xabs == x.data_) ? xabs : -xabs) > ((yabs == y.data_) ? yabs : -yabs)) ? y : x; + } + static half fmax(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + if (xabs > 0x7C00) + return y; + if (yabs > 0x7C00) + return x; + return (((xabs == x.data_) ? xabs : -xabs) < ((yabs == y.data_) ? yabs : -yabs)) ? y : x; + } +}; + +/// Helper class for half casts. +/// This class template has to be specialized for all valid cast argument to define an appropriate static `cast` member +/// function and a corresponding `type` member denoting its return type. +/// \tparam T destination type +/// \tparam U source type +/// \tparam R rounding mode to use +template +struct half_caster +{ +}; +template +struct half_caster +{ +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, "half_cast from non-arithmetic type unsupported"); +#endif + + static half cast(U arg) + { + return cast_impl(arg, is_float()); + }; + +private: + static half cast_impl(U arg, true_type) + { + return half(binary, float2half(arg)); + } + static half cast_impl(U arg, false_type) + { + return half(binary, int2half(arg)); + } +}; +template +struct half_caster +{ +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); +#endif + + static T cast(half arg) + { + return cast_impl(arg, is_float()); + } + +private: + static T cast_impl(half arg, true_type) + { + return half2float(arg.data_); + } + static T cast_impl(half arg, false_type) + { + return half2int(arg.data_); + } +}; +template +struct half_caster +{ +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); +#endif + + static T cast(expr arg) + { + return cast_impl(arg, is_float()); + } + +private: + static T cast_impl(float arg, true_type) + { + return static_cast(arg); + } + static T cast_impl(half arg, false_type) + { + return half2int(arg.data_); + } +}; +template +struct half_caster +{ + static half cast(half arg) + { + return arg; + } +}; +template +struct half_caster : half_caster +{ +}; + +/// \name Comparison operators +/// \{ + +/// Comparison for equality. +/// \param x first operand +/// \param y second operand +/// \retval true if operands equal +/// \retval false else +template +typename enable::type operator==(T x, U y) +{ + return functions::isequal(x, y); +} + +/// Comparison for inequality. +/// \param x first operand +/// \param y second operand +/// \retval true if operands not equal +/// \retval false else +template +typename enable::type operator!=(T x, U y) +{ + return functions::isnotequal(x, y); +} + +/// Comparison for less than. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x less than \a y +/// \retval false else +template +typename enable::type operator<(T x, U y) +{ + return functions::isless(x, y); +} + +/// Comparison for greater than. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x greater than \a y +/// \retval false else +template +typename enable::type operator>(T x, U y) +{ + return functions::isgreater(x, y); +} + +/// Comparison for less equal. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x less equal \a y +/// \retval false else +template +typename enable::type operator<=(T x, U y) +{ + return functions::islessequal(x, y); +} + +/// Comparison for greater equal. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x greater equal \a y +/// \retval false else +template +typename enable::type operator>=(T x, U y) +{ + return functions::isgreaterequal(x, y); +} + +/// \} +/// \name Arithmetic operators +/// \{ + +/// Add halfs. +/// \param x left operand +/// \param y right operand +/// \return sum of half expressions +template +typename enable::type operator+(T x, U y) +{ + return functions::plus(x, y); +} + +/// Subtract halfs. +/// \param x left operand +/// \param y right operand +/// \return difference of half expressions +template +typename enable::type operator-(T x, U y) +{ + return functions::minus(x, y); +} + +/// Multiply halfs. +/// \param x left operand +/// \param y right operand +/// \return product of half expressions +template +typename enable::type operator*(T x, U y) +{ + return functions::multiplies(x, y); +} + +/// Divide halfs. +/// \param x left operand +/// \param y right operand +/// \return quotient of half expressions +template +typename enable::type operator/(T x, U y) +{ + return functions::divides(x, y); +} + +/// Identity. +/// \param arg operand +/// \return uncahnged operand +template +HALF_CONSTEXPR typename enable::type operator+(T arg) +{ + return arg; +} + +/// Negation. +/// \param arg operand +/// \return negated operand +template +HALF_CONSTEXPR typename enable::type operator-(T arg) +{ + return unary_specialized::negate(arg); +} + +/// \} +/// \name Input and output +/// \{ + +/// Output operator. +/// \param out output stream to write into +/// \param arg half expression to write +/// \return reference to output stream +template +typename enable&, T>::type operator<<(std::basic_ostream& out, T arg) +{ + return functions::write(out, arg); +} + +/// Input operator. +/// \param in input stream to read from +/// \param arg half to read into +/// \return reference to input stream +template +std::basic_istream& operator>>(std::basic_istream& in, half& arg) +{ + return functions::read(in, arg); +} + +/// \} +/// \name Basic mathematical operations +/// \{ + +/// Absolute value. +/// \param arg operand +/// \return absolute value of \a arg +// template typename enable::type abs(T arg) { return unary_specialized::fabs(arg); } +inline half abs(half arg) +{ + return unary_specialized::fabs(arg); +} +inline expr abs(expr arg) +{ + return unary_specialized::fabs(arg); +} + +/// Absolute value. +/// \param arg operand +/// \return absolute value of \a arg +// template typename enable::type fabs(T arg) { return unary_specialized::fabs(arg); } +inline half fabs(half arg) +{ + return unary_specialized::fabs(arg); +} +inline expr fabs(expr arg) +{ + return unary_specialized::fabs(arg); +} + +/// Remainder of division. +/// \param x first operand +/// \param y second operand +/// \return remainder of floating point division. +// template typename enable::type fmod(T x, U y) { return functions::fmod(x, y); } +inline expr fmod(half x, half y) +{ + return functions::fmod(x, y); +} +inline expr fmod(half x, expr y) +{ + return functions::fmod(x, y); +} +inline expr fmod(expr x, half y) +{ + return functions::fmod(x, y); +} +inline expr fmod(expr x, expr y) +{ + return functions::fmod(x, y); +} + +/// Remainder of division. +/// \param x first operand +/// \param y second operand +/// \return remainder of floating point division. +// template typename enable::type remainder(T x, U y) { return +//functions::remainder(x, y); } +inline expr remainder(half x, half y) +{ + return functions::remainder(x, y); +} +inline expr remainder(half x, expr y) +{ + return functions::remainder(x, y); +} +inline expr remainder(expr x, half y) +{ + return functions::remainder(x, y); +} +inline expr remainder(expr x, expr y) +{ + return functions::remainder(x, y); +} + +/// Remainder of division. +/// \param x first operand +/// \param y second operand +/// \param quo address to store some bits of quotient at +/// \return remainder of floating point division. +// template typename enable::type remquo(T x, U y, int *quo) { return +//functions::remquo(x, y, quo); } +inline expr remquo(half x, half y, int* quo) +{ + return functions::remquo(x, y, quo); +} +inline expr remquo(half x, expr y, int* quo) +{ + return functions::remquo(x, y, quo); +} +inline expr remquo(expr x, half y, int* quo) +{ + return functions::remquo(x, y, quo); +} +inline expr remquo(expr x, expr y, int* quo) +{ + return functions::remquo(x, y, quo); +} + +/// Fused multiply add. +/// \param x first operand +/// \param y second operand +/// \param z third operand +/// \return ( \a x * \a y ) + \a z rounded as one operation. +// template typename enable::type fma(T x, U y, V z) { return +//functions::fma(x, y, z); } +inline expr fma(half x, half y, half z) +{ + return functions::fma(x, y, z); +} +inline expr fma(half x, half y, expr z) +{ + return functions::fma(x, y, z); +} +inline expr fma(half x, expr y, half z) +{ + return functions::fma(x, y, z); +} +inline expr fma(half x, expr y, expr z) +{ + return functions::fma(x, y, z); +} +inline expr fma(expr x, half y, half z) +{ + return functions::fma(x, y, z); +} +inline expr fma(expr x, half y, expr z) +{ + return functions::fma(x, y, z); +} +inline expr fma(expr x, expr y, half z) +{ + return functions::fma(x, y, z); +} +inline expr fma(expr x, expr y, expr z) +{ + return functions::fma(x, y, z); +} + +/// Maximum of half expressions. +/// \param x first operand +/// \param y second operand +/// \return maximum of operands +// template typename result::type fmax(T x, U y) { return +//binary_specialized::fmax(x, y); } +inline half fmax(half x, half y) +{ + return binary_specialized::fmax(x, y); +} +inline expr fmax(half x, expr y) +{ + return binary_specialized::fmax(x, y); +} +inline expr fmax(expr x, half y) +{ + return binary_specialized::fmax(x, y); +} +inline expr fmax(expr x, expr y) +{ + return binary_specialized::fmax(x, y); +} + +/// Minimum of half expressions. +/// \param x first operand +/// \param y second operand +/// \return minimum of operands +// template typename result::type fmin(T x, U y) { return +//binary_specialized::fmin(x, y); } +inline half fmin(half x, half y) +{ + return binary_specialized::fmin(x, y); +} +inline expr fmin(half x, expr y) +{ + return binary_specialized::fmin(x, y); +} +inline expr fmin(expr x, half y) +{ + return binary_specialized::fmin(x, y); +} +inline expr fmin(expr x, expr y) +{ + return binary_specialized::fmin(x, y); +} + +/// Positive difference. +/// \param x first operand +/// \param y second operand +/// \return \a x - \a y or 0 if difference negative +// template typename enable::type fdim(T x, U y) { return functions::fdim(x, y); } +inline expr fdim(half x, half y) +{ + return functions::fdim(x, y); +} +inline expr fdim(half x, expr y) +{ + return functions::fdim(x, y); +} +inline expr fdim(expr x, half y) +{ + return functions::fdim(x, y); +} +inline expr fdim(expr x, expr y) +{ + return functions::fdim(x, y); +} + +/// Get NaN value. +/// \return quiet NaN +inline half nanh(const char*) +{ + return functions::nanh(); +} + +/// \} +/// \name Exponential functions +/// \{ + +/// Exponential function. +/// \param arg function argument +/// \return e raised to \a arg +// template typename enable::type exp(T arg) { return functions::exp(arg); } +inline expr exp(half arg) +{ + return functions::exp(arg); +} +inline expr exp(expr arg) +{ + return functions::exp(arg); +} + +/// Exponential minus one. +/// \param arg function argument +/// \return e raised to \a arg subtracted by 1 +// template typename enable::type expm1(T arg) { return functions::expm1(arg); } +inline expr expm1(half arg) +{ + return functions::expm1(arg); +} +inline expr expm1(expr arg) +{ + return functions::expm1(arg); +} + +/// Binary exponential. +/// \param arg function argument +/// \return 2 raised to \a arg +// template typename enable::type exp2(T arg) { return functions::exp2(arg); } +inline expr exp2(half arg) +{ + return functions::exp2(arg); +} +inline expr exp2(expr arg) +{ + return functions::exp2(arg); +} + +/// Natural logorithm. +/// \param arg function argument +/// \return logarithm of \a arg to base e +// template typename enable::type log(T arg) { return functions::log(arg); } +inline expr log(half arg) +{ + return functions::log(arg); +} +inline expr log(expr arg) +{ + return functions::log(arg); +} + +/// Common logorithm. +/// \param arg function argument +/// \return logarithm of \a arg to base 10 +// template typename enable::type log10(T arg) { return functions::log10(arg); } +inline expr log10(half arg) +{ + return functions::log10(arg); +} +inline expr log10(expr arg) +{ + return functions::log10(arg); +} + +/// Natural logorithm. +/// \param arg function argument +/// \return logarithm of \a arg plus 1 to base e +// template typename enable::type log1p(T arg) { return functions::log1p(arg); } +inline expr log1p(half arg) +{ + return functions::log1p(arg); +} +inline expr log1p(expr arg) +{ + return functions::log1p(arg); +} + +/// Binary logorithm. +/// \param arg function argument +/// \return logarithm of \a arg to base 2 +// template typename enable::type log2(T arg) { return functions::log2(arg); } +inline expr log2(half arg) +{ + return functions::log2(arg); +} +inline expr log2(expr arg) +{ + return functions::log2(arg); +} + +/// \} +/// \name Power functions +/// \{ + +/// Square root. +/// \param arg function argument +/// \return square root of \a arg +// template typename enable::type sqrt(T arg) { return functions::sqrt(arg); } +inline expr sqrt(half arg) +{ + return functions::sqrt(arg); +} +inline expr sqrt(expr arg) +{ + return functions::sqrt(arg); +} + +/// Cubic root. +/// \param arg function argument +/// \return cubic root of \a arg +// template typename enable::type cbrt(T arg) { return functions::cbrt(arg); } +inline expr cbrt(half arg) +{ + return functions::cbrt(arg); +} +inline expr cbrt(expr arg) +{ + return functions::cbrt(arg); +} + +/// Hypotenuse function. +/// \param x first argument +/// \param y second argument +/// \return square root of sum of squares without internal over- or underflows +// template typename enable::type hypot(T x, U y) { return functions::hypot(x, y); +//} +inline expr hypot(half x, half y) +{ + return functions::hypot(x, y); +} +inline expr hypot(half x, expr y) +{ + return functions::hypot(x, y); +} +inline expr hypot(expr x, half y) +{ + return functions::hypot(x, y); +} +inline expr hypot(expr x, expr y) +{ + return functions::hypot(x, y); +} + +/// Power function. +/// \param base first argument +/// \param exp second argument +/// \return \a base raised to \a exp +// template typename enable::type pow(T base, U exp) { return functions::pow(base, +//exp); } +inline expr pow(half base, half exp) +{ + return functions::pow(base, exp); +} +inline expr pow(half base, expr exp) +{ + return functions::pow(base, exp); +} +inline expr pow(expr base, half exp) +{ + return functions::pow(base, exp); +} +inline expr pow(expr base, expr exp) +{ + return functions::pow(base, exp); +} + +/// \} +/// \name Trigonometric functions +/// \{ + +/// Sine function. +/// \param arg function argument +/// \return sine value of \a arg +// template typename enable::type sin(T arg) { return functions::sin(arg); } +inline expr sin(half arg) +{ + return functions::sin(arg); +} +inline expr sin(expr arg) +{ + return functions::sin(arg); +} + +/// Cosine function. +/// \param arg function argument +/// \return cosine value of \a arg +// template typename enable::type cos(T arg) { return functions::cos(arg); } +inline expr cos(half arg) +{ + return functions::cos(arg); +} +inline expr cos(expr arg) +{ + return functions::cos(arg); +} + +/// Tangent function. +/// \param arg function argument +/// \return tangent value of \a arg +// template typename enable::type tan(T arg) { return functions::tan(arg); } +inline expr tan(half arg) +{ + return functions::tan(arg); +} +inline expr tan(expr arg) +{ + return functions::tan(arg); +} + +/// Arc sine. +/// \param arg function argument +/// \return arc sine value of \a arg +// template typename enable::type asin(T arg) { return functions::asin(arg); } +inline expr asin(half arg) +{ + return functions::asin(arg); +} +inline expr asin(expr arg) +{ + return functions::asin(arg); +} + +/// Arc cosine function. +/// \param arg function argument +/// \return arc cosine value of \a arg +// template typename enable::type acos(T arg) { return functions::acos(arg); } +inline expr acos(half arg) +{ + return functions::acos(arg); +} +inline expr acos(expr arg) +{ + return functions::acos(arg); +} + +/// Arc tangent function. +/// \param arg function argument +/// \return arc tangent value of \a arg +// template typename enable::type atan(T arg) { return functions::atan(arg); } +inline expr atan(half arg) +{ + return functions::atan(arg); +} +inline expr atan(expr arg) +{ + return functions::atan(arg); +} + +/// Arc tangent function. +/// \param x first argument +/// \param y second argument +/// \return arc tangent value +// template typename enable::type atan2(T x, U y) { return functions::atan2(x, y); +//} +inline expr atan2(half x, half y) +{ + return functions::atan2(x, y); +} +inline expr atan2(half x, expr y) +{ + return functions::atan2(x, y); +} +inline expr atan2(expr x, half y) +{ + return functions::atan2(x, y); +} +inline expr atan2(expr x, expr y) +{ + return functions::atan2(x, y); +} + +/// \} +/// \name Hyperbolic functions +/// \{ + +/// Hyperbolic sine. +/// \param arg function argument +/// \return hyperbolic sine value of \a arg +// template typename enable::type sinh(T arg) { return functions::sinh(arg); } +inline expr sinh(half arg) +{ + return functions::sinh(arg); +} +inline expr sinh(expr arg) +{ + return functions::sinh(arg); +} + +/// Hyperbolic cosine. +/// \param arg function argument +/// \return hyperbolic cosine value of \a arg +// template typename enable::type cosh(T arg) { return functions::cosh(arg); } +inline expr cosh(half arg) +{ + return functions::cosh(arg); +} +inline expr cosh(expr arg) +{ + return functions::cosh(arg); +} + +/// Hyperbolic tangent. +/// \param arg function argument +/// \return hyperbolic tangent value of \a arg +// template typename enable::type tanh(T arg) { return functions::tanh(arg); } +inline expr tanh(half arg) +{ + return functions::tanh(arg); +} +inline expr tanh(expr arg) +{ + return functions::tanh(arg); +} + +/// Hyperbolic area sine. +/// \param arg function argument +/// \return area sine value of \a arg +// template typename enable::type asinh(T arg) { return functions::asinh(arg); } +inline expr asinh(half arg) +{ + return functions::asinh(arg); +} +inline expr asinh(expr arg) +{ + return functions::asinh(arg); +} + +/// Hyperbolic area cosine. +/// \param arg function argument +/// \return area cosine value of \a arg +// template typename enable::type acosh(T arg) { return functions::acosh(arg); } +inline expr acosh(half arg) +{ + return functions::acosh(arg); +} +inline expr acosh(expr arg) +{ + return functions::acosh(arg); +} + +/// Hyperbolic area tangent. +/// \param arg function argument +/// \return area tangent value of \a arg +// template typename enable::type atanh(T arg) { return functions::atanh(arg); } +inline expr atanh(half arg) +{ + return functions::atanh(arg); +} +inline expr atanh(expr arg) +{ + return functions::atanh(arg); +} + +/// \} +/// \name Error and gamma functions +/// \{ + +/// Error function. +/// \param arg function argument +/// \return error function value of \a arg +// template typename enable::type erf(T arg) { return functions::erf(arg); } +inline expr erf(half arg) +{ + return functions::erf(arg); +} +inline expr erf(expr arg) +{ + return functions::erf(arg); +} + +/// Complementary error function. +/// \param arg function argument +/// \return 1 minus error function value of \a arg +// template typename enable::type erfc(T arg) { return functions::erfc(arg); } +inline expr erfc(half arg) +{ + return functions::erfc(arg); +} +inline expr erfc(expr arg) +{ + return functions::erfc(arg); +} + +/// Natural logarithm of gamma function. +/// \param arg function argument +/// \return natural logarith of gamma function for \a arg +// template typename enable::type lgamma(T arg) { return functions::lgamma(arg); } +inline expr lgamma(half arg) +{ + return functions::lgamma(arg); +} +inline expr lgamma(expr arg) +{ + return functions::lgamma(arg); +} + +/// Gamma function. +/// \param arg function argument +/// \return gamma function value of \a arg +// template typename enable::type tgamma(T arg) { return functions::tgamma(arg); } +inline expr tgamma(half arg) +{ + return functions::tgamma(arg); +} +inline expr tgamma(expr arg) +{ + return functions::tgamma(arg); +} + +/// \} +/// \name Rounding +/// \{ + +/// Nearest integer not less than half value. +/// \param arg half to round +/// \return nearest integer not less than \a arg +// template typename enable::type ceil(T arg) { return functions::ceil(arg); } +inline half ceil(half arg) +{ + return functions::ceil(arg); +} +inline half ceil(expr arg) +{ + return functions::ceil(arg); +} + +/// Nearest integer not greater than half value. +/// \param arg half to round +/// \return nearest integer not greater than \a arg +// template typename enable::type floor(T arg) { return functions::floor(arg); } +inline half floor(half arg) +{ + return functions::floor(arg); +} +inline half floor(expr arg) +{ + return functions::floor(arg); +} + +/// Nearest integer not greater in magnitude than half value. +/// \param arg half to round +/// \return nearest integer not greater in magnitude than \a arg +// template typename enable::type trunc(T arg) { return functions::trunc(arg); } +inline half trunc(half arg) +{ + return functions::trunc(arg); +} +inline half trunc(expr arg) +{ + return functions::trunc(arg); +} + +/// Nearest integer. +/// \param arg half to round +/// \return nearest integer, rounded away from zero in half-way cases +// template typename enable::type round(T arg) { return functions::round(arg); } +inline half round(half arg) +{ + return functions::round(arg); +} +inline half round(expr arg) +{ + return functions::round(arg); +} + +/// Nearest integer. +/// \param arg half to round +/// \return nearest integer, rounded away from zero in half-way cases +// template typename enable::type lround(T arg) { return functions::lround(arg); } +inline long lround(half arg) +{ + return functions::lround(arg); +} +inline long lround(expr arg) +{ + return functions::lround(arg); +} + +/// Nearest integer using half's internal rounding mode. +/// \param arg half expression to round +/// \return nearest integer using default rounding mode +// template typename enable::type nearbyint(T arg) { return functions::nearbyint(arg); } +inline half nearbyint(half arg) +{ + return functions::rint(arg); +} +inline half nearbyint(expr arg) +{ + return functions::rint(arg); +} + +/// Nearest integer using half's internal rounding mode. +/// \param arg half expression to round +/// \return nearest integer using default rounding mode +// template typename enable::type rint(T arg) { return functions::rint(arg); } +inline half rint(half arg) +{ + return functions::rint(arg); +} +inline half rint(expr arg) +{ + return functions::rint(arg); +} + +/// Nearest integer using half's internal rounding mode. +/// \param arg half expression to round +/// \return nearest integer using default rounding mode +// template typename enable::type lrint(T arg) { return functions::lrint(arg); } +inline long lrint(half arg) +{ + return functions::lrint(arg); +} +inline long lrint(expr arg) +{ + return functions::lrint(arg); +} +#if HALF_ENABLE_CPP11_LONG_LONG +/// Nearest integer. +/// \param arg half to round +/// \return nearest integer, rounded away from zero in half-way cases +// template typename enable::type llround(T arg) { return functions::llround(arg); } +inline long long llround(half arg) +{ + return functions::llround(arg); +} +inline long long llround(expr arg) +{ + return functions::llround(arg); +} + +/// Nearest integer using half's internal rounding mode. +/// \param arg half expression to round +/// \return nearest integer using default rounding mode +// template typename enable::type llrint(T arg) { return functions::llrint(arg); } +inline long long llrint(half arg) +{ + return functions::llrint(arg); +} +inline long long llrint(expr arg) +{ + return functions::llrint(arg); +} +#endif + +/// \} +/// \name Floating point manipulation +/// \{ + +/// Decompress floating point number. +/// \param arg number to decompress +/// \param exp address to store exponent at +/// \return significant in range [0.5, 1) +// template typename enable::type frexp(T arg, int *exp) { return functions::frexp(arg, exp); } +inline half frexp(half arg, int* exp) +{ + return functions::frexp(arg, exp); +} +inline half frexp(expr arg, int* exp) +{ + return functions::frexp(arg, exp); +} + +/// Multiply by power of two. +/// \param arg number to modify +/// \param exp power of two to multiply with +/// \return \a arg multplied by 2 raised to \a exp +// template typename enable::type ldexp(T arg, int exp) { return functions::scalbln(arg, exp); +//} +inline half ldexp(half arg, int exp) +{ + return functions::scalbln(arg, exp); +} +inline half ldexp(expr arg, int exp) +{ + return functions::scalbln(arg, exp); +} + +/// Extract integer and fractional parts. +/// \param arg number to decompress +/// \param iptr address to store integer part at +/// \return fractional part +// template typename enable::type modf(T arg, half *iptr) { return functions::modf(arg, iptr); +//} +inline half modf(half arg, half* iptr) +{ + return functions::modf(arg, iptr); +} +inline half modf(expr arg, half* iptr) +{ + return functions::modf(arg, iptr); +} + +/// Multiply by power of two. +/// \param arg number to modify +/// \param exp power of two to multiply with +/// \return \a arg multplied by 2 raised to \a exp +// template typename enable::type scalbn(T arg, int exp) { return functions::scalbln(arg, exp); +//} +inline half scalbn(half arg, int exp) +{ + return functions::scalbln(arg, exp); +} +inline half scalbn(expr arg, int exp) +{ + return functions::scalbln(arg, exp); +} + +/// Multiply by power of two. +/// \param arg number to modify +/// \param exp power of two to multiply with +/// \return \a arg multplied by 2 raised to \a exp +// template typename enable::type scalbln(T arg, long exp) { return functions::scalbln(arg, +//exp); } +inline half scalbln(half arg, long exp) +{ + return functions::scalbln(arg, exp); +} +inline half scalbln(expr arg, long exp) +{ + return functions::scalbln(arg, exp); +} + +/// Extract exponent. +/// \param arg number to query +/// \return floating point exponent +/// \retval FP_ILOGB0 for zero +/// \retval FP_ILOGBNAN for NaN +/// \retval MAX_INT for infinity +// template typename enable::type ilogb(T arg) { return functions::ilogb(arg); } +inline int ilogb(half arg) +{ + return functions::ilogb(arg); +} +inline int ilogb(expr arg) +{ + return functions::ilogb(arg); +} + +/// Extract exponent. +/// \param arg number to query +/// \return floating point exponent +// template typename enable::type logb(T arg) { return functions::logb(arg); } +inline half logb(half arg) +{ + return functions::logb(arg); +} +inline half logb(expr arg) +{ + return functions::logb(arg); +} + +/// Next representable value. +/// \param from value to compute next representable value for +/// \param to direction towards which to compute next value +/// \return next representable value after \a from in direction towards \a to +// template typename enable::type nextafter(T from, U to) { return +//functions::nextafter(from, to); } +inline half nextafter(half from, half to) +{ + return functions::nextafter(from, to); +} +inline half nextafter(half from, expr to) +{ + return functions::nextafter(from, to); +} +inline half nextafter(expr from, half to) +{ + return functions::nextafter(from, to); +} +inline half nextafter(expr from, expr to) +{ + return functions::nextafter(from, to); +} + +/// Next representable value. +/// \param from value to compute next representable value for +/// \param to direction towards which to compute next value +/// \return next representable value after \a from in direction towards \a to +// template typename enable::type nexttoward(T from, long double to) { return +//functions::nexttoward(from, to); } +inline half nexttoward(half from, long double to) +{ + return functions::nexttoward(from, to); +} +inline half nexttoward(expr from, long double to) +{ + return functions::nexttoward(from, to); +} + +/// Take sign. +/// \param x value to change sign for +/// \param y value to take sign from +/// \return value equal to \a x in magnitude and to \a y in sign +// template typename enable::type copysign(T x, U y) { return +//functions::copysign(x, y); } +inline half copysign(half x, half y) +{ + return functions::copysign(x, y); +} +inline half copysign(half x, expr y) +{ + return functions::copysign(x, y); +} +inline half copysign(expr x, half y) +{ + return functions::copysign(x, y); +} +inline half copysign(expr x, expr y) +{ + return functions::copysign(x, y); +} + +/// \} +/// \name Floating point classification +/// \{ + +/// Classify floating point value. +/// \param arg number to classify +/// \retval FP_ZERO for positive and negative zero +/// \retval FP_SUBNORMAL for subnormal numbers +/// \retval FP_INFINITY for positive and negative infinity +/// \retval FP_NAN for NaNs +/// \retval FP_NORMAL for all other (normal) values +// template typename enable::type fpclassify(T arg) { return functions::fpclassify(arg); } +inline int fpclassify(half arg) +{ + return functions::fpclassify(arg); +} +inline int fpclassify(expr arg) +{ + return functions::fpclassify(arg); +} + +/// Check if finite number. +/// \param arg number to check +/// \retval true if neither infinity nor NaN +/// \retval false else +// template typename enable::type isfinite(T arg) { return functions::isfinite(arg); } +inline bool isfinite(half arg) +{ + return functions::isfinite(arg); +} +inline bool isfinite(expr arg) +{ + return functions::isfinite(arg); +} + +/// Check for infinity. +/// \param arg number to check +/// \retval true for positive or negative infinity +/// \retval false else +// template typename enable::type isinf(T arg) { return functions::isinf(arg); } +inline bool isinf(half arg) +{ + return functions::isinf(arg); +} +inline bool isinf(expr arg) +{ + return functions::isinf(arg); +} + +/// Check for NaN. +/// \param arg number to check +/// \retval true for NaNs +/// \retval false else +// template typename enable::type isnan(T arg) { return functions::isnan(arg); } +inline bool isnan(half arg) +{ + return functions::isnan(arg); +} +inline bool isnan(expr arg) +{ + return functions::isnan(arg); +} + +/// Check if normal number. +/// \param arg number to check +/// \retval true if normal number +/// \retval false if either subnormal, zero, infinity or NaN +// template typename enable::type isnormal(T arg) { return functions::isnormal(arg); } +inline bool isnormal(half arg) +{ + return functions::isnormal(arg); +} +inline bool isnormal(expr arg) +{ + return functions::isnormal(arg); +} + +/// Check sign. +/// \param arg number to check +/// \retval true for negative number +/// \retval false for positive number +// template typename enable::type signbit(T arg) { return functions::signbit(arg); } +inline bool signbit(half arg) +{ + return functions::signbit(arg); +} +inline bool signbit(expr arg) +{ + return functions::signbit(arg); +} + +/// \} +/// \name Comparison +/// \{ + +/// Comparison for greater than. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x greater than \a y +/// \retval false else +// template typename enable::type isgreater(T x, U y) { return +//functions::isgreater(x, y); } +inline bool isgreater(half x, half y) +{ + return functions::isgreater(x, y); +} +inline bool isgreater(half x, expr y) +{ + return functions::isgreater(x, y); +} +inline bool isgreater(expr x, half y) +{ + return functions::isgreater(x, y); +} +inline bool isgreater(expr x, expr y) +{ + return functions::isgreater(x, y); +} + +/// Comparison for greater equal. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x greater equal \a y +/// \retval false else +// template typename enable::type isgreaterequal(T x, U y) { return +//functions::isgreaterequal(x, y); } +inline bool isgreaterequal(half x, half y) +{ + return functions::isgreaterequal(x, y); +} +inline bool isgreaterequal(half x, expr y) +{ + return functions::isgreaterequal(x, y); +} +inline bool isgreaterequal(expr x, half y) +{ + return functions::isgreaterequal(x, y); +} +inline bool isgreaterequal(expr x, expr y) +{ + return functions::isgreaterequal(x, y); +} + +/// Comparison for less than. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x less than \a y +/// \retval false else +// template typename enable::type isless(T x, U y) { return functions::isless(x, +//y); } +inline bool isless(half x, half y) +{ + return functions::isless(x, y); +} +inline bool isless(half x, expr y) +{ + return functions::isless(x, y); +} +inline bool isless(expr x, half y) +{ + return functions::isless(x, y); +} +inline bool isless(expr x, expr y) +{ + return functions::isless(x, y); +} + +/// Comparison for less equal. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x less equal \a y +/// \retval false else +// template typename enable::type islessequal(T x, U y) { return +//functions::islessequal(x, y); } +inline bool islessequal(half x, half y) +{ + return functions::islessequal(x, y); +} +inline bool islessequal(half x, expr y) +{ + return functions::islessequal(x, y); +} +inline bool islessequal(expr x, half y) +{ + return functions::islessequal(x, y); +} +inline bool islessequal(expr x, expr y) +{ + return functions::islessequal(x, y); +} + +/// Comarison for less or greater. +/// \param x first operand +/// \param y second operand +/// \retval true if either less or greater +/// \retval false else +// template typename enable::type islessgreater(T x, U y) { return +//functions::islessgreater(x, y); } +inline bool islessgreater(half x, half y) +{ + return functions::islessgreater(x, y); +} +inline bool islessgreater(half x, expr y) +{ + return functions::islessgreater(x, y); +} +inline bool islessgreater(expr x, half y) +{ + return functions::islessgreater(x, y); +} +inline bool islessgreater(expr x, expr y) +{ + return functions::islessgreater(x, y); +} + +/// Check if unordered. +/// \param x first operand +/// \param y second operand +/// \retval true if unordered (one or two NaN operands) +/// \retval false else +// template typename enable::type isunordered(T x, U y) { return +//functions::isunordered(x, y); } +inline bool isunordered(half x, half y) +{ + return functions::isunordered(x, y); +} +inline bool isunordered(half x, expr y) +{ + return functions::isunordered(x, y); +} +inline bool isunordered(expr x, half y) +{ + return functions::isunordered(x, y); +} +inline bool isunordered(expr x, expr y) +{ + return functions::isunordered(x, y); +} + +/// \name Casting +/// \{ + +/// Cast to or from half-precision floating point number. +/// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted +/// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. +/// It uses the default rounding mode. +/// +/// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types +/// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler +/// error and casting between [half](\ref half_float::half)s is just a no-op. +/// \tparam T destination type (half or built-in arithmetic type) +/// \tparam U source type (half or built-in arithmetic type) +/// \param arg value to cast +/// \return \a arg converted to destination type +template +T half_cast(U arg) +{ + return half_caster::cast(arg); +} + +/// Cast to or from half-precision floating point number. +/// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted +/// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. +/// +/// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types +/// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler +/// error and casting between [half](\ref half_float::half)s is just a no-op. +/// \tparam T destination type (half or built-in arithmetic type) +/// \tparam R rounding mode to use. +/// \tparam U source type (half or built-in arithmetic type) +/// \param arg value to cast +/// \return \a arg converted to destination type +template +T half_cast(U arg) +{ + return half_caster::cast(arg); +} +/// \} +} // namespace detail + +using detail::operator==; +using detail::operator!=; +using detail::operator<; +using detail::operator>; +using detail::operator<=; +using detail::operator>=; +using detail::operator+; +using detail::operator-; +using detail::operator*; +using detail::operator/; +using detail::operator<<; +using detail::operator>>; + +using detail::abs; +using detail::fabs; +using detail::fmod; +using detail::remainder; +using detail::remquo; +using detail::fma; +using detail::fmax; +using detail::fmin; +using detail::fdim; +using detail::nanh; +using detail::exp; +using detail::expm1; +using detail::exp2; +using detail::log; +using detail::log10; +using detail::log1p; +using detail::log2; +using detail::sqrt; +using detail::cbrt; +using detail::hypot; +using detail::pow; +using detail::sin; +using detail::cos; +using detail::tan; +using detail::asin; +using detail::acos; +using detail::atan; +using detail::atan2; +using detail::sinh; +using detail::cosh; +using detail::tanh; +using detail::asinh; +using detail::acosh; +using detail::atanh; +using detail::erf; +using detail::erfc; +using detail::lgamma; +using detail::tgamma; +using detail::ceil; +using detail::floor; +using detail::trunc; +using detail::round; +using detail::lround; +using detail::nearbyint; +using detail::rint; +using detail::lrint; +#if HALF_ENABLE_CPP11_LONG_LONG +using detail::llround; +using detail::llrint; +#endif +using detail::frexp; +using detail::ldexp; +using detail::modf; +using detail::scalbn; +using detail::scalbln; +using detail::ilogb; +using detail::logb; +using detail::nextafter; +using detail::nexttoward; +using detail::copysign; +using detail::fpclassify; +using detail::isfinite; +using detail::isinf; +using detail::isnan; +using detail::isnormal; +using detail::signbit; +using detail::isgreater; +using detail::isgreaterequal; +using detail::isless; +using detail::islessequal; +using detail::islessgreater; +using detail::isunordered; + +using detail::half_cast; +} // namespace half_float + +/// Extensions to the C++ standard library. +namespace std +{ +/// Numeric limits for half-precision floats. +/// Because of the underlying single-precision implementation of many operations, it inherits some properties from +/// `std::numeric_limits`. +template <> +class numeric_limits : public numeric_limits +{ +public: + /// Supports signed values. + static HALF_CONSTEXPR_CONST bool is_signed = true; + + /// Is not exact. + static HALF_CONSTEXPR_CONST bool is_exact = false; + + /// Doesn't provide modulo arithmetic. + static HALF_CONSTEXPR_CONST bool is_modulo = false; + + /// IEEE conformant. + static HALF_CONSTEXPR_CONST bool is_iec559 = true; + + /// Supports infinity. + static HALF_CONSTEXPR_CONST bool has_infinity = true; + + /// Supports quiet NaNs. + static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true; + + /// Supports subnormal values. + static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present; + + /// Rounding mode. + /// Due to the mix of internal single-precision computations (using the rounding mode of the underlying + /// single-precision implementation) with the rounding mode of the single-to-half conversions, the actual rounding + /// mode might be `std::round_indeterminate` if the default half-precision rounding mode doesn't match the + /// single-precision rounding mode. + static HALF_CONSTEXPR_CONST float_round_style round_style + = (std::numeric_limits::round_style == half_float::half::round_style) ? half_float::half::round_style + : round_indeterminate; + + /// Significant digits. + static HALF_CONSTEXPR_CONST int digits = 11; + + /// Significant decimal digits. + static HALF_CONSTEXPR_CONST int digits10 = 3; + + /// Required decimal digits to represent all possible values. + static HALF_CONSTEXPR_CONST int max_digits10 = 5; + + /// Number base. + static HALF_CONSTEXPR_CONST int radix = 2; + + /// One more than smallest exponent. + static HALF_CONSTEXPR_CONST int min_exponent = -13; + + /// Smallest normalized representable power of 10. + static HALF_CONSTEXPR_CONST int min_exponent10 = -4; + + /// One more than largest exponent + static HALF_CONSTEXPR_CONST int max_exponent = 16; + + /// Largest finitely representable power of 10. + static HALF_CONSTEXPR_CONST int max_exponent10 = 4; + + /// Smallest positive normal value. + static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x0400); + } + + /// Smallest finite value. + static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0xFBFF); + } + + /// Largest finite value. + static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x7BFF); + } + + /// Difference between one and next representable value. + static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x1400); + } + + /// Maximum rounding error. + static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, (round_style == std::round_to_nearest) ? 0x3800 : 0x3C00); + } + + /// Positive infinity. + static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x7C00); + } + + /// Quiet NaN. + static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x7FFF); + } + + /// Signalling NaN. + static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x7DFF); + } + + /// Smallest positive subnormal value. + static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x0001); + } +}; + +#if HALF_ENABLE_CPP11_HASH +/// Hash function for half-precision floats. +/// This is only defined if C++11 `std::hash` is supported and enabled. +template <> +struct hash //: unary_function +{ + /// Type of function argument. + typedef half_float::half argument_type; + + /// Function return type. + typedef size_t result_type; + + /// Compute hash function. + /// \param arg half to hash + /// \return hash value + result_type operator()(argument_type arg) const + { + return hash()(static_cast(arg.data_) & -(arg.data_ != 0x8000)); + } +}; +#endif +} // namespace std #undef HALF_CONSTEXPR #undef HALF_CONSTEXPR_CONST #undef HALF_NOEXCEPT #undef HALF_NOTHROW #ifdef HALF_POP_WARNINGS - #pragma warning(pop) - #undef HALF_POP_WARNINGS +#pragma warning(pop) +#undef HALF_POP_WARNINGS #endif #endif