From 56ce27c7ab307102fe1feddcf65285a4799416a4 Mon Sep 17 00:00:00 2001 From: Max Buckley Date: Sun, 22 Mar 2026 19:27:01 +0100 Subject: [PATCH 1/3] Enable strict C++ compiler warnings with -Werror Enable -Wall -Wextra -Wpedantic -Wshadow -Wnon-virtual-dtor -Woverloaded-virtual -Wconversion -Wdouble-promotion -Wformat=2 -Wimplicit-fallthrough -Werror for GCC/Clang, and /W4 /WX for MSVC. Fix all ~5,500 warnings across 114 files: - Comment out unused parameters in plugin interface implementations - Add static_cast for narrowing conversions (int64_t/size_t to int32_t) - Rename constructor params to avoid shadowing member variables - Add using declarations in plugin classes to prevent hiding base class virtual methods (IPluginV2DynamicExt/IPluginV2IOExt) - Change sealed base class methods from private to protected to support using declarations in derived classes - Add virtual destructor to TimerBase - Add [[noreturn]] to reportAssertion to fix implicit-fallthrough - Add #pragma GCC system_header to third-party half.h headers - Add #undef before macro redefinitions across plugin headers - Use f suffix on float literals to prevent double promotion Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Max Buckley --- CMakeLists.txt | 9 +- include/NvInferRuntime.h | 69 +- include/NvInferRuntimePlugin.h | 22 +- plugin/batchTilePlugin/batchTilePlugin.cpp | 15 +- plugin/batchedNMSPlugin/batchedNMSPlugin.cpp | 65 +- plugin/batchedNMSPlugin/batchedNMSPlugin.h | 5 + plugin/bertQKVToContextPlugin/mhaRunner.h | 2 +- .../qkvToContextInt8InterleavedPlugin.cpp | 36 +- ...kvToContextInt8InterleavedPluginLegacy.cpp | 28 +- .../qkvToContextInt8InterleavedPluginLegacy.h | 5 + .../qkvToContextPlugin.cpp | 59 +- .../qkvToContextPluginLegacy.cpp | 37 +- .../qkvToContextPluginLegacy.h | 20 +- plugin/clipPlugin/clipPlugin.cpp | 13 +- plugin/common/bboxUtils.h | 20 +- plugin/common/bertCommon.h | 16 +- plugin/common/checkMacrosPlugin.h | 21 +- plugin/common/cudaDriverWrapper.cpp | 5 +- plugin/common/cudnnWrapper.cpp | 4 +- plugin/common/kernels/proposalKernel.cu | 265 +- plugin/common/plugin.cpp | 5 +- plugin/common/reducedMathPlugin.cpp | 6 +- .../coordConvACPlugin/coordConvACPlugin.cpp | 46 +- .../cropAndResizePlugin.cpp | 40 +- .../cropAndResizePluginLegacy.cpp | 54 +- .../cropAndResizePluginLegacy.h | 5 + plugin/decodeBbox3DPlugin/decodeBbox3D.cpp | 20 +- plugin/decodeBbox3DPlugin/decodeBbox3D.h | 5 + .../detectionLayerPlugin.cpp | 19 +- .../disentangledAttentionPlugin.cpp | 27 +- .../disentangledAttentionPluginLegacy.cpp | 23 +- .../disentangledAttentionPluginLegacy.h | 10 +- .../efficientNMSPlugin/efficientNMSPlugin.cpp | 44 +- .../efficientNMSPlugin/efficientNMSPlugin.h | 5 + .../tftrt/efficientNMSExplicitTFTRTPlugin.cpp | 6 +- .../tftrt/efficientNMSImplicitTFTRTPlugin.cpp | 28 +- .../tftrt/efficientNMSImplicitTFTRTPlugin.h | 2 + .../embLayerNormPlugin/embLayerNormPlugin.cpp | 54 +- .../embLayerNormPluginLegacy.cpp | 44 +- .../embLayerNormPluginLegacy.h | 10 +- .../embLayerNormVarSeqlenPlugin.cpp | 81 +- .../embLayerNormVarSeqlenPluginLegacy.cpp | 48 +- .../embLayerNormVarSeqlenPluginLegacy.h | 15 + plugin/fcPlugin/fcPlugin.cpp | 85 +- plugin/fcPlugin/fcPlugin.h | 11 +- plugin/flattenConcat/flattenConcat.cpp | 39 +- plugin/geluPlugin/geluPlugin.cpp | 29 +- plugin/geluPlugin/geluPlugin.h | 10 +- .../generateDetectionPlugin.cpp | 28 +- plugin/gridAnchorPlugin/gridAnchorPlugin.cpp | 69 +- .../groupNormalizationPlugin.cpp | 68 +- .../groupNormalizationPlugin.h | 10 +- .../instanceNormFwd.h | 4 +- .../instanceNormalizationPluginLegacy.h | 8 +- plugin/leakyReluPlugin/lReluPlugin.cpp | 15 +- .../modulatedDeformConvPlugin.cpp | 48 +- .../modulatedDeformConvPluginLegacy.cpp | 83 +- .../modulatedDeformConvPluginLegacy.h | 5 + .../multilevelCropAndResizePlugin.cpp | 36 +- .../multilevelProposeROIPlugin.cpp | 57 +- .../multiscaleDeformableAttnPlugin.cpp | 35 +- .../multiscaleDeformableAttnPluginLegacy.cpp | 40 +- .../multiscaleDeformableAttnPluginLegacy.h | 15 +- plugin/nmsPlugin/nmsPlugin.cpp | 89 +- plugin/nmsPlugin/nmsPlugin.h | 5 + plugin/normalizePlugin/normalizePlugin.cpp | 51 +- plugin/nvFasterRCNN/nvFasterRCNNPlugin.cpp | 50 +- plugin/nvFasterRCNN/nvFasterRCNNPlugin.h | 2 + plugin/pillarScatterPlugin/pillarScatter.cpp | 34 +- plugin/pillarScatterPlugin/pillarScatter.h | 5 + plugin/priorBoxPlugin/priorBoxPlugin.cpp | 41 +- .../proposalLayerPlugin.cpp | 43 +- plugin/proposalPlugin/proposalPlugin.cpp | 102 +- plugin/proposalPlugin/proposalPlugin.h | 24 +- .../pyramidROIAlignPlugin.cpp | 34 +- plugin/regionPlugin/regionPlugin.cpp | 63 +- plugin/reorgPlugin/reorgPlugin.cpp | 78 +- plugin/reorgPlugin/reorgPlugin.h | 5 + .../resizeNearestPlugin.cpp | 39 +- plugin/roiAlignPlugin/roiAlignPlugin.cpp | 32 +- .../roiAlignPlugin/roiAlignPluginLegacy.cpp | 18 +- plugin/roiAlignPlugin/roiAlignPluginLegacy.h | 5 + .../scatterElementsPlugin.cpp | 24 +- .../scatterElementsPluginLegacy.cpp | 18 +- .../scatterElementsPluginLegacy.h | 5 + plugin/scatterPlugin/scatterPlugin.cpp | 33 +- plugin/scatterPlugin/scatterPlugin.h | 5 + .../skipLayerNormInt8InterleavedPlugin.cpp | 30 +- ...ipLayerNormInt8InterleavedPluginLegacy.cpp | 16 +- ...skipLayerNormInt8InterleavedPluginLegacy.h | 15 + .../skipLayerNormPlugin.cpp | 57 +- .../skipLayerNormPluginLegacy.cpp | 26 +- .../skipLayerNormPluginLegacy.h | 20 +- .../specialSlicePlugin/specialSlicePlugin.cpp | 31 +- plugin/splitPlugin/split.h | 12 +- plugin/vc/checkMacrosPlugin.h | 26 +- .../voxelGeneratorPlugin/voxelGenerator.cpp | 48 +- plugin/voxelGeneratorPlugin/voxelGenerator.h | 15 +- samples/common/ErrorRecorder.h | 4 +- samples/common/common.h | 21 +- samples/common/debugTensorWriter.cpp | 34 +- samples/common/getOptions.cpp | 18 +- samples/common/half.h | 13 +- samples/common/sampleDevice.cpp | 4 +- samples/common/sampleDevice.h | 9 +- samples/common/sampleEngines.cpp | 115 +- samples/common/sampleEngines.h | 33 +- samples/common/sampleInference.cpp | 69 +- samples/common/sampleInference.h | 9 +- samples/common/sampleOptions.cpp | 138 +- samples/common/sampleReporting.cpp | 50 +- samples/common/sampleReporting.h | 7 +- samples/common/sampleUtils.cpp | 39 +- third_party/ieee/half.h | 7099 ++++++++++------- 114 files changed, 6093 insertions(+), 4678 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 1d3e583b5..6d2e8346f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -257,9 +257,16 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) if(NOT MSVC) - set(CMAKE_CXX_FLAGS "-Wno-deprecated-declarations ${CMAKE_CXX_FLAGS} -DBUILD_SYSTEM=cmake_oss") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBUILD_SYSTEM=cmake_oss") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wpedantic") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wshadow -Wnon-virtual-dtor -Woverloaded-virtual") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wconversion -Wdouble-promotion") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat=2 -Wimplicit-fallthrough") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBUILD_SYSTEM=cmake_oss") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /WX") endif() ############################################################################################ diff --git a/include/NvInferRuntime.h b/include/NvInferRuntime.h index cf875bf1d..c444760e0 100644 --- a/include/NvInferRuntime.h +++ b/include/NvInferRuntime.h @@ -360,7 +360,8 @@ class DimsExprs //! struct DynamicPluginTensorDesc { - //! Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of any runtime dimension. + //! Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of any runtime + //! dimension. PluginTensorDesc desc; //! Lower bounds on tensor’s dimensions @@ -433,7 +434,8 @@ class TRT_DEPRECATED IPluginV2DynamicExt : public nvinfer1::IPluginV2Ext //! return output; //! virtual DimsExprs getOutputDimensions( - int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept = 0; + int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept + = 0; //! //! \brief Limit on number of format combinations accepted. @@ -473,7 +475,8 @@ class TRT_DEPRECATED IPluginV2DynamicExt : public nvinfer1::IPluginV2Ext //! Warning: TensorRT will stop asking for formats once it finds kFORMAT_COMBINATION_LIMIT on combinations. //! virtual bool supportsFormatCombination( - int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0; + int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept + = 0; //! //! \brief Configure the plugin. @@ -513,7 +516,8 @@ class TRT_DEPRECATED IPluginV2DynamicExt : public nvinfer1::IPluginV2Ext //! \param nbOutputs Number of output tensors. //! virtual void configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, - DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0; + DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept + = 0; //! //! \brief Find the workspace size required by the layer. @@ -525,7 +529,8 @@ class TRT_DEPRECATED IPluginV2DynamicExt : public nvinfer1::IPluginV2Ext //! \return The workspace size. //! virtual size_t getWorkspaceSize(PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, - int32_t nbOutputs) const noexcept = 0; + int32_t nbOutputs) const noexcept + = 0; //! //! \brief Execute the layer. @@ -540,7 +545,8 @@ class TRT_DEPRECATED IPluginV2DynamicExt : public nvinfer1::IPluginV2Ext //! \return 0 for success, else non-zero (which will cause engine termination). //! virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, - void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0; + void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept + = 0; protected: //! @@ -557,8 +563,11 @@ class TRT_DEPRECATED IPluginV2DynamicExt : public nvinfer1::IPluginV2Ext virtual ~IPluginV2DynamicExt() noexcept {} -private: +protected: // Following are obsolete base class methods, and must not be implemented or used. + // Kept as protected (not private) so derived classes can use `using` declarations + // to suppress -Woverloaded-virtual warnings when defining new methods with the + // same names but different signatures. //! //! \brief Set plugin configuration @@ -916,7 +925,8 @@ class IPluginV3OneBuild : public IPluginCapability //! \return 0 for success, else non-zero (which will cause engine termination, if invoked by TensorRT). //! virtual int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, - DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0; + DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept + = 0; //! //! \brief Provide the data types of the plugin outputs if the input tensors have the data types provided. @@ -935,7 +945,8 @@ class IPluginV3OneBuild : public IPluginCapability //! \warning DataType:kBOOL and DataType::kUINT8 are not supported. //! virtual int32_t getOutputDataTypes( - DataType* outputTypes, int32_t nbOutputs, const DataType* inputTypes, int32_t nbInputs) const noexcept = 0; + DataType* outputTypes, int32_t nbOutputs, const DataType* inputTypes, int32_t nbInputs) const noexcept + = 0; //! //! \brief Provide expressions for computing dimensions of the output tensors from dimensions of the input tensors. @@ -959,7 +970,8 @@ class IPluginV3OneBuild : public IPluginCapability //! through the error recorder. //! virtual int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs, - int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept = 0; + int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept + = 0; //! //! \brief Return true if plugin supports the format and datatype for the input/output indexed by pos. @@ -996,7 +1008,8 @@ class IPluginV3OneBuild : public IPluginCapability //! \see getFormatCombinationLimit //! virtual bool supportsFormatCombination( - int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept = 0; + int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept + = 0; //! //! \brief Get the number of outputs from the plugin. @@ -1014,8 +1027,8 @@ class IPluginV3OneBuild : public IPluginCapability //! //! \return The workspace size. //! - virtual size_t getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept + virtual size_t getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -1051,7 +1064,7 @@ class IPluginV3OneBuild : public IPluginCapability //! \return 0 for success, else non-zero (which will cause engine termination). The returned code will be reported //! through the error recorder. //! - virtual int32_t getValidTactics(int32_t* tactics, int32_t nbTactics) noexcept + virtual int32_t getValidTactics(int32_t* /*tactics*/, int32_t /*nbTactics*/) noexcept { return 0; } @@ -1118,7 +1131,7 @@ class IPluginV3OneRuntime : public IPluginCapability //! \return 0 for success, else non-zero (which will cause engine termination). The returned code will be reported //! through the error recorder. //! - virtual int32_t setTactic(int32_t tactic) noexcept + virtual int32_t setTactic(int32_t /*tactic*/) noexcept { return 0; } @@ -1142,7 +1155,8 @@ class IPluginV3OneRuntime : public IPluginCapability //! \param nbOutputs Number of output tensors. //! virtual int32_t onShapeChange( - PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept = 0; + PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept + = 0; //! //! \brief Execute the layer. @@ -1158,7 +1172,8 @@ class IPluginV3OneRuntime : public IPluginCapability //! through the error recorder. //! virtual int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, - void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept = 0; + void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept + = 0; //! //! \brief Clone the plugin, attach the cloned plugin object to a execution context and grant the cloned plugin @@ -1234,7 +1249,7 @@ class IPluginV3OneBuild : public v_1_0::IPluginV3OneBuild //! +->|Copy +--> t** --->|Plugin1 +--> t2 //! +-----+ +--------+ //! - virtual int32_t getAliasedInput(int32_t outputIndex) noexcept + virtual int32_t getAliasedInput(int32_t /*outputIndex*/) noexcept { return -1; } @@ -1680,7 +1695,8 @@ class IGpuAllocator : public IVersionedInterface //! \deprecated Deprecated in TensorRT 10.0. Superseded by allocateAsync //! TRT_DEPRECATED virtual void* allocate( - uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept = 0; + uint64_t const size, uint64_t const alignment, AllocatorFlags const flags) noexcept + = 0; ~IGpuAllocator() override = default; IGpuAllocator() = default; @@ -1854,7 +1870,6 @@ class IGpuAllocator : public IVersionedInterface //! using IGpuAllocator = v_1_0::IGpuAllocator; - //! //! \class IRuntime //! @@ -2179,7 +2194,6 @@ class IRuntime : public INoCopy return mImpl->getEngineHostCodeAllowed(); } - protected: apiv::VRuntime* mImpl; }; @@ -2629,7 +2643,8 @@ class IRefitter : public INoCopy //! The minimum and maximum specify the permitted range that is supported at runtime, while the optimum value //! is used for the kernel selection. This should be the "typical" value that is expected to occur at runtime. //! -//! \see IOptimizationProfile::setDimensions(), IOptimizationProfile::setShapeValuesV2(), IOptimizationProfile::setShapeValues() +//! \see IOptimizationProfile::setDimensions(), IOptimizationProfile::setShapeValuesV2(), +//! IOptimizationProfile::setShapeValues() //! enum class OptProfileSelector : int32_t { @@ -3121,7 +3136,6 @@ constexpr inline int32_t EnumMax() noexcept return 3; } - //! \class IRuntimeConfig //! //! \brief A class for runtime configuration. This class is used during execution context creation. @@ -3153,7 +3167,6 @@ class IRuntimeConfig : public INoCopy return mImpl->getExecutionContextAllocationStrategy(); } - protected: apiv::VRuntimeConfig* mImpl; }; // class IRuntimeConfig @@ -4209,7 +4222,7 @@ class IOutputAllocator : public IVersionedInterface //! \deprecated Deprecated in TensorRT 10.0. Superseded by reallocateOutputAsync with cudaStream_t argument //! TRT_DEPRECATED virtual void* reallocateOutput( - char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept + char const* /*tensorName*/, void* /*currentMemory*/, uint64_t /*size*/, uint64_t /*alignment*/) noexcept { return nullptr; } @@ -5534,7 +5547,8 @@ class IGpuAsyncAllocator : public IGpuAllocator //! - Thread-safe: Yes, this method is required to be thread-safe and may be called from multiple threads. //! void* allocateAsync(uint64_t const size, uint64_t const alignment, AllocatorFlags const flags, - cudaStream_t /*stream*/) noexcept override = 0; + cudaStream_t /*stream*/) noexcept override + = 0; //! //! \brief A thread-safe callback implemented by the application to handle stream-ordered asynchronous @@ -5652,7 +5666,8 @@ class IPluginCreatorV3One : public IPluginCreatorInterface //! engine deserialization), TensorRT will delete any objects it creates. //! virtual IPluginV3* createPlugin( - AsciiChar const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept = 0; + AsciiChar const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + = 0; //! //! \brief Return a list of fields that need to be passed to createPlugin() when creating a plugin for use in the diff --git a/include/NvInferRuntimePlugin.h b/include/NvInferRuntimePlugin.h index e374a6942..d166fb228 100644 --- a/include/NvInferRuntimePlugin.h +++ b/include/NvInferRuntimePlugin.h @@ -420,8 +420,8 @@ class TRT_DEPRECATED IPluginV2 //! //! \param pluginNamespace The namespace for the plugin object. //! - //! \warning The string pluginNamespace will be NULL-terminated and have a length of 1024 bytes or less including the - //! NULL terminator. + //! \warning The string pluginNamespace will be NULL-terminated and have a length of 1024 bytes or less including + //! the NULL terminator. //! //! \usage //! - Allowed context for the API call @@ -446,15 +446,15 @@ class TRT_DEPRECATED IPluginV2 // @cond SuppressDoxyWarnings IPluginV2() = default; virtual ~IPluginV2() noexcept = default; -// @endcond + // @endcond protected: -// @cond SuppressDoxyWarnings + // @cond SuppressDoxyWarnings IPluginV2(IPluginV2 const&) = default; IPluginV2(IPluginV2&&) = default; IPluginV2& operator=(IPluginV2 const&) & = default; IPluginV2& operator=(IPluginV2&&) & = default; -// @endcond + // @endcond }; //! @@ -672,7 +672,7 @@ class TRT_DEPRECATED IPluginV2Ext : public IPluginV2 IPluginV2Ext(IPluginV2Ext&&) = default; IPluginV2Ext& operator=(IPluginV2Ext const&) & = default; IPluginV2Ext& operator=(IPluginV2Ext&&) & = default; -// @endcond + // @endcond //! //! \brief Return the API version with which this plugin was built. The @@ -787,15 +787,15 @@ class TRT_DEPRECATED IPluginV2IOExt : public IPluginV2Ext // @cond SuppressDoxyWarnings IPluginV2IOExt() = default; ~IPluginV2IOExt() override = default; -// @endcond + // @endcond protected: -// @cond SuppressDoxyWarnings + // @cond SuppressDoxyWarnings IPluginV2IOExt(IPluginV2IOExt const&) = default; IPluginV2IOExt(IPluginV2IOExt&&) = default; IPluginV2IOExt& operator=(IPluginV2IOExt const&) & = default; IPluginV2IOExt& operator=(IPluginV2IOExt&&) & = default; -// @endcond + // @endcond //! //! \brief Return the API version with which this plugin was built. The upper byte is reserved by TensorRT and is @@ -814,8 +814,10 @@ class TRT_DEPRECATED IPluginV2IOExt : public IPluginV2Ext | (static_cast(NV_TENSORRT_VERSION) & 0xFFFFFFU)); } -private: +protected: // Following are obsolete base class methods, and must not be implemented or used. + // Kept as protected (not private) so derived classes can use `using` declarations + // to suppress -Woverloaded-virtual warnings. //! //! \brief Set plugin configuration. diff --git a/plugin/batchTilePlugin/batchTilePlugin.cpp b/plugin/batchTilePlugin/batchTilePlugin.cpp index 0e5e32f2b..f83d7fabc 100644 --- a/plugin/batchTilePlugin/batchTilePlugin.cpp +++ b/plugin/batchTilePlugin/batchTilePlugin.cpp @@ -84,7 +84,7 @@ size_t BatchTilePlugin::getWorkspaceSize(int32_t) const noexcept } DataType BatchTilePlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { try { @@ -137,19 +137,20 @@ size_t BatchTilePlugin::getSerializationSize() const noexcept } bool BatchTilePlugin::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } -bool BatchTilePlugin::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool BatchTilePlugin::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } -void BatchTilePlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void BatchTilePlugin::configurePlugin(Dims const* inputDims, int32_t /*nbInputs*/, Dims const* outputDims, + int32_t nbOutputs, DataType const* /*inputTypes*/, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t /*maxBatchSize*/) noexcept { try { @@ -239,7 +240,7 @@ PluginFieldCollection const* BatchTilePluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2Ext* BatchTilePluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* BatchTilePluginCreator::createPlugin(char const* name, PluginFieldCollection const* /*fc*/) noexcept { try { diff --git a/plugin/batchedNMSPlugin/batchedNMSPlugin.cpp b/plugin/batchedNMSPlugin/batchedNMSPlugin.cpp index e453950f9..ee2069fce 100644 --- a/plugin/batchedNMSPlugin/batchedNMSPlugin.cpp +++ b/plugin/batchedNMSPlugin/batchedNMSPlugin.cpp @@ -148,9 +148,9 @@ Dims BatchedNMSPlugin::getOutputDimensions(int32_t index, Dims const* inputs, in PLUGIN_ASSERT(inputs[0].nbDims == 3); PLUGIN_ASSERT(inputs[1].nbDims == 2 || (inputs[1].nbDims == 3 && inputs[1].d[2] == 1)); // mBoxesSize: number of box coordinates for one sample - mBoxesSize = inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2]; + mBoxesSize = static_cast(inputs[0].d[0] * inputs[0].d[1] * inputs[0].d[2]); // mScoresSize: number of scores for one sample - mScoresSize = inputs[1].d[0] * inputs[1].d[1]; + mScoresSize = static_cast(inputs[1].d[0] * inputs[1].d[1]); // num_detections if (index == 0) { @@ -237,13 +237,13 @@ size_t BatchedNMSPlugin::getWorkspaceSize(int32_t maxBatchSize) const noexcept mNumPriors, param.topK, mPrecision, mPrecision); } -size_t BatchedNMSDynamicPlugin::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t BatchedNMSDynamicPlugin::getWorkspaceSize(PluginTensorDesc const* inputs, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { - int32_t batchSize = inputs[0].dims.d[0]; - int32_t boxesSize = inputs[0].dims.d[1] * inputs[0].dims.d[2] * inputs[0].dims.d[3]; - int32_t scoreSize = inputs[1].dims.d[1] * inputs[1].dims.d[2]; - int32_t numPriors = inputs[0].dims.d[1]; + int32_t batchSize = static_cast(inputs[0].dims.d[0]); + int32_t boxesSize = static_cast(inputs[0].dims.d[1] * inputs[0].dims.d[2] * inputs[0].dims.d[3]); + int32_t scoreSize = static_cast(inputs[1].dims.d[1] * inputs[1].dims.d[2]); + int32_t numPriors = static_cast(inputs[0].dims.d[1]); return detectionInferenceWorkspaceSize(param.shareLocation, batchSize, boxesSize, scoreSize, param.numClasses, numPriors, param.topK, mPrecision, mPrecision); } @@ -299,10 +299,11 @@ int32_t BatchedNMSDynamicPlugin::enqueue(PluginTensorDesc const* inputDesc, Plug void* nmsedScores = outputs[2]; void* nmsedClasses = outputs[3]; - pluginStatus_t status = nmsInference(stream, inputDesc[0].dims.d[0], mBoxesSize, mScoresSize, - param.shareLocation, param.backgroundLabelId, mNumPriors, param.numClasses, param.topK, param.keepTopK, - param.scoreThreshold, param.iouThreshold, mPrecision, locData, mPrecision, confData, keepCount, nmsedBoxes, - nmsedScores, nmsedClasses, workspace, param.isNormalized, false, mClipBoxes, mScoreBits, mCaffeSemantics); + pluginStatus_t status = nmsInference(stream, static_cast(inputDesc[0].dims.d[0]), mBoxesSize, + mScoresSize, param.shareLocation, param.backgroundLabelId, mNumPriors, param.numClasses, param.topK, + param.keepTopK, param.scoreThreshold, param.iouThreshold, mPrecision, locData, mPrecision, confData, + keepCount, nmsedBoxes, nmsedScores, nmsedClasses, workspace, param.isNormalized, false, mClipBoxes, + mScoreBits, mCaffeSemantics); return status; } catch (std::exception const& e) @@ -352,9 +353,9 @@ void BatchedNMSDynamicPlugin::serialize(void* buffer) const noexcept PLUGIN_ASSERT(d == a + getSerializationSize()); } -void BatchedNMSPlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, nvinfer1::PluginFormat format, int32_t maxBatchSize) noexcept +void BatchedNMSPlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t nbOutputs, DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* inputIsBroadcast, + bool const* outputIsBroadcast, nvinfer1::PluginFormat /*format*/, int32_t /*maxBatchSize*/) noexcept { try { @@ -365,10 +366,10 @@ void BatchedNMSPlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, PLUGIN_ASSERT(std::none_of(inputIsBroadcast, inputIsBroadcast + nbInputs, [](bool b) { return b; })); PLUGIN_ASSERT(std::none_of(outputIsBroadcast, outputIsBroadcast + nbInputs, [](bool b) { return b; })); - mBoxesSize = inputDims[0].d[0] * inputDims[0].d[1] * inputDims[0].d[2]; - mScoresSize = inputDims[1].d[0] * inputDims[1].d[1]; + mBoxesSize = static_cast(inputDims[0].d[0] * inputDims[0].d[1] * inputDims[0].d[2]); + mScoresSize = static_cast(inputDims[1].d[0] * inputDims[1].d[1]); // num_boxes - mNumPriors = inputDims[0].d[0]; + mNumPriors = static_cast(inputDims[0].d[0]); const int32_t numLocClasses = param.shareLocation ? 1 : param.numClasses; // Third dimension of boxes must be either 1 or num_classes PLUGIN_ASSERT(inputDims[0].d[1] == numLocClasses); @@ -381,8 +382,8 @@ void BatchedNMSPlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, } } -void BatchedNMSDynamicPlugin::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void BatchedNMSDynamicPlugin::configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, + DynamicPluginTensorDesc const* /*out*/, int32_t nbOutputs) noexcept { try { @@ -401,10 +402,10 @@ void BatchedNMSDynamicPlugin::configurePlugin( // Constant shape: [batch_size, num_boxes, num_classes] or [batch_size, num_boxes, num_classes, 1] PLUGIN_ASSERT(in[1].desc.dims.nbDims == 3 || (in[1].desc.dims.nbDims == 4 && in[1].desc.dims.d[3] == 1)); - mBoxesSize = in[0].desc.dims.d[1] * in[0].desc.dims.d[2] * in[0].desc.dims.d[3]; - mScoresSize = in[1].desc.dims.d[1] * in[1].desc.dims.d[2]; + mBoxesSize = static_cast(in[0].desc.dims.d[1] * in[0].desc.dims.d[2] * in[0].desc.dims.d[3]); + mScoresSize = static_cast(in[1].desc.dims.d[1] * in[1].desc.dims.d[2]); // num_boxes - mNumPriors = in[0].desc.dims.d[1]; + mNumPriors = static_cast(in[0].desc.dims.d[1]); mPrecision = in[0].desc.type; } @@ -564,7 +565,7 @@ char const* BatchedNMSDynamicPlugin::getPluginNamespace() const noexcept } nvinfer1::DataType BatchedNMSPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { if (index == 0) { @@ -574,7 +575,7 @@ nvinfer1::DataType BatchedNMSPlugin::getOutputDataType( } nvinfer1::DataType BatchedNMSDynamicPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { if (index == 0) { @@ -614,12 +615,12 @@ void BatchedNMSDynamicPlugin::setCaffeSemantics(bool caffeSemantics) noexcept } bool BatchedNMSPlugin::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } -bool BatchedNMSPlugin::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool BatchedNMSPlugin::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } @@ -638,7 +639,7 @@ BatchedNMSBasePluginCreator::BatchedNMSBasePluginCreator() mPluginAttributes.emplace_back(PluginField("clipBoxes", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("scoreBits", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("caffeSemantics", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -662,7 +663,7 @@ PluginFieldCollection const* BatchedNMSBasePluginCreator::getFieldNames() noexce return &mFC; } -IPluginV2Ext* BatchedNMSPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* BatchedNMSPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -757,7 +758,7 @@ IPluginV2Ext* BatchedNMSPluginCreator::createPlugin(char const* name, PluginFiel } IPluginV2DynamicExt* BatchedNMSDynamicPluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc) noexcept + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -852,7 +853,7 @@ IPluginV2DynamicExt* BatchedNMSDynamicPluginCreator::createPlugin( } IPluginV2Ext* BatchedNMSPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { @@ -873,7 +874,7 @@ IPluginV2Ext* BatchedNMSPluginCreator::deserializePlugin( } IPluginV2DynamicExt* BatchedNMSDynamicPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/batchedNMSPlugin/batchedNMSPlugin.h b/plugin/batchedNMSPlugin/batchedNMSPlugin.h index 5ecebc625..8ea4c0112 100644 --- a/plugin/batchedNMSPlugin/batchedNMSPlugin.h +++ b/plugin/batchedNMSPlugin/batchedNMSPlugin.h @@ -82,6 +82,11 @@ class TRT_DEPRECATED BatchedNMSPlugin : public IPluginV2Ext class TRT_DEPRECATED BatchedNMSDynamicPlugin : public IPluginV2DynamicExt { public: + using IPluginV2DynamicExt::configurePlugin; + using IPluginV2DynamicExt::getOutputDimensions; + using IPluginV2DynamicExt::getWorkspaceSize; + using IPluginV2DynamicExt::enqueue; + BatchedNMSDynamicPlugin(NMSParameters param); BatchedNMSDynamicPlugin(void const* data, size_t length); ~BatchedNMSDynamicPlugin() override = default; diff --git a/plugin/bertQKVToContextPlugin/mhaRunner.h b/plugin/bertQKVToContextPlugin/mhaRunner.h index 1cbcb4dc9..e0c685ac7 100644 --- a/plugin/bertQKVToContextPlugin/mhaRunner.h +++ b/plugin/bertQKVToContextPlugin/mhaRunner.h @@ -68,7 +68,7 @@ class MHARunner mB = B; mS = S; mHeadSize = headSize; - mRsqrtHeadSize = 1.F / std::sqrt(headSize); + mRsqrtHeadSize = 1.0f / std::sqrt(static_cast(headSize)); mLdQKV = 3 * B * mNumHeads * mHeadSize; mStrideQKV = 3 * mHeadSize; diff --git a/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPlugin.cpp b/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPlugin.cpp index a69983fe4..2157f17e2 100644 --- a/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPlugin.cpp +++ b/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPlugin.cpp @@ -69,7 +69,7 @@ QKVToContextInterleavedPlugin::QKVToContextInterleavedPlugin(std::string const& QKVToContextInterleavedPlugin::~QKVToContextInterleavedPlugin() {} -IPluginV3* QKVToContextInterleavedPlugin::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* QKVToContextInterleavedPlugin::attachToContext(IPluginResourceContext* /*context*/) noexcept { return clone(); } @@ -114,7 +114,7 @@ IPluginV3* QKVToContextInterleavedPlugin::clone() noexcept } int32_t QKVToContextInterleavedPlugin::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, + DimsExprs const* /*shapeInputs*/, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept { try @@ -172,26 +172,26 @@ bool QKVToContextInterleavedPlugin::supportsFormatCombination( return false; } -int32_t QKVToContextInterleavedPlugin::onShapeChange( - PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept +int32_t QKVToContextInterleavedPlugin::onShapeChange(PluginTensorDesc const* /*in*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { return pluginStatus_t::STATUS_SUCCESS; } -int32_t QKVToContextInterleavedPlugin::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +int32_t QKVToContextInterleavedPlugin::configurePlugin(DynamicPluginTensorDesc const* /*in*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { return pluginStatus_t::STATUS_SUCCESS; } -size_t QKVToContextInterleavedPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t QKVToContextInterleavedPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } int32_t QKVToContextInterleavedPlugin::getOutputDataTypes( - DataType* outputTypes, int32_t nbOutputs, DataType const* inputTypes, int32_t nbInputs) const noexcept + DataType* outputTypes, int32_t nbOutputs, DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { try { @@ -236,9 +236,9 @@ int32_t QKVToContextInterleavedPlugin::enqueue(PluginTensorDesc const* inputDesc { PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const total = inputDesc[0].dims.d[2]; - int32_t const B = inputDesc[1].dims.d[0] - 1; - int32_t const maxS = inputDesc[2].dims.d[0]; + int32_t const total = static_cast(inputDesc[0].dims.d[2]); + int32_t const B = static_cast(inputDesc[1].dims.d[0] - 1); + int32_t const maxS = static_cast(inputDesc[2].dims.d[0]); int32_t S = 384; if (maxS <= 128) { @@ -267,9 +267,9 @@ int32_t QKVToContextInterleavedPlugin::enqueue(PluginTensorDesc const* inputDesc float scaleQkv = mUseExplicitInt8 ? mQkvScale : inputDesc[0].scale; float scaleCtx = mUseExplicitInt8 ? mCtxScale : outputDesc[0].scale; - float scaleBmm1 = scaleQkv * scaleQkv * 0.125; // 1 / sqrt(64) + float scaleBmm1 = scaleQkv * scaleQkv * 0.125f; // 1 / sqrt(64) float scaleBmm2 = mDqProbs * scaleQkv / scaleCtx; - float scaleSoftmax = 1.F / mDqProbs; + float scaleSoftmax = 1.0f / mDqProbs; params.scale_bmm1 = reinterpret_cast(scaleBmm1); params.scale_bmm2 = reinterpret_cast(scaleBmm2); @@ -279,8 +279,8 @@ int32_t QKVToContextInterleavedPlugin::enqueue(PluginTensorDesc const* inputDesc params.o_stride_in_bytes = total; params.use_int8_scale_max = mUseInt8ScaleMax; - params.enable_i2f_trick - = -double(1 << 22) * double(scaleBmm2) <= -128.F && double(1 << 22) * double(scaleBmm2) >= 127.F; + params.enable_i2f_trick = -static_cast(1 << 22) * static_cast(scaleBmm2) <= -128.0 + && static_cast(1 << 22) * static_cast(scaleBmm2) >= 127.0; try { @@ -310,7 +310,7 @@ PluginFieldCollection const* QKVToContextInterleavedPlugin::getFieldsToSerialize mDataToSerialize.emplace_back("dq_probs", &mDqProbs, PluginFieldType::kFLOAT32, 1); } - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; @@ -331,7 +331,7 @@ QKVToContextInterleavedPluginCreator::QKVToContextInterleavedPluginCreator() mPluginAttributes.emplace_back(PluginField("input_qkv_scale", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("output_ctx_scale", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPluginLegacy.cpp b/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPluginLegacy.cpp index 64df35a0e..1b606ee55 100644 --- a/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPluginLegacy.cpp +++ b/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPluginLegacy.cpp @@ -104,7 +104,7 @@ nvinfer1::IPluginV2DynamicExt* QKVToContextInterleavedPluginLegacy::clone() cons } DimsExprs QKVToContextInterleavedPluginLegacy::getOutputDimensions( - int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept + int32_t outputIndex, DimsExprs const* inputs, int32_t /*nbInputs*/, IExprBuilder& exprBuilder) noexcept { // Input SHAPE is 1x(3*N*H)xTotalx1 (NCHW) // Output SHAPE is 1x(N*H)xTotalx1 @@ -148,20 +148,20 @@ bool QKVToContextInterleavedPluginLegacy::supportsFormatCombination( return false; } -void QKVToContextInterleavedPluginLegacy::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void QKVToContextInterleavedPluginLegacy::configurePlugin(DynamicPluginTensorDesc const* /*in*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { } -size_t QKVToContextInterleavedPluginLegacy::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t QKVToContextInterleavedPluginLegacy::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } // IPluginV2Ext Methods DataType QKVToContextInterleavedPluginLegacy::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { PLUGIN_ASSERT(index == 0); return DataType::kINT8; @@ -233,9 +233,9 @@ int32_t QKVToContextInterleavedPluginLegacy::enqueue(PluginTensorDesc const* inp { PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const total = inputDesc[0].dims.d[2]; - int32_t const B = inputDesc[1].dims.d[0] - 1; - int32_t const maxS = inputDesc[2].dims.d[0]; + int32_t const total = static_cast(inputDesc[0].dims.d[2]); + int32_t const B = static_cast(inputDesc[1].dims.d[0] - 1); + int32_t const maxS = static_cast(inputDesc[2].dims.d[0]); int32_t S = 384; if (maxS <= 128) { @@ -264,9 +264,9 @@ int32_t QKVToContextInterleavedPluginLegacy::enqueue(PluginTensorDesc const* inp float scaleQkv = mUseExplicitInt8 ? mQkvScale : inputDesc[0].scale; float scaleCtx = mUseExplicitInt8 ? mCtxScale : outputDesc[0].scale; - float scaleBmm1 = scaleQkv * scaleQkv * 0.125; // 1 / sqrt(64) + float scaleBmm1 = scaleQkv * scaleQkv * 0.125f; // 1 / sqrt(64) float scaleBmm2 = mDqProbs * scaleQkv / scaleCtx; - float scaleSoftmax = 1.F / mDqProbs; + float scaleSoftmax = 1.0f / mDqProbs; params.scale_bmm1 = reinterpret_cast(scaleBmm1); params.scale_bmm2 = reinterpret_cast(scaleBmm2); @@ -276,8 +276,8 @@ int32_t QKVToContextInterleavedPluginLegacy::enqueue(PluginTensorDesc const* inp params.o_stride_in_bytes = total; params.use_int8_scale_max = mUseInt8ScaleMax; - params.enable_i2f_trick - = -double(1 << 22) * double(scaleBmm2) <= -128.F && double(1 << 22) * double(scaleBmm2) >= 127.F; + params.enable_i2f_trick = -static_cast(1 << 22) * static_cast(scaleBmm2) <= -128.0 + && static_cast(1 << 22) * static_cast(scaleBmm2) >= 127.0; try { @@ -302,7 +302,7 @@ QKVToContextInterleavedPluginLegacyCreator::QKVToContextInterleavedPluginLegacyC mPluginAttributes.emplace_back(PluginField("input_qkv_scale", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("output_ctx_scale", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPluginLegacy.h b/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPluginLegacy.h index 4858f6998..48a098d64 100644 --- a/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPluginLegacy.h +++ b/plugin/bertQKVToContextPlugin/qkvToContextInt8InterleavedPluginLegacy.h @@ -43,6 +43,11 @@ static constexpr int32_t kSM_BLACKWELL_120 = 120; class QKVToContextInterleavedPluginLegacy : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + QKVToContextInterleavedPluginLegacy(std::string const& name, int32_t hiddenSize, int32_t numHeads, float dqProbs, bool useInt8ScaleMax, bool useExplicitInt8, float qkvScale, float ctxScale); diff --git a/plugin/bertQKVToContextPlugin/qkvToContextPlugin.cpp b/plugin/bertQKVToContextPlugin/qkvToContextPlugin.cpp index ea98d1dd0..5fbae21ed 100644 --- a/plugin/bertQKVToContextPlugin/qkvToContextPlugin.cpp +++ b/plugin/bertQKVToContextPlugin/qkvToContextPlugin.cpp @@ -100,7 +100,6 @@ QKVToContextPluginDynamic::QKVToContextPluginDynamic(const std::string name, con BERT_DEBUG_MSG("MHA Runner Deser Done"); } - IPluginCapability* QKVToContextPluginDynamic::getCapabilityInterface(PluginCapabilityType type) noexcept { try @@ -167,7 +166,7 @@ IPluginV3* QKVToContextPluginDynamic::clone() noexcept } int32_t QKVToContextPluginDynamic::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, + DimsExprs const* /*shapeInputs*/, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept { try @@ -200,7 +199,7 @@ bool QKVToContextPluginDynamic::supportsFormatCombination( PLUGIN_ASSERT(nbInputs == 1 + mHasImask); auto const* in = inOut; auto const* out = inOut + nbInputs; - int32_t packedSize = getMHAMaskPackedSize(mSM, mType, in->desc.dims.d[SDIM]); + int32_t packedSize = getMHAMaskPackedSize(mSM, mType, static_cast(in->desc.dims.d[SDIM])); // we only support int8 IO in fused mha runner, and we only support fused mha runner on Xavier, Turing and Ampere if (mType == DataType::kINT8) @@ -326,8 +325,8 @@ int32_t QKVToContextPluginDynamic::onShapeChange( createMHARunner(); // mS and mB that are set by configurePlugin() may be stale - mS = inDesc.dims.d[SDIM]; - mB = inDesc.dims.d[BDIM]; + mS = static_cast(inDesc.dims.d[SDIM]); + mB = static_cast(inDesc.dims.d[BDIM]); PLUGIN_ASSERT(mS); PLUGIN_ASSERT(mB); if (fusedDispatcher.get() && fusedDispatcher->isValid(mHeadSize, mS)) @@ -375,13 +374,13 @@ int32_t QKVToContextPluginDynamic::configurePlugin( createMHARunner(); - const int32_t S = inDesc.dims.d[SDIM]; - const int32_t B = inDesc.dims.d[BDIM] <= 0 ? in->max.d[BDIM] : inDesc.dims.d[BDIM]; + const int32_t S = static_cast(inDesc.dims.d[SDIM]); + const int32_t B = static_cast(inDesc.dims.d[BDIM] <= 0 ? in->max.d[BDIM] : inDesc.dims.d[BDIM]); if (S <= 0) { // in dynamic shape build stage, we setup with max sequence that cannot fused - const int32_t Smin = in->min.d[SDIM]; - const int32_t Smax = in->max.d[SDIM]; + const int32_t Smin = static_cast(in->min.d[SDIM]); + const int32_t Smax = static_cast(in->max.d[SDIM]); if (fusedDispatcher.get()) { @@ -438,7 +437,7 @@ size_t QKVToContextPluginDynamic::getWorkspaceSize(DynamicPluginTensorDesc const // IPluginV2Ext Methods int32_t QKVToContextPluginDynamic::getOutputDataTypes( - DataType* outputTypes, int32_t nbOutputs, DataType const* inputTypes, int32_t nbInputs) const noexcept + DataType* outputTypes, int32_t /*nbOutputs*/, DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { try { @@ -496,7 +495,6 @@ char const* QKVToContextPluginDynamic::getPluginName() const noexcept return kQKV_TO_CONTEXT_PLUGIN_NAME; } - void QKVToContextPluginDynamic::setPluginNamespace(char const* libNamespace) noexcept { mNamespace = libNamespace; @@ -517,7 +515,8 @@ int32_t QKVToContextPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, Pl try { void const* const maskPtr = mHasImask ? inputs[1] : nullptr; - if (mHasImask && fusedDispatcher.get() && fusedDispatcher->isValid(mHeadSize, inputDesc->dims.d[SDIM])) + if (mHasImask && fusedDispatcher.get() + && fusedDispatcher->isValid(mHeadSize, static_cast(inputDesc->dims.d[SDIM]))) { fusedDispatcher->run( inputDesc[0], outputDesc[0], inputs[0], maskPtr, outputs[0], workspace, stream, mCublasHandle); @@ -556,7 +555,7 @@ PluginFieldCollection const* QKVToContextPluginDynamic::getFieldsToSerialize() n mRunnerStateBuffer.resize(unfusedDispatcher->getSerializationSize()); unfusedDispatcher->serialize(mRunnerStateBuffer.data()); mDataToSerialize.emplace_back("runnerStateBuffer", (void const*) mRunnerStateBuffer.data(), - PluginFieldType::kUNKNOWN, mRunnerStateBuffer.size()); + PluginFieldType::kUNKNOWN, static_cast(mRunnerStateBuffer.size())); } else { @@ -570,7 +569,7 @@ PluginFieldCollection const* QKVToContextPluginDynamic::getFieldsToSerialize() n mDataToSerialize.emplace_back("dq_probs", &mDqProbs, PluginFieldType::kFLOAT32, 1); } - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; @@ -584,7 +583,7 @@ QKVToContextPluginDynamicCreator::QKVToContextPluginDynamicCreator() mPluginAttributes.emplace_back(PluginField("has_mask", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("dq_probs", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -743,7 +742,6 @@ IPluginV3* QKVToContextPluginDynamicCreator::createPlugin( return nullptr; } - void QKVToContextPluginDynamicCreator::setPluginNamespace(char const* libNamespace) noexcept { mNamespace = libNamespace; @@ -754,7 +752,6 @@ char const* QKVToContextPluginDynamicCreator::getPluginNamespace() const noexcep return mNamespace.c_str(); } - ///// QKVToContextVarSeqlenPlugin (CustomQKVToContextPluginDynamic v5) //// QKVToContextVarSeqlenPlugin::~QKVToContextVarSeqlenPlugin() {} @@ -879,7 +876,6 @@ void QKVToContextVarSeqlenPlugin::createMHARunner() } } - IPluginV3* QKVToContextVarSeqlenPlugin::clone() noexcept { BERT_DEBUG_MSG("QKV Clone"); @@ -910,7 +906,7 @@ IPluginV3* QKVToContextVarSeqlenPlugin::clone() noexcept } int32_t QKVToContextVarSeqlenPlugin::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, + DimsExprs const* /*shapeInputs*/, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept { try @@ -1098,8 +1094,8 @@ int32_t QKVToContextVarSeqlenPlugin::configurePlugin( PLUGIN_ASSERT(maskDesc.dims.d[0] == inDesc.dims.d[BDIM]); } - const int32_t S = inDesc.dims.d[SDIM] <= 0 ? in->max.d[SDIM] : inDesc.dims.d[SDIM]; - const int32_t B = inDesc.dims.d[BDIM] <= 0 ? in->max.d[BDIM] : inDesc.dims.d[BDIM]; + const int32_t S = static_cast(inDesc.dims.d[SDIM] <= 0 ? in->max.d[SDIM] : inDesc.dims.d[SDIM]); + const int32_t B = static_cast(inDesc.dims.d[BDIM] <= 0 ? in->max.d[BDIM] : inDesc.dims.d[BDIM]); if (S != mS || B != mB) { @@ -1131,12 +1127,13 @@ int32_t QKVToContextVarSeqlenPlugin::configurePlugin( size_t QKVToContextVarSeqlenPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t /* nbInputs */, DynamicPluginTensorDesc const* /* outputs */, int32_t /* nbOutputs */) const noexcept { - size_t paddingWorkpaceSize = mPatcher ? mPatcher->getWorkspaceSize(inputs[0].desc.dims.d[0], mNumHeads) : 0; + size_t paddingWorkpaceSize + = mPatcher ? mPatcher->getWorkspaceSize(static_cast(inputs[0].desc.dims.d[0]), mNumHeads) : 0; return mDispatcher->getWorkspaceSize() + paddingWorkpaceSize; } int32_t QKVToContextVarSeqlenPlugin::getOutputDataTypes( - DataType* outputTypes, int32_t nbOutputs, DataType const* inputTypes, int32_t nbInputs) const noexcept + DataType* outputTypes, int32_t /*nbOutputs*/, DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { try { @@ -1194,7 +1191,6 @@ char const* QKVToContextVarSeqlenPlugin::getPluginName() const noexcept return kQKV_TO_CONTEXT_PLUGIN_NAME; } - void QKVToContextVarSeqlenPlugin::setPluginNamespace(char const* libNamespace) noexcept { mNamespace = libNamespace; @@ -1213,8 +1209,8 @@ int32_t QKVToContextVarSeqlenPlugin::enqueue(nvinfer1::PluginTensorDesc const* i if (mUseVarSeqlen) { - const int32_t B = inputDesc[2].dims.d[0] - 1; - const int32_t maxS = inputDesc[3].dims.d[0]; + const int32_t B = static_cast(inputDesc[2].dims.d[0] - 1); + const int32_t maxS = static_cast(inputDesc[3].dims.d[0]); PLUGIN_ASSERT((maxS <= 512) && "No implementation for variable sequence length multi-head attention plugin with sequence > 512."); @@ -1249,7 +1245,8 @@ int32_t QKVToContextVarSeqlenPlugin::enqueue(nvinfer1::PluginTensorDesc const* i } auto runV2Kernel = [this, &S, &B, &workspace, &inputDesc, &outputDesc, &stream, &inputs, &outputs]( - MHARunner* dispatcher, QkvPaddingRunner* patcher, int32_t padSize) { + MHARunner* dispatcher, QkvPaddingRunner* patcher, int32_t padSize) + { PLUGIN_ASSERT(dispatcher); // Validate that we can padding to the dispatch required head size also there is kernel exist for this // sequence length. @@ -1264,7 +1261,7 @@ int32_t QKVToContextVarSeqlenPlugin::enqueue(nvinfer1::PluginTensorDesc const* i { PLUGIN_ASSERT(patcher); PLUGIN_ASSERT(padSize <= patcher->getMaxPaddingHeadSize()); - auto sumSeqLen = inputDesc[0].dims.d[0]; + int32_t sumSeqLen = static_cast(inputDesc[0].dims.d[0]); auto paddingWorkspace = patcher->get16BytesAlignedPointer(workspace, dispatcher->getWorkspaceSize()); auto ret = mPatcher->pad(inputs[0], paddingWorkspace, sumSeqLen, mNumHeads, mHeadSize, padSize, stream); if (ret != cudaSuccess) @@ -1337,14 +1334,14 @@ PluginFieldCollection const* QKVToContextVarSeqlenPlugin::getFieldsToSerialize() mRunnerStateBuffer.resize(mDispatcher->getSerializationSize()); mDispatcher->serialize(mRunnerStateBuffer.data()); mDataToSerialize.emplace_back("runnerStateBuffer", (void const*) mRunnerStateBuffer.data(), - PluginFieldType::kUNKNOWN, mRunnerStateBuffer.size()); + PluginFieldType::kUNKNOWN, static_cast(mRunnerStateBuffer.size())); if (mDqProbs >= 0) { mDataToSerialize.emplace_back("dq_probs", &mDqProbs, PluginFieldType::kFLOAT32, 1); } - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; @@ -1361,7 +1358,7 @@ QKVToContextVarSeqlenPluginCreator::QKVToContextVarSeqlenPluginCreator() mPluginAttributes.emplace_back(PluginField("var_seqlen", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("use_int8_scale_max", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/bertQKVToContextPlugin/qkvToContextPluginLegacy.cpp b/plugin/bertQKVToContextPlugin/qkvToContextPluginLegacy.cpp index 9fe6a27c8..e0c1fc76c 100644 --- a/plugin/bertQKVToContextPlugin/qkvToContextPluginLegacy.cpp +++ b/plugin/bertQKVToContextPlugin/qkvToContextPluginLegacy.cpp @@ -162,7 +162,7 @@ bool QKVToContextPluginDynamicLegacy::supportsFormatCombination( PLUGIN_ASSERT(nbInputs == 1 + mHasImask); auto const* in = inOut; auto const* out = inOut + nbInputs; - int32_t packedSize = getMHAMaskPackedSize(mSM, mType, in->dims.d[SDIM]); + int32_t packedSize = getMHAMaskPackedSize(mSM, mType, static_cast(in->dims.d[SDIM])); // we only support int8 IO in fused mha runner, and we only support fused mha runner on Xavier, Turing and Ampere if (mType == DataType::kINT8) @@ -282,13 +282,13 @@ void QKVToContextPluginDynamicLegacy::configurePlugin( createMHARunner(); - int32_t const S = inDesc.dims.d[SDIM]; - int32_t const B = inDesc.dims.d[BDIM] <= 0 ? in->max.d[BDIM] : inDesc.dims.d[BDIM]; + int32_t const S = static_cast(inDesc.dims.d[SDIM]); + int32_t const B = static_cast(inDesc.dims.d[BDIM] <= 0 ? in->max.d[BDIM] : inDesc.dims.d[BDIM]); if (S <= 0) { // in dynamic shape build stage, we setup with max sequence that cannot fused - int32_t const Smin = in->min.d[SDIM]; - int32_t const Smax = in->max.d[SDIM]; + int32_t const Smin = static_cast(in->min.d[SDIM]); + int32_t const Smax = static_cast(in->max.d[SDIM]); if (fusedDispatcher.get()) { @@ -347,7 +347,7 @@ DataType QKVToContextPluginDynamicLegacy::getOutputDataType( } void QKVToContextPluginDynamicLegacy::attachToContext( - cudnnContext* cudnn, cublasContext* cublas, nvinfer1::IGpuAllocator* allocator) noexcept + cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, nvinfer1::IGpuAllocator* allocator) noexcept { try { @@ -442,7 +442,8 @@ int32_t QKVToContextPluginDynamicLegacy::enqueue(PluginTensorDesc const* inputDe try { void const* const maskPtr = mHasImask ? inputs[1] : nullptr; - if (mHasImask && fusedDispatcher.get() && fusedDispatcher->isValid(mHeadSize, inputDesc->dims.d[SDIM])) + if (mHasImask && fusedDispatcher.get() + && fusedDispatcher->isValid(mHeadSize, static_cast(inputDesc->dims.d[SDIM]))) { fusedDispatcher->run( inputDesc[0], outputDesc[0], inputs[0], maskPtr, outputs[0], workspace, stream, mCublas); @@ -471,7 +472,7 @@ QKVToContextPluginDynamicLegacyCreator::QKVToContextPluginDynamicLegacyCreator() mPluginAttributes.emplace_back(PluginField("has_mask", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("dq_probs", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -819,8 +820,8 @@ void QKVToContextVarSeqlenPluginLegacy::configurePlugin( PLUGIN_ASSERT(maskDesc.dims.d[0] == inDesc.dims.d[BDIM]); } - int32_t const S = inDesc.dims.d[SDIM] <= 0 ? in->max.d[SDIM] : inDesc.dims.d[SDIM]; - int32_t const B = inDesc.dims.d[BDIM] <= 0 ? in->max.d[BDIM] : inDesc.dims.d[BDIM]; + int32_t const S = static_cast(inDesc.dims.d[SDIM] <= 0 ? in->max.d[SDIM] : inDesc.dims.d[SDIM]); + int32_t const B = static_cast(inDesc.dims.d[BDIM] <= 0 ? in->max.d[BDIM] : inDesc.dims.d[BDIM]); if (S != mS || B != mB) { @@ -844,7 +845,8 @@ void QKVToContextVarSeqlenPluginLegacy::configurePlugin( size_t QKVToContextVarSeqlenPluginLegacy::getWorkspaceSize(PluginTensorDesc const* inputs, int32_t /* nbInputs */, PluginTensorDesc const* /* outputs */, int32_t /* nbOutputs */) const noexcept { - size_t paddingWorkpaceSize = mPatcher ? mPatcher->getWorkspaceSize(inputs[0].dims.d[0], mNumHeads) : 0; + size_t paddingWorkpaceSize + = mPatcher ? mPatcher->getWorkspaceSize(static_cast(inputs[0].dims.d[0]), mNumHeads) : 0; return mDispatcher->getWorkspaceSize() + paddingWorkpaceSize; } @@ -859,7 +861,7 @@ DataType QKVToContextVarSeqlenPluginLegacy::getOutputDataType( } void QKVToContextVarSeqlenPluginLegacy::attachToContext( - cudnnContext* cudnn, cublasContext* cublas, nvinfer1::IGpuAllocator* allocator) noexcept + cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, nvinfer1::IGpuAllocator* allocator) noexcept { try { @@ -944,8 +946,8 @@ int32_t QKVToContextVarSeqlenPluginLegacy::enqueue(nvinfer1::PluginTensorDesc co if (mUseVarSeqlen) { - int32_t const B = inputDesc[2].dims.d[0] - 1; - int32_t const maxS = inputDesc[3].dims.d[0]; + int32_t const B = static_cast(inputDesc[2].dims.d[0] - 1); + int32_t const maxS = static_cast(inputDesc[3].dims.d[0]); PLUGIN_ASSERT((maxS <= 512) && "No implementation for variable sequence length multi-head attention plugin with sequence > 512."); @@ -980,7 +982,8 @@ int32_t QKVToContextVarSeqlenPluginLegacy::enqueue(nvinfer1::PluginTensorDesc co } auto runV2Kernel = [this, &S, &B, &workspace, &inputDesc, &outputDesc, &stream, &inputs, &outputs]( - MHARunner* dispatcher, QkvPaddingRunner* patcher, int32_t padSize) { + MHARunner* dispatcher, QkvPaddingRunner* patcher, int32_t padSize) + { PLUGIN_ASSERT(dispatcher); // Validate that we can padding to the dispatch required head size also there is kernel exist for this // sequence length. @@ -995,7 +998,7 @@ int32_t QKVToContextVarSeqlenPluginLegacy::enqueue(nvinfer1::PluginTensorDesc co { PLUGIN_ASSERT(patcher); PLUGIN_ASSERT(padSize <= patcher->getMaxPaddingHeadSize()); - auto sumSeqLen = inputDesc[0].dims.d[0]; + int32_t sumSeqLen = static_cast(inputDesc[0].dims.d[0]); auto paddingWorkspace = patcher->get16BytesAlignedPointer(workspace, dispatcher->getWorkspaceSize()); auto ret = mPatcher->pad(inputs[0], paddingWorkspace, sumSeqLen, mNumHeads, mHeadSize, padSize, stream); if (ret != cudaSuccess) @@ -1063,7 +1066,7 @@ QKVToContextVarSeqlenPluginLegacyCreator::QKVToContextVarSeqlenPluginLegacyCreat mPluginAttributes.emplace_back(PluginField("var_seqlen", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("use_int8_scale_max", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/bertQKVToContextPlugin/qkvToContextPluginLegacy.h b/plugin/bertQKVToContextPlugin/qkvToContextPluginLegacy.h index c8fa0d8f0..09bc59c01 100644 --- a/plugin/bertQKVToContextPlugin/qkvToContextPluginLegacy.h +++ b/plugin/bertQKVToContextPlugin/qkvToContextPluginLegacy.h @@ -55,6 +55,11 @@ int32_t computeMaskedScaledSoftmax(cudaStream_t stream, int32_t const ld, int32_ class QKVToContextPluginDynamicLegacy : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + QKVToContextPluginDynamicLegacy(std::string const name, nvinfer1::DataType const type, int32_t const hiddenSize, int32_t const numHeads, float const dqProbs, bool hasImask = false); @@ -120,11 +125,6 @@ class QKVToContextPluginDynamicLegacy : public nvinfer1::IPluginV2DynamicExt nvinfer1::pluginInternal::cublasHandle_t mCublas{}; // the wrapper pointer is shared among all plugins attached to the same context. std::shared_ptr mCublasWrapper; - - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2::enqueue; - using IPluginV2Ext::configurePlugin; }; class QKVToContextPluginDynamicLegacyCreator : public nvinfer1::IPluginCreator @@ -156,6 +156,11 @@ class QKVToContextPluginDynamicLegacyCreator : public nvinfer1::IPluginCreator class QKVToContextVarSeqlenPluginLegacy : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + QKVToContextVarSeqlenPluginLegacy(std::string const name, nvinfer1::DataType const type, int32_t const hiddenSize, int32_t const numHeads, float const dqProbs, bool hasImask = false, bool varSeqlen = false, bool const useInt8ScaleMax = true); @@ -225,11 +230,6 @@ class QKVToContextVarSeqlenPluginLegacy : public nvinfer1::IPluginV2DynamicExt nvinfer1::pluginInternal::cublasHandle_t mCublas{}; // the wrapper pointer is shared among all plugins attached to the same context. std::shared_ptr mCublasWrapper; - - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2::enqueue; - using IPluginV2Ext::configurePlugin; }; class QKVToContextVarSeqlenPluginLegacyCreator : public nvinfer1::IPluginCreator diff --git a/plugin/clipPlugin/clipPlugin.cpp b/plugin/clipPlugin/clipPlugin.cpp index 727f8fb7d..d4f440438 100644 --- a/plugin/clipPlugin/clipPlugin.cpp +++ b/plugin/clipPlugin/clipPlugin.cpp @@ -88,7 +88,8 @@ int32_t ClipPlugin::enqueue( { void* output = outputs[0]; int32_t status = pluginStatus_t::STATUS_FAILURE; - status = clipInference(stream, mInputVolume * batchSize, mClipMin, mClipMax, inputs[0], output, mDataType); + status = clipInference( + stream, static_cast(mInputVolume * batchSize), mClipMin, mClipMax, inputs[0], output, mDataType); if (status != pluginStatus_t::STATUS_SUCCESS) { @@ -127,8 +128,8 @@ void ClipPlugin::serialize(void* buffer) const noexcept PLUGIN_ASSERT(d == a + getSerializationSize()); } -void ClipPlugin::configureWithFormat(Dims const* inputs, int32_t nbInputs, Dims const* outputs, int32_t nbOutputs, - DataType type, PluginFormat format, int32_t) noexcept +void ClipPlugin::configureWithFormat(Dims const* inputs, int32_t /*nbInputs*/, Dims const* /*outputs*/, + int32_t nbOutputs, DataType type, PluginFormat format, int32_t) noexcept { PLUGIN_ASSERT(nbOutputs == 1); PLUGIN_API_CHECK_ENUM_RANGE(DataType, type); @@ -138,7 +139,7 @@ void ClipPlugin::configureWithFormat(Dims const* inputs, int32_t nbInputs, Dims size_t volume = 1; for (int32_t i = 0; i < inputs->nbDims; i++) { - volume *= inputs->d[i]; + volume *= static_cast(inputs->d[i]); } mInputVolume = volume; } @@ -191,7 +192,7 @@ ClipPluginCreator::ClipPluginCreator() mPluginAttributes.emplace_back(PluginField("clipMin", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("clipMax", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -217,7 +218,7 @@ IPluginV2* ClipPluginCreator::createPlugin(char const* name, PluginFieldCollecti gLogWarning << "ClipPlugin is deprecated since TensorRT 9.0. Use INetworkDefinition::addActivation() to add an " "IActivationLayer with ActivationType::kCLIP." << std::endl; - float clipMin = 0.0, clipMax = 0.0; + float clipMin = 0.0f, clipMax = 0.0f; PluginField const* fields = fc->fields; plugin::validateRequiredAttributesExist({"clipMin", "clipMax"}, fc); diff --git a/plugin/common/bboxUtils.h b/plugin/common/bboxUtils.h index 6419611dd..defdd4b15 100644 --- a/plugin/common/bboxUtils.h +++ b/plugin/common/bboxUtils.h @@ -26,11 +26,11 @@ template struct Bbox { T xmin, ymin, xmax, ymax; - Bbox(T xmin, T ymin, T xmax, T ymax) - : xmin(xmin) - , ymin(ymin) - , xmax(xmax) - , ymax(ymax) + Bbox(T xmin_, T ymin_, T xmax_, T ymax_) + : xmin(xmin_) + , ymin(ymin_) + , xmax(xmax_) + , ymax(ymax_) { } Bbox() = default; @@ -43,11 +43,11 @@ struct BboxInfo int32_t label; int32_t bbox_idx; bool kept; - BboxInfo(T conf_score, int32_t label, int32_t bbox_idx, bool kept) - : conf_score(conf_score) - , label(label) - , bbox_idx(bbox_idx) - , kept(kept) + BboxInfo(T conf_score_, int32_t label_, int32_t bbox_idx_, bool kept_) + : conf_score(conf_score_) + , label(label_) + , bbox_idx(bbox_idx_) + , kept(kept_) { } BboxInfo() = default; diff --git a/plugin/common/bertCommon.h b/plugin/common/bertCommon.h index 06df9fbc7..ecd96884c 100644 --- a/plugin/common/bertCommon.h +++ b/plugin/common/bertCommon.h @@ -372,12 +372,12 @@ struct WeightsWithOwnership : public nvinfer1::Weights WeightsWithOwnership(WeightsWithOwnership const&&) = delete; WeightsWithOwnership operator=(WeightsWithOwnership const&&) = delete; - void convertAndCopy(nvinfer1::Weights const& src, nvinfer1::DataType type) + void convertAndCopy(nvinfer1::Weights const& src, nvinfer1::DataType type_) { - this->type = type; + this->type = type_; this->count = src.count; - if (type == nvinfer1::DataType::kFLOAT) + if (type_ == nvinfer1::DataType::kFLOAT) { auto destBuf = new float[src.count]; this->values = destBuf; @@ -401,7 +401,7 @@ struct WeightsWithOwnership : public nvinfer1::Weights } } } - else if (type == nvinfer1::DataType::kHALF) + else if (type_ == nvinfer1::DataType::kHALF) { auto destBuf = new half[src.count]; this->values = destBuf; @@ -431,11 +431,11 @@ struct WeightsWithOwnership : public nvinfer1::Weights } } - void convertAndCopy(char const*& srcBuf, size_t count, nvinfer1::DataType type) noexcept + void convertAndCopy(char const*& srcBuf, size_t count_, nvinfer1::DataType type_) noexcept { - this->type = type; - this->count = count; - auto const nbBytes = getWeightsSize(*this, type); + this->type = type_; + this->count = count_; + auto const nbBytes = getWeightsSize(*this, type_); auto destBuf = new char[nbBytes]; this->values = destBuf; diff --git a/plugin/common/checkMacrosPlugin.h b/plugin/common/checkMacrosPlugin.h index ae9ed9808..095d5e71f 100644 --- a/plugin/common/checkMacrosPlugin.h +++ b/plugin/common/checkMacrosPlugin.h @@ -61,34 +61,35 @@ class CublasError : public TRTException } // namespace nvinfer1 +#undef PLUGIN_CHECK_CUDNN #define PLUGIN_CHECK_CUDNN(call) \ do \ { \ - cudnnStatus_t status = call; \ - if (status != CUDNN_STATUS_SUCCESS) \ + cudnnStatus_t status_check_cudnn_ = call; \ + if (status_check_cudnn_ != CUDNN_STATUS_SUCCESS) \ { \ - return status; \ + return status_check_cudnn_; \ } \ } while (0) #define PLUGIN_CUBLASASSERT(status_) \ { \ - auto s_ = status_; \ - if (s_ != nvinfer1::pluginInternal::CUBLAS_STATUS_SUCCESS) \ + auto s_cublas_ = status_; \ + if (s_cublas_ != nvinfer1::pluginInternal::CUBLAS_STATUS_SUCCESS) \ { \ - nvinfer1::plugin::throwCublasError(__FILE__, FN_NAME, __LINE__, s_); \ + nvinfer1::plugin::throwCublasError(__FILE__, FN_NAME, __LINE__, s_cublas_); \ } \ } #define PLUGIN_CUDNNASSERT(status_) \ { \ - auto s_ = status_; \ - if (s_ != CUDNN_STATUS_SUCCESS) \ + auto s_cudnn_ = status_; \ + if (s_cudnn_ != CUDNN_STATUS_SUCCESS) \ { \ nvinfer1::pluginInternal::CudnnWrapper& wrapper \ = nvinfer1::pluginInternal::getCudnnWrapper(/* plugin caller name */ nullptr); \ - const char* msg = wrapper.cudnnGetErrorString(s_); \ - nvinfer1::plugin::throwCudnnError(__FILE__, FN_NAME, __LINE__, s_, msg); \ + const char* msg_cudnn_ = wrapper.cudnnGetErrorString(s_cudnn_); \ + nvinfer1::plugin::throwCudnnError(__FILE__, FN_NAME, __LINE__, s_cudnn_, msg_cudnn_); \ } \ } diff --git a/plugin/common/cudaDriverWrapper.cpp b/plugin/common/cudaDriverWrapper.cpp index f81635a06..42badfcb8 100644 --- a/plugin/common/cudaDriverWrapper.cpp +++ b/plugin/common/cudaDriverWrapper.cpp @@ -44,8 +44,9 @@ CUDADriverWrapper::CUDADriverWrapper() handle = dllOpen(CUDA_LIB_NAME); PLUGIN_ASSERT(handle != nullptr); - auto load_sym = [](void* handle, char const* name) { - void* ret = dllGetSym(handle, name); + auto load_sym = [](void* libHandle, char const* name) + { + void* ret = dllGetSym(libHandle, name); PLUGIN_ASSERT(ret != nullptr); return ret; }; diff --git a/plugin/common/cudnnWrapper.cpp b/plugin/common/cudnnWrapper.cpp index a16270f9c..6ee15d6d6 100644 --- a/plugin/common/cudnnWrapper.cpp +++ b/plugin/common/cudnnWrapper.cpp @@ -44,7 +44,8 @@ namespace nvinfer1::pluginInternal CudnnWrapper::CudnnWrapper(bool initHandle, char const* callerPluginName) : mLibrary(tryLoadingCudnn(callerPluginName)) { - auto load_sym = [](void* handle, char const* name) { + auto load_sym = [](void* handle, char const* name) + { void* ret = dllGetSym(handle, name); std::string loadError = "Fail to load symbol " + std::string(name) + " from the cudnn library."; PLUGIN_VALIDATE(ret != nullptr, loadError.c_str()); @@ -87,6 +88,7 @@ CudnnWrapper::~CudnnWrapper() void* CudnnWrapper::tryLoadingCudnn(char const* callerPluginName) { + (void) callerPluginName; #if CUDART_VERSION >= 12070 && CUDNN_MAJOR == 8 static constexpr int32_t kSM_BLACKWELL_100 = 100; diff --git a/plugin/common/kernels/proposalKernel.cu b/plugin/common/kernels/proposalKernel.cu index 52d97ea50..5dff84ce5 100644 --- a/plugin/common/kernels/proposalKernel.cu +++ b/plugin/common/kernels/proposalKernel.cu @@ -32,6 +32,7 @@ namespace nvinfer1 { namespace plugin { +#undef PLUGIN_CHECK_CUDA #define PLUGIN_CHECK_CUDA(call) \ do \ { \ @@ -59,23 +60,23 @@ typedef pluginStatus_t frcnnStatus_t; #define DEBUG_RPN_ENABLE 0 -#define FRCNN_ASSERT_PARAM(exp) \ - do \ - { \ - if (!(exp)) \ - { \ - DEBUG_FPRINTF(stderr, "Bad param - " #exp ", %s:%d\n", __FILE__, __LINE__); \ - return STATUS_BAD_PARAM; \ - } \ +#define FRCNN_ASSERT_PARAM(exp) \ + do \ + { \ + if (!(exp)) \ + { \ + DEBUG_FPRINTF(stderr, "Bad param - " #exp ", %s:%d\n", __FILE__, __LINE__); \ + return STATUS_BAD_PARAM; \ + } \ } while (0) -#define DEBUG_FPRINTF(...) \ - do \ - { \ - if (DEBUG_RPN_ENABLE) \ - { \ - fprintf(__VA_ARGS__); \ - } \ +#define DEBUG_FPRINTF(...) \ + do \ + { \ + if (DEBUG_RPN_ENABLE) \ + { \ + fprintf(__VA_ARGS__); \ + } \ } while (0) #define CUDA_MEM_ALIGN 256 @@ -83,46 +84,30 @@ typedef pluginStatus_t frcnnStatus_t; unsigned int hash(const void* array_, size_t size); int8_t* alignPtr(int8_t* ptr, uintptr_t to); __global__ void setOffset(int stride, int size, int* output); -frcnnStatus_t nms(cudaStream_t stream, - const int N, - const int R, - const int preNmsTop, - const int nmsMaxOut, - const float iouThreshold, - const DType_t t_fgScores, - const DLayout_t l_fgScores, - void* fgScores, - const DType_t t_proposals, - const DLayout_t l_proposals, - const void* proposals, - void* workspace, - const DType_t t_rois, - void* rois); +frcnnStatus_t nms(cudaStream_t stream, const int N, const int R, const int preNmsTop, const int nmsMaxOut, + const float iouThreshold, const DType_t t_fgScores, const DLayout_t l_fgScores, void* fgScores, + const DType_t t_proposals, const DLayout_t l_proposals, const void* proposals, void* workspace, + const DType_t t_rois, void* rois); int8_t* nextWorkspacePtr(int8_t* ptr, uintptr_t previousWorkspaceSize); - template __device__ __host__ inline float IoU(const Bbox& a, const Bbox& b) { TFloat left = max(a.x1, b.x1), right = min(a.x2, b.x2); TFloat top = max(a.y1, b.y1), bottom = min(a.y2, b.y2); - TFloat width = max((TFloat)(right - left + (TFloat) 1.0), (TFloat) 0.0); - TFloat height = max((TFloat)(bottom - top + (TFloat) 1.0), (TFloat) 0.0); + TFloat width = max((TFloat) (right - left + (TFloat) 1.0), (TFloat) 0.0); + TFloat height = max((TFloat) (bottom - top + (TFloat) 1.0), (TFloat) 0.0); TFloat interS = width * height; TFloat Sa = (a.x2 - a.x1 + (TFloat) 1) * (a.y2 - a.y1 + (TFloat) 1); TFloat Sb = (b.x2 - b.x1 + (TFloat) 1) * (b.y2 - b.y1 + (TFloat) 1); return (float) interS / (float) (Sa + Sb - interS); } - // NMS KERNEL FOR SMALL BATCH SIZE {{{ template __global__ __launch_bounds__(DIM) void nmsKernel1(const int propSize, - Bbox const* __restrict__ preNmsProposals, - T_ROIS* __restrict__ afterNmsProposals, - const int preNmsTopN, - const float nmsThres, - const int afterNmsTopN) + Bbox const* __restrict__ preNmsProposals, T_ROIS* __restrict__ afterNmsProposals, const int preNmsTopN, + const float nmsThres, const int afterNmsTopN) { __shared__ bool kept_boxes[TSIZE * DIM]; int kept = 0; @@ -185,8 +170,7 @@ __global__ __launch_bounds__(DIM) void nmsKernel1(const int propSize, do { ref_box_idx++; - } - while (!kept_boxes[ref_box_idx - batch_offset] && ref_box_idx < max_box_idx); + } while (!kept_boxes[ref_box_idx - batch_offset] && ref_box_idx < max_box_idx); kept++; } @@ -195,12 +179,8 @@ __global__ __launch_bounds__(DIM) void nmsKernel1(const int propSize, // NMS KERNEL FOR LARGE BATCH SIZE {{{ template -__global__ __launch_bounds__(DIM) void nmsKernel2(const int propSize, - Bbox const* __restrict__ proposals, - T_ROIS* __restrict__ filtered, - const int preNmsTopN, - const float nmsThres, - const int afterNmsTopN) +__global__ __launch_bounds__(DIM) void nmsKernel2(const int propSize, Bbox const* __restrict__ proposals, + T_ROIS* __restrict__ filtered, const int preNmsTopN, const float nmsThres, const int afterNmsTopN) { Bbox const* cProposals = proposals + blockIdx.x * propSize; Bbox t[TSIZE]; @@ -265,8 +245,7 @@ __global__ __launch_bounds__(DIM) void nmsKernel2(const int propSize, for (int k = 0; k < TSIZE; k++) { - if (index < k * DIM + threadIdx.x - && IoU(test, t[k]) > nmsThres) + if (index < k * DIM + threadIdx.x && IoU(test, t[k]) > nmsThres) { del |= (uint64_t) 1 << k; } @@ -279,34 +258,22 @@ __global__ __launch_bounds__(DIM) void nmsKernel2(const int propSize, // NMS LAUNCH {{{ template -frcnnStatus_t nmsLaunch(cudaStream_t stream, - const int batch, - const int propSize, - void* proposals, - void* filtered, - const int preNmsTopN, - const float nmsThres, - const int afterNmsTopN) +frcnnStatus_t nmsLaunch(cudaStream_t stream, const int batch, const int propSize, void* proposals, void* filtered, + const int preNmsTopN, const float nmsThres, const int afterNmsTopN) { const int blockSize = 1024; #define P1(tsize) nmsKernel1 #define P2(tsize) nmsKernel2 - void (*kernel[64])(int, Bbox const*, T_ROIS*, int, float, int) = - { - P1(1), P1(2), P1(3), P1(4), P1(5), P1(6), P1(7), P1(8), P1(9), P1(10), P1(11), P1(12), P2(13), P2(14), P2(15), P2(16), - P2(17), P2(18), P2(19), P2(20), P2(21), P2(22), P2(23), P2(24), P2(25), P2(26), P2(27), P2(28), P2(29), P2(30), P2(31), P2(32), - P2(33), P2(34), P2(35), P2(36), P2(37), P2(38), P2(39), P2(40), P2(41), P2(42), P2(43), P2(44), P2(45), P2(46), P2(47), P2(48), - P2(49), P2(50), P2(51), P2(52), P2(53), P2(54), P2(55), P2(56), P2(57), P2(58), P2(59), P2(60), P2(61), P2(62), P2(63), P2(64) - }; + void (*kernel[64])(int, Bbox const*, T_ROIS*, int, float, int) + = {P1(1), P1(2), P1(3), P1(4), P1(5), P1(6), P1(7), P1(8), P1(9), P1(10), P1(11), P1(12), P2(13), P2(14), + P2(15), P2(16), P2(17), P2(18), P2(19), P2(20), P2(21), P2(22), P2(23), P2(24), P2(25), P2(26), P2(27), + P2(28), P2(29), P2(30), P2(31), P2(32), P2(33), P2(34), P2(35), P2(36), P2(37), P2(38), P2(39), P2(40), + P2(41), P2(42), P2(43), P2(44), P2(45), P2(46), P2(47), P2(48), P2(49), P2(50), P2(51), P2(52), P2(53), + P2(54), P2(55), P2(56), P2(57), P2(58), P2(59), P2(60), P2(61), P2(62), P2(63), P2(64)}; FRCNN_ASSERT_PARAM(preNmsTopN < 64 * blockSize); - CSC(cudaMemsetAsync(filtered, 0, batch * afterNmsTopN * 4 * sizeof(T_ROIS), stream), - STATUS_FAILURE); - kernel[(preNmsTopN + blockSize - 1) / blockSize - 1] <<< batch, blockSize, 0, stream>>>(propSize, - (Bbox*) proposals, - (T_ROIS*) filtered, - preNmsTopN, - nmsThres, - afterNmsTopN); + CSC(cudaMemsetAsync(filtered, 0, batch * afterNmsTopN * 4 * sizeof(T_ROIS), stream), STATUS_FAILURE); + kernel[(preNmsTopN + blockSize - 1) / blockSize - 1]<<>>( + propSize, (Bbox*) proposals, (T_ROIS*) filtered, preNmsTopN, nmsThres, afterNmsTopN); CSC(cudaGetLastError(), STATUS_FAILURE); return STATUS_SUCCESS; } @@ -354,14 +321,7 @@ frcnnStatus_t nmsGpu(cudaStream_t stream, const int N, const int R, const int pr DEBUG_PRINTF("&&&& [NMS] POST CUB\n"); DEBUG_PRINTF("&&&& [NMS] PROPOSALS %u\n", hash(proposalsOut, N * R * 4 * sizeof(float))); DEBUG_PRINTF("&&&& [NMS] SCORES %u\n", hash(scoresOut, N * R * sizeof(float))); - error = nmsLaunch(stream, - N, - R, - proposalsOut, - rois, - preNmsTop, - iouThreshold, - nmsMaxOut); + error = nmsLaunch(stream, N, R, proposalsOut, rois, preNmsTop, iouThreshold, nmsMaxOut); DEBUG_PRINTF("&&&& [NMS] POST LAUNCH\n"); DEBUG_PRINTF("&&&& [NMS] SCORES %u\n", hash(rois, N * nmsMaxOut * 4 * sizeof(float))); @@ -375,15 +335,15 @@ frcnnStatus_t nmsGpu(cudaStream_t stream, const int N, const int R, const int pr // }}} typedef frcnnStatus_t (*nmsFun)(cudaStream_t, - const int, // N - const int, // R - const int, // preNmsTop - const int, // nmsMaxOut - const float, // iouThreshold - void*, // fgScores - const void*, // proposals, - void*, // workspace, - void*); // rois + const int, // N + const int, // R + const int, // preNmsTop + const int, // nmsMaxOut + const float, // iouThreshold + void*, // fgScores + const void*, // proposals, + void*, // workspace, + void*); // rois struct nmsLaunchConfig { @@ -394,12 +354,8 @@ struct nmsLaunchConfig DType_t t_rois; nmsFun function; - nmsLaunchConfig(DType_t t_fgScores, - DLayout_t l_fgScores, - DType_t t_proposals, - DLayout_t l_proposals, - DType_t t_rois, - nmsFun function) + nmsLaunchConfig(DType_t t_fgScores, DLayout_t l_fgScores, DType_t t_proposals, DLayout_t l_proposals, + DType_t t_rois, nmsFun function) : t_fgScores(t_fgScores) , l_fgScores(l_fgScores) , t_proposals(t_proposals) @@ -423,26 +379,23 @@ struct nmsLaunchConfig bool operator==(nmsLaunchConfig const& other) const { return (t_fgScores == other.t_fgScores) && (l_fgScores == other.l_fgScores) - && (t_proposals == other.t_proposals) && (l_proposals == other.l_proposals) - && (t_rois == other.t_rois); + && (t_proposals == other.t_proposals) && (l_proposals == other.l_proposals) && (t_rois == other.t_rois); } }; static std::vector nmsLCVec; #define FLOAT32 nvinfer1::DataType::kFLOAT -__global__ void _inverse_transform_gpu(const float* RPN_prob, const float* RPN_regr, int N, - int INPUT_H, int INPUT_W, int RPN_H, int RPN_W, float RPN_STD_SCALING, int RPN_STRIDE, - float* ANCHOR_SIZES, int anc_size_num, float* ANCHOR_RATIOS, int anc_ratio_num, float bbox_min_size, - float* fg_scores, float* proposal_out) +__global__ void _inverse_transform_gpu(const float* RPN_prob, const float* RPN_regr, int N, int INPUT_H, int INPUT_W, + int RPN_H, int RPN_W, float RPN_STD_SCALING, int RPN_STRIDE, float* ANCHOR_SIZES, int anc_size_num, + float* ANCHOR_RATIOS, int anc_ratio_num, float bbox_min_size, float* fg_scores, float* proposal_out) { int nthreads = N * RPN_H * RPN_W * anc_size_num * anc_ratio_num; int num_ancs = anc_size_num * anc_ratio_num; - for (int out_idx = threadIdx.x + blockDim.x * blockIdx.x; out_idx < nthreads; - out_idx += blockDim.x * gridDim.x) + for (int out_idx = threadIdx.x + blockDim.x * blockIdx.x; out_idx < nthreads; out_idx += blockDim.x * gridDim.x) { - //input RPN_regr: (N, A4, H, W), thread: (N, A, H, W) + // input RPN_regr: (N, A4, H, W), thread: (N, A, H, W) int idx = out_idx; int w = idx % RPN_W; idx /= RPN_W; @@ -481,7 +434,7 @@ __global__ void _inverse_transform_gpu(const float* RPN_prob, const float* RPN_r ty = (ty >= 0.0f) ? ty : 0.0f; tw = (tw >= 0.0f) ? tw : 0.0f; th = (th >= 0.0f) ? th : 0.0f; - //clip to max + // clip to max tx = (tx <= INPUT_W - 1.0f) ? tx : (INPUT_W - 1.0f); ty = (ty <= INPUT_H - 1.0f) ? ty : (INPUT_H - 1.0f); tw = (tw <= INPUT_W - 1.0f) ? tw : (INPUT_W - 1.0f); @@ -503,28 +456,21 @@ __global__ void _inverse_transform_gpu(const float* RPN_prob, const float* RPN_r } } - - -cudaError_t _inverse_transform_wrapper(const float* RPN_prob, const float* RPN_regr, int N, int INPUT_H, - int INPUT_W, int RPN_H, int RPN_W, float RPN_STD_SCALING, int RPN_STRIDE, float* ANCHOR_SIZES, - int anc_size_num, float* ANCHOR_RATIOS, int anc_ratio_num, float bbox_min_size, float* fg_scores, - float* proposal_out, cudaStream_t stream) +cudaError_t _inverse_transform_wrapper(const float* RPN_prob, const float* RPN_regr, int N, int INPUT_H, int INPUT_W, + int RPN_H, int RPN_W, float RPN_STD_SCALING, int RPN_STRIDE, float* ANCHOR_SIZES, int anc_size_num, + float* ANCHOR_RATIOS, int anc_ratio_num, float bbox_min_size, float* fg_scores, float* proposal_out, + cudaStream_t stream) { const int block_size = 1024; - const int grid_size = (N * anc_size_num * anc_ratio_num * RPN_H * RPN_W + block_size - 1) / - (block_size); - _inverse_transform_gpu <<< grid_size, block_size, 0, stream>>> (RPN_prob, RPN_regr, N, INPUT_H, - INPUT_W, RPN_H, RPN_W, RPN_STD_SCALING, RPN_STRIDE, ANCHOR_SIZES, anc_size_num, ANCHOR_RATIOS, - anc_ratio_num, bbox_min_size, fg_scores, proposal_out); + const int grid_size = (N * anc_size_num * anc_ratio_num * RPN_H * RPN_W + block_size - 1) / (block_size); + _inverse_transform_gpu<<>>(RPN_prob, RPN_regr, N, INPUT_H, INPUT_W, RPN_H, RPN_W, + RPN_STD_SCALING, RPN_STRIDE, ANCHOR_SIZES, anc_size_num, ANCHOR_RATIOS, anc_ratio_num, bbox_min_size, fg_scores, + proposal_out); return cudaGetLastError(); } -size_t _proposalsForwardNMSWorkspaceSize(int N, - int A, - int H, - int W, - int nmsMaxOut) +size_t _proposalsForwardNMSWorkspaceSize(int N, int A, int H, int W, int nmsMaxOut) { return N * A * H * W * 5 * 5 * sizeof(float) + (1 << 22); } @@ -534,13 +480,11 @@ size_t _proposalsForwardBboxWorkspaceSize(int N, int A, int H, int W) return N * A * H * W * 4 * sizeof(float); } - size_t _proposalForwardFgScoresWorkspaceSize(int N, int A, int H, int W) { return N * A * H * W * sizeof(float); } - size_t anchors_buf_size(int anc_size_num, int anc_ratio_num) { return (anc_size_num + anc_ratio_num) * sizeof(float); @@ -548,12 +492,7 @@ size_t anchors_buf_size(int anc_size_num, int anc_ratio_num) size_t calculateTotalWorkspaceSize(size_t* workspaces, int count); -size_t _get_workspace_size(int N, - int anc_size_num, - int anc_ratio_num, - int H, - int W, - int nmsMaxOut) +size_t _get_workspace_size(int N, int anc_size_num, int anc_ratio_num, int H, int W, int nmsMaxOut) { size_t wss[4]; int A = anc_size_num * anc_ratio_num; @@ -564,26 +503,19 @@ size_t _get_workspace_size(int N, return calculateTotalWorkspaceSize(wss, 4); } - - template -frcnnStatus_t extractFgScores_gpu(cudaStream_t stream, - int N, - int A, - int H, - int W, - const void* scores, - void* fgScores) +frcnnStatus_t extractFgScores_gpu(cudaStream_t stream, int N, int A, int H, int W, const void* scores, void* fgScores) { - //TODO custom kernel for this + // TODO custom kernel for this size_t size = A * H * W * sizeof(T); for (int n = 0; n < N; n++) { size_t offset_ld = n * A * H * W; size_t offset_st = n * A * H * W; - CSC(cudaMemcpyAsync(((T*) fgScores) + offset_st, ((T*) scores) + offset_ld, size, - cudaMemcpyDeviceToDevice, stream), STATUS_FAILURE); + CSC(cudaMemcpyAsync( + ((T*) fgScores) + offset_st, ((T*) scores) + offset_ld, size, cudaMemcpyDeviceToDevice, stream), + STATUS_FAILURE); } return STATUS_SUCCESS; @@ -600,10 +532,9 @@ cudaError_t _copy_anchors_to_gpu(cudaStream_t stream, float* ANCHOR_SIZES, int a return cudaSuccess; } - __global__ void _normalize_rois_kernel(float* roi_after_nms, int nthreads, int width, int height) { - for(int i = threadIdx.x + blockDim.x * blockIdx.x; i < nthreads; i += blockDim.x * gridDim.x) + for (int i = threadIdx.x + blockDim.x * blockIdx.x; i < nthreads; i += blockDim.x * gridDim.x) { float x1 = roi_after_nms[i * 4]; float y1 = roi_after_nms[i * 4 + 1]; @@ -616,53 +547,33 @@ __global__ void _normalize_rois_kernel(float* roi_after_nms, int nthreads, int w } } - - -cudaError_t _normalize_rois(float* roi_after_nms, int n, int max_box_num, int input_width, - int input_height, cudaStream_t stream) +cudaError_t _normalize_rois( + float* roi_after_nms, int n, int max_box_num, int input_width, int input_height, cudaStream_t stream) { const int block_size = 1024; const int grid_size = (n * max_box_num + block_size - 1) / block_size; - _normalize_rois_kernel <<< grid_size, block_size, 0, stream>>>(roi_after_nms, n * max_box_num, - input_width, input_height); + _normalize_rois_kernel<<>>( + roi_after_nms, n * max_box_num, input_width, input_height); return cudaGetLastError(); } - -int proposalInference_gpu( - cudaStream_t stream, - const void* rpn_prob, - const void* rpn_regr, - int batch_size, - int input_height, - int input_width, - int rpn_height, - int rpn_width, - int MAX_BOX_NUM, - int RPN_PRE_NMS_TOP_N, - float* ANCHOR_SIZES, - int anc_size_num, - float* ANCHOR_RATIOS, - int anc_ratio_num, - float rpn_std_scaling, - int rpn_stride, - float bbox_min_size, - float nms_iou_threshold, - void * workspace, - void* output) +int proposalInference_gpu(cudaStream_t stream, const void* rpn_prob, const void* rpn_regr, int batch_size, + int input_height, int input_width, int rpn_height, int rpn_width, int MAX_BOX_NUM, int RPN_PRE_NMS_TOP_N, + float* ANCHOR_SIZES, int anc_size_num, float* ANCHOR_RATIOS, int anc_ratio_num, float rpn_std_scaling, + int rpn_stride, float bbox_min_size, float nms_iou_threshold, void* workspace, void* output) { - size_t nmsWorkspaceSize = _proposalsForwardNMSWorkspaceSize(batch_size, anc_size_num * anc_ratio_num, - rpn_height, rpn_width, MAX_BOX_NUM); + size_t nmsWorkspaceSize = _proposalsForwardNMSWorkspaceSize( + batch_size, anc_size_num * anc_ratio_num, rpn_height, rpn_width, MAX_BOX_NUM); void* nmsWorkspace = workspace; - size_t proposalsSize = _proposalsForwardBboxWorkspaceSize(batch_size, anc_size_num * anc_ratio_num, - rpn_height, rpn_width); + size_t proposalsSize + = _proposalsForwardBboxWorkspaceSize(batch_size, anc_size_num * anc_ratio_num, rpn_height, rpn_width); const DType_t t_proposals = nvinfer1::DataType::kFLOAT; const DLayout_t l_proposals = NC4HW; void* proposals = nextWorkspacePtr((int8_t*) nmsWorkspace, nmsWorkspaceSize); void* fg_scores = nextWorkspacePtr((int8_t*) proposals, proposalsSize); - size_t fg_scores_size = _proposalForwardFgScoresWorkspaceSize(batch_size, - anc_size_num * anc_ratio_num, rpn_height, rpn_width); + size_t fg_scores_size + = _proposalForwardFgScoresWorkspaceSize(batch_size, anc_size_num * anc_ratio_num, rpn_height, rpn_width); void* anchor_size_buf = nextWorkspacePtr((int8_t*) fg_scores, fg_scores_size); void* anchor_ratio_buf = static_cast(static_cast(anchor_size_buf) + anc_size_num); frcnnStatus_t status; diff --git a/plugin/common/plugin.cpp b/plugin/common/plugin.cpp index 6ff56323e..4929611c1 100644 --- a/plugin/common/plugin.cpp +++ b/plugin/common/plugin.cpp @@ -54,7 +54,8 @@ class PerContextPluginHandleSingletonCreator std::shared_ptr result = mObservers[executionContextIdentifier].lock(); if (result == nullptr) { - auto deleter = [this, executionContextIdentifier](T* obj) { + auto deleter = [this, executionContextIdentifier](T* obj) + { if (obj == nullptr) { return; @@ -67,7 +68,7 @@ class PerContextPluginHandleSingletonCreator // To avoid deadlock, it's critical to release the lock here held by lk first, // before destroying observedObjHolder. Hence observedObjHolder must be declared // before lk. - std::lock_guard lk{mMutex}; + std::lock_guard lk_{mMutex}; // Must check observer again because another thread may create new instance for // this ctx just before we lock mMutex. We can't infer that the observer is // stale from the fact that obj is destroyed, because shared_ptr ref-count diff --git a/plugin/common/reducedMathPlugin.cpp b/plugin/common/reducedMathPlugin.cpp index bf2c90dd1..d8fee8c3b 100644 --- a/plugin/common/reducedMathPlugin.cpp +++ b/plugin/common/reducedMathPlugin.cpp @@ -32,7 +32,7 @@ int32_t clz(int32_t x) return 32; } -#define CUDNN_IS_POW_2(x) (0 == ((x) & ((x) -1))) +#define CUDNN_IS_POW_2(x) (0 == ((x) & ((x) - 1))) int32_t find_log_2(int32_t x, bool round_up = false) { @@ -72,8 +72,8 @@ void findDivisor(int32_t denom, uint32_t& mul_coeff, uint32_t& shift_coeff) // Once we've picked Y, then X [our mul_coeff value] is simply Y/D, rounding up, // and we save shift_coeff as whatever further shift we have to do beyond // what the umulhi() implies. - uint32_t p = 31 + find_log_2(denom, true); - uint32_t m = ((1ULL << p) + (uint32_t) denom - 1) / (uint32_t) denom; + uint32_t p = static_cast(31 + find_log_2(denom, true)); + uint32_t m = static_cast(((1ULL << p) + static_cast(denom) - 1) / static_cast(denom)); mul_coeff = m; shift_coeff = p - 32; } diff --git a/plugin/coordConvACPlugin/coordConvACPlugin.cpp b/plugin/coordConvACPlugin/coordConvACPlugin.cpp index 704014faf..fe762dbe4 100644 --- a/plugin/coordConvACPlugin/coordConvACPlugin.cpp +++ b/plugin/coordConvACPlugin/coordConvACPlugin.cpp @@ -33,14 +33,14 @@ int32_t const kNUM_COORDCONV_CHANNELS = 2; CoordConvACPlugin::CoordConvACPlugin() {} CoordConvACPlugin::CoordConvACPlugin( - nvinfer1::DataType iType, int32_t iC, int32_t iH, int32_t iW, int32_t oC, int32_t oH, int32_t oW) - : iType(iType) - , iC(iC) - , iH(iH) - , iW(iW) - , oC(oC) - , oH(oH) - , oW(oW) + nvinfer1::DataType iType_, int32_t iC_, int32_t iH_, int32_t iW_, int32_t oC_, int32_t oH_, int32_t oW_) + : iType(iType_) + , iC(iC_) + , iH(iH_) + , iW(iW_) + , oC(oC_) + , oH(oH_) + , oW(oW_) { } @@ -90,7 +90,7 @@ Dims CoordConvACPlugin::getOutputDimensions(int32_t index, Dims const* inputs, i return dimsOutput; } -size_t CoordConvACPlugin::getWorkspaceSize(int32_t maxBatchSize) const noexcept +size_t CoordConvACPlugin::getWorkspaceSize(int32_t /*maxBatchSize*/) const noexcept { return 0; } @@ -115,19 +115,19 @@ void CoordConvACPlugin::serialize(void* buffer) const noexcept } void CoordConvACPlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, nvinfer1::PluginFormat format, int32_t maxBatchSize) noexcept + int32_t nbOutputs, DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, nvinfer1::PluginFormat /*format*/, int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(nbInputs == 1); PLUGIN_ASSERT(nbOutputs == 1); - iC = inputDims->d[0]; - iH = inputDims->d[1]; - iW = inputDims->d[2]; + iC = static_cast(inputDims->d[0]); + iH = static_cast(inputDims->d[1]); + iW = static_cast(inputDims->d[2]); - oC = outputDims->d[0]; - oH = outputDims->d[1]; - oW = outputDims->d[2]; + oC = static_cast(outputDims->d[0]); + oH = static_cast(outputDims->d[1]); + oW = static_cast(outputDims->d[2]); iType = inputTypes[0]; } @@ -178,24 +178,24 @@ char const* CoordConvACPlugin::getPluginNamespace() const noexcept } nvinfer1::DataType CoordConvACPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { return inputTypes[0]; } bool CoordConvACPlugin::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } -bool CoordConvACPlugin::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool CoordConvACPlugin::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } void CoordConvACPlugin::attachToContext( - cudnnContext* cudnn, cublasContext* cublas, nvinfer1::IGpuAllocator* allocator) noexcept + cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, nvinfer1::IGpuAllocator* /*allocator*/) noexcept { } @@ -217,7 +217,7 @@ PluginFieldCollection const* CoordConvACPluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2Ext* CoordConvACPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* CoordConvACPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* /*fc*/) noexcept { try { @@ -234,7 +234,7 @@ IPluginV2Ext* CoordConvACPluginCreator::createPlugin(char const* name, PluginFie } IPluginV2Ext* CoordConvACPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/cropAndResizePlugin/cropAndResizePlugin.cpp b/plugin/cropAndResizePlugin/cropAndResizePlugin.cpp index 5fb6b205d..a382004e5 100644 --- a/plugin/cropAndResizePlugin/cropAndResizePlugin.cpp +++ b/plugin/cropAndResizePlugin/cropAndResizePlugin.cpp @@ -66,7 +66,7 @@ int32_t CropAndResizeDynamicPlugin::getNbOutputs() const noexcept } int32_t CropAndResizeDynamicPlugin::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept { try @@ -104,7 +104,7 @@ int32_t CropAndResizeDynamicPlugin::enqueue(PluginTensorDesc const* inputDesc, P // Our plugin outputs only one tensor void* output = outputs[0]; - int32_t batchSize = inputDesc[0].dims.d[0]; + int32_t batchSize = static_cast(inputDesc[0].dims.d[0]); int32_t status = cropAndResizeInference(stream, mDepth * mInputHeight * mInputWidth * batchSize, inputs[0], inputs[1], batchSize, mInputHeight, mInputWidth, mNumBoxes, mCropHeight, mCropWidth, mDepth, output); return status; @@ -116,8 +116,8 @@ int32_t CropAndResizeDynamicPlugin::enqueue(PluginTensorDesc const* inputDesc, P return STATUS_FAILURE; } -size_t CropAndResizeDynamicPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t CropAndResizeDynamicPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -168,7 +168,7 @@ PluginFieldCollection const* CropAndResizeDynamicPlugin::getFieldsToSerialize() mDataToSerialize.emplace_back(PluginField("crop_width", &mCropWidth, PluginFieldType::kINT32, 1)); mDataToSerialize.emplace_back(PluginField("crop_height", &mCropHeight, PluginFieldType::kINT32, 1)); - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; } @@ -200,7 +200,7 @@ int32_t CropAndResizeDynamicPlugin::getOutputDataTypes( } int32_t CropAndResizeDynamicPlugin::onShapeChange( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) noexcept + PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* /*outputs*/, int32_t nbOutputs) noexcept { try { @@ -210,10 +210,10 @@ int32_t CropAndResizeDynamicPlugin::onShapeChange( // Re-validate dimensions and update internal state if needed // Here we can update mDepth, mInputHeight, mInputWidth, mNumBoxes if they change - mDepth = inputs[0].dims.d[1]; - mInputHeight = inputs[0].dims.d[2]; - mInputWidth = inputs[0].dims.d[3]; - mNumBoxes = inputs[1].dims.d[1]; + mDepth = static_cast(inputs[0].dims.d[1]); + mInputHeight = static_cast(inputs[0].dims.d[2]); + mInputWidth = static_cast(inputs[0].dims.d[3]); + mNumBoxes = static_cast(inputs[1].dims.d[1]); return STATUS_SUCCESS; } @@ -224,7 +224,7 @@ int32_t CropAndResizeDynamicPlugin::onShapeChange( return STATUS_FAILURE; } -IPluginV3* CropAndResizeDynamicPlugin::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* CropAndResizeDynamicPlugin::attachToContext(IPluginResourceContext* /*context*/) noexcept { try { @@ -238,7 +238,7 @@ IPluginV3* CropAndResizeDynamicPlugin::attachToContext(IPluginResourceContext* c } bool CropAndResizeDynamicPlugin::supportsFormatCombination( - int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept + int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t /*nbOutputs*/) noexcept { try { @@ -281,18 +281,18 @@ void CropAndResizeDynamicPlugin::setPluginNamespace(char const* libNamespace) no } } -int32_t CropAndResizeDynamicPlugin::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +int32_t CropAndResizeDynamicPlugin::configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, + DynamicPluginTensorDesc const* /*out*/, int32_t nbOutputs) noexcept { try { // Validate input/output counts and update internal state based on input dimensions PLUGIN_VALIDATE(nbInputs == 2); PLUGIN_VALIDATE(nbOutputs == 1); - mDepth = in[0].desc.dims.d[1]; - mInputHeight = in[0].desc.dims.d[2]; - mInputWidth = in[0].desc.dims.d[3]; - mNumBoxes = in[1].desc.dims.d[1]; + mDepth = static_cast(in[0].desc.dims.d[1]); + mInputHeight = static_cast(in[0].desc.dims.d[2]); + mInputWidth = static_cast(in[0].desc.dims.d[3]); + mNumBoxes = static_cast(in[1].desc.dims.d[1]); } catch (std::exception const& e) { @@ -312,7 +312,7 @@ CropAndResizeDynamicPluginCreator::CropAndResizeDynamicPluginCreator() mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("crop_width", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("crop_height", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -332,7 +332,7 @@ PluginFieldCollection const* CropAndResizeDynamicPluginCreator::getFieldNames() } IPluginV3* CropAndResizeDynamicPluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* /*name*/, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { diff --git a/plugin/cropAndResizePlugin/cropAndResizePluginLegacy.cpp b/plugin/cropAndResizePlugin/cropAndResizePluginLegacy.cpp index ce3a0b702..2f334ee15 100644 --- a/plugin/cropAndResizePlugin/cropAndResizePluginLegacy.cpp +++ b/plugin/cropAndResizePlugin/cropAndResizePluginLegacy.cpp @@ -46,7 +46,7 @@ CropAndResizeDynamicPluginLegacy::CropAndResizeDynamicPluginLegacy(int32_t cropW { } -CropAndResizePlugin::CropAndResizePlugin(void const* serialBuf, size_t serialSize) +CropAndResizePlugin::CropAndResizePlugin(void const* serialBuf, size_t /*serialSize*/) { auto const* d = toPointer(serialBuf); auto const* a = d; @@ -59,7 +59,7 @@ CropAndResizePlugin::CropAndResizePlugin(void const* serialBuf, size_t serialSiz PLUGIN_ASSERT(d == a + sizeof(int32_t) * 6); } -CropAndResizeDynamicPluginLegacy::CropAndResizeDynamicPluginLegacy(void const* serialBuf, size_t serialSize) +CropAndResizeDynamicPluginLegacy::CropAndResizeDynamicPluginLegacy(void const* serialBuf, size_t /*serialSize*/) { auto const* d = reinterpret_cast(serialBuf); auto const* a = d; @@ -136,10 +136,10 @@ Dims CropAndResizePlugin::getOutputDimensions(int32_t index, Dims const* inputs, PLUGIN_VALIDATE(nbInputDims == 2); PLUGIN_VALIDATE(inputs != nullptr); PLUGIN_VALIDATE(inputs->nbDims == 3); - int32_t channels = inputs->d[0]; + int32_t channels = static_cast(inputs->d[0]); int32_t height = mCropHeight; int32_t width = mCropWidth; - int32_t roiBatch = inputs[1].d[0]; + int32_t roiBatch = static_cast(inputs[1].d[0]); return Dims4(roiBatch, channels, height, width); } catch (std::exception const& e) @@ -220,7 +220,7 @@ int32_t CropAndResizeDynamicPluginLegacy::enqueue(PluginTensorDesc const* inputD void* output = outputs[0]; // Launch CUDA kernel wrapper and save its return value - int32_t batchSize = inputDesc[0].dims.d[0]; + int32_t batchSize = static_cast(inputDesc[0].dims.d[0]); int32_t status = cropAndResizeInference(stream, mDepth * mInputHeight * mInputWidth * batchSize, inputs[0], inputs[1], batchSize, mInputHeight, mInputWidth, mNumBoxes, mCropHeight, mCropWidth, mDepth, output); return status; @@ -280,7 +280,7 @@ bool CropAndResizePlugin::supportsFormat(DataType type, PluginFormat format) con } bool CropAndResizeDynamicPluginLegacy::supportsFormatCombination( - int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept + int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t /*nbOutputs*/) noexcept { try { @@ -317,8 +317,8 @@ size_t CropAndResizePlugin::getWorkspaceSize(int32_t /*maxBatchSize*/) const noe return 0; } -size_t CropAndResizeDynamicPluginLegacy::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t CropAndResizeDynamicPluginLegacy::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -405,7 +405,7 @@ char const* CropAndResizeDynamicPluginLegacy::getPluginNamespace() const noexcep // Return the DataType of the plugin output at the requested index. DataType CropAndResizePlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { try { @@ -421,7 +421,7 @@ DataType CropAndResizePlugin::getOutputDataType( } DataType CropAndResizeDynamicPluginLegacy::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { try { @@ -438,20 +438,20 @@ DataType CropAndResizeDynamicPluginLegacy::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool CropAndResizePlugin::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool CropAndResizePlugin::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool CropAndResizePlugin::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } -void CropAndResizePlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void CropAndResizePlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t nbOutputs, DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, PluginFormat floatFormat, int32_t /*maxBatchSize*/) noexcept { try { @@ -459,10 +459,10 @@ void CropAndResizePlugin::configurePlugin(Dims const* inputDims, int32_t nbInput && floatFormat == PluginFormat::kLINEAR); PLUGIN_VALIDATE(nbInputs == 2); PLUGIN_VALIDATE(nbOutputs == 1); - mDepth = inputDims[0].d[0]; - mInputHeight = inputDims[0].d[1]; - mInputWidth = inputDims[0].d[2]; - mNumBoxes = inputDims[1].d[0]; + mDepth = static_cast(inputDims[0].d[0]); + mInputHeight = static_cast(inputDims[0].d[1]); + mInputWidth = static_cast(inputDims[0].d[2]); + mNumBoxes = static_cast(inputDims[1].d[0]); } catch (std::exception const& e) { @@ -470,17 +470,17 @@ void CropAndResizePlugin::configurePlugin(Dims const* inputDims, int32_t nbInput } } -void CropAndResizeDynamicPluginLegacy::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void CropAndResizeDynamicPluginLegacy::configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, + DynamicPluginTensorDesc const* /*out*/, int32_t nbOutputs) noexcept { try { PLUGIN_VALIDATE(nbInputs == 2); PLUGIN_VALIDATE(nbOutputs == 1); - mDepth = in[0].desc.dims.d[1]; - mInputHeight = in[0].desc.dims.d[2]; - mInputWidth = in[0].desc.dims.d[3]; - mNumBoxes = in[1].desc.dims.d[1]; + mDepth = static_cast(in[0].desc.dims.d[1]); + mInputHeight = static_cast(in[0].desc.dims.d[2]); + mInputWidth = static_cast(in[0].desc.dims.d[3]); + mNumBoxes = static_cast(in[1].desc.dims.d[1]); } catch (std::exception const& e) { @@ -489,7 +489,7 @@ void CropAndResizeDynamicPluginLegacy::configurePlugin( } // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void CropAndResizePlugin::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } @@ -502,7 +502,7 @@ CropAndResizeBasePluginCreator::CropAndResizeBasePluginCreator() mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("crop_width", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("crop_height", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/cropAndResizePlugin/cropAndResizePluginLegacy.h b/plugin/cropAndResizePlugin/cropAndResizePluginLegacy.h index 015edb04e..295a99185 100644 --- a/plugin/cropAndResizePlugin/cropAndResizePluginLegacy.h +++ b/plugin/cropAndResizePlugin/cropAndResizePluginLegacy.h @@ -134,6 +134,11 @@ class TRT_DEPRECATED CropAndResizePlugin : public IPluginV2Ext class TRT_DEPRECATED CropAndResizeDynamicPluginLegacy : public IPluginV2DynamicExt { public: + using IPluginV2DynamicExt::configurePlugin; + using IPluginV2DynamicExt::getOutputDimensions; + using IPluginV2DynamicExt::getWorkspaceSize; + using IPluginV2DynamicExt::enqueue; + //! //! \brief Constructs CropAndResizeDynamicPluginLegacy with specified crop dimensions. //! diff --git a/plugin/decodeBbox3DPlugin/decodeBbox3D.cpp b/plugin/decodeBbox3DPlugin/decodeBbox3D.cpp index 67e78a399..1207bc352 100644 --- a/plugin/decodeBbox3DPlugin/decodeBbox3D.cpp +++ b/plugin/decodeBbox3DPlugin/decodeBbox3D.cpp @@ -126,7 +126,7 @@ nvinfer1::IPluginV2DynamicExt* DecodeBbox3DPlugin::clone() const noexcept } nvinfer1::DimsExprs DecodeBbox3DPlugin::getOutputDimensions(int32_t outputIndex, nvinfer1::DimsExprs const* inputs, - int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept + int32_t /*nbInputs*/, nvinfer1::IExprBuilder& exprBuilder) noexcept { try { @@ -199,14 +199,14 @@ bool DecodeBbox3DPlugin::supportsFormatCombination( return false; } -void DecodeBbox3DPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, - nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void DecodeBbox3DPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t /*nbInputs*/, + nvinfer1::DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { try { PLUGIN_VALIDATE(in != nullptr); - mFeatureH = in[0].desc.dims.d[1]; - mFeatureW = in[0].desc.dims.d[2]; + mFeatureH = static_cast(in[0].desc.dims.d[1]); + mFeatureW = static_cast(in[0].desc.dims.d[2]); } catch (std::exception const& e) { @@ -214,8 +214,8 @@ void DecodeBbox3DPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc const } } -size_t DecodeBbox3DPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t DecodeBbox3DPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + nvinfer1::PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { size_t mAnchorsSize = mNumClasses * 2 * 4 * sizeof(float); size_t mAnchorBottomHeightSize = mNumClasses * sizeof(float); @@ -233,7 +233,7 @@ int32_t DecodeBbox3DPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr && workspace != nullptr); - int32_t batchSize = inputDesc[0].dims.d[0]; + int32_t batchSize = static_cast(inputDesc[0].dims.d[0]); // Inputs auto const* clsInput = static_cast(inputs[0]); @@ -269,7 +269,7 @@ int32_t DecodeBbox3DPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, } nvinfer1::DataType DecodeBbox3DPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { try { @@ -379,7 +379,7 @@ DecodeBbox3DPluginCreator::DecodeBbox3DPluginCreator() mPluginAttributes.emplace_back(PluginField("num_dir_bins", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("score_thresh", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/decodeBbox3DPlugin/decodeBbox3D.h b/plugin/decodeBbox3DPlugin/decodeBbox3D.h index 1f7fac66f..84e09f5ac 100644 --- a/plugin/decodeBbox3DPlugin/decodeBbox3D.h +++ b/plugin/decodeBbox3DPlugin/decodeBbox3D.h @@ -31,6 +31,11 @@ namespace plugin class DecodeBbox3DPlugin : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + DecodeBbox3DPlugin() = delete; DecodeBbox3DPlugin(float xMin, float xMax, float yMin, float yMax, float zMin, float zMax, int32_t numDirBins, float dirOffset, float dirLimitOffset, std::vector const& anchorBottomHeight, diff --git a/plugin/detectionLayerPlugin/detectionLayerPlugin.cpp b/plugin/detectionLayerPlugin/detectionLayerPlugin.cpp index 1ce3dfb68..0ccbbe520 100644 --- a/plugin/detectionLayerPlugin/detectionLayerPlugin.cpp +++ b/plugin/detectionLayerPlugin/detectionLayerPlugin.cpp @@ -36,7 +36,7 @@ DetectionLayerPluginCreator::DetectionLayerPluginCreator() mPluginAttributes.emplace_back(PluginField("score_threshold", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("iou_threshold", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -316,7 +316,7 @@ int32_t DetectionLayer::enqueue( } DataType DetectionLayer::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { // Only DataType::kFLOAT is acceptable by the plugin layer. return DataType::kFLOAT; @@ -324,28 +324,29 @@ DataType DetectionLayer::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool DetectionLayer::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool DetectionLayer::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool DetectionLayer::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void DetectionLayer::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void DetectionLayer::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t /*nbOutputs*/, DataType const* inputTypes, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t maxBatchSize) noexcept { try { checkValidInputs(inputDims, nbInputs); PLUGIN_VALIDATE(inputDims[0].d[0] == inputDims[1].d[0] && inputDims[1].d[0] == inputDims[2].d[0]); - mAnchorsCnt = inputDims[2].d[0]; + mAnchorsCnt = static_cast(inputDims[2].d[0]); mType = inputTypes[0]; mMaxBatchSize = maxBatchSize; } @@ -357,7 +358,7 @@ void DetectionLayer::configurePlugin(Dims const* inputDims, int32_t nbInputs, Di // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void DetectionLayer::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } diff --git a/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.cpp b/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.cpp index 959980fec..c08a834df 100644 --- a/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.cpp +++ b/plugin/disentangledAttentionPlugin/disentangledAttentionPlugin.cpp @@ -126,7 +126,7 @@ PluginFieldCollection const* DisentangledAttentionPlugin::getFieldsToSerialize() mDataToSerialize.emplace_back("span", &mSpan, PluginFieldType::kINT32, 1); mDataToSerialize.emplace_back("factor", &mFactor, PluginFieldType::kFLOAT32, 1); - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; @@ -141,8 +141,8 @@ PluginFieldCollection const* DisentangledAttentionPlugin::getFieldsToSerialize() // IPluginV3OneBuild methods int32_t DisentangledAttentionPlugin::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, - IExprBuilder& exprBuilder) noexcept + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, + IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -233,15 +233,18 @@ bool DisentangledAttentionPlugin::supportsFormatCombination( // IPluginV3OneRuntime methods template -void DisentangledAttentionPlugin::enqueueType(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, +void DisentangledAttentionPlugin::enqueueType(PluginTensorDesc const* inputDesc, PluginTensorDesc const* /*outputDesc*/, void const* const* inputs, void* const* outputs, cudaStream_t stream, TDataType factor) { Dims dims0 = inputDesc[0].dims; Dims dims1 = inputDesc[1].dims; Dims dims2 = inputDesc[2].dims; - dim3 dimData0(dims0.d[0], dims0.d[1], dims0.d[2]); - dim3 dimData1(dims1.d[0], dims1.d[1], dims1.d[2]); - dim3 dimData2(dims2.d[0], dims2.d[1], dims2.d[2]); + dim3 dimData0(static_cast(dims0.d[0]), static_cast(dims0.d[1]), + static_cast(dims0.d[2])); + dim3 dimData1(static_cast(dims1.d[0]), static_cast(dims1.d[1]), + static_cast(dims1.d[2])); + dim3 dimData2(static_cast(dims2.d[0]), static_cast(dims2.d[1]), + static_cast(dims2.d[2])); dim3 dimResult(dimData0); dim3 blockOptimized(kDISENTANGLED_TILESIZE, kDISENTANGLED_BLOCKDIMY); @@ -283,8 +286,8 @@ int32_t DisentangledAttentionPlugin::enqueue(PluginTensorDesc const* inputDesc, } } -size_t DisentangledAttentionPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t DisentangledAttentionPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -318,7 +321,7 @@ int32_t DisentangledAttentionPlugin::onShapeChange( return STATUS_FAILURE; } -IPluginV3* DisentangledAttentionPlugin::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* DisentangledAttentionPlugin::attachToContext(IPluginResourceContext* /*context*/) noexcept { try { @@ -339,7 +342,7 @@ DisentangledAttentionPluginCreator::DisentangledAttentionPluginCreator() mPluginAttributes.emplace_back(PluginField("span", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("factor", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -359,7 +362,7 @@ PluginFieldCollection const* DisentangledAttentionPluginCreator::getFieldNames() } IPluginV3* DisentangledAttentionPluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* /*name*/, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { diff --git a/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.cpp b/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.cpp index e96f11aa9..a07bb88d8 100644 --- a/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.cpp +++ b/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.cpp @@ -72,8 +72,8 @@ char const* DisentangledAttentionPluginLegacy::getPluginVersion() const noexcept } // IPluginV2DynamicExt Methods -nvinfer1::DimsExprs DisentangledAttentionPluginLegacy::getOutputDimensions( - int32_t index, nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept +nvinfer1::DimsExprs DisentangledAttentionPluginLegacy::getOutputDimensions(int32_t index, + nvinfer1::DimsExprs const* inputs, int32_t /*nbInputs*/, nvinfer1::IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -90,15 +90,18 @@ nvinfer1::DimsExprs DisentangledAttentionPluginLegacy::getOutputDimensions( template void DisentangledAttentionPluginLegacy::enqueueType(nvinfer1::PluginTensorDesc const* inputDesc, - nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, cudaStream_t stream, - TDataType factor) + nvinfer1::PluginTensorDesc const* /*outputDesc*/, void const* const* inputs, void* const* outputs, + cudaStream_t stream, TDataType factor) { nvinfer1::Dims dims0 = inputDesc[0].dims; nvinfer1::Dims dims1 = inputDesc[1].dims; nvinfer1::Dims dims2 = inputDesc[2].dims; - dim3 dimData0(dims0.d[0], dims0.d[1], dims0.d[2]); - dim3 dimData1(dims1.d[0], dims1.d[1], dims1.d[2]); - dim3 dimData2(dims2.d[0], dims2.d[1], dims2.d[2]); + dim3 dimData0(static_cast(dims0.d[0]), static_cast(dims0.d[1]), + static_cast(dims0.d[2])); + dim3 dimData1(static_cast(dims1.d[0]), static_cast(dims1.d[1]), + static_cast(dims1.d[2])); + dim3 dimData2(static_cast(dims2.d[0]), static_cast(dims2.d[1]), + static_cast(dims2.d[2])); dim3 dimResult(dimData0); dim3 blockOptimized(kDISENTANGLED_TILESIZE, kDISENTANGLED_BLOCKDIMY); @@ -247,8 +250,8 @@ nvinfer1::DataType DisentangledAttentionPluginLegacy::getOutputDataType( return nvinfer1::DataType{}; } -size_t DisentangledAttentionPluginLegacy::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t DisentangledAttentionPluginLegacy::getWorkspaceSize(nvinfer1::PluginTensorDesc const* /*inputs*/, + int32_t /*nbInputs*/, nvinfer1::PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -279,7 +282,7 @@ DisentangledAttentionPluginCreatorLegacy::DisentangledAttentionPluginCreatorLega mPluginAttributes.emplace_back(PluginField("span", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("factor", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.h b/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.h index d3eecacfa..882144500 100644 --- a/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.h +++ b/plugin/disentangledAttentionPlugin/disentangledAttentionPluginLegacy.h @@ -44,6 +44,11 @@ namespace plugin class DisentangledAttentionPluginLegacy : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + DisentangledAttentionPluginLegacy(); DisentangledAttentionPluginLegacy(int32_t span, float factor); @@ -103,11 +108,6 @@ class DisentangledAttentionPluginLegacy : public nvinfer1::IPluginV2DynamicExt // attributes int32_t mSpan; float mFactor; - - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2::enqueue; - using IPluginV2Ext::configurePlugin; }; class DisentangledAttentionPluginCreatorLegacy : public nvinfer1::IPluginCreator diff --git a/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp b/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp index 71836943c..869a790cd 100644 --- a/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp +++ b/plugin/efficientNMSPlugin/efficientNMSPlugin.cpp @@ -131,7 +131,7 @@ char const* EfficientNMSPlugin::getPluginNamespace() const noexcept } nvinfer1::DataType EfficientNMSPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { if (mParam.outputONNXIndices) { @@ -164,7 +164,7 @@ IPluginV2DynamicExt* EfficientNMSPlugin::clone() const noexcept } DimsExprs EfficientNMSPlugin::getOutputDimensions( - int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept + int32_t outputIndex, DimsExprs const* inputs, int32_t /*nbInputs*/, IExprBuilder& exprBuilder) noexcept { try { @@ -280,8 +280,8 @@ bool EfficientNMSPlugin::supportsFormatCombination( && (inOut[0].type == inOut[pos].type); } -void EfficientNMSPlugin::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void EfficientNMSPlugin::configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, + DynamicPluginTensorDesc const* /*out*/, int32_t nbOutputs) noexcept { try { @@ -305,8 +305,8 @@ void EfficientNMSPlugin::configurePlugin( // Shape of scores input should be // [batch_size, num_boxes, num_classes] or [batch_size, num_boxes, num_classes, 1] PLUGIN_ASSERT(in[1].desc.dims.nbDims == 3 || (in[1].desc.dims.nbDims == 4 && in[1].desc.dims.d[3] == 1)); - mParam.numScoreElements = in[1].desc.dims.d[1] * in[1].desc.dims.d[2]; - mParam.numClasses = in[1].desc.dims.d[2]; + mParam.numScoreElements = static_cast(in[1].desc.dims.d[1] * in[1].desc.dims.d[2]); + mParam.numClasses = static_cast(in[1].desc.dims.d[2]); // When pad per class is set, the total output boxes size may need to be reduced. // This operation is also done in getOutputDimension(), but for dynamic shapes, the @@ -326,16 +326,17 @@ void EfficientNMSPlugin::configurePlugin( { PLUGIN_ASSERT(in[0].desc.dims.d[2] == 4); mParam.shareLocation = true; - mParam.numBoxElements = in[0].desc.dims.d[1] * in[0].desc.dims.d[2]; + mParam.numBoxElements = static_cast(in[0].desc.dims.d[1] * in[0].desc.dims.d[2]); } else { mParam.shareLocation = (in[0].desc.dims.d[2] == 1); PLUGIN_ASSERT(in[0].desc.dims.d[2] == mParam.numClasses || mParam.shareLocation); PLUGIN_ASSERT(in[0].desc.dims.d[3] == 4); - mParam.numBoxElements = in[0].desc.dims.d[1] * in[0].desc.dims.d[2] * in[0].desc.dims.d[3]; + mParam.numBoxElements + = static_cast(in[0].desc.dims.d[1] * in[0].desc.dims.d[2] * in[0].desc.dims.d[3]); } - mParam.numAnchors = in[0].desc.dims.d[1]; + mParam.numAnchors = static_cast(in[0].desc.dims.d[1]); if (nbInputs == 2) { @@ -358,12 +359,12 @@ void EfficientNMSPlugin::configurePlugin( } } -size_t EfficientNMSPlugin::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t EfficientNMSPlugin::getWorkspaceSize(PluginTensorDesc const* inputs, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { - int32_t batchSize = inputs[1].dims.d[0]; - int32_t numScoreElements = inputs[1].dims.d[1] * inputs[1].dims.d[2]; - int32_t numClasses = inputs[1].dims.d[2]; + int32_t batchSize = static_cast(inputs[1].dims.d[0]); + int32_t numScoreElements = static_cast(inputs[1].dims.d[1] * inputs[1].dims.d[2]); + int32_t numClasses = static_cast(inputs[1].dims.d[2]); return EfficientNMSWorkspaceSize(batchSize, numScoreElements, numClasses, mParam.datatype); } @@ -374,7 +375,7 @@ int32_t EfficientNMSPlugin::enqueue(PluginTensorDesc const* inputDesc, PluginTen { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr && workspace != nullptr); - mParam.batchSize = inputDesc[0].dims.d[0]; + mParam.batchSize = static_cast(inputDesc[0].dims.d[0]); if (mParam.outputONNXIndices) { @@ -421,7 +422,7 @@ EfficientNMSPluginCreator::EfficientNMSPluginCreator() mPluginAttributes.emplace_back(PluginField("score_activation", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("class_agnostic", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("box_coding", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -440,7 +441,8 @@ PluginFieldCollection const* EfficientNMSPluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2DynamicExt* EfficientNMSPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2DynamicExt* EfficientNMSPluginCreator::createPlugin( + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -512,7 +514,7 @@ IPluginV2DynamicExt* EfficientNMSPluginCreator::createPlugin(char const* name, P } IPluginV2DynamicExt* EfficientNMSPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { @@ -539,7 +541,7 @@ EfficientNMSONNXPluginCreator::EfficientNMSONNXPluginCreator() mPluginAttributes.emplace_back(PluginField("iou_threshold", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("max_output_boxes_per_class", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("center_point_box", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -559,7 +561,7 @@ PluginFieldCollection const* EfficientNMSONNXPluginCreator::getFieldNames() noex } IPluginV2DynamicExt* EfficientNMSONNXPluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc) noexcept + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -608,7 +610,7 @@ IPluginV2DynamicExt* EfficientNMSONNXPluginCreator::createPlugin( } IPluginV2DynamicExt* EfficientNMSONNXPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/efficientNMSPlugin/efficientNMSPlugin.h b/plugin/efficientNMSPlugin/efficientNMSPlugin.h index c7248d91f..9ce2061b6 100644 --- a/plugin/efficientNMSPlugin/efficientNMSPlugin.h +++ b/plugin/efficientNMSPlugin/efficientNMSPlugin.h @@ -30,6 +30,11 @@ namespace plugin class EfficientNMSPlugin : public IPluginV2DynamicExt { public: + using IPluginV2DynamicExt::configurePlugin; + using IPluginV2DynamicExt::getOutputDimensions; + using IPluginV2DynamicExt::getWorkspaceSize; + using IPluginV2DynamicExt::enqueue; + explicit EfficientNMSPlugin(EfficientNMSParameters param); EfficientNMSPlugin(void const* data, size_t length); ~EfficientNMSPlugin() override = default; diff --git a/plugin/efficientNMSPlugin/tftrt/efficientNMSExplicitTFTRTPlugin.cpp b/plugin/efficientNMSPlugin/tftrt/efficientNMSExplicitTFTRTPlugin.cpp index 3aef2fe6c..02fad8d89 100644 --- a/plugin/efficientNMSPlugin/tftrt/efficientNMSExplicitTFTRTPlugin.cpp +++ b/plugin/efficientNMSPlugin/tftrt/efficientNMSExplicitTFTRTPlugin.cpp @@ -77,7 +77,7 @@ EfficientNMSExplicitTFTRTPluginCreator::EfficientNMSExplicitTFTRTPluginCreator() mPluginAttributes.emplace_back(PluginField("score_threshold", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("pad_per_class", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("clip_boxes", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -97,7 +97,7 @@ const PluginFieldCollection* EfficientNMSExplicitTFTRTPluginCreator::getFieldNam } IPluginV2DynamicExt* EfficientNMSExplicitTFTRTPluginCreator::createPlugin( - const char* name, const PluginFieldCollection* fc) noexcept + const char* /*name*/, const PluginFieldCollection* fc) noexcept { try { @@ -149,7 +149,7 @@ IPluginV2DynamicExt* EfficientNMSExplicitTFTRTPluginCreator::createPlugin( } IPluginV2DynamicExt* EfficientNMSExplicitTFTRTPluginCreator::deserializePlugin( - const char* name, const void* serialData, size_t serialLength) noexcept + const char* /*name*/, const void* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.cpp b/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.cpp index af75d75dc..d361f52c5 100644 --- a/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.cpp +++ b/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.cpp @@ -107,7 +107,7 @@ const char* EfficientNMSImplicitTFTRTPlugin::getPluginNamespace() const noexcept } Dims EfficientNMSImplicitTFTRTPlugin::getOutputDimensions( - int32_t outputIndex, const Dims* inputs, int32_t nbInputs) noexcept + int32_t outputIndex, const Dims* inputs, int32_t /*nbInputs*/) noexcept { try { @@ -118,7 +118,7 @@ Dims EfficientNMSImplicitTFTRTPlugin::getOutputDimensions( PLUGIN_ASSERT(inputs[1].nbDims == 2); if (mParam.padOutputBoxesPerClass && mParam.numOutputBoxesPerClass > 0) { - const int32_t numClasses = inputs[1].d[1]; + const int32_t numClasses = static_cast(inputs[1].d[1]); if (mParam.numOutputBoxesPerClass * numClasses < mParam.numOutputBoxes) { mParam.numOutputBoxes = mParam.numOutputBoxesPerClass * numClasses; @@ -189,13 +189,13 @@ int32_t EfficientNMSImplicitTFTRTPlugin::enqueue(int32_t batchSize, void const* return -1; } -bool EfficientNMSImplicitTFTRTPlugin::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool EfficientNMSImplicitTFTRTPlugin::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } DataType EfficientNMSImplicitTFTRTPlugin::getOutputDataType( - int32_t index, const DataType* inputTypes, int32_t nbInputs) const noexcept + int32_t index, const DataType* inputTypes, int32_t /*nbInputs*/) const noexcept { // num_detections and detection_classes use integer outputs if (index == 0 || index == 3) @@ -222,7 +222,7 @@ IPluginV2IOExt* EfficientNMSImplicitTFTRTPlugin::clone() const noexcept } bool EfficientNMSImplicitTFTRTPlugin::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } @@ -255,7 +255,7 @@ bool EfficientNMSImplicitTFTRTPlugin::supportsFormatCombination( } void EfficientNMSImplicitTFTRTPlugin::configurePlugin( - const PluginTensorDesc* in, int32_t nbInputs, const PluginTensorDesc* out, int32_t nbOutputs) noexcept + const PluginTensorDesc* in, int32_t nbInputs, const PluginTensorDesc* /*out*/, int32_t nbOutputs) noexcept { try { @@ -268,8 +268,8 @@ void EfficientNMSImplicitTFTRTPlugin::configurePlugin( // [batch_size, num_boxes, num_classes] or [batch_size, num_boxes, // num_classes, 1] PLUGIN_ASSERT(in[1].dims.nbDims == 2 || (in[1].dims.nbDims == 3 && in[1].dims.d[2] == 1)); - mParam.numScoreElements = in[1].dims.d[0] * in[1].dims.d[1]; - mParam.numClasses = in[1].dims.d[1]; + mParam.numScoreElements = static_cast(in[1].dims.d[0] * in[1].dims.d[1]); + mParam.numClasses = static_cast(in[1].dims.d[1]); // Shape of boxes input should be // [batch_size, num_boxes, 4] or [batch_size, num_boxes, 1, 4] or [batch_size, @@ -279,16 +279,16 @@ void EfficientNMSImplicitTFTRTPlugin::configurePlugin( { PLUGIN_ASSERT(in[0].dims.d[1] == 4); mParam.shareLocation = true; - mParam.numBoxElements = in[0].dims.d[0] * in[0].dims.d[1]; + mParam.numBoxElements = static_cast(in[0].dims.d[0] * in[0].dims.d[1]); } else { mParam.shareLocation = (in[0].dims.d[1] == 1); PLUGIN_ASSERT(in[0].dims.d[1] == mParam.numClasses || mParam.shareLocation); PLUGIN_ASSERT(in[0].dims.d[2] == 4); - mParam.numBoxElements = in[0].dims.d[0] * in[0].dims.d[1] * in[0].dims.d[2]; + mParam.numBoxElements = static_cast(in[0].dims.d[0] * in[0].dims.d[1] * in[0].dims.d[2]); } - mParam.numAnchors = in[0].dims.d[0]; + mParam.numAnchors = static_cast(in[0].dims.d[0]); if (nbInputs == 2) { @@ -311,7 +311,7 @@ EfficientNMSImplicitTFTRTPluginCreator::EfficientNMSImplicitTFTRTPluginCreator() mPluginAttributes.emplace_back(PluginField("score_threshold", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("pad_per_class", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("clip_boxes", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -331,7 +331,7 @@ const PluginFieldCollection* EfficientNMSImplicitTFTRTPluginCreator::getFieldNam } IPluginV2IOExt* EfficientNMSImplicitTFTRTPluginCreator::createPlugin( - const char* name, const PluginFieldCollection* fc) noexcept + const char* /*name*/, const PluginFieldCollection* fc) noexcept { try { @@ -383,7 +383,7 @@ IPluginV2IOExt* EfficientNMSImplicitTFTRTPluginCreator::createPlugin( } IPluginV2IOExt* EfficientNMSImplicitTFTRTPluginCreator::deserializePlugin( - const char* name, const void* serialData, size_t serialLength) noexcept + const char* /*name*/, const void* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.h b/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.h index 3ca88f8dc..8b4a4fa5c 100644 --- a/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.h +++ b/plugin/efficientNMSPlugin/tftrt/efficientNMSImplicitTFTRTPlugin.h @@ -40,6 +40,8 @@ using EfficientNMSImplicitTFTRTOutputsDataType = void**; class EfficientNMSImplicitTFTRTPlugin : public nvinfer1::IPluginV2IOExt { public: + using nvinfer1::IPluginV2IOExt::configurePlugin; + explicit EfficientNMSImplicitTFTRTPlugin(EfficientNMSParameters param); EfficientNMSImplicitTFTRTPlugin(const void* data, size_t length); ~EfficientNMSImplicitTFTRTPlugin() override = default; diff --git a/plugin/embLayerNormPlugin/embLayerNormPlugin.cpp b/plugin/embLayerNormPlugin/embLayerNormPlugin.cpp index 0df771fb5..a42b48280 100644 --- a/plugin/embLayerNormPlugin/embLayerNormPlugin.cpp +++ b/plugin/embLayerNormPlugin/embLayerNormPlugin.cpp @@ -42,19 +42,19 @@ EmbLayerNormPluginDynamic::EmbLayerNormPluginDynamic(std::string const& name, Da DataType const mhaType, Weights const& beta, Weights const& gamma, Weights const& wordEmb, Weights const& posEmb, Weights const& tokEmb, bool const useFullMask) : mLayerName(name) - , mLd(beta.count) + , mLd(static_cast(beta.count)) , mType(type) , mMhaType(mhaType) { // Assuming Weights.count is the number of elements and not bytes PLUGIN_VALIDATE(beta.count == gamma.count); PLUGIN_VALIDATE(mLd > 0U); - PLUGIN_VALIDATE(wordEmb.count % mLd == 0); - PLUGIN_VALIDATE(posEmb.count % mLd == 0); - PLUGIN_VALIDATE(tokEmb.count % mLd == 0); - mWordVocabSize = wordEmb.count / mLd; - mPosVocabSize = posEmb.count / mLd; - mTokVocabSize = tokEmb.count / mLd; + PLUGIN_VALIDATE(static_cast(wordEmb.count) % mLd == 0); + PLUGIN_VALIDATE(static_cast(posEmb.count) % mLd == 0); + PLUGIN_VALIDATE(static_cast(tokEmb.count) % mLd == 0); + mWordVocabSize = static_cast(wordEmb.count) / mLd; + mPosVocabSize = static_cast(posEmb.count) / mLd; + mTokVocabSize = static_cast(tokEmb.count) / mLd; mSM = getSmVersion(); mOutputFp16 = mType == DataType::kHALF ? 1 : 0; mUseFullMask = static_cast(useFullMask); @@ -175,7 +175,7 @@ PluginFieldCollection const* EmbLayerNormPluginDynamic::getFieldsToSerialize() n mDataToSerialize.emplace_back("bert_embeddings_position_embeddings", static_cast(mPosEmb.values), PluginFieldType::kFLOAT32, mPosEmb.count); } - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; } @@ -191,9 +191,9 @@ int32_t EmbLayerNormPluginDynamic::onShapeChange( PLUGIN_ASSERT(nbInputs == 3); PLUGIN_ASSERT(inputs[0].dims.nbDims == 2); - int32_t const S = inputs[0].dims.d[SDIM]; - mS = S; - int32_t const B = inputs[0].dims.d[BDIM]; + int32_t const S = static_cast(inputs[0].dims.d[SDIM]); + mS = static_cast(S); + int32_t const B = static_cast(inputs[0].dims.d[BDIM]); TRT_UNUSED B; PLUGIN_ASSERT(mS == static_cast(inputs[1].dims.d[SDIM])); PLUGIN_ASSERT(B == inputs[1].dims.d[BDIM]); @@ -242,7 +242,7 @@ int32_t EmbLayerNormPluginDynamic::onShapeChange( return pluginStatus_t::STATUS_FAILURE; } -IPluginV3* EmbLayerNormPluginDynamic::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* EmbLayerNormPluginDynamic::attachToContext(IPluginResourceContext* /*context*/) noexcept { return clone(); } @@ -254,8 +254,8 @@ int32_t EmbLayerNormPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, Pl { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const batchSize = inputDesc->dims.d[BDIM]; - int32_t const S = inputDesc->dims.d[SDIM]; + int32_t const batchSize = static_cast(inputDesc->dims.d[BDIM]); + int32_t const S = static_cast(inputDesc->dims.d[SDIM]); int32_t status = STATUS_FAILURE; // Our plugin outputs only one tensor @@ -272,7 +272,8 @@ int32_t EmbLayerNormPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, Pl auto const tokEmb = static_cast(mTokEmbDev.get()); auto const posEmb = static_cast(mPosEmbDev.get()); status = embSkipLayerNorm(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output); + beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output); if (status != cudaSuccess) { @@ -286,7 +287,8 @@ int32_t EmbLayerNormPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, Pl auto const tokEmb = static_cast(mTokEmbDev.get()); auto const posEmb = static_cast(mPosEmbDev.get()); status = embSkipLayerNorm(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, beta, - gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output); + gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output); if (status != cudaSuccess) { @@ -317,7 +319,9 @@ int32_t EmbLayerNormPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, Pl } uint32_t* inputMaskX = static_cast(outputs[1]); - status = convertMask(S, batchSize, warps_m, warps_n, warps_k, inputMask, inputMaskX, stream); + status = convertMask(static_cast(S), static_cast(batchSize), + static_cast(warps_m), static_cast(warps_n), static_cast(warps_k), + inputMask, inputMaskX, stream); } else { @@ -386,7 +390,7 @@ bool EmbLayerNormPluginDynamic::supportsFormatCombination( } int32_t EmbLayerNormPluginDynamic::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept { try @@ -406,7 +410,7 @@ int32_t EmbLayerNormPluginDynamic::getOutputShapes(DimsExprs const* inputs, int3 outputs[0].nbDims = 5; outputs[0].d[0] = inputs[0].d[0]; outputs[0].d[1] = inputs[0].d[1]; - outputs[0].d[2] = exprBuilder.constant(mLd); + outputs[0].d[2] = exprBuilder.constant(static_cast(mLd)); outputs[0].d[3] = exprBuilder.constant(1); outputs[0].d[4] = exprBuilder.constant(1); @@ -483,14 +487,14 @@ int32_t EmbLayerNormPluginDynamic::getOutputDataTypes( return pluginStatus_t::STATUS_FAILURE; } -int32_t EmbLayerNormPluginDynamic::configurePlugin(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) noexcept +int32_t EmbLayerNormPluginDynamic::configurePlugin(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) noexcept { return pluginStatus_t::STATUS_SUCCESS; } -size_t EmbLayerNormPluginDynamic::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t EmbLayerNormPluginDynamic::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -548,7 +552,7 @@ EmbLayerNormPluginDynamicCreator::EmbLayerNormPluginDynamicCreator() mPluginAttributes.emplace_back(PluginField("output_fp16")); mPluginAttributes.emplace_back(PluginField("full_mask")); mPluginAttributes.emplace_back(PluginField("mha_type_id")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -568,7 +572,7 @@ PluginFieldCollection const* EmbLayerNormPluginDynamicCreator::getFieldNames() n } IPluginV3* EmbLayerNormPluginDynamicCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* name, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { diff --git a/plugin/embLayerNormPlugin/embLayerNormPluginLegacy.cpp b/plugin/embLayerNormPlugin/embLayerNormPluginLegacy.cpp index 62cb36447..f029f7745 100644 --- a/plugin/embLayerNormPlugin/embLayerNormPluginLegacy.cpp +++ b/plugin/embLayerNormPlugin/embLayerNormPluginLegacy.cpp @@ -42,7 +42,7 @@ EmbLayerNormPluginDynamicLegacy::EmbLayerNormPluginDynamicLegacy(std::string con DataType const mhaType, Weights const& beta, Weights const& gamma, Weights const& wordEmb, Weights const& posEmb, Weights const& tokEmb, bool const useFullMask) : mLayerName(name) - , mLd(beta.count) + , mLd(static_cast(beta.count)) , mType(type) , mUseFullMask(useFullMask) , mMhaType(mhaType) @@ -50,12 +50,12 @@ EmbLayerNormPluginDynamicLegacy::EmbLayerNormPluginDynamicLegacy(std::string con // Assuming Weights.count is the number of elements and not bytes PLUGIN_VALIDATE(beta.count == gamma.count); PLUGIN_VALIDATE(mLd > 0U); - PLUGIN_VALIDATE(wordEmb.count % mLd == 0); - PLUGIN_VALIDATE(posEmb.count % mLd == 0); - PLUGIN_VALIDATE(tokEmb.count % mLd == 0); - mWordVocabSize = wordEmb.count / mLd; - mPosVocabSize = posEmb.count / mLd; - mTokVocabSize = tokEmb.count / mLd; + PLUGIN_VALIDATE(static_cast(wordEmb.count) % mLd == 0); + PLUGIN_VALIDATE(static_cast(posEmb.count) % mLd == 0); + PLUGIN_VALIDATE(static_cast(tokEmb.count) % mLd == 0); + mWordVocabSize = static_cast(wordEmb.count) / mLd; + mPosVocabSize = static_cast(posEmb.count) / mLd; + mTokVocabSize = static_cast(tokEmb.count) / mLd; mSM = getSmVersion(); // mS is set during configure @@ -150,7 +150,7 @@ DimsExprs EmbLayerNormPluginDynamicLegacy::getOutputDimensions( ret.nbDims = 5; ret.d[0] = inputs[0].d[0]; ret.d[1] = inputs[0].d[1]; - ret.d[2] = exprBuilder.constant(mLd); + ret.d[2] = exprBuilder.constant(static_cast(mLd)); ret.d[3] = exprBuilder.constant(1); ret.d[4] = exprBuilder.constant(1); return ret; @@ -253,9 +253,9 @@ void EmbLayerNormPluginDynamicLegacy::configurePlugin(DynamicPluginTensorDesc co PLUGIN_ASSERT(nbInputs == 3); PLUGIN_ASSERT(inputs[0].desc.dims.nbDims == 2); - int32_t const S = inputs[0].desc.dims.d[SDIM]; - mS = S; - int32_t const B = inputs[0].desc.dims.d[BDIM]; + int32_t const S = static_cast(inputs[0].desc.dims.d[SDIM]); + mS = static_cast(S); + int32_t const B = static_cast(inputs[0].desc.dims.d[BDIM]); TRT_UNUSED B; PLUGIN_ASSERT(mS == static_cast(inputs[1].desc.dims.d[SDIM])); PLUGIN_ASSERT(B == inputs[1].desc.dims.d[BDIM]); @@ -297,8 +297,8 @@ void EmbLayerNormPluginDynamicLegacy::configurePlugin(DynamicPluginTensorDesc co PLUGIN_ASSERT(outputs[1].desc.type == DataType::kINT32); } -size_t EmbLayerNormPluginDynamicLegacy::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t EmbLayerNormPluginDynamicLegacy::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -311,8 +311,8 @@ int32_t EmbLayerNormPluginDynamicLegacy::enqueue(PluginTensorDesc const* inputDe { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const batchSize = inputDesc->dims.d[BDIM]; - int32_t const S = inputDesc->dims.d[SDIM]; + int32_t const batchSize = static_cast(inputDesc->dims.d[BDIM]); + int32_t const S = static_cast(inputDesc->dims.d[SDIM]); int32_t status = STATUS_FAILURE; // Our plugin outputs only one tensor @@ -329,7 +329,8 @@ int32_t EmbLayerNormPluginDynamicLegacy::enqueue(PluginTensorDesc const* inputDe auto const tokEmb = static_cast(mTokEmbDev.get()); auto const posEmb = static_cast(mPosEmbDev.get()); status = embSkipLayerNorm(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output); + beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output); if (status != cudaSuccess) { @@ -343,7 +344,8 @@ int32_t EmbLayerNormPluginDynamicLegacy::enqueue(PluginTensorDesc const* inputDe auto const tokEmb = static_cast(mTokEmbDev.get()); auto const posEmb = static_cast(mPosEmbDev.get()); status = embSkipLayerNorm(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, beta, - gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output); + gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output); if (status != cudaSuccess) { @@ -374,7 +376,9 @@ int32_t EmbLayerNormPluginDynamicLegacy::enqueue(PluginTensorDesc const* inputDe } uint32_t* inputMaskX = static_cast(outputs[1]); - status = convertMask(S, batchSize, warps_m, warps_n, warps_k, inputMask, inputMaskX, stream); + status = convertMask(static_cast(S), static_cast(batchSize), + static_cast(warps_m), static_cast(warps_n), static_cast(warps_k), + inputMask, inputMaskX, stream); } else { @@ -393,7 +397,7 @@ int32_t EmbLayerNormPluginDynamicLegacy::enqueue(PluginTensorDesc const* inputDe // IPluginV2Ext Methods DataType EmbLayerNormPluginDynamicLegacy::getOutputDataType( - int32_t index, DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { PLUGIN_ASSERT(index == 0 || index == 1); @@ -509,7 +513,7 @@ EmbLayerNormPluginDynamicLegacyCreator::EmbLayerNormPluginDynamicLegacyCreator() mPluginAttributes.emplace_back(PluginField("output_fp16")); mPluginAttributes.emplace_back(PluginField("full_mask")); mPluginAttributes.emplace_back(PluginField("mha_type_id")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/embLayerNormPlugin/embLayerNormPluginLegacy.h b/plugin/embLayerNormPlugin/embLayerNormPluginLegacy.h index 936098ef1..ca8760364 100644 --- a/plugin/embLayerNormPlugin/embLayerNormPluginLegacy.h +++ b/plugin/embLayerNormPlugin/embLayerNormPluginLegacy.h @@ -48,6 +48,11 @@ cudaError_t convertMask(uint32_t const S, uint32_t const B, uint32_t const warps class EmbLayerNormPluginDynamicLegacy : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + EmbLayerNormPluginDynamicLegacy(std::string const& name, nvinfer1::DataType const type, nvinfer1::DataType const mhaType, nvinfer1::Weights const& beta, nvinfer1::Weights const& gamma, nvinfer1::Weights const& word_emb, nvinfer1::Weights const& pos_emb, nvinfer1::Weights const& tok_emb, @@ -111,11 +116,6 @@ class EmbLayerNormPluginDynamicLegacy : public nvinfer1::IPluginV2DynamicExt bool mUseFullMask; nvinfer1::DataType mMhaType; int32_t mSM; - - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2::enqueue; - using IPluginV2Ext::configurePlugin; }; class EmbLayerNormPluginDynamicLegacyCreator : public nvinfer1::IPluginCreator diff --git a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPlugin.cpp b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPlugin.cpp index 8f472a265..bd096cdc5 100644 --- a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPlugin.cpp +++ b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPlugin.cpp @@ -58,7 +58,7 @@ void checkConfigurationInputs( PLUGIN_ASSERT(inputs[2].type == DataType::kINT32); } -bool initializeFields(char const* name, PluginFieldCollection const* fc, Weights& beta, Weights& gamma, +bool initializeFields(char const* /*name*/, PluginFieldCollection const* fc, Weights& beta, Weights& gamma, Weights& word_emb, Weights& pos_emb, Weights& tok_emb) { bool output_fp16 = false; @@ -132,19 +132,19 @@ EmbLayerNormVarSeqlenPluginBase::EmbLayerNormVarSeqlenPluginBase(std::string con Weights const& beta, Weights const& gamma, Weights const& wordEmb, Weights const& posEmb, Weights const& tokEmb, DataType maskType) : mLayerName(name) - , mLd(beta.count) + , mLd(static_cast(beta.count)) , mType(type) , mMaskType(maskType) { // Assuming Weights.count is the number of elements and not bytes PLUGIN_VALIDATE(beta.count == gamma.count); PLUGIN_VALIDATE(mLd > 0U); - PLUGIN_VALIDATE(wordEmb.count % mLd == 0); - PLUGIN_VALIDATE(posEmb.count % mLd == 0); - PLUGIN_VALIDATE(tokEmb.count % mLd == 0); - mWordVocabSize = wordEmb.count / mLd; - mPosVocabSize = posEmb.count / mLd; - mTokVocabSize = tokEmb.count / mLd; + PLUGIN_VALIDATE(static_cast(wordEmb.count) % mLd == 0); + PLUGIN_VALIDATE(static_cast(posEmb.count) % mLd == 0); + PLUGIN_VALIDATE(static_cast(tokEmb.count) % mLd == 0); + mWordVocabSize = static_cast(wordEmb.count) / mLd; + mPosVocabSize = static_cast(posEmb.count) / mLd; + mTokVocabSize = static_cast(tokEmb.count) / mLd; mBeta.convertAndCopy(beta, nvinfer1::DataType::kFLOAT); mGamma.convertAndCopy(gamma, nvinfer1::DataType::kFLOAT); @@ -302,7 +302,7 @@ PluginFieldCollection const* EmbLayerNormVarSeqlenPluginBase::getFieldsToSeriali mDataToSerialize.emplace_back("bert_embeddings_position_embeddings", static_cast(mPosEmb.values), PluginFieldType::kFLOAT32, mPosEmb.count); } - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; } @@ -357,7 +357,7 @@ int32_t EmbLayerNormVarSeqlenPluginMTron::onShapeChange( return pluginStatus_t::STATUS_FAILURE; } -IPluginV3* EmbLayerNormVarSeqlenPluginBase::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* EmbLayerNormVarSeqlenPluginBase::attachToContext(IPluginResourceContext* /*context*/) noexcept { return clone(); } @@ -370,9 +370,9 @@ int32_t EmbLayerNormVarSeqlenPluginHFace::enqueue(PluginTensorDesc const* inputD { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const batchSize = inputDesc[2].dims.d[0] - 1; + int32_t const batchSize = static_cast(inputDesc[2].dims.d[0]) - 1; // read out the maximum sequence length from the dummy input - int32_t const maxSeqlen = inputDesc[3].dims.d[0]; + int32_t const maxSeqlen = static_cast(inputDesc[3].dims.d[0]); // There are four versions of the kernel which are optimized for sequence lengths 384, 256, 192 and 128. // Find the closest sequence length bigger than the max seq length in this batch. @@ -405,7 +405,8 @@ int32_t EmbLayerNormVarSeqlenPluginHFace::enqueue(PluginTensorDesc const* inputD auto const posEmb = static_cast(mPosEmbDev.get()); return embSkipLayerNormHFace(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output); + cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output); } if (mType == DataType::kHALF) { @@ -415,7 +416,8 @@ int32_t EmbLayerNormVarSeqlenPluginHFace::enqueue(PluginTensorDesc const* inputD auto const posEmb = static_cast(mPosEmbDev.get()); return embSkipLayerNormHFace(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output); + cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output); } else { @@ -442,9 +444,9 @@ int32_t EmbLayerNormVarSeqlenPluginMTron::enqueue(PluginTensorDesc const* inputD { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const batchSize = inputDesc[2].dims.d[0] - 1; + int32_t const batchSize = static_cast(inputDesc[2].dims.d[0]) - 1; // read out the maximum sequence length from the dummy input - int32_t const maxSeqlen = inputDesc[3].dims.d[0]; + int32_t const maxSeqlen = static_cast(inputDesc[3].dims.d[0]); // There are four versions of the kernel which are optimized for sequence lengths 384, 256, 192 and 128. // Find the closest sequence length bigger than the max seq length in this batch. @@ -478,7 +480,8 @@ int32_t EmbLayerNormVarSeqlenPluginMTron::enqueue(PluginTensorDesc const* inputD auto const posEmb = static_cast(mPosEmbDev.get()); return embSkipLayerNormMTron(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output, skip); + cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output, skip); } if (mType == DataType::kHALF) { @@ -489,7 +492,8 @@ int32_t EmbLayerNormVarSeqlenPluginMTron::enqueue(PluginTensorDesc const* inputD auto const posEmb = static_cast(mPosEmbDev.get()); return embSkipLayerNormMTron(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output, skip); + cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output, skip); } else { @@ -566,7 +570,7 @@ bool EmbLayerNormVarSeqlenPluginBase::supportsFormatCombination( } int32_t EmbLayerNormVarSeqlenPluginHFace::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept { try @@ -587,7 +591,7 @@ int32_t EmbLayerNormVarSeqlenPluginHFace::getOutputShapes(DimsExprs const* input // output 0 : embedded input outputs[0].nbDims = 4; outputs[0].d[0] = inputs[0].d[0]; - outputs[0].d[1] = exprBuilder.constant(mLd); + outputs[0].d[1] = exprBuilder.constant(static_cast(mLd)); outputs[0].d[2] = exprBuilder.constant(1); outputs[0].d[3] = exprBuilder.constant(1); @@ -604,7 +608,7 @@ int32_t EmbLayerNormVarSeqlenPluginHFace::getOutputShapes(DimsExprs const* input } int32_t EmbLayerNormVarSeqlenPluginMTron::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept { try @@ -623,14 +627,14 @@ int32_t EmbLayerNormVarSeqlenPluginMTron::getOutputShapes(DimsExprs const* input // Output 0 : embedded input outputs[0].nbDims = 4; outputs[0].d[0] = inputs[0].d[0]; - outputs[0].d[1] = exprBuilder.constant(mLd); + outputs[0].d[1] = exprBuilder.constant(static_cast(mLd)); outputs[0].d[2] = exprBuilder.constant(1); outputs[0].d[3] = exprBuilder.constant(1); // Output 1 : maskIdx outputs[1].nbDims = 4; outputs[1].d[0] = inputs[0].d[0]; - outputs[1].d[1] = exprBuilder.constant(mLd); + outputs[1].d[1] = exprBuilder.constant(static_cast(mLd)); outputs[1].d[2] = exprBuilder.constant(1); outputs[1].d[3] = exprBuilder.constant(1); @@ -644,7 +648,7 @@ int32_t EmbLayerNormVarSeqlenPluginMTron::getOutputShapes(DimsExprs const* input } int32_t EmbLayerNormVarSeqlenPluginBase::getOutputDataTypes( - DataType* outputTypes, int32_t nbOutputs, DataType const* inputTypes, int32_t nbInputs) const noexcept + DataType* outputTypes, int32_t /*nbOutputs*/, DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { try { @@ -660,14 +664,14 @@ int32_t EmbLayerNormVarSeqlenPluginBase::getOutputDataTypes( return pluginStatus_t::STATUS_FAILURE; } -int32_t EmbLayerNormVarSeqlenPluginBase::configurePlugin(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) noexcept +int32_t EmbLayerNormVarSeqlenPluginBase::configurePlugin(DynamicPluginTensorDesc const* /*inputs*/, + int32_t /*nbInputs*/, DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) noexcept { return pluginStatus_t::STATUS_SUCCESS; } -size_t EmbLayerNormVarSeqlenPluginBase::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t EmbLayerNormVarSeqlenPluginBase::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, + int32_t /*nbInputs*/, DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -716,14 +720,19 @@ EmbLayerNormVarSeqlenPluginBaseCreator::EmbLayerNormVarSeqlenPluginBaseCreator() mPluginAttributes.emplace_back(PluginField("output_fp16", nullptr, PluginFieldType::kINT32, 1)); // the length of beta, gamma, word_emb, pos_emb, and tok_emb will only be known at the time of plugin creation // so we set it to 0 here - mPluginAttributes.emplace_back(PluginField("bert_embeddings_layernorm_beta", nullptr, PluginFieldType::kFLOAT32, 0)); - mPluginAttributes.emplace_back(PluginField("bert_embeddings_layernorm_gamma", nullptr, PluginFieldType::kFLOAT32, 0)); + mPluginAttributes.emplace_back( + PluginField("bert_embeddings_layernorm_beta", nullptr, PluginFieldType::kFLOAT32, 0)); + mPluginAttributes.emplace_back( + PluginField("bert_embeddings_layernorm_gamma", nullptr, PluginFieldType::kFLOAT32, 0)); // the embeddings datatype is determined by the output_fp16 attribute known at runtime // so we set it to kUNKNOWN here - mPluginAttributes.emplace_back(PluginField("bert_embeddings_word_embeddings", nullptr, PluginFieldType::kUNKNOWN, 0)); - mPluginAttributes.emplace_back(PluginField("bert_embeddings_token_type_embeddings", nullptr, PluginFieldType::kUNKNOWN, 0)); - mPluginAttributes.emplace_back(PluginField("bert_embeddings_position_embeddings", nullptr, PluginFieldType::kUNKNOWN, 0)); - mFC.nbFields = mPluginAttributes.size(); + mPluginAttributes.emplace_back( + PluginField("bert_embeddings_word_embeddings", nullptr, PluginFieldType::kUNKNOWN, 0)); + mPluginAttributes.emplace_back( + PluginField("bert_embeddings_token_type_embeddings", nullptr, PluginFieldType::kUNKNOWN, 0)); + mPluginAttributes.emplace_back( + PluginField("bert_embeddings_position_embeddings", nullptr, PluginFieldType::kUNKNOWN, 0)); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -748,7 +757,7 @@ PluginFieldCollection const* EmbLayerNormVarSeqlenPluginBaseCreator::getFieldNam } IPluginV3* EmbLayerNormVarSeqlenPluginHFaceCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* name, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { @@ -779,7 +788,7 @@ IPluginV3* EmbLayerNormVarSeqlenPluginHFaceCreator::createPlugin( } IPluginV3* EmbLayerNormVarSeqlenPluginMTronCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* name, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { diff --git a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPluginLegacy.cpp b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPluginLegacy.cpp index 5f103c058..a0ec91472 100644 --- a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPluginLegacy.cpp +++ b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPluginLegacy.cpp @@ -42,19 +42,19 @@ EmbLayerNormVarSeqlenPluginLegacyBase::EmbLayerNormVarSeqlenPluginLegacyBase(std Weights const& beta, Weights const& gamma, Weights const& wordEmb, Weights const& posEmb, Weights const& tokEmb, DataType maskType) : mLayerName(name) - , mLd(beta.count) + , mLd(static_cast(beta.count)) , mType(type) , mMaskType(maskType) { // Assuming Weights.count is the number of elements and not bytes PLUGIN_VALIDATE(beta.count == gamma.count); PLUGIN_VALIDATE(mLd > 0U); - PLUGIN_VALIDATE(wordEmb.count % mLd == 0); - PLUGIN_VALIDATE(posEmb.count % mLd == 0); - PLUGIN_VALIDATE(tokEmb.count % mLd == 0); - mWordVocabSize = wordEmb.count / mLd; - mPosVocabSize = posEmb.count / mLd; - mTokVocabSize = tokEmb.count / mLd; + PLUGIN_VALIDATE(static_cast(wordEmb.count) % mLd == 0); + PLUGIN_VALIDATE(static_cast(posEmb.count) % mLd == 0); + PLUGIN_VALIDATE(static_cast(tokEmb.count) % mLd == 0); + mWordVocabSize = static_cast(wordEmb.count) / mLd; + mPosVocabSize = static_cast(posEmb.count) / mLd; + mTokVocabSize = static_cast(tokEmb.count) / mLd; mBeta.convertAndCopy(beta, nvinfer1::DataType::kFLOAT); mGamma.convertAndCopy(gamma, nvinfer1::DataType::kFLOAT); @@ -190,7 +190,7 @@ DimsExprs EmbLayerNormVarSeqlenPluginLegacyHFace::getOutputDimensions( DimsExprs ret; ret.nbDims = 4; ret.d[0] = inputs[0].d[0]; - ret.d[1] = exprBuilder.constant(mLd); + ret.d[1] = exprBuilder.constant(static_cast(mLd)); ret.d[2] = exprBuilder.constant(1); ret.d[3] = exprBuilder.constant(1); return ret; @@ -219,7 +219,7 @@ DimsExprs EmbLayerNormVarSeqlenPluginLegacyMTron::getOutputDimensions( DimsExprs ret; ret.nbDims = 4; ret.d[0] = inputs[0].d[0]; - ret.d[1] = exprBuilder.constant(mLd); + ret.d[1] = exprBuilder.constant(static_cast(mLd)); ret.d[2] = exprBuilder.constant(1); ret.d[3] = exprBuilder.constant(1); return ret; @@ -319,8 +319,8 @@ void EmbLayerNormVarSeqlenPluginLegacyMTron::configurePlugin(DynamicPluginTensor PLUGIN_ASSERT(outputs[1].desc.type == mMaskType); } -size_t EmbLayerNormVarSeqlenPluginLegacyBase::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t EmbLayerNormVarSeqlenPluginLegacyBase::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -333,9 +333,9 @@ int32_t EmbLayerNormVarSeqlenPluginLegacyHFace::enqueue(PluginTensorDesc const* { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const batchSize = inputDesc[2].dims.d[0] - 1; + int32_t const batchSize = static_cast(inputDesc[2].dims.d[0]) - 1; // read out the maximum sequence length from the dummy input - int32_t const maxSeqlen = inputDesc[3].dims.d[0]; + int32_t const maxSeqlen = static_cast(inputDesc[3].dims.d[0]); // There are four versions of the kernel which are optimized for sequence lengths 384, 256, 192 and 128. // Find the closest sequence length bigger than the max seq length in this batch. @@ -368,7 +368,8 @@ int32_t EmbLayerNormVarSeqlenPluginLegacyHFace::enqueue(PluginTensorDesc const* auto const posEmb = static_cast(mPosEmbDev.get()); return embSkipLayerNormHFace(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output); + cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output); } if (mType == DataType::kHALF) { @@ -378,7 +379,8 @@ int32_t EmbLayerNormVarSeqlenPluginLegacyHFace::enqueue(PluginTensorDesc const* auto const posEmb = static_cast(mPosEmbDev.get()); return embSkipLayerNormHFace(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output); + cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output); } else { @@ -405,9 +407,9 @@ int32_t EmbLayerNormVarSeqlenPluginLegacyMTron::enqueue(PluginTensorDesc const* { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const batchSize = inputDesc[2].dims.d[0] - 1; + int32_t const batchSize = static_cast(inputDesc[2].dims.d[0]) - 1; // read out the maximum sequence length from the dummy input - int32_t const maxSeqlen = inputDesc[3].dims.d[0]; + int32_t const maxSeqlen = static_cast(inputDesc[3].dims.d[0]); // There are four versions of the kernel which are optimized for sequence lengths 384, 256, 192 and 128. // Find the closest sequence length bigger than the max seq length in this batch. @@ -441,7 +443,8 @@ int32_t EmbLayerNormVarSeqlenPluginLegacyMTron::enqueue(PluginTensorDesc const* auto const posEmb = static_cast(mPosEmbDev.get()); return embSkipLayerNormMTron(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output, skip); + cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output, skip); } if (mType == DataType::kHALF) { @@ -452,7 +455,8 @@ int32_t EmbLayerNormVarSeqlenPluginLegacyMTron::enqueue(PluginTensorDesc const* auto const posEmb = static_cast(mPosEmbDev.get()); return embSkipLayerNormMTron(stream, static_cast(mLd), batchSize, S, inputIds, segmentIds, - cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, mWordVocabSize, mTokVocabSize, output, skip); + cuSeqlens, beta, gamma, wordEmb, posEmb, tokEmb, static_cast(mWordVocabSize), + static_cast(mTokVocabSize), output, skip); } else { @@ -473,7 +477,7 @@ int32_t EmbLayerNormVarSeqlenPluginLegacyMTron::enqueue(PluginTensorDesc const* // IPluginV2Ext Methods DataType EmbLayerNormVarSeqlenPluginLegacyBase::getOutputDataType( - int32_t index, DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { PLUGIN_ASSERT(index == 0 || index == 1); PLUGIN_ASSERT(mType == DataType::kHALF || mType == DataType::kFLOAT); @@ -609,7 +613,7 @@ EmbLayerNormVarSeqlenPluginLegacyBaseCreator::EmbLayerNormVarSeqlenPluginLegacyB mPluginAttributes.emplace_back(PluginField("bert_embeddings_token_type_embeddings")); mPluginAttributes.emplace_back(PluginField("bert_embeddings_position_embeddings")); mPluginAttributes.emplace_back(PluginField("output_fp16")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -633,7 +637,7 @@ PluginFieldCollection const* EmbLayerNormVarSeqlenPluginLegacyBaseCreator::getFi return &mFC; } -bool initializeFields(char const* name, PluginFieldCollection const* fc, Weights& beta, Weights& gamma, +bool initializeFields(char const* /*name*/, PluginFieldCollection const* fc, Weights& beta, Weights& gamma, Weights& word_emb, Weights& pos_emb, Weights& tok_emb) { bool output_fp16 = false; diff --git a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPluginLegacy.h b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPluginLegacy.h index 67b03a25d..c4cb03569 100644 --- a/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPluginLegacy.h +++ b/plugin/embLayerNormPlugin/embLayerNormVarSeqlenPluginLegacy.h @@ -46,6 +46,11 @@ int32_t embSkipLayerNormMTron(cudaStream_t stream, int32_t ld, int32_t B, int32_ class EmbLayerNormVarSeqlenPluginLegacyBase : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + EmbLayerNormVarSeqlenPluginLegacyBase(std::string const& name, DataType type, Weights const& beta, Weights const& gamma, Weights const& word_emb, Weights const& pos_emb, Weights const& tok_emb, DataType maskType); @@ -100,6 +105,11 @@ class EmbLayerNormVarSeqlenPluginLegacyBase : public nvinfer1::IPluginV2DynamicE class EmbLayerNormVarSeqlenPluginLegacyHFace : public EmbLayerNormVarSeqlenPluginLegacyBase { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + EmbLayerNormVarSeqlenPluginLegacyHFace(std::string const& name, nvinfer1::DataType const type, nvinfer1::Weights const& beta, nvinfer1::Weights const& gamma, nvinfer1::Weights const& word_emb, nvinfer1::Weights const& pos_emb, nvinfer1::Weights const& tok_emb); @@ -129,6 +139,11 @@ class EmbLayerNormVarSeqlenPluginLegacyHFace : public EmbLayerNormVarSeqlenPlugi class EmbLayerNormVarSeqlenPluginLegacyMTron : public EmbLayerNormVarSeqlenPluginLegacyBase { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + EmbLayerNormVarSeqlenPluginLegacyMTron(std::string const& name, nvinfer1::DataType const type, nvinfer1::Weights const& beta, nvinfer1::Weights const& gamma, nvinfer1::Weights const& word_emb, nvinfer1::Weights const& pos_emb, nvinfer1::Weights const& tok_emb); diff --git a/plugin/fcPlugin/fcPlugin.cpp b/plugin/fcPlugin/fcPlugin.cpp index 6f0d02257..f7dc9dad7 100644 --- a/plugin/fcPlugin/fcPlugin.cpp +++ b/plugin/fcPlugin/fcPlugin.cpp @@ -50,9 +50,9 @@ static void printPerfStructure(customMatmulPerf_t const& perf, int32_t const m, AlgoProps p; p.populate(perf.algo); // Calculate GFLOPS - double timeAvg - = perf.time * 1e-3; // Convert to seconds. It has been divided by kNB_KERNEL_REPEATS in customMatmulRun(). - double gflop = (2 * static_cast(m * n) * k) * 1e-9; // Real + double timeAvg = static_cast(perf.time) + * 1e-3; // Convert to seconds. It has been divided by kNB_KERNEL_REPEATS in customMatmulRun(). + double gflop = (2 * static_cast(static_cast(m * n) * k)) * 1e-9; // Real gLogVerbose << "Algo=" << p.algoId << " Tile=" << p.tile << " (" << matmulTileName[p.tile] << ") K=" << p.numSplitsK << " Red.Sch.=" << p.reductionScheme << " Swiz=" << p.swizzle << " Cust=" << p.customOption @@ -117,7 +117,7 @@ static cublasStatus_t customMatmulRun(cublasLtHandle_t ltHandle, // to get the c } // For the moment only add successful findings perfResults.algo = algo; - perfResults.time = time / kNB_KERNEL_REPEATS; // Average time + perfResults.time = time / static_cast(kNB_KERNEL_REPEATS); // Average time perfResults.workspaceSize = heurResult.workspaceSize; perfResults.wavesCount = heurResult.wavesCount; } @@ -179,11 +179,14 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera // Create matrix descriptors. We are good with the details here so no need to // set any extra attributes + PLUGIN_CUBLASASSERT( + cublasLtWrapper.cublasLtMatrixLayoutCreate(&Adesc, Atype, static_cast(transa == CUBLAS_OP_N ? m : k), + static_cast(transa == CUBLAS_OP_N ? k : m), static_cast(lda))); + PLUGIN_CUBLASASSERT( + cublasLtWrapper.cublasLtMatrixLayoutCreate(&Bdesc, Btype, static_cast(transb == CUBLAS_OP_N ? k : n), + static_cast(transb == CUBLAS_OP_N ? n : k), static_cast(ldb))); PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatrixLayoutCreate( - &Adesc, Atype, transa == CUBLAS_OP_N ? m : k, transa == CUBLAS_OP_N ? k : m, lda)); - PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatrixLayoutCreate( - &Bdesc, Btype, transb == CUBLAS_OP_N ? k : n, transb == CUBLAS_OP_N ? n : k, ldb)); - PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatrixLayoutCreate(&Cdesc, Ctype, m, n, ldc)); + &Cdesc, Ctype, static_cast(m), static_cast(n), static_cast(ldc))); // Request the 4 first AlgoId available for SGEMM ( computeType = scaleType = // Atype = Btype = Ctype = Dtype = CUDA_R_32F) @@ -209,7 +212,7 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera continue; } - uint64_t numericImpl = -1; + uint64_t numericImpl = static_cast(-1); PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatmulAlgoCapGetAttribute( &algo, CUBLASLT_ALGO_CAP_NUMERICAL_IMPL_FLAGS, &numericImpl, sizeof(numericImpl), nullptr)); if (Ctype == CUDA_R_32F && numericImpl == CUBLASLT_NUMERICAL_IMPL_FLAGS_HMMA) @@ -221,7 +224,7 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera // Query the tiles enums supported by that algo PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatmulAlgoCapGetAttribute( &algo, CUBLASLT_ALGO_CAP_TILE_IDS, nullptr, 0, &sizeWritten)); - int32_t nbTiles = int32_t(sizeWritten / sizeof(int32_t)); + int32_t nbTiles = static_cast(sizeWritten / sizeof(int32_t)); int32_t* tileA = new int32_t[nbTiles == 0 ? 1 : nbTiles]; if (nbTiles == 0) { @@ -259,12 +262,12 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatmulAlgoConfigSetAttribute( &algo, CUBLASLT_ALGO_CONFIG_CUSTOM_OPTION, &customOption, sizeof(customOption))); // Loop over the CTAs swizzling support - for (int32_t k = 0; k <= swizzlingMax; k++) + for (int32_t k_ = 0; k_ <= swizzlingMax; k_++) { int32_t splitkTrial = 0; if (splitkSupport) { - splitkTrial += sizeof(splitKSequenceA) / sizeof(splitKSequenceA[0]); + splitkTrial += static_cast(sizeof(splitKSequenceA) / sizeof(splitKSequenceA[0])); } // Loop over the splitK value over a fixed sequence splitKSequenceA in // addition to the case where splitK is not enabled @@ -278,7 +281,7 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatmulAlgoConfigSetAttribute( &algo, CUBLASLT_ALGO_CONFIG_SPLITK_NUM, &splitK_val, sizeof(splitK_val))); PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatmulAlgoConfigSetAttribute( - &algo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &k, sizeof(k))); + &algo, CUBLASLT_ALGO_CONFIG_CTA_SWIZZLING, &k_, sizeof(k_))); PLUGIN_CUBLASASSERT(cublasLtWrapper.cublasLtMatmulAlgoConfigSetAttribute( &algo, CUBLASLT_ALGO_CONFIG_REDUCTION_SCHEME, &redScheme, sizeof(int32_t))); @@ -290,8 +293,8 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera sizeof(splitKSequenceA[l - 1]))); // Going over all the reduction scheme for (redScheme = 1; redScheme < static_cast(CUBLASLT_REDUCTION_SCHEME_MASK) - && (algoCount < kNB_ALGO_COMBINATIONS); - redScheme = redScheme << 1) + && (algoCount < kNB_ALGO_COMBINATIONS); + redScheme = redScheme << 1) { if (redScheme & redMask) { @@ -308,7 +311,7 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera algoCount++; } } // end if - } // end for + } // end for } else { // Non-splitK case @@ -327,9 +330,9 @@ void nvinfer1::plugin::bert::LtGemmSearch(cublasLtHandle_t ltHandle, cublasOpera } } } // end l - } // end k - } // end customOption - } // end tileIdx + } // end k + } // end customOption + } // end tileIdx delete[] tileA; } // end idx @@ -360,7 +363,7 @@ FCPluginDynamic::FCPluginDynamic(std::string const name, DataType const type, in : mLayerName(name) , mType(type) , mOutDim(outDim) - , mNumParams(W.count) + , mNumParams(static_cast(W.count)) , mNmax(0) , mK(0) , mWdev(nullptr) @@ -398,7 +401,7 @@ IPluginV2DynamicExt* FCPluginDynamic::clone() const noexcept { gLogVerbose << "FCPluginDynamic clone\n"; - auto* p = new FCPluginDynamic(mLayerName, mType, mOutDim, mW); + auto* p = new FCPluginDynamic(mLayerName, mType, static_cast(mOutDim), mW); memcpy(p->mAlgo.data, mAlgo.data, sizeof(mAlgo.data)); p->setPluginNamespace(mNamespace.c_str()); @@ -411,8 +414,8 @@ IPluginV2DynamicExt* FCPluginDynamic::clone() const noexcept return nullptr; } -void FCPluginDynamic::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, nvinfer1::IGpuAllocator* gpuAllocator) noexcept +void FCPluginDynamic::attachToContext(cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, + nvinfer1::IGpuAllocator* /*gpuAllocator*/) noexcept { mLtContext.attach(); } @@ -478,15 +481,15 @@ void FCPluginDynamic::configurePlugin(DynamicPluginTensorDesc const* inputs, int auto const& inDims0 = inputs[0].desc.dims; PLUGIN_VALIDATE(inDims0.nbDims == 5); - mK = inDims0.d[HDIM]; // hiddensize + mK = static_cast(inDims0.d[HDIM]); // hiddensize // PLUGIN_ASSERT(hiddenSize * mOutDim == mNumParams); PLUGIN_VALIDATE(inDims0.d[3] == 1); PLUGIN_VALIDATE(inDims0.d[4] == 1); // m and k are mOutDim // n is B*S - int32_t const S = inputs->max.d[SDIM]; - int32_t const B = inputs->max.d[BDIM]; + int32_t const S = static_cast(inputs->max.d[SDIM]); + int32_t const B = static_cast(inputs->max.d[BDIM]); mNmax = S * B; @@ -495,12 +498,12 @@ void FCPluginDynamic::configurePlugin(DynamicPluginTensorDesc const* inputs, int if (mType == DataType::kFLOAT) { - Gemm g(mOutDim, mNmax, mK, false, false); + Gemm g(static_cast(mOutDim), mNmax, mK, false, false); mLtContext.create(g, kMAX_WORKSPACE_BYTES); } else if (mType == DataType::kHALF) { - Gemm g(mOutDim, mNmax, mK, false, false); + Gemm g(static_cast(mOutDim), mNmax, mK, false, false); mLtContext.create(g, kMAX_WORKSPACE_BYTES); } else @@ -524,11 +527,13 @@ void FCPluginDynamic::configurePlugin(DynamicPluginTensorDesc const* inputs, int } if (mType == DataType::kFLOAT) { - mAlgo = gemmSearch(mOutDim, mNmax, mK, kMAX_WORKSPACE_BYTES, actualWorkspace, mSharedStream); + mAlgo = gemmSearch( + static_cast(mOutDim), mNmax, mK, kMAX_WORKSPACE_BYTES, actualWorkspace, mSharedStream); } else if (mType == DataType::kHALF) { - mAlgo = gemmSearch(mOutDim, mNmax, mK, kMAX_WORKSPACE_BYTES, actualWorkspace, mSharedStream); + mAlgo = gemmSearch( + static_cast(mOutDim), mNmax, mK, kMAX_WORKSPACE_BYTES, actualWorkspace, mSharedStream); } } @@ -556,8 +561,8 @@ void FCPluginDynamic::configurePlugin(DynamicPluginTensorDesc const* inputs, int } } -size_t FCPluginDynamic::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t FCPluginDynamic::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return kMAX_WORKSPACE_BYTES; } @@ -572,8 +577,8 @@ int32_t FCPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, PluginTensor size_t const workspaceSize = getWorkspaceSize(inputDesc, 1, outputDesc, 1); - int32_t const S = inputDesc->dims.d[SDIM]; - int32_t const B = inputDesc->dims.d[BDIM]; + int32_t const S = static_cast(inputDesc->dims.d[SDIM]); + int32_t const B = static_cast(inputDesc->dims.d[BDIM]); int32_t const n = S * B; PLUGIN_VALIDATE(n >= 0); mLtContext.setN(static_cast(n)); @@ -583,7 +588,7 @@ int32_t FCPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, PluginTensor auto const* const input = static_cast(inputs[0]); auto* output = static_cast(outputs[0]); - Gemm g(mOutDim, n, mK, false, false); + Gemm g(static_cast(mOutDim), n, mK, false, false); if (mWdev == nullptr) { return STATUS_FAILURE; @@ -599,7 +604,7 @@ int32_t FCPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, PluginTensor auto const* const input = static_cast(inputs[0]); auto* output = static_cast(outputs[0]); - Gemm g(mOutDim, n, mK, false, false); + Gemm g(static_cast(mOutDim), n, mK, false, false); if (mWdev == nullptr) { return STATUS_FAILURE; @@ -668,8 +673,8 @@ void FCPluginDynamic::terminate() noexcept size_t FCPluginDynamic::getSerializationSize() const noexcept { size_t wordSize = getElementSize(mType); - return wordSize * mNumParams + sizeof(mType) + sizeof(mOutDim) + sizeof(mNumParams) + sizeof(mAlgo) + sizeof(mNmax) - + sizeof(mK); + return wordSize * static_cast(mNumParams) + sizeof(mType) + sizeof(mOutDim) + sizeof(mNumParams) + + sizeof(mAlgo) + sizeof(mNmax) + sizeof(mK); } void FCPluginDynamic::serialize(void* buffer) const noexcept @@ -683,7 +688,7 @@ void FCPluginDynamic::serialize(void* buffer) const noexcept size_t wordSize = getElementSize(mType); char* d = static_cast(buffer); - serFromDev(d, static_cast(mWdev.get()), mNumParams * wordSize); + serFromDev(d, static_cast(mWdev.get()), static_cast(mNumParams) * wordSize); } void FCPluginDynamic::destroy() noexcept @@ -722,7 +727,7 @@ FCPluginDynamicCreator::FCPluginDynamicCreator() mPluginAttributes.emplace_back(PluginField("type_id", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("W", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/fcPlugin/fcPlugin.h b/plugin/fcPlugin/fcPlugin.h index 61f613860..a41919f65 100644 --- a/plugin/fcPlugin/fcPlugin.h +++ b/plugin/fcPlugin/fcPlugin.h @@ -60,7 +60,7 @@ class SharedStream : public IPluginResource { free(); } - catch (std::exception const& e) + catch (std::exception const& /*e*/) { return -1; } @@ -74,7 +74,7 @@ class SharedStream : public IPluginResource { cloned = std::make_unique(/* init */ true); } - catch (std::exception const& e) + catch (std::exception const& /*e*/) { return nullptr; } @@ -323,7 +323,7 @@ struct LtContext } template - void create(Gemm& g, size_t workspaceSize) + void create(Gemm& g, size_t /*workspaceSize*/) { typeA = Gemm::Types::cudaTypeI; typeB = Gemm::Types::cudaTypeI; @@ -522,6 +522,11 @@ nvinfer1::pluginInternal::cublasLtMatmulAlgo_t gemmSearch( class FCPluginDynamic : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + FCPluginDynamic( std::string const name, nvinfer1::DataType const type, int32_t const outDim, nvinfer1::Weights const& W); diff --git a/plugin/flattenConcat/flattenConcat.cpp b/plugin/flattenConcat/flattenConcat.cpp index 512df0db6..ce258112b 100644 --- a/plugin/flattenConcat/flattenConcat.cpp +++ b/plugin/flattenConcat/flattenConcat.cpp @@ -112,7 +112,7 @@ Dims FlattenConcat::getOutputDimensions(int32_t index, Dims const* inputs, int32 { PLUGIN_ASSERT(inputs[i].d[2] == inputs[0].d[2]); } - flattenInput = inputs[i].d[0] * inputs[i].d[1] * inputs[i].d[2]; + flattenInput = static_cast(inputs[i].d[0] * inputs[i].d[1] * inputs[i].d[2]); outputConcatAxis += flattenInput; } @@ -133,13 +133,13 @@ int32_t FlattenConcat::initialize() noexcept void FlattenConcat::terminate() noexcept {} -size_t FlattenConcat::getWorkspaceSize(int32_t) const noexcept +size_t FlattenConcat::getWorkspaceSize(int32_t /*maxBatchSize*/) const noexcept { return 0; } -int32_t FlattenConcat::enqueue( - int32_t batchSize, void const* const* inputs, void* const* outputs, void*, cudaStream_t stream) noexcept +int32_t FlattenConcat::enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* /*workspace*/, + cudaStream_t /*stream*/) noexcept { try { @@ -182,8 +182,8 @@ int32_t FlattenConcat::enqueue( size_t FlattenConcat::getSerializationSize() const noexcept { - return sizeof(bool) + sizeof(int32_t) * (3 + mNumInputs) + sizeof(nvinfer1::Dims) - + (sizeof(decltype(mCopySize)::value_type) * mNumInputs); + return sizeof(bool) + sizeof(int32_t) * static_cast(3 + mNumInputs) + sizeof(nvinfer1::Dims) + + (sizeof(decltype(mCopySize)::value_type) * static_cast(mNumInputs)); } void FlattenConcat::serialize(void* buffer) const noexcept @@ -208,7 +208,7 @@ void FlattenConcat::serialize(void* buffer) const noexcept // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void FlattenConcat::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* gpuAllocator) noexcept { try { @@ -227,13 +227,13 @@ void FlattenConcat::detachFromContext() noexcept {} // Return true if output tensor is broadcast across a batch. bool FlattenConcat::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool FlattenConcat::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool FlattenConcat::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } @@ -258,15 +258,15 @@ char const* FlattenConcat::getPluginNamespace() const noexcept // Return the DataType of the plugin output at the requested index DataType FlattenConcat::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { - PLUGIN_ASSERT(index < 3); return DataType::kFLOAT; } -void FlattenConcat::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void FlattenConcat::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t nbOutputs, DataType const* /*inputTypes*/, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t /*maxBatchSize*/) noexcept { try { @@ -292,7 +292,7 @@ void FlattenConcat::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dim { PLUGIN_ASSERT(inputDims[i].d[2] == inputDims[0].d[2]); } - flattenInput = inputDims[i].d[0] * inputDims[i].d[1] * inputDims[i].d[2]; + flattenInput = static_cast(inputDims[i].d[0] * inputDims[i].d[1] * inputDims[i].d[2]); mInputConcatAxis[i] = flattenInput; mOutputConcatAxis += mInputConcatAxis[i]; } @@ -300,7 +300,8 @@ void FlattenConcat::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dim mCopySize.resize(mNumInputs); for (int32_t i = 0; i < nbInputs; ++i) { - mCopySize[i] = inputDims[i].d[0] * inputDims[i].d[1] * inputDims[i].d[2] * sizeof(float); + mCopySize[i] + = static_cast(inputDims[i].d[0] * inputDims[i].d[1] * inputDims[i].d[2]) * sizeof(float); } } catch (std::exception const& e) @@ -350,7 +351,7 @@ FlattenConcatPluginCreator::FlattenConcatPluginCreator() mPluginAttributes.emplace_back(PluginField("axis", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("ignoreBatch", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -369,7 +370,7 @@ PluginFieldCollection const* FlattenConcatPluginCreator::getFieldNames() noexcep return &mFC; } -IPluginV2Ext* FlattenConcatPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* FlattenConcatPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -404,7 +405,7 @@ IPluginV2Ext* FlattenConcatPluginCreator::createPlugin(char const* name, PluginF } IPluginV2Ext* FlattenConcatPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/geluPlugin/geluPlugin.cpp b/plugin/geluPlugin/geluPlugin.cpp index f3ab72caa..bf8319184 100644 --- a/plugin/geluPlugin/geluPlugin.cpp +++ b/plugin/geluPlugin/geluPlugin.cpp @@ -47,8 +47,9 @@ GeluPluginDynamic::GeluPluginDynamic(const std::string name, const DataType type if (mHasBias) { void* cudaMem{nullptr}; - PLUGIN_CUASSERT(cudaMalloc(&cudaMem, getWeightsSize(bias, mType))); - PLUGIN_CUASSERT(cudaMemcpy(cudaMem, bias.values, getWeightsSize(bias, mType), cudaMemcpyHostToDevice)); + PLUGIN_CUASSERT(cudaMalloc(&cudaMem, static_cast(getWeightsSize(bias, mType)))); + PLUGIN_CUASSERT( + cudaMemcpy(cudaMem, bias.values, static_cast(getWeightsSize(bias, mType)), cudaMemcpyHostToDevice)); make_cuda_shared(mBiasDev, cudaMem); } } @@ -65,7 +66,7 @@ GeluPluginDynamic::GeluPluginDynamic(const std::string name, void const* data, s { PLUGIN_VALIDATE(mLd > 0); char const* d = static_cast(data); - make_cuda_shared(mBiasDev, deserToDev(d, mLd * getElementSize(mType))); + make_cuda_shared(mBiasDev, deserToDev(d, static_cast(mLd) * getElementSize(mType))); } } // IPluginV2DynamicExt Methods @@ -86,7 +87,7 @@ nvinfer1::IPluginV2DynamicExt* GeluPluginDynamic::clone() const noexcept } nvinfer1::DimsExprs GeluPluginDynamic::getOutputDimensions(int32_t outputIndex, nvinfer1::DimsExprs const* inputs, - int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept + int32_t nbInputs, nvinfer1::IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -133,7 +134,7 @@ bool GeluPluginDynamic::supportsFormatCombination( } void GeluPluginDynamic::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, - nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept + nvinfer1::DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { gLogVerbose << "GeluPluginDynamic configurePlugin\n"; @@ -149,8 +150,8 @@ void GeluPluginDynamic::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* } } -size_t GeluPluginDynamic::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t GeluPluginDynamic::getWorkspaceSize(nvinfer1::PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + nvinfer1::PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -164,8 +165,8 @@ int32_t GeluPluginDynamic::enqueueTyped( if (mHasBias) { - int32_t const cols = inputVolume / mLd; - int32_t const rows = mLd; + int32_t const cols = static_cast(inputVolume / mLd); + int32_t const rows = static_cast(mLd); TDataType const* bias = static_cast(mBiasDev.get()); return computeGeluBias(output, input, bias, rows, cols, stream); } @@ -189,7 +190,7 @@ int32_t GeluPluginDynamic::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, return STATUS_FAILURE; } - int32_t const inputVolume = volume(inputDesc[0].dims); + int32_t const inputVolume = static_cast(volume(inputDesc[0].dims)); // Our plugin outputs only one tensor. // Launch CUDA kernel wrapper and save its return value. @@ -205,7 +206,7 @@ int32_t GeluPluginDynamic::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, // IPluginV2Ext Methods nvinfer1::DataType GeluPluginDynamic::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { try { @@ -252,7 +253,7 @@ void GeluPluginDynamic::terminate() noexcept size_t GeluPluginDynamic::getSerializationSize() const noexcept { const size_t wordSize = getElementSize(mType); - const size_t biasSize = mHasBias ? mLd * wordSize : 0; + const size_t biasSize = mHasBias ? static_cast(mLd) * wordSize : 0; return sizeof(mType) + sizeof(mHasBias) + sizeof(mLd) + biasSize; } @@ -265,7 +266,7 @@ void GeluPluginDynamic::serialize(void* buffer) const noexcept { PLUGIN_ASSERT(mLd > 0); char* d = static_cast(buffer); - serFromDev(d, static_cast(mBiasDev.get()), mLd * getElementSize(mType)); + serFromDev(d, static_cast(mBiasDev.get()), static_cast(mLd) * getElementSize(mType)); } } @@ -303,7 +304,7 @@ GeluPluginDynamicCreator::GeluPluginDynamicCreator() mPluginAttributes.emplace_back(PluginField("bias", nullptr, PluginFieldType::kFLOAT32, 1)); // Fill PluginFieldCollection with PluginField arguments metadata - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/geluPlugin/geluPlugin.h b/plugin/geluPlugin/geluPlugin.h index a20630729..4514ecf2e 100644 --- a/plugin/geluPlugin/geluPlugin.h +++ b/plugin/geluPlugin/geluPlugin.h @@ -46,6 +46,11 @@ int32_t computeGeluBias( class TRT_DEPRECATED GeluPluginDynamic : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + GeluPluginDynamic(const std::string name, const nvinfer1::DataType type, nvinfer1::Weights const& bias); GeluPluginDynamic(const std::string name, void const* data, size_t length); @@ -95,11 +100,6 @@ class TRT_DEPRECATED GeluPluginDynamic : public nvinfer1::IPluginV2DynamicExt bool mHasBias; bert::cuda_shared_ptr mBiasDev; size_t mLd; - - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2::enqueue; - using IPluginV2Ext::configurePlugin; }; class TRT_DEPRECATED GeluPluginDynamicCreator : public nvinfer1::IPluginCreator diff --git a/plugin/generateDetectionPlugin/generateDetectionPlugin.cpp b/plugin/generateDetectionPlugin/generateDetectionPlugin.cpp index e8655dbdb..723764b78 100644 --- a/plugin/generateDetectionPlugin/generateDetectionPlugin.cpp +++ b/plugin/generateDetectionPlugin/generateDetectionPlugin.cpp @@ -42,7 +42,7 @@ GenerateDetectionPluginCreator::GenerateDetectionPluginCreator() noexcept mPluginAttributes.emplace_back(PluginField("iou_threshold", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("image_size", nullptr, PluginFieldType::kINT32, 3)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -61,7 +61,8 @@ PluginFieldCollection const* GenerateDetectionPluginCreator::getFieldNames() noe return &mFC; } -IPluginV2Ext* GenerateDetectionPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* GenerateDetectionPluginCreator::createPlugin( + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -109,7 +110,7 @@ IPluginV2Ext* GenerateDetectionPluginCreator::createPlugin(char const* name, Plu } IPluginV2Ext* GenerateDetectionPluginCreator::deserializePlugin( - char const* name, void const* data, size_t length) noexcept + char const* /*name*/, void const* data, size_t length) noexcept { try { @@ -165,7 +166,7 @@ int32_t GenerateDetection::initialize() noexcept mValidCnt = std::make_shared>(mMaxBatchSize); PLUGIN_CUASSERT(cudaMemcpy(mValidCnt->mPtr, static_cast(tempValidCnt.data()), - sizeof(int32_t) * mMaxBatchSize, cudaMemcpyHostToDevice)); + sizeof(int32_t) * static_cast(mMaxBatchSize), cudaMemcpyHostToDevice)); return 0; } @@ -315,11 +316,11 @@ int32_t GenerateDetection::enqueue( detections); PLUGIN_ASSERT(status == cudaSuccess); - return status; + return static_cast(status); } DataType GenerateDetection::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { // Only DataType::kFLOAT is acceptable by the plugin layer return DataType::kFLOAT; @@ -327,33 +328,34 @@ DataType GenerateDetection::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool GenerateDetection::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool GenerateDetection::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool GenerateDetection::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void GenerateDetection::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void GenerateDetection::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t /*nbOutputs*/, DataType const* inputTypes, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t maxBatchSize) noexcept { check_valid_inputs(inputDims, nbInputs); PLUGIN_ASSERT(inputDims[0].d[0] == inputDims[1].d[0] && inputDims[1].d[0] == inputDims[2].d[0]); - mAnchorsCnt = inputDims[2].d[0]; + mAnchorsCnt = static_cast(inputDims[2].d[0]); mType = inputTypes[0]; mMaxBatchSize = maxBatchSize; } // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void GenerateDetection::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } diff --git a/plugin/gridAnchorPlugin/gridAnchorPlugin.cpp b/plugin/gridAnchorPlugin/gridAnchorPlugin.cpp index 262fdc743..61a21fed5 100644 --- a/plugin/gridAnchorPlugin/gridAnchorPlugin.cpp +++ b/plugin/gridAnchorPlugin/gridAnchorPlugin.cpp @@ -64,7 +64,9 @@ GridAnchorGenerator::GridAnchorGenerator(GridAnchorParameters const* paramIn, in // Calculate the scales of SSD model for each layer for (int32_t i = 0; i < mNumLayers; i++) { - tmpScales[i] = (mParam[id].minSize + (mParam[id].maxSize - mParam[id].minSize) * id / (mNumLayers - 1)); + tmpScales[i] = (mParam[id].minSize + + (mParam[id].maxSize - mParam[id].minSize) * static_cast(id) + / static_cast(mNumLayers - 1)); } // Add another 1.0f to tmpScales to prevent going out side of the vector in calculating the scale_next. tmpScales.push_back(1.0F); // has 7 entries @@ -92,7 +94,7 @@ GridAnchorGenerator::GridAnchorGenerator(GridAnchorParameters const* paramIn, in aspect_ratios.push_back(mParam[id].aspectRatios[i]); } // Additional aspect ratio of 1.0 as described in the paper - aspect_ratios.push_back(1.0); + aspect_ratios.push_back(1.0F); // scales for (int32_t i = 0; i < mParam[id].numAspectRatios; i++) @@ -100,9 +102,11 @@ GridAnchorGenerator::GridAnchorGenerator(GridAnchorParameters const* paramIn, in scales.push_back(tmpScales[id]); } auto scale_next = (id == mNumLayers - 1) - ? 1.0 - : (mParam[id].minSize + (mParam[id].maxSize - mParam[id].minSize) * (id + 1) / (mNumLayers - 1)); - scales.push_back(std::sqrt(tmpScales[id] * scale_next)); + ? 1.0F + : (mParam[id].minSize + + (mParam[id].maxSize - mParam[id].minSize) * static_cast(id + 1) + / static_cast(mNumLayers - 1)); + scales.push_back(static_cast(std::sqrt(tmpScales[id] * scale_next))); mNumPriors[id] = mParam[id].numAspectRatios + 1; } @@ -112,7 +116,7 @@ GridAnchorGenerator::GridAnchorGenerator(GridAnchorParameters const* paramIn, in // Calculate the width and height of the prior boxes for (int32_t i = 0; i < mNumPriors[id]; i++) { - float sqrt_AR = std::sqrt(aspect_ratios[i]); + float sqrt_AR = sqrtf(aspect_ratios[i]); tmpWidths.push_back(scales[i] * sqrt_AR); tmpHeights.push_back(scales[i] / sqrt_AR); } @@ -175,7 +179,7 @@ int32_t GridAnchorGenerator::getNbOutputs() const noexcept return mNumLayers; } -Dims GridAnchorGenerator::getOutputDimensions(int32_t index, Dims const* inputs, int32_t nbInputDims) noexcept +Dims GridAnchorGenerator::getOutputDimensions(int32_t index, Dims const* /*inputs*/, int32_t /*nbInputDims*/) noexcept { // Particularity of the PriorBox layer: no batchSize dimension needed // 2 channels. First channel stores the mean of each prior coordinate. @@ -190,13 +194,13 @@ int32_t GridAnchorGenerator::initialize() noexcept void GridAnchorGenerator::terminate() noexcept {} -size_t GridAnchorGenerator::getWorkspaceSize(int32_t maxBatchSize) const noexcept +size_t GridAnchorGenerator::getWorkspaceSize(int32_t /*maxBatchSize*/) const noexcept { return 0; } -int32_t GridAnchorGenerator::enqueue( - int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t GridAnchorGenerator::enqueue(int32_t /*batchSize*/, void const* const* /*inputs*/, void* const* outputs, + void* /*workspace*/, cudaStream_t stream) noexcept { // Generate prior boxes for each layer for (int32_t id = 0; id < mNumLayers; id++) @@ -218,10 +222,10 @@ size_t GridAnchorGenerator::getSerializationSize() const noexcept for (int32_t i = 0; i < mNumLayers; i++) { sum += 4 * sizeof(int32_t); // mNumPriors, mParam[i].{numAspectRatios, H, W} - sum += (6 + mParam[i].numAspectRatios) + sum += static_cast(6 + mParam[i].numAspectRatios) * sizeof(float); // mParam[i].{minSize, maxSize, aspectRatios, variance[4]} - sum += mDeviceWidths[i].count * sizeof(float); - sum += mDeviceHeights[i].count * sizeof(float); + sum += static_cast(mDeviceWidths[i].count) * sizeof(float); + sum += static_cast(mDeviceHeights[i].count) * sizeof(float); } return sum; } @@ -264,9 +268,9 @@ Weights GridAnchorGenerator::copyToDevice(void const* hostData, size_t count) no void GridAnchorGenerator::serializeFromDevice(char*& hostBuffer, Weights deviceWeights) const noexcept { - PLUGIN_CUASSERT( - cudaMemcpy(hostBuffer, deviceWeights.values, deviceWeights.count * sizeof(float), cudaMemcpyDeviceToHost)); - hostBuffer += deviceWeights.count * sizeof(float); + PLUGIN_CUASSERT(cudaMemcpy(hostBuffer, deviceWeights.values, + static_cast(deviceWeights.count) * sizeof(float), cudaMemcpyDeviceToHost)); + hostBuffer += static_cast(deviceWeights.count) * sizeof(float); } Weights GridAnchorGenerator::deserializeToDevice(char const*& hostBuffer, size_t count) noexcept @@ -304,7 +308,7 @@ char const* GridAnchorGenerator::getPluginNamespace() const noexcept #include // Return the DataType of the plugin output at the requested index DataType GridAnchorGenerator::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { PLUGIN_ASSERT(index < mNumLayers); return DataType::kFLOAT; @@ -312,21 +316,22 @@ DataType GridAnchorGenerator::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool GridAnchorGenerator::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool GridAnchorGenerator::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool GridAnchorGenerator::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void GridAnchorGenerator::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void GridAnchorGenerator::configurePlugin(Dims const* /*inputDims*/, int32_t /*nbInputs*/, Dims const* outputDims, + int32_t nbOutputs, DataType const* /*inputTypes*/, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(nbOutputs == mNumLayers); PLUGIN_ASSERT(outputDims[0].nbDims == 3); @@ -334,7 +339,7 @@ void GridAnchorGenerator::configurePlugin(Dims const* inputDims, int32_t nbInput // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void GridAnchorGenerator::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } @@ -371,7 +376,7 @@ GridAnchorBasePluginCreator::GridAnchorBasePluginCreator() mPluginAttributes.emplace_back(PluginField("variance", nullptr, PluginFieldType::kFLOAT32, 4)); mPluginAttributes.emplace_back(PluginField("numLayers", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -390,7 +395,7 @@ PluginFieldCollection const* GridAnchorBasePluginCreator::getFieldNames() noexce return &mFC; } -IPluginV2Ext* GridAnchorBasePluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* GridAnchorBasePluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -423,7 +428,7 @@ IPluginV2Ext* GridAnchorBasePluginCreator::createPlugin(char const* name, Plugin else if (!strcmp(attrName, "variance")) { PLUGIN_VALIDATE(fields[i].type == PluginFieldType::kFLOAT32); - int32_t size = fields[i].length; + int32_t size = static_cast(fields[i].length); layerVariances.reserve(size); auto const* lVar = static_cast(fields[i].data); for (int32_t j = 0; j < size; j++) @@ -435,7 +440,7 @@ IPluginV2Ext* GridAnchorBasePluginCreator::createPlugin(char const* name, Plugin else if (!strcmp(attrName, "aspectRatios")) { PLUGIN_VALIDATE(fields[i].type == PluginFieldType::kFLOAT32); - int32_t size = fields[i].length; + int32_t size = static_cast(fields[i].length); aspectRatios.reserve(size); auto const* aR = static_cast(fields[i].data); for (int32_t j = 0; j < size; j++) @@ -447,7 +452,7 @@ IPluginV2Ext* GridAnchorBasePluginCreator::createPlugin(char const* name, Plugin else if (!strcmp(attrName, "featureMapShapes")) { PLUGIN_VALIDATE(fields[i].type == PluginFieldType::kINT32); - int32_t size = fields[i].length; + int32_t size = static_cast(fields[i].length); PLUGIN_VALIDATE(!isFMapRect || (size % 2 == 0)); fMapShapes.reserve(size); int32_t const* fMap = static_cast(fields[i].data); @@ -463,7 +468,7 @@ IPluginV2Ext* GridAnchorBasePluginCreator::createPlugin(char const* name, Plugin std::vector firstLayerAspectRatios; PLUGIN_VALIDATE(numLayers > 0); - int32_t const numExpectedLayers = static_cast(fMapShapes.size()) >> (isFMapRect ? 1 : 0); + int32_t const numExpectedLayers = static_cast(fMapShapes.size() >> (isFMapRect ? 1 : 0)); PLUGIN_VALIDATE(numExpectedLayers == numLayers); int32_t numFirstLayerARs = 3; @@ -485,12 +490,12 @@ IPluginV2Ext* GridAnchorBasePluginCreator::createPlugin(char const* name, Plugin if (i == 0) { boxParams[i] = {minScale, maxScale, firstLayerAspectRatios.data(), - (int32_t) firstLayerAspectRatios.size(), fMapShapes[hOffset], fMapShapes[wOffset], + static_cast(firstLayerAspectRatios.size()), fMapShapes[hOffset], fMapShapes[wOffset], {layerVariances[0], layerVariances[1], layerVariances[2], layerVariances[3]}}; } else { - boxParams[i] = {minScale, maxScale, aspectRatios.data(), (int32_t) aspectRatios.size(), + boxParams[i] = {minScale, maxScale, aspectRatios.data(), static_cast(aspectRatios.size()), fMapShapes[hOffset], fMapShapes[wOffset], {layerVariances[0], layerVariances[1], layerVariances[2], layerVariances[3]}}; } @@ -508,7 +513,7 @@ IPluginV2Ext* GridAnchorBasePluginCreator::createPlugin(char const* name, Plugin } IPluginV2Ext* GridAnchorBasePluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/groupNormalizationPlugin/groupNormalizationPlugin.cpp b/plugin/groupNormalizationPlugin/groupNormalizationPlugin.cpp index bfdb42154..87bb384b5 100644 --- a/plugin/groupNormalizationPlugin/groupNormalizationPlugin.cpp +++ b/plugin/groupNormalizationPlugin/groupNormalizationPlugin.cpp @@ -72,8 +72,8 @@ int32_t GroupNormalizationPlugin::getNbOutputs() const noexcept return 1; } -nvinfer1::DimsExprs GroupNormalizationPlugin::getOutputDimensions( - int32_t index, nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept +nvinfer1::DimsExprs GroupNormalizationPlugin::getOutputDimensions(int32_t index, nvinfer1::DimsExprs const* inputs, + int32_t nbInputs, nvinfer1::IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -90,7 +90,7 @@ nvinfer1::DimsExprs GroupNormalizationPlugin::getOutputDimensions( } void GroupNormalizationPlugin::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* gpuAllocator) noexcept { try { @@ -149,28 +149,28 @@ int32_t GroupNormalizationPlugin::enqueue(nvinfer1::PluginTensorDesc const* inpu float const one = 1.F; float const zero = 0.F; PLUGIN_CHECK_CUDNN(mCudnnWrapper->cudnnBatchNormalizationForwardTraining(mCudnnHandle, // handle - CUDNN_BATCHNORM_SPATIAL, // BatchNormMode_t, try also non persistent - &one, // - &zero, // - mTensorDesc, // in/out descriptor - inputs[0], // input - mTensorDesc, // in/out descriptor - outputs[0], // output - mBNTensorDesc, // - mBnScales->mPtr, // 1 - mBnBias->mPtr, // 0 - 0.0, // exponential average factor - nullptr, // resultRunningMean - nullptr, // resultRunningVar - mEpsilon, // eps - nullptr, // resultSaveMean - nullptr // resultSaveInvVar + CUDNN_BATCHNORM_SPATIAL, // BatchNormMode_t, try also non persistent + &one, // + &zero, // + mTensorDesc, // in/out descriptor + inputs[0], // input + mTensorDesc, // in/out descriptor + outputs[0], // output + mBNTensorDesc, // + mBnScales->mPtr, // 1 + mBnBias->mPtr, // 0 + 0.0F, // exponential average factor + nullptr, // resultRunningMean + nullptr, // resultRunningVar + static_cast(mEpsilon), // eps + nullptr, // resultSaveMean + nullptr // resultSaveInvVar )); // Apply an additional scale and bias on each channel. nvinfer1::Dims inputDims = inputDesc[0].dims; - int32_t batchSize = inputDims.d[0]; - int32_t nbChannels = inputDims.d[1]; + int32_t batchSize = static_cast(inputDims.d[0]); + int32_t nbChannels = static_cast(inputDims.d[1]); auto* output = static_cast(outputs[0]); return scaleShiftChannelsInplace(output, batchSize, nbChannels, mChannelVolume, static_cast(inputs[2]), static_cast(inputs[1]), stream); // mBetaDev, mGammaDev, @@ -250,8 +250,8 @@ void GroupNormalizationPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc PLUGIN_VALIDATE(nbOutputs == getNbOutputs()); nvinfer1::Dims inputDims = in[0].desc.dims; - int32_t const batchSize = inputDims.d[0]; - int32_t const nbChannels = inputDims.d[1]; + int32_t const batchSize = static_cast(inputDims.d[0]); + int32_t const nbChannels = static_cast(inputDims.d[1]); if (batchSize <= 0 || nbChannels <= 0) { @@ -267,7 +267,8 @@ void GroupNormalizationPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc // Allocate scale/bias tensors needed for cudnnBatchNorm. mNbScaleBias = batchSize * mNbGroups; - auto allocScaleBias = [this](std::shared_ptr>& buf, float value) { + auto allocScaleBias = [this](std::shared_ptr>& buf, float value) + { PLUGIN_VALIDATE(mNbScaleBias > 0); if (!buf || !buf->mPtr || buf->mSize != mNbScaleBias) { @@ -276,8 +277,8 @@ void GroupNormalizationPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc // Initialize values. std::vector const values(mNbScaleBias, value); - PLUGIN_CUASSERT( - cudaMemcpy(buf->mPtr, values.data(), sizeof(float) * mNbScaleBias, cudaMemcpyHostToDevice)); + PLUGIN_CUASSERT(cudaMemcpy(buf->mPtr, values.data(), sizeof(float) * static_cast(mNbScaleBias), + cudaMemcpyHostToDevice)); } }; allocScaleBias(mBnScales, 1.F); @@ -285,7 +286,8 @@ void GroupNormalizationPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc // Calculate size of each group int32_t groupSize = nbChannels / mNbGroups; - mChannelVolume = pluginInternal::volume(inputDims, /*start*/ 2, /*stop*/ inputDims.nbDims); + mChannelVolume + = static_cast(pluginInternal::volume(inputDims, /*start*/ 2, /*stop*/ inputDims.nbDims)); // Set tensor descriptor in a way that cudnnBatchNorm will perform Group Normalization. PLUGIN_CUDNNASSERT(mCudnnWrapper->cudnnSetTensor4dDescriptor(mTensorDesc, // descriptor @@ -306,7 +308,7 @@ void GroupNormalizationPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc } nvinfer1::DataType GroupNormalizationPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { try { @@ -321,8 +323,8 @@ nvinfer1::DataType GroupNormalizationPlugin::getOutputDataType( } } -size_t GroupNormalizationPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t GroupNormalizationPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + nvinfer1::PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -351,7 +353,7 @@ GroupNormalizationPluginCreator::GroupNormalizationPluginCreator() mPluginAttributes.emplace_back(PluginField("eps", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("num_groups", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -389,7 +391,7 @@ void GroupNormalizationPluginCreator::setPluginNamespace(char const* libNamespac } IPluginV2DynamicExt* GroupNormalizationPluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc) noexcept + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -425,7 +427,7 @@ IPluginV2DynamicExt* GroupNormalizationPluginCreator::createPlugin( } IPluginV2DynamicExt* GroupNormalizationPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/groupNormalizationPlugin/groupNormalizationPlugin.h b/plugin/groupNormalizationPlugin/groupNormalizationPlugin.h index 29d893e00..962151ba9 100644 --- a/plugin/groupNormalizationPlugin/groupNormalizationPlugin.h +++ b/plugin/groupNormalizationPlugin/groupNormalizationPlugin.h @@ -37,6 +37,11 @@ cudaError_t scaleShiftChannelsInplace(T* inOut, int32_t const B, int32_t const C class GroupNormalizationPlugin final : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + GroupNormalizationPlugin(float epsilon, int32_t const nbGroups); GroupNormalizationPlugin(void const* data, size_t length); @@ -110,11 +115,6 @@ class GroupNormalizationPlugin final : public nvinfer1::IPluginV2DynamicExt std::shared_ptr> mBnScales{}; std::shared_ptr> mBnBias{}; size_t mNbScaleBias{}; - - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2::enqueue; - using IPluginV2Ext::configurePlugin; }; class GroupNormalizationPluginCreator : public IPluginCreator diff --git a/plugin/instanceNormalizationPlugin/instanceNormFwd.h b/plugin/instanceNormalizationPlugin/instanceNormFwd.h index e09096b33..4a47e3fb9 100644 --- a/plugin/instanceNormalizationPlugin/instanceNormFwd.h +++ b/plugin/instanceNormalizationPlugin/instanceNormFwd.h @@ -25,6 +25,7 @@ namespace instance_norm_impl { +#undef PLUGIN_CHECK_CUDA #define PLUGIN_CHECK_CUDA(call) \ do \ { \ @@ -35,6 +36,7 @@ namespace instance_norm_impl } \ } while (0) +#undef PLUGIN_CHECK_CUDNN #define PLUGIN_CHECK_CUDNN(call) \ do \ { \ @@ -105,7 +107,7 @@ struct InstanceNormFwdContext InstanceNormFwdContext() : sm_count(0) , sm_shared_size(0) - , sm_version(0){}; + , sm_version(0) {}; int32_t sm_count; int32_t sm_shared_size; int32_t sm_version; diff --git a/plugin/instanceNormalizationPlugin/instanceNormalizationPluginLegacy.h b/plugin/instanceNormalizationPlugin/instanceNormalizationPluginLegacy.h index 2d6412d50..d3a0a2089 100644 --- a/plugin/instanceNormalizationPlugin/instanceNormalizationPluginLegacy.h +++ b/plugin/instanceNormalizationPlugin/instanceNormalizationPluginLegacy.h @@ -54,7 +54,7 @@ class InstanceNormalizationPlugin : public nvinfer1::IPluginV2DynamicExt int32_t getNbOutputs() const noexcept override; // DynamicExt plugins returns DimsExprs class instead of Dims - using nvinfer1::IPluginV2::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; DimsExprs getOutputDimensions(int32_t outputIndex, nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept override; @@ -62,11 +62,11 @@ class InstanceNormalizationPlugin : public nvinfer1::IPluginV2DynamicExt void terminate() noexcept override; - using nvinfer1::IPluginV2::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; size_t getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept override; - using nvinfer1::IPluginV2::enqueue; + using nvinfer1::IPluginV2DynamicExt::enqueue; int32_t enqueue(nvinfer1::PluginTensorDesc const* inputDesc, nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override; @@ -98,7 +98,7 @@ class InstanceNormalizationPlugin : public nvinfer1::IPluginV2DynamicExt void detachFromContext() noexcept override; - using nvinfer1::IPluginV2Ext::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::configurePlugin; void configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept override; diff --git a/plugin/leakyReluPlugin/lReluPlugin.cpp b/plugin/leakyReluPlugin/lReluPlugin.cpp index 57517725c..193b8ac5e 100644 --- a/plugin/leakyReluPlugin/lReluPlugin.cpp +++ b/plugin/leakyReluPlugin/lReluPlugin.cpp @@ -51,13 +51,13 @@ Dims LReLU::getOutputDimensions(int32_t index, Dims const* inputs, int32_t nbInp return inputs[0]; } -int32_t LReLU::enqueue( - int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t LReLU::enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* /*workspace*/, + cudaStream_t stream) noexcept { void const* inputData = inputs[0]; void* outputData = outputs[0]; pluginStatus_t status = lReLUInference(stream, mBatchDim * batchSize, mNegSlope, inputData, outputData); - return status; + return static_cast(status); } size_t LReLU::getSerializationSize() const noexcept @@ -82,7 +82,7 @@ void LReLU::configureWithFormat(Dims const* inputDims, int32_t /* nbInputs */, D PLUGIN_ASSERT(nbOutputs == 1); for (int32_t i = 0; i < inputDims[0].nbDims; ++i) { - mBatchDim *= inputDims[0].d[i]; + mBatchDim *= static_cast(inputDims[0].d[i]); } } @@ -138,7 +138,7 @@ LReluPluginCreator::LReluPluginCreator() mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("negSlope", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -157,7 +157,7 @@ PluginFieldCollection const* LReluPluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2* LReluPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2* LReluPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -180,7 +180,8 @@ IPluginV2* LReluPluginCreator::createPlugin(char const* name, PluginFieldCollect return nullptr; } -IPluginV2* LReluPluginCreator::deserializePlugin(char const* name, void const* serialData, size_t serialLength) noexcept +IPluginV2* LReluPluginCreator::deserializePlugin( + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPlugin.cpp b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPlugin.cpp index c2a1aadd7..484a92057 100644 --- a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPlugin.cpp +++ b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPlugin.cpp @@ -106,8 +106,8 @@ IPluginCapability* ModulatedDeformableConvPluginDynamic::getCapabilityInterface( } int32_t ModulatedDeformableConvPluginDynamic::getOutputShapes(nvinfer1::DimsExprs const* inputs, int32_t nbInputs, - nvinfer1::DimsExprs const* shapeInputs, int32_t nbShapeInputs, nvinfer1::DimsExprs* outputs, int32_t nbOutputs, - nvinfer1::IExprBuilder& exprBuilder) noexcept + nvinfer1::DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, nvinfer1::DimsExprs* outputs, + int32_t nbOutputs, nvinfer1::IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -135,7 +135,7 @@ int32_t ModulatedDeformableConvPluginDynamic::getOutputShapes(nvinfer1::DimsExpr } bool ModulatedDeformableConvPluginDynamic::supportsFormatCombination( - int32_t pos, nvinfer1::DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept + int32_t pos, nvinfer1::DynamicPluginTensorDesc const* inOut, int32_t /*nbInputs*/, int32_t /*nbOutputs*/) noexcept { try { @@ -185,13 +185,13 @@ size_t ModulatedDeformableConvPluginDynamic::getWorkspaceSize(nvinfer1::DynamicP int32_t /* nbInputs */, nvinfer1::DynamicPluginTensorDesc const* outputs, int32_t /* nbOutputs */) const noexcept { // Calculate workspace size needed for the im2col buffer. - int32_t const sizeOfDtype = nvinfer1::plugin::bert::getElementSize(outputs[0].desc.type); + int32_t const sizeOfDtype = static_cast(nvinfer1::plugin::bert::getElementSize(outputs[0].desc.type)); - int32_t const nInputPlane = inputs[0].desc.dims.d[1]; // Input channels - int32_t const outputHeight = outputs[0].desc.dims.d[2]; - int32_t const outputWidth = outputs[0].desc.dims.d[3]; - int32_t const kernelH = inputs[3].desc.dims.d[2]; // Weight kernel height - int32_t const kernelW = inputs[3].desc.dims.d[3]; // Weight kernel width + int32_t const nInputPlane = static_cast(inputs[0].desc.dims.d[1]); // Input channels + int32_t const outputHeight = static_cast(outputs[0].desc.dims.d[2]); + int32_t const outputWidth = static_cast(outputs[0].desc.dims.d[3]); + int32_t const kernelH = static_cast(inputs[3].desc.dims.d[2]); // Weight kernel height + int32_t const kernelW = static_cast(inputs[3].desc.dims.d[3]); // Weight kernel width // Calculate size needed for the intermediate 'columns' buffer used in im2col + GEMM approach. int64_t const colSize @@ -211,13 +211,13 @@ int32_t ModulatedDeformableConvPluginDynamic::enqueue(nvinfer1::PluginTensorDesc && workspace != nullptr); // Extract dimensions - int32_t const batch = inputDescs[0].dims.d[0]; - int32_t const channels = inputDescs[0].dims.d[1]; - int32_t const height = inputDescs[0].dims.d[2]; - int32_t const width = inputDescs[0].dims.d[3]; - int32_t const channelsOut = outputDescs[0].dims.d[1]; - int32_t const kernelH = inputDescs[3].dims.d[2]; // Weight kernel height - int32_t const kernelW = inputDescs[3].dims.d[3]; // Weight kernel width + int32_t const batch = static_cast(inputDescs[0].dims.d[0]); + int32_t const channels = static_cast(inputDescs[0].dims.d[1]); + int32_t const height = static_cast(inputDescs[0].dims.d[2]); + int32_t const width = static_cast(inputDescs[0].dims.d[3]); + int32_t const channelsOut = static_cast(outputDescs[0].dims.d[1]); + int32_t const kernelH = static_cast(inputDescs[3].dims.d[2]); // Weight kernel height + int32_t const kernelW = static_cast(inputDescs[3].dims.d[3]); // Weight kernel width // Get input/output pointers void const* inputTensor = inputs[0]; @@ -238,16 +238,20 @@ int32_t ModulatedDeformableConvPluginDynamic::enqueue(nvinfer1::PluginTensorDesc static_cast(weightTensor), static_cast(biasTensor), static_cast(offsetTensor), static_cast(maskTensor), static_cast(outputTensor), workspace, batch, channels, height, width, channelsOut, kernelW, - kernelH, mStride.d[0], mStride.d[1], mPadding.d[0], mPadding.d[1], mDilation.d[0], mDilation.d[1], - mGroup, mDeformableGroup, im2colStep, mCublasHandle, stream); + kernelH, static_cast(mStride.d[0]), static_cast(mStride.d[1]), + static_cast(mPadding.d[0]), static_cast(mPadding.d[1]), + static_cast(mDilation.d[0]), static_cast(mDilation.d[1]), mGroup, mDeformableGroup, + im2colStep, mCublasHandle, stream); break; case nvinfer1::DataType::kHALF: ModulatedDeformConvForwardCUDAKernelLauncherHalf(static_cast(inputTensor), static_cast(weightTensor), static_cast(biasTensor), static_cast(offsetTensor), static_cast(maskTensor), static_cast(outputTensor), workspace, batch, channels, height, width, channelsOut, kernelW, - kernelH, mStride.d[0], mStride.d[1], mPadding.d[0], mPadding.d[1], mDilation.d[0], mDilation.d[1], - mGroup, mDeformableGroup, im2colStep, mCublasHandle, stream); + kernelH, static_cast(mStride.d[0]), static_cast(mStride.d[1]), + static_cast(mPadding.d[0]), static_cast(mPadding.d[1]), + static_cast(mDilation.d[0]), static_cast(mDilation.d[1]), mGroup, mDeformableGroup, + im2colStep, mCublasHandle, stream); break; default: // Unsupported data type @@ -361,7 +365,7 @@ nvinfer1::PluginFieldCollection const* ModulatedDeformableConvPluginDynamic::get mDataToSerialize.emplace_back("group", &mGroup, PluginFieldType::kINT32, 1); mDataToSerialize.emplace_back("deformable_group", &mDeformableGroup, PluginFieldType::kINT32, 1); - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; } @@ -383,7 +387,7 @@ ModulatedDeformableConvPluginDynamicCreator::ModulatedDeformableConvPluginDynami mPluginAttributes.emplace_back(PluginField("group", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("deformable_group", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.cpp b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.cpp index 4fa4b7134..fd081b24a 100644 --- a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.cpp +++ b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.cpp @@ -96,8 +96,8 @@ nvinfer1::IPluginV2DynamicExt* ModulatedDeformableConvPluginDynamicLegacy::clone return nullptr; } -nvinfer1::DimsExprs ModulatedDeformableConvPluginDynamicLegacy::getOutputDimensions(int32_t outputIndex, - nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept +nvinfer1::DimsExprs ModulatedDeformableConvPluginDynamicLegacy::getOutputDimensions(int32_t /*outputIndex*/, + nvinfer1::DimsExprs const* inputs, int32_t /*nbInputs*/, nvinfer1::IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -118,7 +118,7 @@ nvinfer1::DimsExprs ModulatedDeformableConvPluginDynamicLegacy::getOutputDimensi } bool ModulatedDeformableConvPluginDynamicLegacy::supportsFormatCombination( - int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept + int32_t pos, nvinfer1::PluginTensorDesc const* inOut, int32_t /*nbInputs*/, int32_t /*nbOutputs*/) noexcept { if (pos == 0) { @@ -131,8 +131,8 @@ bool ModulatedDeformableConvPluginDynamicLegacy::supportsFormatCombination( } } -void ModulatedDeformableConvPluginDynamicLegacy::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* inputs, - int32_t nbInputs, nvinfer1::DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) noexcept +void ModulatedDeformableConvPluginDynamicLegacy::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* /*inputs*/, + int32_t nbInputs, nvinfer1::DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) noexcept { try { @@ -148,19 +148,20 @@ void ModulatedDeformableConvPluginDynamicLegacy::configurePlugin(nvinfer1::Dynam } size_t ModulatedDeformableConvPluginDynamicLegacy::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, - int32_t nbInputs, nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept + int32_t /*nbInputs*/, nvinfer1::PluginTensorDesc const* outputs, int32_t /*nbOutputs*/) const noexcept { - int32_t sizeofDtype = nvinfer1::plugin::bert::getElementSize(outputs[0].type); + int32_t sizeofDtype = static_cast(nvinfer1::plugin::bert::getElementSize(outputs[0].type)); - int32_t nInputPlane = inputs[0].dims.d[1]; - int32_t outputHeight = outputs[0].dims.d[2]; - int32_t outputWidth = outputs[0].dims.d[3]; - int32_t kH = inputs[3].dims.d[2]; - int32_t kW = inputs[3].dims.d[3]; + int32_t nInputPlane = static_cast(inputs[0].dims.d[1]); + int32_t outputHeight = static_cast(outputs[0].dims.d[2]); + int32_t outputWidth = static_cast(outputs[0].dims.d[3]); + int32_t kH = static_cast(inputs[3].dims.d[2]); + int32_t kW = static_cast(inputs[3].dims.d[3]); - int64_t colSize = divUp(nInputPlane * kW * kH * outputHeight * outputWidth * sizeofDtype, 16) * 16; + int64_t colSize + = divUp(static_cast(nInputPlane) * kW * kH * outputHeight * outputWidth * sizeofDtype, 16) * 16; - return colSize; + return static_cast(colSize); } int32_t ModulatedDeformableConvPluginDynamicLegacy::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, @@ -172,13 +173,13 @@ int32_t ModulatedDeformableConvPluginDynamicLegacy::enqueue(nvinfer1::PluginTens PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr && workSpace != nullptr); - int32_t batch = inputDesc[0].dims.d[0]; - int32_t channels = inputDesc[0].dims.d[1]; - int32_t height = inputDesc[0].dims.d[2]; - int32_t width = inputDesc[0].dims.d[3]; - int32_t channelsOut = outputDesc[0].dims.d[1]; - int32_t kernelH = inputDesc[3].dims.d[2]; - int32_t kernelW = inputDesc[3].dims.d[3]; + int32_t batch = static_cast(inputDesc[0].dims.d[0]); + int32_t channels = static_cast(inputDesc[0].dims.d[1]); + int32_t height = static_cast(inputDesc[0].dims.d[2]); + int32_t width = static_cast(inputDesc[0].dims.d[3]); + int32_t channelsOut = static_cast(outputDesc[0].dims.d[1]); + int32_t kernelH = static_cast(inputDesc[3].dims.d[2]); + int32_t kernelW = static_cast(inputDesc[3].dims.d[3]); void const* x = inputs[0]; void const* offset = inputs[1]; @@ -192,16 +193,22 @@ int32_t ModulatedDeformableConvPluginDynamicLegacy::enqueue(nvinfer1::PluginTens switch (data_type) { case nvinfer1::DataType::kFLOAT: - ModulatedDeformConvForwardCUDAKernelLauncherFloat((float*) x, (float*) weight, (float*) bias, - (float*) offset, (float*) mask, (float*) output, workSpace, batch, channels, height, width, channelsOut, - kernelW, kernelH, mStride.d[0], mStride.d[1], mPadding.d[0], mPadding.d[1], mDilation.d[0], - mDilation.d[1], mGroup, mDeformableGroup, im2colStep, mCublasHandle, stream); + ModulatedDeformConvForwardCUDAKernelLauncherFloat(static_cast(x), + static_cast(weight), static_cast(bias), static_cast(offset), + static_cast(mask), static_cast(output), workSpace, batch, channels, height, width, + channelsOut, kernelW, kernelH, static_cast(mStride.d[0]), static_cast(mStride.d[1]), + static_cast(mPadding.d[0]), static_cast(mPadding.d[1]), + static_cast(mDilation.d[0]), static_cast(mDilation.d[1]), mGroup, mDeformableGroup, + im2colStep, mCublasHandle, stream); break; case nvinfer1::DataType::kHALF: - ModulatedDeformConvForwardCUDAKernelLauncherHalf((half*) x, (half*) weight, (half*) bias, (half*) offset, - (half*) mask, (half*) output, workSpace, batch, channels, height, width, channelsOut, kernelW, kernelH, - mStride.d[0], mStride.d[1], mPadding.d[0], mPadding.d[1], mDilation.d[0], mDilation.d[1], mGroup, - mDeformableGroup, im2colStep, mCublasHandle, stream); + ModulatedDeformConvForwardCUDAKernelLauncherHalf(static_cast(x), + static_cast(weight), static_cast(bias), static_cast(offset), + static_cast(mask), static_cast(output), workSpace, batch, channels, height, width, + channelsOut, kernelW, kernelH, static_cast(mStride.d[0]), static_cast(mStride.d[1]), + static_cast(mPadding.d[0]), static_cast(mPadding.d[1]), + static_cast(mDilation.d[0]), static_cast(mDilation.d[1]), mGroup, mDeformableGroup, + im2colStep, mCublasHandle, stream); break; default: return 1; } @@ -215,7 +222,7 @@ int32_t ModulatedDeformableConvPluginDynamicLegacy::enqueue(nvinfer1::PluginTens } nvinfer1::DataType ModulatedDeformableConvPluginDynamicLegacy::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { return inputTypes[0]; } @@ -265,7 +272,7 @@ void ModulatedDeformableConvPluginDynamicLegacy::destroy() noexcept } void ModulatedDeformableConvPluginDynamicLegacy::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, nvinfer1::IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, nvinfer1::IGpuAllocator* gpuAllocator) noexcept { try { @@ -308,7 +315,7 @@ ModulatedDeformableConvPluginDynamicLegacyCreator::ModulatedDeformableConvPlugin mPluginAttributes.emplace_back(nvinfer1::PluginField("group", nullptr, nvinfer1::PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back( nvinfer1::PluginField("deformable_group", nullptr, nvinfer1::PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -365,8 +372,8 @@ nvinfer1::IPluginV2* ModulatedDeformableConvPluginDynamicLegacyCreator::createPl { PLUGIN_VALIDATE(fc->fields[i].type == PluginFieldType::kINT32); stride.nbDims = 2; - stride.d[0] = static_cast(fc->fields[i].data)[0]; - stride.d[1] = static_cast(fc->fields[i].data)[1]; + stride.d[0] = static_cast(static_cast(fc->fields[i].data)[0]); + stride.d[1] = static_cast(static_cast(fc->fields[i].data)[1]); PLUGIN_VALIDATE(stride.d[0] > 0); PLUGIN_VALIDATE(stride.d[1] > 0); } @@ -375,8 +382,8 @@ nvinfer1::IPluginV2* ModulatedDeformableConvPluginDynamicLegacyCreator::createPl { PLUGIN_VALIDATE(fc->fields[i].type == PluginFieldType::kINT32); padding.nbDims = 2; - padding.d[0] = static_cast(fc->fields[i].data)[0]; - padding.d[1] = static_cast(fc->fields[i].data)[1]; + padding.d[0] = static_cast(static_cast(fc->fields[i].data)[0]); + padding.d[1] = static_cast(static_cast(fc->fields[i].data)[1]); PLUGIN_VALIDATE(padding.d[0] >= 0); PLUGIN_VALIDATE(padding.d[1] >= 0); } @@ -385,8 +392,8 @@ nvinfer1::IPluginV2* ModulatedDeformableConvPluginDynamicLegacyCreator::createPl { PLUGIN_VALIDATE(fc->fields[i].type == PluginFieldType::kINT32); dilation.nbDims = 2; - dilation.d[0] = static_cast(fc->fields[i].data)[0]; - dilation.d[1] = static_cast(fc->fields[i].data)[1]; + dilation.d[0] = static_cast(static_cast(fc->fields[i].data)[0]); + dilation.d[1] = static_cast(static_cast(fc->fields[i].data)[1]); PLUGIN_VALIDATE(dilation.d[0] > 0); PLUGIN_VALIDATE(dilation.d[1] > 0); } diff --git a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.h b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.h index 3526030e7..475fa3191 100644 --- a/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.h +++ b/plugin/modulatedDeformConvPlugin/modulatedDeformConvPluginLegacy.h @@ -46,6 +46,11 @@ namespace plugin class ModulatedDeformableConvPluginDynamicLegacy : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + ModulatedDeformableConvPluginDynamicLegacy(std::string const& name, nvinfer1::Dims const stride, nvinfer1::Dims const padding, nvinfer1::Dims const dilation, int32_t const deformableGroup, int32_t const group); diff --git a/plugin/multilevelCropAndResizePlugin/multilevelCropAndResizePlugin.cpp b/plugin/multilevelCropAndResizePlugin/multilevelCropAndResizePlugin.cpp index 79a3800eb..3e9e0921f 100644 --- a/plugin/multilevelCropAndResizePlugin/multilevelCropAndResizePlugin.cpp +++ b/plugin/multilevelCropAndResizePlugin/multilevelCropAndResizePlugin.cpp @@ -39,7 +39,7 @@ MultilevelCropAndResizePluginCreator::MultilevelCropAndResizePluginCreator() noe mPluginAttributes.emplace_back(PluginField("pooled_size", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("image_size", nullptr, PluginFieldType::kINT32, 3)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -59,7 +59,7 @@ PluginFieldCollection const* MultilevelCropAndResizePluginCreator::getFieldNames } IPluginV2Ext* MultilevelCropAndResizePluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc) noexcept + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -92,7 +92,7 @@ IPluginV2Ext* MultilevelCropAndResizePluginCreator::createPlugin( } IPluginV2Ext* MultilevelCropAndResizePluginCreator::deserializePlugin( - char const* name, void const* data, size_t length) noexcept + char const* /*name*/, void const* data, size_t length) noexcept { try { @@ -113,8 +113,8 @@ MultilevelCropAndResize::MultilevelCropAndResize(int32_t pooled_size, nvinfer1:: PLUGIN_VALIDATE(imageSize.nbDims == 3); PLUGIN_VALIDATE(imageSize.d[0] > 0 && imageSize.d[1] > 0 && imageSize.d[2] > 0); // shape - mInputHeight = imageSize.d[1]; - mInputWidth = imageSize.d[2]; + mInputHeight = static_cast(imageSize.d[1]); + mInputWidth = static_cast(imageSize.d[2]); // Threshold to P3: Smaller -> P2 mThresh = (224 * 224) / (4.0F); } @@ -136,7 +136,7 @@ void MultilevelCropAndResize::destroy() noexcept delete this; } -size_t MultilevelCropAndResize::getWorkspaceSize(int32_t) const noexcept +size_t MultilevelCropAndResize::getWorkspaceSize(int32_t /*maxBatchSize*/) const noexcept { return 0; } @@ -220,8 +220,8 @@ Dims MultilevelCropAndResize::getOutputDimensions(int32_t index, Dims const* inp return result; } -int32_t MultilevelCropAndResize::enqueue( - int32_t batch_size, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t MultilevelCropAndResize::enqueue(int32_t batch_size, void const* const* inputs, void* const* outputs, + void* /*workspace*/, cudaStream_t stream) noexcept { void* pooled = outputs[0]; @@ -238,8 +238,8 @@ int32_t MultilevelCropAndResize::enqueue( size_t MultilevelCropAndResize::getSerializationSize() const noexcept { - return sizeof(int32_t) * 2 + sizeof(int32_t) * 4 + sizeof(float) + sizeof(int32_t) * 2 * mFeatureMapCount - + sizeof(DataType); + return sizeof(int32_t) * 2 + sizeof(int32_t) * 4 + sizeof(float) + + sizeof(int32_t) * 2 * static_cast(mFeatureMapCount) + sizeof(DataType); } void MultilevelCropAndResize::serialize(void* buffer) const noexcept @@ -285,7 +285,7 @@ void MultilevelCropAndResize::deserialize(int8_t const* data, size_t length) // Return the DataType of the plugin output at the requested index DataType MultilevelCropAndResize::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { // Only DataType::kFLOAT is acceptable by the plugin layer // return DataType::kFLOAT; @@ -298,21 +298,21 @@ DataType MultilevelCropAndResize::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool MultilevelCropAndResize::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool MultilevelCropAndResize::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool MultilevelCropAndResize::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void MultilevelCropAndResize::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void MultilevelCropAndResize::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t nbOutputs, DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, PluginFormat floatFormat, int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(supportsFormat(inputTypes[0], floatFormat)); check_valid_inputs(inputDims, nbInputs); @@ -325,7 +325,7 @@ void MultilevelCropAndResize::configurePlugin(Dims const* inputDims, int32_t nbI mROICount = dimToInt32(inputDims[0].d[0]); mFeatureLength = dimToInt32(inputDims[1].d[0]); - for (size_t layer = 0; layer < mFeatureMapCount; ++layer) + for (int32_t layer = 0; layer < mFeatureMapCount; ++layer) { mFeatureSpatialSize[layer] = {dimToInt32(inputDims[layer + 1].d[1]), dimToInt32(inputDims[layer + 1].d[2])}; } @@ -340,7 +340,7 @@ void MultilevelCropAndResize::configurePlugin(Dims const* inputDims, int32_t nbI // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void MultilevelCropAndResize::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } diff --git a/plugin/multilevelProposeROI/multilevelProposeROIPlugin.cpp b/plugin/multilevelProposeROI/multilevelProposeROIPlugin.cpp index f9dddc0a2..75e090023 100644 --- a/plugin/multilevelProposeROI/multilevelProposeROIPlugin.cpp +++ b/plugin/multilevelProposeROI/multilevelProposeROIPlugin.cpp @@ -45,7 +45,7 @@ MultilevelProposeROIPluginCreator::MultilevelProposeROIPluginCreator() noexcept mPluginAttributes.emplace_back(PluginField("iou_threshold", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("image_size", nullptr, PluginFieldType::kINT32, 3)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -65,7 +65,7 @@ PluginFieldCollection const* MultilevelProposeROIPluginCreator::getFieldNames() } IPluginV2Ext* MultilevelProposeROIPluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc) noexcept + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -112,7 +112,7 @@ IPluginV2Ext* MultilevelProposeROIPluginCreator::createPlugin( } IPluginV2Ext* MultilevelProposeROIPluginCreator::deserializePlugin( - char const* name, void const* data, size_t length) noexcept + char const* /*name*/, void const* data, size_t length) noexcept { try { @@ -174,7 +174,7 @@ int32_t MultilevelProposeROI::initialize() noexcept mValidCnt = std::make_shared>(mMaxBatchSize); PLUGIN_CUASSERT(cudaMemcpy(mValidCnt->mPtr, static_cast(tempValidCnt.data()), - sizeof(int32_t) * mMaxBatchSize, cudaMemcpyHostToDevice)); + sizeof(int32_t) * static_cast(mMaxBatchSize), cudaMemcpyHostToDevice)); // Init the anchors for batch size: for (int32_t i = 0; i < mFeatureCnt; i++) @@ -182,11 +182,11 @@ int32_t MultilevelProposeROI::initialize() noexcept int32_t i_anchors_cnt = mAnchorsCnt[i]; auto i_anchors_host = mAnchorBoxesHost[i].data(); auto i_anchors_device = std::make_shared>(i_anchors_cnt * 4 * mMaxBatchSize); - int32_t batch_offset = sizeof(float) * i_anchors_cnt * 4; + size_t batch_offset = sizeof(float) * static_cast(i_anchors_cnt) * 4; uint8_t* device_ptr = static_cast(i_anchors_device->mPtr); - for (int32_t i = 0; i < mMaxBatchSize; i++) + for (int32_t b = 0; b < mMaxBatchSize; b++) { - PLUGIN_CUASSERT(cudaMemcpy(static_cast(device_ptr + i * batch_offset), + PLUGIN_CUASSERT(cudaMemcpy(static_cast(device_ptr + b * batch_offset), static_cast(i_anchors_host), batch_offset, cudaMemcpyHostToDevice)); } mAnchorBoxesDevice.push_back(i_anchors_device); @@ -218,11 +218,13 @@ int32_t MultilevelProposeROI::initialize() noexcept } // Init the temp storage for pointer arrays of score and box: - PLUGIN_CUASSERT(cudaMalloc(&mDeviceScores, sizeof(void*) * mFeatureCnt)); - PLUGIN_CUASSERT(cudaMalloc(&mDeviceBboxes, sizeof(void*) * mFeatureCnt)); + PLUGIN_CUASSERT(cudaMalloc(&mDeviceScores, sizeof(void*) * static_cast(mFeatureCnt))); + PLUGIN_CUASSERT(cudaMalloc(&mDeviceBboxes, sizeof(void*) * static_cast(mFeatureCnt))); - PLUGIN_CUASSERT(cudaMemcpy(mDeviceScores, score_tp.data(), sizeof(void*) * mFeatureCnt, cudaMemcpyHostToDevice)); - PLUGIN_CUASSERT(cudaMemcpy(mDeviceBboxes, box_tp.data(), sizeof(void*) * mFeatureCnt, cudaMemcpyHostToDevice)); + PLUGIN_CUASSERT(cudaMemcpy( + mDeviceScores, score_tp.data(), sizeof(void*) * static_cast(mFeatureCnt), cudaMemcpyHostToDevice)); + PLUGIN_CUASSERT(cudaMemcpy( + mDeviceBboxes, box_tp.data(), sizeof(void*) * static_cast(mFeatureCnt), cudaMemcpyHostToDevice)); return 0; } @@ -274,8 +276,8 @@ char const* MultilevelProposeROI::getPluginNamespace() const noexcept size_t MultilevelProposeROI::getSerializationSize() const noexcept { - return sizeof(int32_t) * 2 + sizeof(float) * 2 + sizeof(int32_t) * (mFeatureCnt + 1) + sizeof(nvinfer1::Dims) - + sizeof(DataType); + return sizeof(int32_t) * 2 + sizeof(float) * 2 + sizeof(int32_t) * static_cast(mFeatureCnt + 1) + + sizeof(nvinfer1::Dims) + sizeof(DataType); } void MultilevelProposeROI::serialize(void* buffer) const noexcept @@ -388,9 +390,9 @@ void MultilevelProposeROI::generate_pyramid_anchors(nvinfer1::Dims const& imageS std::vector anchor_strides; for (int32_t i = min_level; i < max_level + 1; i++) { - int32_t stride = static_cast(pow(2.0, i)); + int32_t stride = static_cast(std::pow(2.0, i)); anchor_strides.push_back(stride); - anchor_scales.push_back(stride * anchor_scale); + anchor_scales.push_back(static_cast(stride) * anchor_scale); } auto& anchors = mAnchorBoxesHost; @@ -411,7 +413,9 @@ void MultilevelProposeROI::generate_pyramid_anchors(nvinfer1::Dims const& imageS float w = scale * r.first; // Using y+h/2 instead of y+h/2-1 for alignment with TLT implementation - s_anchors.insert(s_anchors.end(), {(y - h / 2), (x - w / 2), (y + h / 2), (x + w / 2)}); + s_anchors.insert(s_anchors.end(), + {(static_cast(y) - h / 2), (static_cast(x) - w / 2), + (static_cast(y) + h / 2), (static_cast(x) + w / 2)}); } anchors.push_back(s_anchors); @@ -470,12 +474,12 @@ int32_t MultilevelProposeROI::enqueue( reinterpret_cast(mDeviceBboxes), final_proposals); PLUGIN_ASSERT(status == cudaSuccess); - return status; + return static_cast(status); } // Return the DataType of the plugin output at the requested index DataType MultilevelProposeROI::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { // Only DataType::kFLOAT is acceptable by the plugin layer if ((inputTypes[0] == DataType::kFLOAT) || (inputTypes[0] == DataType::kHALF)) @@ -485,29 +489,30 @@ DataType MultilevelProposeROI::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool MultilevelProposeROI::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool MultilevelProposeROI::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool MultilevelProposeROI::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void MultilevelProposeROI::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void MultilevelProposeROI::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t /*nbOutputs*/, DataType const* inputTypes, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t maxBatchSize) noexcept { check_valid_inputs(inputDims, nbInputs); mAnchorsCnt.clear(); for (int32_t i = 0; i < mFeatureCnt; i++) { - mAnchorsCnt.push_back(inputDims[2 * i].d[0]); - PLUGIN_ASSERT(mAnchorsCnt[i] == (int32_t) (mAnchorBoxesHost[i].size() / 4)); + mAnchorsCnt.push_back(static_cast(inputDims[2 * i].d[0])); + PLUGIN_ASSERT(mAnchorsCnt[i] == static_cast(mAnchorBoxesHost[i].size() / 4)); } mMaxBatchSize = maxBatchSize; @@ -517,7 +522,7 @@ void MultilevelProposeROI::configurePlugin(Dims const* inputDims, int32_t nbInpu // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void MultilevelProposeROI::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } diff --git a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.cpp b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.cpp index 80182b3d5..06f2d95fe 100644 --- a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.cpp +++ b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPlugin.cpp @@ -119,8 +119,8 @@ int32_t MultiscaleDeformableAttnPlugin::getOutputDataTypes( } int32_t MultiscaleDeformableAttnPlugin::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, - IExprBuilder& exprBuilder) noexcept + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, + IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -236,7 +236,7 @@ PluginFieldCollection const* MultiscaleDeformableAttnPlugin::getFieldsToSerializ { mDataToSerialize.clear(); // This plugin has no fields to serialize - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; } @@ -248,8 +248,8 @@ PluginFieldCollection const* MultiscaleDeformableAttnPlugin::getFieldsToSerializ } // IPluginV3OneRuntime methods -size_t MultiscaleDeformableAttnPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t MultiscaleDeformableAttnPlugin::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { // No workspace needed for this plugin return 0; @@ -296,7 +296,7 @@ int32_t MultiscaleDeformableAttnPlugin::onShapeChange( return STATUS_FAILURE; } -IPluginV3* MultiscaleDeformableAttnPlugin::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* MultiscaleDeformableAttnPlugin::attachToContext(IPluginResourceContext* /*context*/) noexcept { try { @@ -310,21 +310,22 @@ IPluginV3* MultiscaleDeformableAttnPlugin::attachToContext(IPluginResourceContex return nullptr; } -int32_t MultiscaleDeformableAttnPlugin::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, - void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t MultiscaleDeformableAttnPlugin::enqueue(PluginTensorDesc const* inputDesc, + PluginTensorDesc const* /*outputDesc*/, void const* const* inputs, void* const* outputs, void* /*workspace*/, + cudaStream_t stream) noexcept { try { PLUGIN_VALIDATE( inputDesc != nullptr && inputs != nullptr && outputs != nullptr, "Null pointers found in enqueue"); - int32_t const batch = inputDesc[0].dims.d[0]; - int32_t spatialSize = inputDesc[0].dims.d[1]; - int32_t numHeads = inputDesc[0].dims.d[2]; - int32_t channels = inputDesc[0].dims.d[3]; - int32_t numLevels = inputDesc[1].dims.d[0]; - int32_t numQuery = inputDesc[3].dims.d[1]; - int32_t numPoint = inputDesc[3].dims.d[4]; + int32_t const batch = static_cast(inputDesc[0].dims.d[0]); + int32_t spatialSize = static_cast(inputDesc[0].dims.d[1]); + int32_t numHeads = static_cast(inputDesc[0].dims.d[2]); + int32_t channels = static_cast(inputDesc[0].dims.d[3]); + int32_t numLevels = static_cast(inputDesc[1].dims.d[0]); + int32_t numQuery = static_cast(inputDesc[3].dims.d[1]); + int32_t numPoint = static_cast(inputDesc[3].dims.d[4]); int32_t rc = 0; if (inputDesc[0].type == DataType::kFLOAT) @@ -369,7 +370,7 @@ int32_t MultiscaleDeformableAttnPlugin::enqueue(PluginTensorDesc const* inputDes MultiscaleDeformableAttnPluginCreator::MultiscaleDeformableAttnPluginCreator() { mPluginAttributes.clear(); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -389,7 +390,7 @@ PluginFieldCollection const* MultiscaleDeformableAttnPluginCreator::getFieldName } IPluginV3* MultiscaleDeformableAttnPluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* /*name*/, PluginFieldCollection const* /*fc*/, TensorRTPhase /*phase*/) noexcept { try { diff --git a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.cpp b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.cpp index cb5206fa2..201fa0303 100644 --- a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.cpp +++ b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.cpp @@ -39,7 +39,7 @@ static char const* DMHA_NAME{"MultiscaleDeformableAttnPlugin_TRT"}; MultiscaleDeformableAttnPluginLegacy::MultiscaleDeformableAttnPluginLegacy() {} -MultiscaleDeformableAttnPluginLegacy::MultiscaleDeformableAttnPluginLegacy(void const* data, size_t length) {} +MultiscaleDeformableAttnPluginLegacy::MultiscaleDeformableAttnPluginLegacy(void const* /*data*/, size_t /*length*/) {} nvinfer1::IPluginV2DynamicExt* MultiscaleDeformableAttnPluginLegacy::clone() const noexcept { @@ -56,8 +56,8 @@ nvinfer1::IPluginV2DynamicExt* MultiscaleDeformableAttnPluginLegacy::clone() con return nullptr; } -nvinfer1::DimsExprs MultiscaleDeformableAttnPluginLegacy::getOutputDimensions(int32_t outputIndex, - nvinfer1::DimsExprs const* inputs, int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept +nvinfer1::DimsExprs MultiscaleDeformableAttnPluginLegacy::getOutputDimensions(int32_t /*outputIndex*/, + nvinfer1::DimsExprs const* inputs, int32_t /*nbInputs*/, nvinfer1::IExprBuilder& /*exprBuilder*/) noexcept { nvinfer1::DimsExprs ret; ret.nbDims = 4; @@ -88,7 +88,7 @@ bool MultiscaleDeformableAttnPluginLegacy::supportsFormatCombination( } void MultiscaleDeformableAttnPluginLegacy::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* inputs, - int32_t nbInputs, nvinfer1::DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) noexcept + int32_t /*nbInputs*/, nvinfer1::DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) noexcept { // Check for valid input dimensions PLUGIN_ASSERT(inputs[0].desc.dims.nbDims == 4); @@ -113,8 +113,8 @@ void MultiscaleDeformableAttnPluginLegacy::configurePlugin(nvinfer1::DynamicPlug PLUGIN_ASSERT(inputs[3].desc.dims.d[1] == inputs[4].desc.dims.d[1]); } -size_t MultiscaleDeformableAttnPluginLegacy::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, - int32_t nbInputs, nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t MultiscaleDeformableAttnPluginLegacy::getWorkspaceSize(nvinfer1::PluginTensorDesc const* /*inputs*/, + int32_t /*nbInputs*/, nvinfer1::PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -125,13 +125,13 @@ int32_t MultiscaleDeformableAttnPluginLegacy::enqueue(nvinfer1::PluginTensorDesc { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const batch = inputDesc[0].dims.d[0]; - int32_t spatial_size = inputDesc[0].dims.d[1]; - int32_t num_heads = inputDesc[0].dims.d[2]; - int32_t channels = inputDesc[0].dims.d[3]; - int32_t num_levels = inputDesc[1].dims.d[0]; - int32_t num_query = inputDesc[3].dims.d[1]; - int32_t num_point = inputDesc[3].dims.d[4]; + int32_t const batch = static_cast(inputDesc[0].dims.d[0]); + int32_t spatial_size = static_cast(inputDesc[0].dims.d[1]); + int32_t num_heads = static_cast(inputDesc[0].dims.d[2]); + int32_t channels = static_cast(inputDesc[0].dims.d[3]); + int32_t num_levels = static_cast(inputDesc[1].dims.d[0]); + int32_t num_query = static_cast(inputDesc[3].dims.d[1]); + int32_t num_point = static_cast(inputDesc[3].dims.d[4]); int32_t rc = 0; if (inputDesc[0].type == nvinfer1::DataType::kFLOAT) { @@ -161,8 +161,8 @@ int32_t MultiscaleDeformableAttnPluginLegacy::enqueue(nvinfer1::PluginTensorDesc return rc; } -void MultiscaleDeformableAttnPluginLegacy::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, nvinfer1::IGpuAllocator* gpuAllocator) noexcept +void MultiscaleDeformableAttnPluginLegacy::attachToContext(cudnnContext* /*cudnnContext*/, + cublasContext* /*cublasContext*/, nvinfer1::IGpuAllocator* /*gpuAllocator*/) noexcept { } @@ -170,7 +170,7 @@ void MultiscaleDeformableAttnPluginLegacy::detachFromContext() noexcept {} // IPluginV2Ext Methods nvinfer1::DataType MultiscaleDeformableAttnPluginLegacy::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { return inputTypes[0]; } @@ -203,7 +203,7 @@ size_t MultiscaleDeformableAttnPluginLegacy::getSerializationSize() const noexce return 0; } -void MultiscaleDeformableAttnPluginLegacy::serialize(void* buffer) const noexcept {} +void MultiscaleDeformableAttnPluginLegacy::serialize(void* /*buffer*/) const noexcept {} void MultiscaleDeformableAttnPluginLegacy::destroy() noexcept { @@ -224,7 +224,7 @@ char const* MultiscaleDeformableAttnPluginLegacy::getPluginNamespace() const noe MultiscaleDeformableAttnPluginCreatorLegacy::MultiscaleDeformableAttnPluginCreatorLegacy() { mPluginAttributes.clear(); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -244,7 +244,7 @@ nvinfer1::PluginFieldCollection const* MultiscaleDeformableAttnPluginCreatorLega } IPluginV2* MultiscaleDeformableAttnPluginCreatorLegacy::createPlugin( - char const* name, PluginFieldCollection const* fc) noexcept + char const* /*name*/, PluginFieldCollection const* /*fc*/) noexcept { try { @@ -259,7 +259,7 @@ IPluginV2* MultiscaleDeformableAttnPluginCreatorLegacy::createPlugin( } IPluginV2* MultiscaleDeformableAttnPluginCreatorLegacy::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.h b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.h index 18da1b789..ec19aca8f 100644 --- a/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.h +++ b/plugin/multiscaleDeformableAttnPlugin/multiscaleDeformableAttnPluginLegacy.h @@ -42,6 +42,11 @@ namespace plugin class MultiscaleDeformableAttnPluginLegacy : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + // Constructors/destructors MultiscaleDeformableAttnPluginLegacy(); MultiscaleDeformableAttnPluginLegacy(void const* data, size_t length); @@ -80,16 +85,6 @@ class MultiscaleDeformableAttnPluginLegacy : public nvinfer1::IPluginV2DynamicEx private: std::string mNamespace; - -#if NV_TENSORRT_MAJOR < 8 - using nvinfer1::IPluginV2DynamicExt::canBroadcastInputAcrossBatch; - using nvinfer1::IPluginV2DynamicExt::configurePlugin; - using nvinfer1::IPluginV2DynamicExt::enqueue; - using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; - using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; - using nvinfer1::IPluginV2DynamicExt::isOutputBroadcastAcrossBatch; - using nvinfer1::IPluginV2DynamicExt::supportsFormat; -#endif }; // Legacy creator class diff --git a/plugin/nmsPlugin/nmsPlugin.cpp b/plugin/nmsPlugin/nmsPlugin.cpp index 6e49d3aed..820f47191 100644 --- a/plugin/nmsPlugin/nmsPlugin.cpp +++ b/plugin/nmsPlugin/nmsPlugin.cpp @@ -52,22 +52,22 @@ DetectionOutputDynamic::DetectionOutputDynamic(DetectionOutputParameters params) { } -DetectionOutput::DetectionOutput(DetectionOutputParameters params, int32_t C1, int32_t C2, int32_t numPriors) +DetectionOutput::DetectionOutput(DetectionOutputParameters params, int32_t C1_, int32_t C2_, int32_t numPriors_) : param(params) - , C1(C1) - , C2(C2) - , numPriors(numPriors) + , C1(C1_) + , C2(C2_) + , numPriors(numPriors_) , mType(DataType::kFLOAT) , mScoreBits(16) { } DetectionOutputDynamic::DetectionOutputDynamic( - DetectionOutputParameters params, int32_t C1, int32_t C2, int32_t numPriors) + DetectionOutputParameters params, int32_t C1_, int32_t C2_, int32_t numPriors_) : param(params) - , C1(C1) - , C2(C2) - , numPriors(numPriors) + , C1(C1_) + , C2(C2_) + , numPriors(numPriors_) , mType(DataType::kFLOAT) , mScoreBits(16) { @@ -139,7 +139,7 @@ void DetectionOutput::terminate() noexcept {} void DetectionOutputDynamic::terminate() noexcept {} // Returns output dimensions at given index -Dims DetectionOutput::getOutputDimensions(int32_t index, Dims const* inputs, int32_t nbInputDims) noexcept +Dims DetectionOutput::getOutputDimensions(int32_t index, Dims const* /*inputs*/, int32_t nbInputDims) noexcept { PLUGIN_ASSERT(nbInputDims == 3); PLUGIN_ASSERT(index == 0 || index == 1); @@ -169,17 +169,18 @@ DimsExprs DetectionOutputDynamic::getOutputDimensions( if (inputs[C1_idx].d[0]->isConstant() && inputs[C1_idx].d[1]->isConstant() && inputs[C1_idx].d[2]->isConstant() && inputs[C1_idx].d[3]->isConstant()) { - C1 = exprBuilder - .operation(DimensionOperation::kPROD, - *exprBuilder.operation(DimensionOperation::kPROD, *inputs[C1_idx].d[1], *inputs[C1_idx].d[2]), - *inputs[C1_idx].d[3]) - ->getConstantValue(); + C1 = static_cast(exprBuilder + .operation(DimensionOperation::kPROD, + *exprBuilder.operation(DimensionOperation::kPROD, *inputs[C1_idx].d[1], *inputs[C1_idx].d[2]), + *inputs[C1_idx].d[3]) + ->getConstantValue()); } if (inputs[C2_idx].d[0]->isConstant() && inputs[C2_idx].d[1]->isConstant() && inputs[C2_idx].d[2]->isConstant()) { - C2 = exprBuilder.operation(DimensionOperation::kPROD, *inputs[C2_idx].d[1], *inputs[C2_idx].d[2]) - ->getConstantValue(); + C2 = static_cast( + exprBuilder.operation(DimensionOperation::kPROD, *inputs[C2_idx].d[1], *inputs[C2_idx].d[2]) + ->getConstantValue()); } // Output dimensions // index 0 : Dimensions 1x param.keepTopK x 7 @@ -212,11 +213,11 @@ size_t DetectionOutput::getWorkspaceSize(int32_t maxBatchSize) const noexcept param.shareLocation, maxBatchSize, C1, C2, param.numClasses, numPriors, param.topK, mType, mType); } -size_t DetectionOutputDynamic::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t DetectionOutputDynamic::getWorkspaceSize(PluginTensorDesc const* inputs, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { - return detectionInferenceWorkspaceSize( - param.shareLocation, inputs[0].dims.d[0], C1, C2, param.numClasses, numPriors, param.topK, mType, mType); + return detectionInferenceWorkspaceSize(param.shareLocation, static_cast(inputs[0].dims.d[0]), C1, C2, + param.numClasses, numPriors, param.topK, mType, mType); } // Plugin layer implementation @@ -253,10 +254,11 @@ int32_t DetectionOutputDynamic::enqueue(PluginTensorDesc const* inputDesc, Plugi void* topDetections = outputs[0]; void* keepCount = outputs[1]; - pluginStatus_t status = detectionInference(stream, inputDesc[0].dims.d[0], C1, C2, param.shareLocation, - param.varianceEncodedInTarget, param.backgroundLabelId, numPriors, param.numClasses, param.topK, param.keepTopK, - param.confidenceThreshold, param.nmsThreshold, param.codeType, mType, locData, priorData, mType, confData, - keepCount, topDetections, workspace, param.isNormalized, param.confSigmoid, mScoreBits, false); + pluginStatus_t status = detectionInference(stream, static_cast(inputDesc[0].dims.d[0]), C1, C2, + param.shareLocation, param.varianceEncodedInTarget, param.backgroundLabelId, numPriors, param.numClasses, + param.topK, param.keepTopK, param.confidenceThreshold, param.nmsThreshold, param.codeType, mType, locData, + priorData, mType, confData, keepCount, topDetections, workspace, param.isNormalized, param.confSigmoid, + mScoreBits, false); return status; } @@ -305,7 +307,7 @@ bool DetectionOutput::supportsFormat(DataType type, PluginFormat format) const n } bool DetectionOutputDynamic::supportsFormatCombination( - int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept + int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t /*nbOutputs*/) noexcept { // 3 inputs, 2 outputs, so 5 input/output in total PLUGIN_ASSERT(0 <= pos && pos < 5); @@ -438,7 +440,7 @@ char const* DetectionOutputDynamic::getPluginNamespace() const noexcept // Return the DataType of the plugin output at the requested index. DataType DetectionOutput::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { // Two outputs PLUGIN_ASSERT(index == 0 || index == 1); @@ -454,7 +456,7 @@ DataType DetectionOutput::getOutputDataType( } DataType DetectionOutputDynamic::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { // Two outputs PLUGIN_ASSERT(index == 0 || index == 1); @@ -471,13 +473,13 @@ DataType DetectionOutputDynamic::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool DetectionOutput::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool DetectionOutput::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool DetectionOutput::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } @@ -491,8 +493,8 @@ bool DetectionOutput::canBroadcastInputAcrossBatch(int32_t inputIndex) const noe // format: format NCHW, NHWC etc // maxbatchSize: maximum batch size for the plugin layer void DetectionOutput::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept + int32_t nbOutputs, DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(nbInputs == 3); PLUGIN_ASSERT(nbOutputs == 2); @@ -511,11 +513,11 @@ void DetectionOutput::configurePlugin(Dims const* inputDims, int32_t nbInputs, D // Configure C1, C2 and numPriors // Input ordering C1, C2, numPriors - C1 = inputDims[param.inputOrder[0]].d[0]; - C2 = inputDims[param.inputOrder[1]].d[0]; + C1 = static_cast(inputDims[param.inputOrder[0]].d[0]); + C2 = static_cast(inputDims[param.inputOrder[1]].d[0]); int32_t const nbBoxCoordinates = 4; - numPriors = inputDims[param.inputOrder[2]].d[1] / nbBoxCoordinates; + numPriors = static_cast(inputDims[param.inputOrder[2]].d[1] / nbBoxCoordinates); int32_t const numLocClasses = param.shareLocation ? 1 : param.numClasses; // Verify C1 @@ -548,11 +550,11 @@ void DetectionOutputDynamic::configurePlugin( // Configure C1, C2 and numPriors // Input ordering C1, C2, numPriors - C1 = in[param.inputOrder[0]].desc.dims.d[1]; - C2 = in[param.inputOrder[1]].desc.dims.d[1]; + C1 = static_cast(in[param.inputOrder[0]].desc.dims.d[1]); + C2 = static_cast(in[param.inputOrder[1]].desc.dims.d[1]); int32_t const nbBoxCoordinates = 4; - numPriors = in[param.inputOrder[2]].desc.dims.d[2] / nbBoxCoordinates; + numPriors = static_cast(in[param.inputOrder[2]].desc.dims.d[2] / nbBoxCoordinates); int32_t const numLocClasses = param.shareLocation ? 1 : param.numClasses; // Verify C1 @@ -567,7 +569,7 @@ void DetectionOutputDynamic::configurePlugin( // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void DetectionOutput::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } @@ -593,7 +595,7 @@ NMSBasePluginCreator::NMSBasePluginCreator() mPluginAttributes.emplace_back(PluginField("codeType", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("scoreBits", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("isBatchAgnostic", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -626,7 +628,7 @@ PluginFieldCollection const* NMSBasePluginCreator::getFieldNames() noexcept } // Creates the NMS plugin -IPluginV2Ext* NMSPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* NMSPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -734,7 +736,8 @@ IPluginV2Ext* NMSPluginCreator::createPlugin(char const* name, PluginFieldCollec return nullptr; } -IPluginV2DynamicExt* NMSDynamicPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2DynamicExt* NMSDynamicPluginCreator::createPlugin( + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -838,7 +841,7 @@ IPluginV2DynamicExt* NMSDynamicPluginCreator::createPlugin(char const* name, Plu } IPluginV2Ext* NMSPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { @@ -859,7 +862,7 @@ IPluginV2Ext* NMSPluginCreator::deserializePlugin( } IPluginV2DynamicExt* NMSDynamicPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/nmsPlugin/nmsPlugin.h b/plugin/nmsPlugin/nmsPlugin.h index bdce5f2d7..6b31f3922 100644 --- a/plugin/nmsPlugin/nmsPlugin.h +++ b/plugin/nmsPlugin/nmsPlugin.h @@ -99,6 +99,11 @@ class TRT_DEPRECATED DetectionOutput : public IPluginV2Ext class TRT_DEPRECATED DetectionOutputDynamic : public IPluginV2DynamicExt { public: + using IPluginV2DynamicExt::configurePlugin; + using IPluginV2DynamicExt::getOutputDimensions; + using IPluginV2DynamicExt::getWorkspaceSize; + using IPluginV2DynamicExt::enqueue; + DetectionOutputDynamic(DetectionOutputParameters param); DetectionOutputDynamic(DetectionOutputParameters param, int32_t C1, int32_t C2, int32_t numPriors); DetectionOutputDynamic(void const* data, size_t length); diff --git a/plugin/normalizePlugin/normalizePlugin.cpp b/plugin/normalizePlugin/normalizePlugin.cpp index 0bec31dcc..c0cd25f1a 100644 --- a/plugin/normalizePlugin/normalizePlugin.cpp +++ b/plugin/normalizePlugin/normalizePlugin.cpp @@ -31,10 +31,10 @@ char const* const kNORMALIZE_PLUGIN_VERSION{"1"}; char const* const kNORMALIZE_PLUGIN_NAME{"Normalize_TRT"}; } // namespace -Normalize::Normalize(Weights const* weights, int32_t nbWeights, bool acrossSpatial, bool channelShared, float eps) - : acrossSpatial(acrossSpatial) - , channelShared(channelShared) - , eps(eps) +Normalize::Normalize(Weights const* weights, int32_t nbWeights, bool acrossSpatial_, bool channelShared_, float eps_) + : acrossSpatial(acrossSpatial_) + , channelShared(channelShared_) + , eps(eps_) { gLogWarning << "NormalizePlugin is deprecated since TensorRT 9.0. Use INetworkDefinition::addNormalization() to " "add an INormalizationLayer." @@ -46,15 +46,15 @@ Normalize::Normalize(Weights const* weights, int32_t nbWeights, bool acrossSpati mScalarScale = static_cast(weights[0].values)[0]; } -Normalize::Normalize(Weights const* weights, int32_t nbWeights, float scalarScale, bool acrossSpatial, - bool channelShared, float eps, int32_t C, int32_t H, int32_t W) +Normalize::Normalize(Weights const* weights, int32_t nbWeights, float scalarScale, bool acrossSpatial_, + bool channelShared_, float eps_, int32_t C_, int32_t H_, int32_t W_) : mScalarScale(scalarScale) - , acrossSpatial(acrossSpatial) - , channelShared(channelShared) - , eps(eps) - , C(C) - , H(H) - , W(W) + , acrossSpatial(acrossSpatial_) + , channelShared(channelShared_) + , eps(eps_) + , C(C_) + , H(H_) + , W(W_) { mNbWeights = nbWeights; PLUGIN_VALIDATE(nbWeights == 1); @@ -101,7 +101,7 @@ int32_t Normalize::initialize() noexcept void Normalize::terminate() noexcept {} -size_t Normalize::getWorkspaceSize(int32_t maxBatchSize) const noexcept +size_t Normalize::getWorkspaceSize(int32_t /*maxBatchSize*/) const noexcept { return normalizePluginWorkspaceSize(acrossSpatial, C, H, W); } @@ -192,7 +192,7 @@ char const* Normalize::getPluginNamespace() const noexcept // Return the DataType of the plugin output at the requested index DataType Normalize::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { PLUGIN_ASSERT(index == 0); return DataType::kFLOAT; @@ -200,26 +200,26 @@ DataType Normalize::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool Normalize::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool Normalize::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool Normalize::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. void Normalize::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept + DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, PluginFormat floatFormat, int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(*inputTypes == DataType::kFLOAT && floatFormat == PluginFormat::kLINEAR); - C = inputDims[0].d[0]; - H = inputDims[0].d[1]; - W = inputDims[0].d[2]; + C = static_cast(inputDims[0].d[0]); + H = static_cast(inputDims[0].d[1]); + W = static_cast(inputDims[0].d[2]); if (channelShared) { PLUGIN_ASSERT(mWeights.count == 1); @@ -237,7 +237,8 @@ void Normalize::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims co } // Attach the plugin object to an execution context and grant the plugin the access to some context resource. -void Normalize::attachToContext(cudnnContext* cudnn, cublasContext* cublas, IGpuAllocator* gpuAllocator) noexcept +void Normalize::attachToContext( + cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, IGpuAllocator* gpuAllocator) noexcept { try { @@ -299,7 +300,7 @@ NormalizePluginCreator::NormalizePluginCreator() mPluginAttributes.emplace_back(PluginField("nbWeights", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("eps", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -318,7 +319,7 @@ PluginFieldCollection const* NormalizePluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2Ext* NormalizePluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* NormalizePluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -378,7 +379,7 @@ IPluginV2Ext* NormalizePluginCreator::createPlugin(char const* name, PluginField } IPluginV2Ext* NormalizePluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/nvFasterRCNN/nvFasterRCNNPlugin.cpp b/plugin/nvFasterRCNN/nvFasterRCNNPlugin.cpp index 0ccd27e8b..ff9ccd6dc 100644 --- a/plugin/nvFasterRCNN/nvFasterRCNNPlugin.cpp +++ b/plugin/nvFasterRCNN/nvFasterRCNNPlugin.cpp @@ -27,8 +27,8 @@ char const* const kRPROI_PLUGIN_VERSION{"1"}; char const* const kRPROI_PLUGIN_NAME{"RPROI_TRT"}; } // namespace -RPROIPlugin::RPROIPlugin(RPROIParams params, float const* anchorsRatios, float const* anchorsScales) - : params(params) +RPROIPlugin::RPROIPlugin(RPROIParams params_, float const* anchorsRatios, float const* anchorsScales) + : params(params_) { /* * It only supports the scenario where params.featureStride == params.minBoxSize @@ -48,18 +48,18 @@ RPROIPlugin::RPROIPlugin(RPROIParams params, float const* anchorsRatios, float c } // Constructor for cloning one plugin instance to another -RPROIPlugin::RPROIPlugin(RPROIParams params, float const* anchorsRatios, float const* anchorsScales, int32_t A, - int32_t C, int32_t H, int32_t W, float const* _anchorsDev, size_t deviceSmemSize, DataType inFeatureType, - DataType outFeatureType, DLayout_t inFeatureLayout) - : deviceSmemSize(deviceSmemSize) - , params(params) - , A(A) - , C(C) - , H(H) - , W(W) - , inFeatureType(inFeatureType) - , outFeatureType(outFeatureType) - , inFeatureLayout(inFeatureLayout) +RPROIPlugin::RPROIPlugin(RPROIParams params_, float const* anchorsRatios, float const* anchorsScales, int32_t A_, + int32_t C_, int32_t H_, int32_t W_, float const* _anchorsDev, size_t deviceSmemSize_, DataType inFeatureType_, + DataType outFeatureType_, DLayout_t inFeatureLayout_) + : deviceSmemSize(deviceSmemSize_) + , params(params_) + , A(A_) + , C(C_) + , H(H_) + , W(W_) + , inFeatureType(inFeatureType_) + , outFeatureType(outFeatureType_) + , inFeatureLayout(inFeatureLayout_) { PLUGIN_VALIDATE(params.anchorsRatioCount > 0 && params.anchorsScaleCount > 0); anchorsRatiosHost = copyToHost(anchorsRatios, params.anchorsRatioCount); @@ -235,7 +235,7 @@ float* RPROIPlugin::copyToHost(void const* srcHostData, int32_t count) noexcept int32_t RPROIPlugin::copyFromHost(char* dstHostBuffer, void const* source, int32_t count) const noexcept { PLUGIN_CHECK(cudaMemcpy(dstHostBuffer, source, count * sizeof(float), cudaMemcpyHostToHost)); - return count * sizeof(float); + return static_cast(count * sizeof(float)); } bool RPROIPlugin::supportsFormatCombination( @@ -313,7 +313,7 @@ char const* RPROIPlugin::getPluginNamespace() const noexcept // Return the DataType of the plugin output at the requested index. DataType RPROIPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { // Two outputs PLUGIN_ASSERT(index == 0 || index == 1); @@ -322,13 +322,13 @@ DataType RPROIPlugin::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool RPROIPlugin::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool RPROIPlugin::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool RPROIPlugin::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } @@ -353,9 +353,9 @@ void RPROIPlugin::configurePlugin( PLUGIN_ASSERT(nbOutput == PluginNbOutputs); A = params.anchorsRatioCount * params.anchorsScaleCount; - C = in[2].dims.d[0]; - H = in[2].dims.d[1]; - W = in[2].dims.d[2]; + C = static_cast(in[2].dims.d[0]); + H = static_cast(in[2].dims.d[1]); + W = static_cast(in[2].dims.d[2]); inFeatureType = in[2].type; outFeatureType = out[1].type; inFeatureLayout = convertTensorFormat(in[2].format); @@ -372,7 +372,7 @@ void RPROIPlugin::configurePlugin( // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void RPROIPlugin::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } @@ -398,7 +398,7 @@ RPROIPluginCreator::RPROIPluginCreator() mPluginAttributes.emplace_back(PluginField("anchorsRatios", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("anchorsScales", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -422,7 +422,7 @@ PluginFieldCollection const* RPROIPluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2Ext* RPROIPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* RPROIPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -520,7 +520,7 @@ IPluginV2Ext* RPROIPluginCreator::createPlugin(char const* name, PluginFieldColl } IPluginV2Ext* RPROIPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/nvFasterRCNN/nvFasterRCNNPlugin.h b/plugin/nvFasterRCNN/nvFasterRCNNPlugin.h index ec9b931b5..665c06513 100644 --- a/plugin/nvFasterRCNN/nvFasterRCNNPlugin.h +++ b/plugin/nvFasterRCNN/nvFasterRCNNPlugin.h @@ -29,6 +29,8 @@ namespace plugin class RPROIPlugin : public IPluginV2IOExt { public: + using IPluginV2IOExt::configurePlugin; + RPROIPlugin(RPROIParams params, float const* anchorsRatios, float const* anchorsScales); RPROIPlugin(RPROIParams params, float const* anchorsRatios, float const* anchorsScales, int32_t A, int32_t C, diff --git a/plugin/pillarScatterPlugin/pillarScatter.cpp b/plugin/pillarScatterPlugin/pillarScatter.cpp index 4e623010d..66c7a0cba 100644 --- a/plugin/pillarScatterPlugin/pillarScatter.cpp +++ b/plugin/pillarScatterPlugin/pillarScatter.cpp @@ -31,7 +31,7 @@ PillarScatterPlugin::PillarScatterPlugin(size_t h, size_t w) { } -PillarScatterPlugin::PillarScatterPlugin(void const* data, size_t length) +PillarScatterPlugin::PillarScatterPlugin(void const* data, size_t /*length*/) { auto const* d = toPointer(data); feature_y_size_ = readFromBuffer(d); @@ -54,7 +54,7 @@ nvinfer1::IPluginV2DynamicExt* PillarScatterPlugin::clone() const noexcept } nvinfer1::DimsExprs PillarScatterPlugin::getOutputDimensions(int32_t outputIndex, nvinfer1::DimsExprs const* inputs, - int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept + int32_t /*nbInputs*/, nvinfer1::IExprBuilder& exprBuilder) noexcept { PLUGIN_ASSERT(outputIndex == 0); nvinfer1::DimsExprs output; @@ -62,8 +62,8 @@ nvinfer1::DimsExprs PillarScatterPlugin::getOutputDimensions(int32_t outputIndex output.nbDims = 4; output.d[0] = batch_size; output.d[1] = inputs[0].d[2]; - output.d[2] = exprBuilder.constant(feature_y_size_); - output.d[3] = exprBuilder.constant(feature_x_size_); + output.d[2] = exprBuilder.constant(static_cast(feature_y_size_)); + output.d[3] = exprBuilder.constant(static_cast(feature_x_size_)); return output; } @@ -93,14 +93,14 @@ bool PillarScatterPlugin::supportsFormatCombination( return false; } -void PillarScatterPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* in, int32_t nbInputs, - nvinfer1::DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void PillarScatterPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc const* /*in*/, int32_t /*nbInputs*/, + nvinfer1::DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { return; } -size_t PillarScatterPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t PillarScatterPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + nvinfer1::PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -112,17 +112,17 @@ int32_t PillarScatterPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDesc { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t batchSize = inputDesc[0].dims.d[0]; - int32_t maxPillarNum = inputDesc[0].dims.d[1]; - int32_t numFeatures = inputDesc[0].dims.d[2]; + int32_t batchSize = static_cast(inputDesc[0].dims.d[0]); + int32_t maxPillarNum = static_cast(inputDesc[0].dims.d[1]); + int32_t numFeatures = static_cast(inputDesc[0].dims.d[2]); nvinfer1::DataType inputType = inputDesc[0].type; auto coords_data = static_cast(inputs[1]); auto params_data = static_cast(inputs[2]); - uint32_t featureY = feature_y_size_; - uint32_t featureX = feature_x_size_; + uint32_t featureY = static_cast(feature_y_size_); + uint32_t featureX = static_cast(feature_x_size_); int32_t status = -1; @@ -155,7 +155,7 @@ int32_t PillarScatterPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDesc } nvinfer1::DataType PillarScatterPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { return inputTypes[0]; } @@ -213,7 +213,7 @@ PillarScatterPluginCreator::PillarScatterPluginCreator() { mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("dense_shape", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -232,7 +232,7 @@ PluginFieldCollection const* PillarScatterPluginCreator::getFieldNames() noexcep return &mFC; } -IPluginV2* PillarScatterPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2* PillarScatterPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -262,7 +262,7 @@ IPluginV2* PillarScatterPluginCreator::createPlugin(char const* name, PluginFiel } IPluginV2* PillarScatterPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/pillarScatterPlugin/pillarScatter.h b/plugin/pillarScatterPlugin/pillarScatter.h index 95c789de3..bc4e78af9 100644 --- a/plugin/pillarScatterPlugin/pillarScatter.h +++ b/plugin/pillarScatterPlugin/pillarScatter.h @@ -32,6 +32,11 @@ namespace plugin class PillarScatterPlugin : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + PillarScatterPlugin() = delete; PillarScatterPlugin(void const* data, size_t length); PillarScatterPlugin(size_t h, size_t w); diff --git a/plugin/priorBoxPlugin/priorBoxPlugin.cpp b/plugin/priorBoxPlugin/priorBoxPlugin.cpp index 6b715faad..a1b8427b0 100644 --- a/plugin/priorBoxPlugin/priorBoxPlugin.cpp +++ b/plugin/priorBoxPlugin/priorBoxPlugin.cpp @@ -39,7 +39,8 @@ PriorBox::PriorBox(PriorBoxParameters param, int32_t H, int32_t W) , mW(W) { // Each object should manage its copy of param. - auto copyParamData = [](float*& dstPtr, std::vector& dstVec, float const* src, int32_t size) { + auto copyParamData = [](float*& dstPtr, std::vector& dstVec, float const* src, int32_t size) + { PLUGIN_VALIDATE(size >= 0); PLUGIN_VALIDATE(src != nullptr); @@ -56,7 +57,8 @@ PriorBox::PriorBox(PriorBoxParameters param, int32_t H, int32_t W) void PriorBox::setupDeviceMemory() noexcept { - auto copyToDevice = [](void const* hostData, int32_t count) -> Weights { + auto copyToDevice = [](void const* hostData, int32_t count) -> Weights + { PLUGIN_VALIDATE(count >= 0); void* deviceData = nullptr; PLUGIN_CUASSERT(cudaMalloc(&deviceData, count * sizeof(float))); @@ -84,7 +86,7 @@ void PriorBox::setupDeviceMemory() noexcept // Prevent duplicated aspect ratios from input for (size_t j = 0; j < tmpAR.size(); ++j) { - if (std::fabs(aspectRatio - tmpAR[j]) < 1e-6) + if (std::fabs(aspectRatio - tmpAR[j]) < 1e-6F) { alreadyExist = true; break; @@ -105,11 +107,11 @@ void PriorBox::setupDeviceMemory() noexcept // https://docs.nvidia.com/deeplearning/sdk/tensorrt-api/c_api/classnvinfer1_1_1_weights.html // mAspectRatiosGPU.count is different to mParam.numAspectRatios. // - mAspectRatiosGPU = copyToDevice(&tmpAR[0], tmpAR.size()); + mAspectRatiosGPU = copyToDevice(&tmpAR[0], static_cast(tmpAR.size())); // Number of prior boxes per grid cell on the feature map // tmpAR already included an aspect ratio of 1.0 - mNumPriors = tmpAR.size() * mParam.numMinSize; + mNumPriors = static_cast(tmpAR.size()) * mParam.numMinSize; // // If we have maxSizes, as long as all the maxSizes meets assertion requirement, we add one bounding box per maxSize @@ -144,7 +146,8 @@ void PriorBox::deserialize(uint8_t const* data, size_t length) auto const* d{data}; mParam = read(d); - auto readArray = [&d](int32_t size, std::vector& dstVec, float*& dstPtr) { + auto readArray = [&d](int32_t size, std::vector& dstVec, float*& dstPtr) + { PLUGIN_VALIDATE(size >= 0); dstVec.resize(size); for (int32_t i = 0; i < size; i++) @@ -179,8 +182,8 @@ Dims PriorBox::getOutputDimensions(int32_t index, Dims const* inputs, int32_t nb // Only one output from the plugin layer PLUGIN_VALIDATE(index == 0); // Particularity of the PriorBox layer: no batchSize dimension needed - mH = inputs[0].d[1]; - mW = inputs[0].d[2]; + mH = static_cast(inputs[0].d[1]); + mW = static_cast(inputs[0].d[2]); // workaround for TRT // The first channel is for prior box coordinates. // The second channel is for prior box scaling factors, which is simply a copy of the variance provided. @@ -201,8 +204,9 @@ int32_t PriorBox::enqueue(int32_t /*batchSize*/, void const* const* /*inputs*/, void* /*workspace*/, cudaStream_t stream) noexcept { void* outputData = outputs[0]; - pluginStatus_t status = priorBoxInference(stream, mParam, mH, mW, mNumPriors, mAspectRatiosGPU.count, - mMinSizeGPU.values, mMaxSizeGPU.values, mAspectRatiosGPU.values, outputData); + pluginStatus_t status + = priorBoxInference(stream, mParam, mH, mW, mNumPriors, static_cast(mAspectRatiosGPU.count), + mMinSizeGPU.values, mMaxSizeGPU.values, mAspectRatiosGPU.values, outputData); return status; } @@ -221,7 +225,8 @@ void PriorBox::serialize(void* buffer) const noexcept uint8_t* a = d; write(d, mParam); - auto writeArray = [&d](int32_t const size, float const* srcPtr, std::vector const& srcVec) { + auto writeArray = [&d](int32_t const size, float const* srcPtr, std::vector const& srcVec) + { // srcVec is only used here to check that the size and srcPtr are correct. PLUGIN_VALIDATE(srcVec.data() == srcPtr); PLUGIN_VALIDATE(srcVec.size() == static_cast(size)); @@ -331,18 +336,18 @@ void PriorBox::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims con PLUGIN_VALIDATE(inputDims[0].nbDims == 3); PLUGIN_VALIDATE(inputDims[1].nbDims == 3); PLUGIN_VALIDATE(outputDims[0].nbDims == 3); - mH = inputDims[0].d[1]; - mW = inputDims[0].d[2]; + mH = static_cast(inputDims[0].d[1]); + mW = static_cast(inputDims[0].d[2]); // Prepare for the inference function. if (mParam.imgH == 0 || mParam.imgW == 0) { - mParam.imgH = inputDims[1].d[1]; - mParam.imgW = inputDims[1].d[2]; + mParam.imgH = static_cast(inputDims[1].d[1]); + mParam.imgW = static_cast(inputDims[1].d[2]); } if (mParam.stepH == 0 || mParam.stepW == 0) { - mParam.stepH = static_cast(mParam.imgH) / mH; - mParam.stepW = static_cast(mParam.imgW) / mW; + mParam.stepH = static_cast(mParam.imgH) / static_cast(mH); + mParam.stepW = static_cast(mParam.imgW) / static_cast(mW); } } @@ -370,7 +375,7 @@ PriorBoxPluginCreator::PriorBoxPluginCreator() mPluginAttributes.emplace_back(PluginField("stepW", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("offset", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/proposalLayerPlugin/proposalLayerPlugin.cpp b/plugin/proposalLayerPlugin/proposalLayerPlugin.cpp index 083445331..5bd76713a 100644 --- a/plugin/proposalLayerPlugin/proposalLayerPlugin.cpp +++ b/plugin/proposalLayerPlugin/proposalLayerPlugin.cpp @@ -41,7 +41,7 @@ ProposalLayerPluginCreator::ProposalLayerPluginCreator() mPluginAttributes.emplace_back(PluginField("iou_threshold", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("image_size", nullptr, PluginFieldType::kINT32, 3)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -60,7 +60,7 @@ PluginFieldCollection const* ProposalLayerPluginCreator::getFieldNames() noexcep return &mFC; } -IPluginV2Ext* ProposalLayerPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* ProposalLayerPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -101,7 +101,8 @@ IPluginV2Ext* ProposalLayerPluginCreator::createPlugin(char const* name, PluginF return nullptr; } -IPluginV2Ext* ProposalLayerPluginCreator::deserializePlugin(char const* name, void const* data, size_t length) noexcept +IPluginV2Ext* ProposalLayerPluginCreator::deserializePlugin( + char const* /*name*/, void const* data, size_t length) noexcept { try { @@ -132,7 +133,7 @@ ProposalLayer::ProposalLayer( mParam.backgroundLabelId = -1; mParam.numClasses = 1; mParam.keepTopK = mKeepTopK; - mParam.scoreThreshold = 0.0; + mParam.scoreThreshold = 0.0F; mParam.iouThreshold = mIOUThreshold; mType = DataType::kFLOAT; @@ -157,7 +158,7 @@ int32_t ProposalLayer::initialize() noexcept // Init the anchors for batch size: mAnchorBoxesDevice = std::make_shared>(mAnchorsCnt * 4 * mMaxBatchSize); - int32_t batch_offset = sizeof(float) * mAnchorsCnt * 4; + int32_t batch_offset = static_cast(sizeof(float)) * mAnchorsCnt * 4; uint8_t* device_ptr = static_cast(mAnchorBoxesDevice->mPtr); for (int32_t i = 0; i < mMaxBatchSize; i++) { @@ -256,7 +257,7 @@ void ProposalLayer::deserialize(int8_t const* data, size_t length) mParam.backgroundLabelId = -1; mParam.numClasses = 1; mParam.keepTopK = mKeepTopK; - mParam.scoreThreshold = 0.0; + mParam.scoreThreshold = 0.0F; mParam.iouThreshold = mIOUThreshold; mType = DataType::kFLOAT; @@ -301,8 +302,8 @@ void ProposalLayer::generate_pyramid_anchors(nvinfer1::Dims const& imageDims) auto const& strides = MaskRCNNConfig::BACKBONE_STRIDES; auto anchor_stride = MaskRCNNConfig::RPN_ANCHOR_STRIDE; - float const cy = imageDims.d[1] - 1; - float const cx = imageDims.d[2] - 1; + float const cy = static_cast(imageDims.d[1] - 1); + float const cx = static_cast(imageDims.d[2] - 1); auto& anchors = mAnchorBoxesHost; PLUGIN_VALIDATE(anchors.empty()); @@ -311,18 +312,19 @@ void ProposalLayer::generate_pyramid_anchors(nvinfer1::Dims const& imageDims) for (size_t s = 0; s < scales.size(); ++s) { float scale = scales[s]; - int32_t stride = strides[s]; + int32_t stride = static_cast(strides[s]); for (int32_t y = 0; y < imageDims.d[1]; y += anchor_stride * stride) for (int32_t x = 0; x < imageDims.d[2]; x += anchor_stride * stride) for (float r : ratios) { - float sqrt_r = sqrt(r); + float sqrt_r = sqrtf(r); float h = scale / sqrt_r; float w = scale * sqrt_r; anchors.insert(anchors.end(), - {(y - h / 2) / cy, (x - w / 2) / cx, (y + h / 2 - 1) / cy, (x + w / 2 - 1) / cx}); + {(static_cast(y) - h / 2) / cy, (static_cast(x) - w / 2) / cx, + (static_cast(y) + h / 2 - 1) / cy, (static_cast(x) + w / 2 - 1) / cx}); } } @@ -352,7 +354,7 @@ int32_t ProposalLayer::enqueue( // Return the DataType of the plugin output at the requested index DataType ProposalLayer::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { // Only DataType::kFLOAT is acceptable by the plugin layer return DataType::kFLOAT; @@ -360,33 +362,34 @@ DataType ProposalLayer::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool ProposalLayer::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool ProposalLayer::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool ProposalLayer::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void ProposalLayer::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void ProposalLayer::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t /*nbOutputs*/, DataType const* /*inputTypes*/, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t maxBatchSize) noexcept { check_valid_inputs(inputDims, nbInputs); PLUGIN_ASSERT(inputDims[0].d[0] == inputDims[1].d[0]); - mAnchorsCnt = inputDims[0].d[0]; - PLUGIN_ASSERT(mAnchorsCnt == (int32_t) (mAnchorBoxesHost.size() / 4)); + mAnchorsCnt = static_cast(inputDims[0].d[0]); + PLUGIN_ASSERT(mAnchorsCnt == static_cast(mAnchorBoxesHost.size() / 4)); mMaxBatchSize = maxBatchSize; } // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void ProposalLayer::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } diff --git a/plugin/proposalPlugin/proposalPlugin.cpp b/plugin/proposalPlugin/proposalPlugin.cpp index da528ce3e..31ab8de32 100644 --- a/plugin/proposalPlugin/proposalPlugin.cpp +++ b/plugin/proposalPlugin/proposalPlugin.cpp @@ -248,7 +248,7 @@ Dims ProposalPlugin::getOutputDimensions(int32_t index, Dims const* inputs, int3 PLUGIN_VALIDATE(nbInputDims == 2); PLUGIN_VALIDATE(inputs->nbDims == 3); PLUGIN_VALIDATE(inputs[1].nbDims == 3); - int32_t channels = mMaxBoxNum; + int32_t channels = static_cast(mMaxBoxNum); int32_t height = 4; int32_t width = 1; return Dims3(channels, height, width); @@ -273,7 +273,7 @@ DimsExprs ProposalDynamicPlugin::getOutputDimensions( DimsExprs outDim; outDim.nbDims = 4; outDim.d[0] = inputs[0].d[0]; - outDim.d[1] = exprBuilder.constant(mMaxBoxNum); + outDim.d[1] = exprBuilder.constant(static_cast(mMaxBoxNum)); outDim.d[2] = exprBuilder.constant(4); outDim.d[3] = exprBuilder.constant(1); return outDim; @@ -297,14 +297,17 @@ int32_t ProposalDynamicPlugin::initialize() noexcept size_t ProposalPlugin::getWorkspaceSize(int32_t maxBatchSize) const noexcept { - return _get_workspace_size(maxBatchSize, mAnchorSizeNum, mAnchorRatioNum, mRpnHeight, mRpnWidth, mMaxBoxNum); + return _get_workspace_size(maxBatchSize, static_cast(mAnchorSizeNum), + static_cast(mAnchorRatioNum), static_cast(mRpnHeight), static_cast(mRpnWidth), + static_cast(mMaxBoxNum)); } -size_t ProposalDynamicPlugin::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t ProposalDynamicPlugin::getWorkspaceSize(PluginTensorDesc const* inputs, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { - int32_t batchSize = inputs[0].dims.d[0]; - return _get_workspace_size(batchSize, mAnchorSizeNum, mAnchorRatioNum, mRpnHeight, mRpnWidth, mMaxBoxNum); + int32_t batchSize = static_cast(inputs[0].dims.d[0]); + return _get_workspace_size(batchSize, static_cast(mAnchorSizeNum), static_cast(mAnchorRatioNum), + static_cast(mRpnHeight), static_cast(mRpnWidth), static_cast(mMaxBoxNum)); } int32_t ProposalPlugin::enqueue( @@ -315,9 +318,11 @@ int32_t ProposalPlugin::enqueue( int32_t status = STATUS_FAILURE; // Our plugin outputs only one tensor void* output = outputs[0]; - status = proposalInference_gpu(stream, inputs[0], inputs[1], batchSize, mInputHeight, mInputWidth, mRpnHeight, - mRpnWidth, mMaxBoxNum, mPreNmsTopN, &mAnchorSizes[0], mAnchorSizeNum, &mAnchorRatios[0], mAnchorRatioNum, - mRpnStdScaling, mRpnStride, mBboxMinSize, mNmsIouThreshold, workspace, output); + status = proposalInference_gpu(stream, inputs[0], inputs[1], batchSize, static_cast(mInputHeight), + static_cast(mInputWidth), static_cast(mRpnHeight), static_cast(mRpnWidth), + static_cast(mMaxBoxNum), static_cast(mPreNmsTopN), &mAnchorSizes[0], + static_cast(mAnchorSizeNum), &mAnchorRatios[0], static_cast(mAnchorRatioNum), + mRpnStdScaling, static_cast(mRpnStride), mBboxMinSize, mNmsIouThreshold, workspace, output); return status; } catch (std::exception const& e) @@ -337,10 +342,12 @@ int32_t ProposalDynamicPlugin::enqueue(PluginTensorDesc const* inputDesc, Plugin int32_t status = STATUS_FAILURE; // Our plugin outputs only one tensor void* output = outputs[0]; - int32_t batchSize = inputDesc[0].dims.d[0]; - status = proposalInference_gpu(stream, inputs[0], inputs[1], batchSize, mInputHeight, mInputWidth, mRpnHeight, - mRpnWidth, mMaxBoxNum, mPreNmsTopN, &mAnchorSizes[0], mAnchorSizeNum, &mAnchorRatios[0], mAnchorRatioNum, - mRpnStdScaling, mRpnStride, mBboxMinSize, mNmsIouThreshold, workspace, output); + int32_t batchSize = static_cast(inputDesc[0].dims.d[0]); + status = proposalInference_gpu(stream, inputs[0], inputs[1], batchSize, static_cast(mInputHeight), + static_cast(mInputWidth), static_cast(mRpnHeight), static_cast(mRpnWidth), + static_cast(mMaxBoxNum), static_cast(mPreNmsTopN), &mAnchorSizes[0], + static_cast(mAnchorSizeNum), &mAnchorRatios[0], static_cast(mAnchorRatioNum), + mRpnStdScaling, static_cast(mRpnStride), mBboxMinSize, mNmsIouThreshold, workspace, output); return status; } catch (std::exception const& e) @@ -429,7 +436,7 @@ bool ProposalPlugin::supportsFormat(DataType type, PluginFormat format) const no } bool ProposalDynamicPlugin::supportsFormatCombination( - int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept + int32_t pos, PluginTensorDesc const* inOut, int32_t nbInputs, int32_t /*nbOutputs*/) noexcept { try { @@ -477,9 +484,11 @@ IPluginV2Ext* ProposalPlugin::clone() const noexcept { try { - IPluginV2Ext* plugin = new ProposalPlugin(mInputHeight, mInputWidth, mRpnHeight, mRpnWidth, mRpnStdScaling, - mRpnStride, mBboxMinSize, mNmsIouThreshold, mPreNmsTopN, mMaxBoxNum, &mAnchorSizes[0], mAnchorSizeNum, - &mAnchorRatios[0], mAnchorRatioNum); + IPluginV2Ext* plugin = new ProposalPlugin(static_cast(mInputHeight), static_cast(mInputWidth), + static_cast(mRpnHeight), static_cast(mRpnWidth), mRpnStdScaling, + static_cast(mRpnStride), mBboxMinSize, mNmsIouThreshold, static_cast(mPreNmsTopN), + static_cast(mMaxBoxNum), &mAnchorSizes[0], static_cast(mAnchorSizeNum), &mAnchorRatios[0], + static_cast(mAnchorRatioNum)); plugin->setPluginNamespace(mNamespace.c_str()); return plugin; } @@ -494,9 +503,11 @@ IPluginV2DynamicExt* ProposalDynamicPlugin::clone() const noexcept { try { - auto* plugin = new ProposalDynamicPlugin(mInputHeight, mInputWidth, mRpnHeight, mRpnWidth, mRpnStdScaling, - mRpnStride, mBboxMinSize, mNmsIouThreshold, mPreNmsTopN, mMaxBoxNum, &mAnchorSizes[0], mAnchorSizeNum, - &mAnchorRatios[0], mAnchorRatioNum); + auto* plugin = new ProposalDynamicPlugin(static_cast(mInputHeight), static_cast(mInputWidth), + static_cast(mRpnHeight), static_cast(mRpnWidth), mRpnStdScaling, + static_cast(mRpnStride), mBboxMinSize, mNmsIouThreshold, static_cast(mPreNmsTopN), + static_cast(mMaxBoxNum), &mAnchorSizes[0], static_cast(mAnchorSizeNum), &mAnchorRatios[0], + static_cast(mAnchorRatioNum)); plugin->setPluginNamespace(mNamespace.c_str()); return plugin; } @@ -545,7 +556,7 @@ char const* ProposalDynamicPlugin::getPluginNamespace() const noexcept // Return the DataType of the plugin output at the requested index. DataType ProposalPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { try { @@ -561,7 +572,7 @@ DataType ProposalPlugin::getOutputDataType( } DataType ProposalDynamicPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { try { @@ -578,28 +589,29 @@ DataType ProposalDynamicPlugin::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool ProposalPlugin::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without // replication. -bool ProposalPlugin::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool ProposalPlugin::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } -void ProposalPlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void ProposalPlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t nbOutputs, DataType const* /*inputTypes*/, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t /*maxBatchSize*/) noexcept { try { PLUGIN_VALIDATE(nbInputs == 2); PLUGIN_VALIDATE(nbOutputs == 1); - mRpnHeight = inputDims->d[1]; - mRpnWidth = inputDims->d[2]; + mRpnHeight = static_cast(inputDims->d[1]); + mRpnWidth = static_cast(inputDims->d[2]); } catch (std::exception const& e) { @@ -607,15 +619,15 @@ void ProposalPlugin::configurePlugin(Dims const* inputDims, int32_t nbInputs, Di } } -void ProposalDynamicPlugin::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void ProposalDynamicPlugin::configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, + DynamicPluginTensorDesc const* /*out*/, int32_t nbOutputs) noexcept { try { PLUGIN_VALIDATE(nbInputs == 2); PLUGIN_VALIDATE(nbOutputs == 1); - mRpnHeight = in[0].desc.dims.d[2]; - mRpnWidth = in[0].desc.dims.d[3]; + mRpnHeight = static_cast(in[0].desc.dims.d[2]); + mRpnWidth = static_cast(in[0].desc.dims.d[3]); } catch (std::exception const& e) { @@ -626,7 +638,7 @@ void ProposalDynamicPlugin::configurePlugin( // Attach the plugin object to an execution context and grant the plugin the // access to some context resource. void ProposalPlugin::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } @@ -645,7 +657,7 @@ ProposalBasePluginCreator::ProposalBasePluginCreator() noexcept mPluginAttributes.emplace_back(PluginField("post_nms_top_n", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("anchor_sizes", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("anchor_ratios", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -674,7 +686,7 @@ PluginFieldCollection const* ProposalBasePluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2Ext* ProposalPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* ProposalPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -767,9 +779,9 @@ IPluginV2Ext* ProposalPluginCreator::createPlugin(char const* name, PluginFieldC PLUGIN_VALIDATE(roiMinSize >= 0.0F); PLUGIN_VALIDATE(nmsIouThreshold > 0.0F); - IPluginV2Ext* plugin - = new ProposalPlugin(inputHeight, inputWidth, kRPN_STD_SCALING, rpnStride, roiMinSize, nmsIouThreshold, - preNmsTopN, postNmsTopN, &anchorSizes[0], anchorSizes.size(), &anchorRatios[0], anchorRatios.size()); + IPluginV2Ext* plugin = new ProposalPlugin(inputHeight, inputWidth, kRPN_STD_SCALING, rpnStride, roiMinSize, + nmsIouThreshold, preNmsTopN, postNmsTopN, &anchorSizes[0], static_cast(anchorSizes.size()), + &anchorRatios[0], static_cast(anchorRatios.size())); plugin->setPluginNamespace(mNamespace.c_str()); return plugin; } @@ -781,7 +793,7 @@ IPluginV2Ext* ProposalPluginCreator::createPlugin(char const* name, PluginFieldC } IPluginV2DynamicExt* ProposalDynamicPluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc) noexcept + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -872,8 +884,8 @@ IPluginV2DynamicExt* ProposalDynamicPluginCreator::createPlugin( PLUGIN_VALIDATE(nmsIouThreshold > 0.0F); IPluginV2DynamicExt* plugin = new ProposalDynamicPlugin(inputHeight, inputWidth, kRPN_STD_SCALING, rpnStride, - roiMinSize, nmsIouThreshold, preNmsTopN, postNmsTopN, &anchorSizes[0], anchorSizes.size(), &anchorRatios[0], - anchorRatios.size()); + roiMinSize, nmsIouThreshold, preNmsTopN, postNmsTopN, &anchorSizes[0], + static_cast(anchorSizes.size()), &anchorRatios[0], static_cast(anchorRatios.size())); plugin->setPluginNamespace(mNamespace.c_str()); return plugin; } @@ -885,7 +897,7 @@ IPluginV2DynamicExt* ProposalDynamicPluginCreator::createPlugin( } IPluginV2Ext* ProposalPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { @@ -905,7 +917,7 @@ IPluginV2Ext* ProposalPluginCreator::deserializePlugin( } IPluginV2DynamicExt* ProposalDynamicPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/proposalPlugin/proposalPlugin.h b/plugin/proposalPlugin/proposalPlugin.h index 90f0f1071..fa3943bb9 100644 --- a/plugin/proposalPlugin/proposalPlugin.h +++ b/plugin/proposalPlugin/proposalPlugin.h @@ -82,7 +82,8 @@ class TRT_DEPRECATED ProposalPlugin : public IPluginV2Ext char const* getPluginNamespace() const noexcept override; - DataType getOutputDataType(int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept override; + DataType getOutputDataType( + int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept override; bool isOutputBroadcastAcrossBatch( int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept override; @@ -119,13 +120,19 @@ class TRT_DEPRECATED ProposalPlugin : public IPluginV2Ext class ProposalDynamicPlugin : public IPluginV2DynamicExt { public: - ProposalDynamicPlugin(int32_t inputHeight, int32_t inputWidth, int32_t rpnHeight, int32_t rpnWidth, float rpnStdScaling, - int32_t rpnStride, float bboxMinSize, float nmsIouThreshold, int32_t preNmsTopN, int32_t maxBoxNum, - float const* anchorSizes, int32_t ancSizeNum, float const* anchorRatios, int32_t ancRatioNum); + using IPluginV2DynamicExt::configurePlugin; + using IPluginV2DynamicExt::getOutputDimensions; + using IPluginV2DynamicExt::getWorkspaceSize; + using IPluginV2DynamicExt::enqueue; - ProposalDynamicPlugin(int32_t inputHeight, int32_t inputWidth, float rpnStdScaling, int32_t rpnStride, float bboxMinSize, - float nmsIouThreshold, int32_t preNmsTopN, int32_t maxBoxNum, float const* anchorSizes, int32_t ancSizeNum, - float const* anchorRatios, int32_t ancRatioNum); + ProposalDynamicPlugin(int32_t inputHeight, int32_t inputWidth, int32_t rpnHeight, int32_t rpnWidth, + float rpnStdScaling, int32_t rpnStride, float bboxMinSize, float nmsIouThreshold, int32_t preNmsTopN, + int32_t maxBoxNum, float const* anchorSizes, int32_t ancSizeNum, float const* anchorRatios, + int32_t ancRatioNum); + + ProposalDynamicPlugin(int32_t inputHeight, int32_t inputWidth, float rpnStdScaling, int32_t rpnStride, + float bboxMinSize, float nmsIouThreshold, int32_t preNmsTopN, int32_t maxBoxNum, float const* anchorSizes, + int32_t ancSizeNum, float const* anchorRatios, int32_t ancRatioNum); ProposalDynamicPlugin(void const* serialBuf, size_t serialSize); @@ -148,7 +155,8 @@ class ProposalDynamicPlugin : public IPluginV2DynamicExt char const* getPluginNamespace() const noexcept override; // IPluginV2Ext methods - DataType getOutputDataType(int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept override; + DataType getOutputDataType( + int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept override; // IPluginV2DynamicExt methods IPluginV2DynamicExt* clone() const noexcept override; diff --git a/plugin/pyramidROIAlignPlugin/pyramidROIAlignPlugin.cpp b/plugin/pyramidROIAlignPlugin/pyramidROIAlignPlugin.cpp index 598faf949..38b23d60d 100644 --- a/plugin/pyramidROIAlignPlugin/pyramidROIAlignPlugin.cpp +++ b/plugin/pyramidROIAlignPlugin/pyramidROIAlignPlugin.cpp @@ -43,7 +43,7 @@ PyramidROIAlignPluginCreator::PyramidROIAlignPluginCreator() mPluginAttributes.emplace_back(PluginField("sampling_ratio", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("legacy", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -62,7 +62,7 @@ PluginFieldCollection const* PyramidROIAlignPluginCreator::getFieldNames() noexc return &mFC; } -IPluginV2Ext* PyramidROIAlignPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* PyramidROIAlignPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -147,7 +147,7 @@ IPluginV2Ext* PyramidROIAlignPluginCreator::createPlugin(char const* name, Plugi } IPluginV2Ext* PyramidROIAlignPluginCreator::deserializePlugin( - char const* name, void const* data, size_t length) noexcept + char const* /*name*/, void const* data, size_t length) noexcept { try { @@ -280,8 +280,8 @@ Dims PyramidROIAlign::getOutputDimensions(int32_t index, Dims const* inputs, int return result; } -int32_t PyramidROIAlign::enqueue( - int32_t batch_size, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t PyramidROIAlign::enqueue(int32_t batch_size, void const* const* inputs, void* const* outputs, + void* /*workspace*/, cudaStream_t stream) noexcept { void* const pooled = outputs[0]; cudaError_t status; @@ -296,7 +296,9 @@ int32_t PyramidROIAlign::enqueue( mAbsCoords = false; mSamplingRatio = 1; float const firstThreshold - = (224 * 224 * 2.F / (MaskRCNNConfig::IMAGE_SHAPE.d[1] * MaskRCNNConfig::IMAGE_SHAPE.d[2])) / (4.F * 4.F); + = (224 * 224 * 2.F + / static_cast(MaskRCNNConfig::IMAGE_SHAPE.d[1] * MaskRCNNConfig::IMAGE_SHAPE.d[2])) + / (4.F * 4.F); status = roiAlign(stream, batch_size, mImageSize, mFeatureLength, mROICount, firstThreshold, mTransformCoords, mAbsCoords, mSwapCoords, mPlusOneCoords, mSamplingRatio, inputs[0], &inputs[1], mFeatureSpatialSize, pooled, mPooledSize); @@ -309,7 +311,7 @@ int32_t PyramidROIAlign::enqueue( // plugin works with normalized ROI coordinates, the FPN scale must be normalized // by the input image size. float const scale = static_cast(mFPNScale); - float const normScale = sqrtf(scale * scale / (mImageSize.y * mImageSize.x)); + float const normScale = sqrtf(scale * scale / static_cast(mImageSize.y * mImageSize.x)); // Furthermore, the roiAlign kernel expects a first threshold instead. This is // the *area* of an ROI but for one level down, i.e. at the P2->P3 transition. float const firstThreshold = normScale * normScale / 4.F; @@ -395,7 +397,7 @@ void PyramidROIAlign::deserialize(int8_t const* data, size_t length) // Return the DataType of the plugin output at the requested index DataType PyramidROIAlign::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t /*index*/, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { // Only DataType::kFLOAT is acceptable by the plugin layer return DataType::kFLOAT; @@ -403,21 +405,21 @@ DataType PyramidROIAlign::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool PyramidROIAlign::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool PyramidROIAlign::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool PyramidROIAlign::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void PyramidROIAlign::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, - int32_t nbOutputs, DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void PyramidROIAlign::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t nbOutputs, DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, PluginFormat floatFormat, int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(supportsFormat(inputTypes[0], floatFormat)); check_valid_inputs(inputDims, nbInputs); @@ -425,8 +427,8 @@ void PyramidROIAlign::configurePlugin(Dims const* inputDims, int32_t nbInputs, D PLUGIN_ASSERT(nbOutputs == 1); PLUGIN_ASSERT(nbInputs == 1 + mFeatureMapCount); - mROICount = inputDims[0].d[0]; - mFeatureLength = inputDims[1].d[0]; + mROICount = static_cast(inputDims[0].d[0]); + mFeatureLength = static_cast(inputDims[1].d[0]); for (size_t layer = 0; layer < mFeatureMapCount; ++layer) { @@ -436,7 +438,7 @@ void PyramidROIAlign::configurePlugin(Dims const* inputDims, int32_t nbInputs, D // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void PyramidROIAlign::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } diff --git a/plugin/regionPlugin/regionPlugin.cpp b/plugin/regionPlugin/regionPlugin.cpp index b7a96c080..a932c6503 100644 --- a/plugin/regionPlugin/regionPlugin.cpp +++ b/plugin/regionPlugin/regionPlugin.cpp @@ -70,22 +70,22 @@ struct SoftmaxTreeDeleter } // namespace -Region::Region(RegionParameters params) - : num(params.num) - , coords(params.coords) - , classes(params.classes) - , smTree(params.smTree, SoftmaxTreeDeleter()) +Region::Region(RegionParameters params_) + : num(params_.num) + , coords(params_.coords) + , classes(params_.classes) + , smTree(params_.smTree, SoftmaxTreeDeleter()) { } -Region::Region(RegionParameters params, int32_t C, int32_t H, int32_t W) - : num(params.num) - , coords(params.coords) - , classes(params.classes) - , smTree(params.smTree, SoftmaxTreeDeleter()) - , C(C) - , H(H) - , W(W) +Region::Region(RegionParameters params_, int32_t C_, int32_t H_, int32_t W_) + : num(params_.num) + , coords(params_.coords) + , classes(params_.classes) + , smTree(params_.smTree, SoftmaxTreeDeleter()) + , C(C_) + , H(H_) + , W(W_) { } @@ -237,8 +237,8 @@ Dims Region::getOutputDimensions(int32_t index, Dims const* inputs, int32_t nbIn return inputs[0]; } -int32_t Region::enqueue( - int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t Region::enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* /*workspace*/, + cudaStream_t stream) noexcept { void const* inputData = inputs[0]; void* outputData = outputs[0]; @@ -384,7 +384,7 @@ char const* Region::getPluginVersion() const noexcept return kREGION_PLUGIN_VERSION; } -size_t Region::getWorkspaceSize(int32_t maxBatchSize) const noexcept +size_t Region::getWorkspaceSize(int32_t /*maxBatchSize*/) const noexcept { return 0; } @@ -398,8 +398,8 @@ IPluginV2Ext* Region::clone() const noexcept { try { - RegionParameters params{num, coords, classes, nullptr}; - Region* plugin = new Region(params, C, H, W); + RegionParameters params_{num, coords, classes, nullptr}; + Region* plugin = new Region(params_, C, H, W); plugin->setPluginNamespace(mPluginNamespace.c_str()); plugin->setSoftmaxTree(smTree); @@ -424,7 +424,8 @@ char const* Region::getPluginNamespace() const noexcept } // Return the DataType of the plugin output at the requested index -DataType Region::getOutputDataType(int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept +DataType Region::getOutputDataType( + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { PLUGIN_ASSERT(index == 0); return DataType::kFLOAT; @@ -432,28 +433,28 @@ DataType Region::getOutputDataType(int32_t index, nvinfer1::DataType const* inpu // Return true if output tensor is broadcast across a batch. bool Region::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool Region::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool Region::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void Region::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void Region::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, int32_t nbOutputs, + DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, PluginFormat floatFormat, int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(*inputTypes == DataType::kFLOAT && floatFormat == PluginFormat::kLINEAR); PLUGIN_ASSERT(nbInputs == 1); PLUGIN_ASSERT(nbOutputs == 1); - C = inputDims[0].d[0]; - H = inputDims[0].d[1]; - W = inputDims[0].d[2]; + C = static_cast(inputDims[0].d[0]); + H = static_cast(inputDims[0].d[1]); + W = static_cast(inputDims[0].d[2]); /* * In the below assertion, 1 stands for the objectness of the bounding box * We should also @@ -464,7 +465,7 @@ void Region::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void Region::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } @@ -479,7 +480,7 @@ RegionPluginCreator::RegionPluginCreator() mPluginAttributes.emplace_back(PluginField("classes", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("smTree", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -498,7 +499,7 @@ PluginFieldCollection const* RegionPluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2Ext* RegionPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* RegionPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -541,7 +542,7 @@ IPluginV2Ext* RegionPluginCreator::createPlugin(char const* name, PluginFieldCol } IPluginV2Ext* RegionPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/reorgPlugin/reorgPlugin.cpp b/plugin/reorgPlugin/reorgPlugin.cpp index 7eb1a0e6d..0daca0384 100644 --- a/plugin/reorgPlugin/reorgPlugin.cpp +++ b/plugin/reorgPlugin/reorgPlugin.cpp @@ -73,7 +73,7 @@ char const* Reorg::getPluginNamespace() const noexcept // Return the DataType of the plugin output at the requested index template DataType Reorg::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { // Only 1 input and 1 output from the plugin layer PLUGIN_ASSERT(index == 0); @@ -85,7 +85,7 @@ DataType Reorg::getOutputDataType( // Attach the plugin object to an execution context and grant the plugin the access to some context resource. template void Reorg::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } @@ -95,8 +95,8 @@ void Reorg::detachFromContext() noexcept { } -ReorgDynamic::ReorgDynamic(int32_t stride) - : Reorg(stride) +ReorgDynamic::ReorgDynamic(int32_t stride_) + : Reorg(stride_) { } @@ -119,8 +119,8 @@ size_t ReorgDynamic::getSerializationSize() const noexcept return sizeof(int32_t); } -size_t ReorgDynamic::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t ReorgDynamic::getWorkspaceSize(nvinfer1::PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -166,22 +166,23 @@ void ReorgDynamic::configurePlugin( PLUGIN_ASSERT(out->desc.format == PluginFormat::kLINEAR); PLUGIN_ASSERT(stride > 0); - int32_t H = in->desc.dims.d[2]; - int32_t W = in->desc.dims.d[3]; - PLUGIN_ASSERT(H % stride == 0); - PLUGIN_ASSERT(W % stride == 0); + int32_t H_ = static_cast(in->desc.dims.d[2]); + int32_t W_ = static_cast(in->desc.dims.d[3]); + PLUGIN_ASSERT(H_ % stride == 0); + PLUGIN_ASSERT(W_ % stride == 0); } -int32_t ReorgDynamic::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, nvinfer1::PluginTensorDesc const* outputDesc, - void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t ReorgDynamic::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, + nvinfer1::PluginTensorDesc const* /*outputDesc*/, void const* const* inputs, void* const* outputs, + void* /*workspace*/, cudaStream_t stream) noexcept { void const* inputData = inputs[0]; void* outputData = outputs[0]; - int32_t const N = inputDesc[0].dims.d[0]; - int32_t const C = inputDesc[0].dims.d[1]; - int32_t const H = inputDesc[0].dims.d[2]; - int32_t const W = inputDesc[0].dims.d[3]; - pluginStatus_t status = reorgInference(stream, N, C, H, W, stride, inputData, outputData); + int32_t const N = static_cast(inputDesc[0].dims.d[0]); + int32_t const C_ = static_cast(inputDesc[0].dims.d[1]); + int32_t const H_ = static_cast(inputDesc[0].dims.d[2]); + int32_t const W_ = static_cast(inputDesc[0].dims.d[3]); + pluginStatus_t status = reorgInference(stream, N, C_, H_, W_, stride, inputData, outputData); return status; } @@ -200,16 +201,16 @@ IPluginV2DynamicExt* ReorgDynamic::clone() const noexcept return nullptr; } -ReorgStatic::ReorgStatic(int32_t stride) - : Reorg(stride) +ReorgStatic::ReorgStatic(int32_t stride_) + : Reorg(stride_) { } -ReorgStatic::ReorgStatic(int32_t C, int32_t H, int32_t W, int32_t stride) - : Reorg(stride) - , C(C) - , H(H) - , W(W) +ReorgStatic::ReorgStatic(int32_t C_, int32_t H_, int32_t W_, int32_t stride_) + : Reorg(stride_) + , C(C_) + , H(H_) + , W(W_) { } @@ -229,7 +230,7 @@ char const* ReorgStatic::getPluginVersion() const noexcept return kREORG_PLUGIN_STATIC_VERSION; } -size_t ReorgStatic::getWorkspaceSize(int32_t maxBatchSize) const noexcept +size_t ReorgStatic::getWorkspaceSize(int32_t /*maxBatchSize*/) const noexcept { return 0; } @@ -257,8 +258,8 @@ Dims ReorgStatic::getOutputDimensions(int32_t index, Dims const* inputs, int32_t return Dims3(inputs[0].d[0] * stride * stride, inputs[0].d[1] / stride, inputs[0].d[2] / stride); } -int32_t ReorgStatic::enqueue( - int32_t batchSize, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t ReorgStatic::enqueue(int32_t batchSize, void const* const* inputs, void* const* outputs, void* /*workspace*/, + cudaStream_t stream) noexcept { void const* inputData = inputs[0]; void* outputData = outputs[0]; @@ -288,29 +289,29 @@ IPluginV2Ext* ReorgStatic::clone() const noexcept // Return true if output tensor is broadcast across a batch. bool ReorgStatic::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool ReorgStatic::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool ReorgStatic::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void ReorgStatic::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void ReorgStatic::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t nbOutputs, DataType const* inputTypes, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, PluginFormat floatFormat, int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(*inputTypes == DataType::kFLOAT && floatFormat == PluginFormat::kLINEAR); PLUGIN_ASSERT(nbInputs == 1); PLUGIN_ASSERT(nbOutputs == 1); PLUGIN_ASSERT(stride > 0); - C = inputDims[0].d[0]; - H = inputDims[0].d[1]; - W = inputDims[0].d[2]; + C = static_cast(inputDims[0].d[0]); + H = static_cast(inputDims[0].d[1]); + W = static_cast(inputDims[0].d[2]); PLUGIN_ASSERT(H % stride == 0); PLUGIN_ASSERT(W % stride == 0); } @@ -321,7 +322,7 @@ ReorgPluginCreator::ReorgPluginCreator() mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("stride", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -352,7 +353,8 @@ PluginFieldCollection const* ReorgPluginCreator::getFieldNames() n } template -IPluginV2Ext* ReorgPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* ReorgPluginCreator::createPlugin( + char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -377,7 +379,7 @@ IPluginV2Ext* ReorgPluginCreator::createPlugin(char const* name, P template IPluginV2Ext* ReorgPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/reorgPlugin/reorgPlugin.h b/plugin/reorgPlugin/reorgPlugin.h index 7cc6d8cd2..5ee3e05bf 100644 --- a/plugin/reorgPlugin/reorgPlugin.h +++ b/plugin/reorgPlugin/reorgPlugin.h @@ -92,6 +92,11 @@ class TRT_DEPRECATED ReorgStatic : public Reorg class ReorgDynamic : public Reorg { public: + using IPluginV2DynamicExt::configurePlugin; + using IPluginV2DynamicExt::getOutputDimensions; + using IPluginV2DynamicExt::getWorkspaceSize; + using IPluginV2DynamicExt::enqueue; + ReorgDynamic(int32_t stride); ReorgDynamic(void const* buffer, size_t length); diff --git a/plugin/resizeNearestPlugin/resizeNearestPlugin.cpp b/plugin/resizeNearestPlugin/resizeNearestPlugin.cpp index a918f41a9..4cd7618a3 100644 --- a/plugin/resizeNearestPlugin/resizeNearestPlugin.cpp +++ b/plugin/resizeNearestPlugin/resizeNearestPlugin.cpp @@ -38,7 +38,7 @@ ResizeNearestPluginCreator::ResizeNearestPluginCreator() mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("scale", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -57,7 +57,7 @@ PluginFieldCollection const* ResizeNearestPluginCreator::getFieldNames() noexcep return &mFC; } -IPluginV2Ext* ResizeNearestPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* ResizeNearestPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -81,7 +81,8 @@ IPluginV2Ext* ResizeNearestPluginCreator::createPlugin(char const* name, PluginF return nullptr; } -IPluginV2Ext* ResizeNearestPluginCreator::deserializePlugin(char const* name, void const* data, size_t length) noexcept +IPluginV2Ext* ResizeNearestPluginCreator::deserializePlugin( + char const* /*name*/, void const* data, size_t length) noexcept { try { @@ -116,7 +117,7 @@ Dims ResizeNearest::getOutputDimensions(int32_t index, Dims const* inputDims, in { if (d == input.nbDims - 2 || d == input.nbDims - 1) { - output.d[d] = int32_t(input.d[d] * mScale); + output.d[d] = static_cast(static_cast(input.d[d]) * mScale); } else { @@ -222,19 +223,21 @@ bool ResizeNearest::supportsFormat(DataType type, PluginFormat format) const noe return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR); } -int32_t ResizeNearest::enqueue( - int32_t batch_size, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t ResizeNearest::enqueue(int32_t batch_size, void const* const* inputs, void* const* outputs, void* /*workspace*/, + cudaStream_t stream) noexcept { - int32_t nchan = mOutputDims.d[0]; + int32_t nchan = static_cast(mOutputDims.d[0]); float scale = mScale; int2 osize = {dimToInt32(mOutputDims.d[2]), dimToInt32(mOutputDims.d[1])}; - int32_t istride = mInputDims.d[2]; - int32_t ostride = mOutputDims.d[2]; - int32_t ibatchstride = mInputDims.d[1] * istride; - int32_t obatchstride = mOutputDims.d[1] * ostride; + int32_t istride = static_cast(mInputDims.d[2]); + int32_t ostride = static_cast(mOutputDims.d[2]); + int32_t ibatchstride = static_cast(mInputDims.d[1]) * istride; + int32_t obatchstride = static_cast(mOutputDims.d[1]) * ostride; dim3 block(32, 16); - dim3 grid((osize.x - 1) / block.x + 1, (osize.y - 1) / block.y + 1, std::min(batch_size * nchan, 65535)); + dim3 grid(static_cast((osize.x - 1) / static_cast(block.x) + 1), + static_cast((osize.y - 1) / static_cast(block.y) + 1), + static_cast(std::min(batch_size * nchan, 65535))); resizeNearest(grid, block, stream, batch_size * nchan, scale, osize, static_cast(inputs[0]), istride, ibatchstride, static_cast(outputs[0]), ostride, obatchstride); @@ -244,7 +247,7 @@ int32_t ResizeNearest::enqueue( // Return the DataType of the plugin output at the requested index DataType ResizeNearest::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { // Only 1 input and 1 output from the plugin layer PLUGIN_ASSERT(index == 0); @@ -255,21 +258,21 @@ DataType ResizeNearest::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool ResizeNearest::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool ResizeNearest::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool ResizeNearest::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. void ResizeNearest::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept + DataType const* /*inputTypes*/, DataType const* /*outputTypes*/, bool const* /*inputIsBroadcast*/, + bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(nbInputs == 1); mInputDims = inputDims[0]; @@ -280,7 +283,7 @@ void ResizeNearest::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dim // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void ResizeNearest::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } diff --git a/plugin/roiAlignPlugin/roiAlignPlugin.cpp b/plugin/roiAlignPlugin/roiAlignPlugin.cpp index b563eea1c..a0bee084b 100644 --- a/plugin/roiAlignPlugin/roiAlignPlugin.cpp +++ b/plugin/roiAlignPlugin/roiAlignPlugin.cpp @@ -42,7 +42,7 @@ ROIAlignV3PluginCreator::ROIAlignV3PluginCreator() mPluginAttributes.emplace_back(PluginField("sampling_ratio", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("spatial_scale", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -62,7 +62,7 @@ PluginFieldCollection const* ROIAlignV3PluginCreator::getFieldNames() noexcept } IPluginV3* ROIAlignV3PluginCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* /*name*/, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { @@ -210,8 +210,8 @@ int32_t ROIAlignV3::getNbOutputs() const noexcept return 1; } -int32_t ROIAlignV3::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +int32_t ROIAlignV3::configurePlugin(DynamicPluginTensorDesc const* /*in*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { return 0; } @@ -256,8 +256,8 @@ int32_t ROIAlignV3::getOutputDataTypes( return 0; } -int32_t ROIAlignV3::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs, - int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept +int32_t ROIAlignV3::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* /*shapeInputs*/, + int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept { PLUGIN_ASSERT(inputs != nullptr); PLUGIN_ASSERT(nbInputs == 3); @@ -281,8 +281,8 @@ int32_t ROIAlignV3::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, D return 0; } -int32_t ROIAlignV3::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, - void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t ROIAlignV3::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* /*outputDesc*/, + void const* const* inputs, void* const* outputs, void* /*workspace*/, cudaStream_t stream) noexcept { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr); @@ -346,15 +346,15 @@ int32_t ROIAlignV3::onShapeChange( // Check batch_indices matches rois in length PLUGIN_ASSERT(rois.d[0] == batchIndices.d[0]); - mFeatureLength = in[0].dims.d[1]; - mHeight = in[0].dims.d[2]; - mWidth = in[0].dims.d[3]; + mFeatureLength = static_cast(in[0].dims.d[1]); + mHeight = static_cast(in[0].dims.d[2]); + mWidth = static_cast(in[0].dims.d[3]); - mROICount = in[1].dims.d[0]; + mROICount = static_cast(in[1].dims.d[0]); return 0; } -IPluginV3* ROIAlignV3::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* ROIAlignV3::attachToContext(IPluginResourceContext* /*context*/) noexcept { return clone(); } @@ -368,13 +368,13 @@ PluginFieldCollection const* ROIAlignV3::getFieldsToSerialize() noexcept mDataToSerialize.emplace_back("output_width", &mOutputWidth, PluginFieldType::kINT32, 1); mDataToSerialize.emplace_back("sampling_ratio", &mSamplingRatio, PluginFieldType::kINT32, 1); mDataToSerialize.emplace_back("spatial_scale", &mSpatialScale, PluginFieldType::kFLOAT32, 1); - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; } -size_t ROIAlignV3::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t ROIAlignV3::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } diff --git a/plugin/roiAlignPlugin/roiAlignPluginLegacy.cpp b/plugin/roiAlignPlugin/roiAlignPluginLegacy.cpp index 3051137b9..56d592885 100644 --- a/plugin/roiAlignPlugin/roiAlignPluginLegacy.cpp +++ b/plugin/roiAlignPlugin/roiAlignPluginLegacy.cpp @@ -43,7 +43,7 @@ ROIAlignPluginCreator::ROIAlignPluginCreator() mPluginAttributes.emplace_back(PluginField("sampling_ratio", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("spatial_scale", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -62,7 +62,7 @@ PluginFieldCollection const* ROIAlignPluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2DynamicExt* ROIAlignPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2DynamicExt* ROIAlignPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -121,7 +121,7 @@ IPluginV2DynamicExt* ROIAlignPluginCreator::createPlugin(char const* name, Plugi } IPluginV2DynamicExt* ROIAlignPluginCreator::deserializePlugin( - char const* name, void const* data, size_t length) noexcept + char const* /*name*/, void const* data, size_t length) noexcept { try { @@ -159,8 +159,8 @@ void ROIAlign::destroy() noexcept delete this; } -size_t ROIAlign::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t ROIAlign::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -417,9 +417,9 @@ void ROIAlign::configurePlugin( checkValidInputs(in, nbInputs); - mFeatureLength = in[0].desc.dims.d[1]; - mHeight = in[0].desc.dims.d[2]; - mWidth = in[0].desc.dims.d[3]; + mFeatureLength = static_cast(in[0].desc.dims.d[1]); + mHeight = static_cast(in[0].desc.dims.d[2]); + mWidth = static_cast(in[0].desc.dims.d[3]); - mROICount = in[1].desc.dims.d[0]; + mROICount = static_cast(in[1].desc.dims.d[0]); } diff --git a/plugin/roiAlignPlugin/roiAlignPluginLegacy.h b/plugin/roiAlignPlugin/roiAlignPluginLegacy.h index fc1cf5723..739dfefb7 100644 --- a/plugin/roiAlignPlugin/roiAlignPluginLegacy.h +++ b/plugin/roiAlignPlugin/roiAlignPluginLegacy.h @@ -33,6 +33,11 @@ namespace plugin class ROIAlign : public IPluginV2DynamicExt { public: + using IPluginV2DynamicExt::configurePlugin; + using IPluginV2DynamicExt::getOutputDimensions; + using IPluginV2DynamicExt::getWorkspaceSize; + using IPluginV2DynamicExt::enqueue; + ROIAlign(int32_t outputHeight, int32_t outputWidth, int32_t samplingRatio, int32_t mode, float spatialScale, int32_t aligned); ROIAlign(void const* data, size_t length); diff --git a/plugin/scatterElementsPlugin/scatterElementsPlugin.cpp b/plugin/scatterElementsPlugin/scatterElementsPlugin.cpp index fef8c0ca8..c7fb06bfd 100644 --- a/plugin/scatterElementsPlugin/scatterElementsPlugin.cpp +++ b/plugin/scatterElementsPlugin/scatterElementsPlugin.cpp @@ -93,8 +93,8 @@ char const* ScatterElementsPluginV3::getPluginVersion() const noexcept } int32_t ScatterElementsPluginV3::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, - IExprBuilder& exprBuilder) noexcept + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, + IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -112,7 +112,7 @@ int32_t ScatterElementsPluginV3::getOutputShapes(DimsExprs const* inputs, int32_ } int32_t ScatterElementsPluginV3::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, - void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept + void const* const* inputs, void* const* outputs, void* /*workspace*/, cudaStream_t stream) noexcept { try { @@ -153,10 +153,10 @@ PluginFieldCollection const* ScatterElementsPluginV3::getFieldsToSerialize() noe mDataToSerialize.clear(); // "reduction" field is serialized as string mDataToSerialize.emplace_back("reduction", kREDUCE_ENUM_TO_STR.at(mReduction).c_str(), PluginFieldType::kCHAR, - kREDUCE_ENUM_TO_STR.at(mReduction).size()); + static_cast(kREDUCE_ENUM_TO_STR.at(mReduction).size())); mDataToSerialize.emplace_back("axis", &mAxis, PluginFieldType::kINT32, 1); - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; } @@ -205,14 +205,14 @@ ScatterElementsPluginV3* ScatterElementsPluginV3::clone() noexcept return nullptr; } -IPluginV3* ScatterElementsPluginV3::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* ScatterElementsPluginV3::attachToContext(IPluginResourceContext* /*context*/) noexcept { ScatterElementsPluginV3* obj = clone(); return obj; } -int32_t ScatterElementsPluginV3::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +int32_t ScatterElementsPluginV3::configurePlugin(DynamicPluginTensorDesc const* /*in*/, int32_t nbInputs, + DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { try { @@ -244,8 +244,8 @@ int32_t ScatterElementsPluginV3::getOutputDataTypes( return pluginStatus_t::STATUS_FAILURE; } -size_t ScatterElementsPluginV3::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t ScatterElementsPluginV3::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -284,7 +284,7 @@ ScatterElementsPluginV3Creator::ScatterElementsPluginV3Creator() gPluginAttributes.clear(); gPluginAttributes.emplace_back(PluginField("reduction")); gPluginAttributes.emplace_back(PluginField("axis")); - gFC.nbFields = gPluginAttributes.size(); + gFC.nbFields = static_cast(gPluginAttributes.size()); gFC.fields = gPluginAttributes.data(); } @@ -315,7 +315,7 @@ void ScatterElementsPluginV3Creator::setPluginNamespace(char const* libNamespace } IPluginV3* ScatterElementsPluginV3Creator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* /*name*/, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { std::string reductionArg; int32_t axisArg = 0; diff --git a/plugin/scatterElementsPlugin/scatterElementsPluginLegacy.cpp b/plugin/scatterElementsPlugin/scatterElementsPluginLegacy.cpp index 5a0b13724..c673e656d 100644 --- a/plugin/scatterElementsPlugin/scatterElementsPluginLegacy.cpp +++ b/plugin/scatterElementsPlugin/scatterElementsPluginLegacy.cpp @@ -80,7 +80,7 @@ char const* ScatterElementsPluginV2::getPluginVersion() const noexcept } DimsExprs ScatterElementsPluginV2::getOutputDimensions( - int32_t index, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept + int32_t index, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -99,7 +99,7 @@ DimsExprs ScatterElementsPluginV2::getOutputDimensions( } int32_t ScatterElementsPluginV2::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, - void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept + void const* const* inputs, void* const* outputs, void* /*workspace*/, cudaStream_t stream) noexcept { try { @@ -173,8 +173,8 @@ IPluginV2DynamicExt* ScatterElementsPluginV2::clone() const noexcept return plugin; } -void ScatterElementsPluginV2::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void ScatterElementsPluginV2::configurePlugin(DynamicPluginTensorDesc const* /*in*/, int32_t nbInputs, + DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { try { @@ -200,8 +200,8 @@ DataType ScatterElementsPluginV2::getOutputDataType( return inputTypes[kDATA_TENSOR_IDX]; } -size_t ScatterElementsPluginV2::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t ScatterElementsPluginV2::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -225,7 +225,7 @@ ScatterElementsPluginV2Creator::ScatterElementsPluginV2Creator() gPluginAttributes.clear(); gPluginAttributes.emplace_back(PluginField("reduction")); gPluginAttributes.emplace_back(PluginField("axis")); - gFC.nbFields = gPluginAttributes.size(); + gFC.nbFields = static_cast(gPluginAttributes.size()); gFC.fields = gPluginAttributes.data(); } @@ -255,7 +255,7 @@ void ScatterElementsPluginV2Creator::setPluginNamespace(char const* libNamespace } IPluginV2DynamicExt* ScatterElementsPluginV2Creator::createPlugin( - char const* name, PluginFieldCollection const* fc) noexcept + char const* /*name*/, PluginFieldCollection const* fc) noexcept { std::string reductionArg; int32_t axisArg = 0; @@ -299,7 +299,7 @@ IPluginV2DynamicExt* ScatterElementsPluginV2Creator::createPlugin( } IPluginV2DynamicExt* ScatterElementsPluginV2Creator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { ScatterElementsPluginV2* plugin = new ScatterElementsPluginV2(serialData, serialLength); plugin->setPluginNamespace(mNamespace.c_str()); diff --git a/plugin/scatterElementsPlugin/scatterElementsPluginLegacy.h b/plugin/scatterElementsPlugin/scatterElementsPluginLegacy.h index 2c5aa6e1a..7179a7302 100644 --- a/plugin/scatterElementsPlugin/scatterElementsPluginLegacy.h +++ b/plugin/scatterElementsPlugin/scatterElementsPluginLegacy.h @@ -29,6 +29,11 @@ namespace plugin class ScatterElementsPluginV2 final : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + ScatterElementsPluginV2() = delete; ScatterElementsPluginV2(ScatterElementsPluginV2 const&) = delete; ScatterElementsPluginV2(std::string const&, int32_t); diff --git a/plugin/scatterPlugin/scatterPlugin.cpp b/plugin/scatterPlugin/scatterPlugin.cpp index 100d69c57..9ddc83d9b 100644 --- a/plugin/scatterPlugin/scatterPlugin.cpp +++ b/plugin/scatterPlugin/scatterPlugin.cpp @@ -39,7 +39,7 @@ int32_t ScatterND::getNbOutputs() const noexcept } DimsExprs ScatterND::getOutputDimensions( - int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept + int32_t /*outputIndex*/, DimsExprs const* inputs, int32_t /*nbInputs*/, IExprBuilder& /*exprBuilder*/) noexcept { // output should have same dimensions as data tensor DimsExprs ret = inputs[dataTensorIdx]; @@ -77,8 +77,8 @@ bool ScatterND::supportsFormatCombination( return ret; } -void ScatterND::configurePlugin( - DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, int32_t nbOutputs) noexcept +void ScatterND::configurePlugin(DynamicPluginTensorDesc const* /*in*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept { } @@ -87,13 +87,13 @@ int32_t ScatterND::calculateNumSlices(Dims indexTensorDims) const noexcept int32_t nSlices = 1; for (int32_t i = 0; i < indexTensorDims.nbDims - 1; i++) { - nSlices *= indexTensorDims.d[i]; + nSlices *= static_cast(indexTensorDims.d[i]); } return nSlices; } -size_t ScatterND::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t ScatterND::getWorkspaceSize(PluginTensorDesc const* inputs, int32_t /*nbInputs*/, + PluginTensorDesc const* outputs, int32_t /*nbOutputs*/) const noexcept { int32_t nSlices = calculateNumSlices(inputs[indexTensorIdx].dims); // transformCoeffs + transformed indices @@ -107,7 +107,7 @@ void ScatterND::calculateTransformCoeff( for (int32_t i = indexRank - 1, nIndx = 1; i >= 0; i--) { pitches.push_back(nIndx); - nIndx *= dataTensorDims.d[i]; + nIndx *= static_cast(dataTensorDims.d[i]); } std::reverse(pitches.begin(), pitches.end()); // last dimension pitch is always one (assuming linear mem) @@ -120,9 +120,9 @@ int32_t ScatterND::calculateCopySize(Dims const& dataDims) const noexcept int32_t copySize = 1; for (int32_t i = 0; i < dataDims.nbDims; i++) { - copySize *= dataDims.d[i]; + copySize *= static_cast(dataDims.d[i]); } - copySize *= sizeof(float); + copySize *= static_cast(sizeof(float)); return copySize; } @@ -138,7 +138,7 @@ int32_t ScatterND::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc c Dims dataDims = inputDesc[dataTensorIdx].dims; - int32_t indexRank = IndexDims.d[IndexDims.nbDims - 1]; + int32_t indexRank = static_cast(IndexDims.d[IndexDims.nbDims - 1]); PLUGIN_ASSERT(indexRank <= dataDims.nbDims); int32_t nSlices = calculateNumSlices(IndexDims); @@ -163,7 +163,7 @@ int32_t ScatterND::enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc c for (int32_t i = indexRank; i < dataDims.nbDims; i++) { - rowSize *= dataDims.d[i]; + rowSize *= static_cast(dataDims.d[i]); } calculateTransformCoeff(dataDims, indexRank, transformCoeff); @@ -180,7 +180,7 @@ size_t ScatterND::getSerializationSize() const noexcept return 0; } -void ScatterND::serialize(void* buffer) const noexcept +void ScatterND::serialize(void* /*buffer*/) const noexcept { return; } @@ -198,14 +198,15 @@ char const* ScatterND::getPluginNamespace() const noexcept // Return the DataType of the plugin output at the requested index DataType ScatterND::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { PLUGIN_ASSERT(index == 0); return inputTypes[dataTensorIdx]; } // Attach the plugin object to an execution context and grant the plugin the access to some context resource. -void ScatterND::attachToContext(cudnnContext* cudnn, cublasContext* cublas, IGpuAllocator* gpuAllocator) noexcept +void ScatterND::attachToContext( + cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, IGpuAllocator* /*gpuAllocator*/) noexcept { return; } @@ -265,7 +266,7 @@ PluginFieldCollection const* ScatterNDPluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2Ext* ScatterNDPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* ScatterNDPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* /*fc*/) noexcept { try { @@ -281,7 +282,7 @@ IPluginV2Ext* ScatterNDPluginCreator::createPlugin(char const* name, PluginField } IPluginV2Ext* ScatterNDPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* /*serialData*/, size_t /*serialLength*/) noexcept { try { diff --git a/plugin/scatterPlugin/scatterPlugin.h b/plugin/scatterPlugin/scatterPlugin.h index 8c979e511..4983aa0bf 100644 --- a/plugin/scatterPlugin/scatterPlugin.h +++ b/plugin/scatterPlugin/scatterPlugin.h @@ -29,6 +29,11 @@ namespace plugin class ScatterND : public IPluginV2DynamicExt { public: + using IPluginV2DynamicExt::configurePlugin; + using IPluginV2DynamicExt::getOutputDimensions; + using IPluginV2DynamicExt::getWorkspaceSize; + using IPluginV2DynamicExt::enqueue; + ScatterND(); ~ScatterND() override = default; diff --git a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPlugin.cpp b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPlugin.cpp index 9e8a7ff42..44d15c176 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPlugin.cpp +++ b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPlugin.cpp @@ -223,7 +223,7 @@ PluginFieldCollection const* SkipLayerNormInterleavedPluginBase::getFieldsToSeri mDataToSerialize.emplace_back( "gamma", static_cast(mGamma.values), PluginFieldType::kFLOAT16, mGamma.count); PLUGIN_ASSERT(mGamma.type == kPARAM_TYPE); - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; } @@ -265,7 +265,7 @@ int32_t SkipLayerNormInterleavedPluginBase::onShapeChange( return pluginStatus_t::STATUS_FAILURE; } -IPluginV3* SkipLayerNormInterleavedPluginBase::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* SkipLayerNormInterleavedPluginBase::attachToContext(IPluginResourceContext* /*context*/) noexcept { return clone(); } @@ -284,8 +284,8 @@ int32_t SkipLayerNormInterleavedPluginHFace::enqueue(PluginTensorDesc const* inp auto const oDesc = outputDesc[0]; checkDescs(iDesc, sDesc, oDesc); - const int32_t ld = iDesc.dims.d[1]; - const int32_t total = iDesc.dims.d[2]; + const int32_t ld = static_cast(iDesc.dims.d[1]); + const int32_t total = static_cast(iDesc.dims.d[2]); float const dqScaleIn = iDesc.scale; float const dqScaleSkip = sDesc.scale; float const qScale = 1.F / oDesc.scale; @@ -326,8 +326,8 @@ int32_t SkipLayerNormInterleavedPluginMTron::enqueue(PluginTensorDesc const* inp checkDescs(iDesc, sDesc, oDesc); PLUGIN_VALIDATE(std::equal(iDesc.dims.d, iDesc.dims.d + iDesc.dims.nbDims, pDesc.dims.d)); - const int32_t ld = iDesc.dims.d[1]; - const int32_t total = iDesc.dims.d[2]; + const int32_t ld = static_cast(iDesc.dims.d[1]); + const int32_t total = static_cast(iDesc.dims.d[2]); float const dqScaleIn = iDesc.scale; float const dqScaleSkip = sDesc.scale; float const qScale = 1.F / oDesc.scale; @@ -395,8 +395,8 @@ bool SkipLayerNormInterleavedPluginBase::supportsFormatCombination( } int32_t SkipLayerNormInterleavedPluginBase::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, - IExprBuilder& exprBuilder) noexcept + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, + IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -438,14 +438,14 @@ int32_t SkipLayerNormInterleavedPluginBase::getOutputDataTypes( return pluginStatus_t::STATUS_FAILURE; } -int32_t SkipLayerNormInterleavedPluginBase::configurePlugin(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) noexcept +int32_t SkipLayerNormInterleavedPluginBase::configurePlugin(DynamicPluginTensorDesc const* /*inputs*/, + int32_t /*nbInputs*/, DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) noexcept { return pluginStatus_t::STATUS_SUCCESS; } -size_t SkipLayerNormInterleavedPluginBase::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t SkipLayerNormInterleavedPluginBase::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, + int32_t /*nbInputs*/, DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -493,7 +493,7 @@ SkipLayerNormInterleavedPluginBaseCreator::SkipLayerNormInterleavedPluginBaseCre mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("beta")); mPluginAttributes.emplace_back(PluginField("gamma")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -528,7 +528,7 @@ PluginFieldCollection const* SkipLayerNormInterleavedPluginBaseCreator::getField } IPluginV3* SkipLayerNormInterleavedPluginHFaceCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* name, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { @@ -548,7 +548,7 @@ IPluginV3* SkipLayerNormInterleavedPluginHFaceCreator::createPlugin( } IPluginV3* SkipLayerNormInterleavedPluginMTronCreator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* name, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { diff --git a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPluginLegacy.cpp b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPluginLegacy.cpp index 9c776a84b..b94294d5e 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPluginLegacy.cpp +++ b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPluginLegacy.cpp @@ -187,7 +187,7 @@ IPluginV2DynamicExt* SkipLayerNormInterleavedPluginMTronLegacy::clone() const no } DimsExprs SkipLayerNormInterleavedPluginBaseLegacy::getOutputDimensions( - int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept + int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -259,8 +259,8 @@ void SkipLayerNormInterleavedPluginBaseLegacy::configurePlugin(DynamicPluginTens } } -size_t SkipLayerNormInterleavedPluginBaseLegacy::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t SkipLayerNormInterleavedPluginBaseLegacy::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, + int32_t /*nbInputs*/, PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -279,8 +279,8 @@ int32_t SkipLayerNormInterleavedPluginHFaceLegacy::enqueue(PluginTensorDesc cons auto const oDesc = outputDesc[0]; checkDescs(iDesc, sDesc, oDesc); - int32_t const ld = iDesc.dims.d[1]; - int32_t const total = iDesc.dims.d[2]; + int32_t const ld = static_cast(iDesc.dims.d[1]); + int32_t const total = static_cast(iDesc.dims.d[2]); float const dqScaleIn = iDesc.scale; float const dqScaleSkip = sDesc.scale; float const qScale = 1.F / oDesc.scale; @@ -321,8 +321,8 @@ int32_t SkipLayerNormInterleavedPluginMTronLegacy::enqueue(PluginTensorDesc cons checkDescs(iDesc, sDesc, oDesc); PLUGIN_VALIDATE(std::equal(iDesc.dims.d, iDesc.dims.d + iDesc.dims.nbDims, pDesc.dims.d)); - int32_t const ld = iDesc.dims.d[1]; - int32_t const total = iDesc.dims.d[2]; + int32_t const ld = static_cast(iDesc.dims.d[1]); + int32_t const total = static_cast(iDesc.dims.d[2]); float const dqScaleIn = iDesc.scale; float const dqScaleSkip = sDesc.scale; float const qScale = 1.F / oDesc.scale; @@ -481,7 +481,7 @@ SkipLayerNormInterleavedPluginBaseLegacyCreator::SkipLayerNormInterleavedPluginB mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("beta")); mPluginAttributes.emplace_back(PluginField("gamma")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPluginLegacy.h b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPluginLegacy.h index e05a15a0b..e542d7194 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPluginLegacy.h +++ b/plugin/skipLayerNormPlugin/skipLayerNormInt8InterleavedPluginLegacy.h @@ -51,6 +51,11 @@ int32_t launch_large_mtron(cudaStream_t stream, int32_t const ld, int32_t const class SkipLayerNormInterleavedPluginBaseLegacy : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + SkipLayerNormInterleavedPluginBaseLegacy( std::string const& name, nvinfer1::Weights const& beta, nvinfer1::Weights const& gamma); @@ -99,6 +104,11 @@ class SkipLayerNormInterleavedPluginBaseLegacy : public nvinfer1::IPluginV2Dynam class SkipLayerNormInterleavedPluginHFaceLegacy : public SkipLayerNormInterleavedPluginBaseLegacy { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + SkipLayerNormInterleavedPluginHFaceLegacy( std::string const& name, nvinfer1::Weights const& beta, nvinfer1::Weights const& gamma); @@ -124,6 +134,11 @@ class SkipLayerNormInterleavedPluginHFaceLegacy : public SkipLayerNormInterleave class SkipLayerNormInterleavedPluginMTronLegacy : public SkipLayerNormInterleavedPluginBaseLegacy { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + SkipLayerNormInterleavedPluginMTronLegacy( std::string const& name, nvinfer1::Weights const& beta, nvinfer1::Weights const& gamma); diff --git a/plugin/skipLayerNormPlugin/skipLayerNormPlugin.cpp b/plugin/skipLayerNormPlugin/skipLayerNormPlugin.cpp index ac599ce3c..e274090d8 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormPlugin.cpp +++ b/plugin/skipLayerNormPlugin/skipLayerNormPlugin.cpp @@ -107,8 +107,9 @@ IPluginV3* SkipLayerNormPluginV3::clone() noexcept return nullptr; } -int32_t SkipLayerNormPluginV3::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs, - int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept +int32_t SkipLayerNormPluginV3::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t /*nbOutputs*/, + IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -168,20 +169,20 @@ bool SkipLayerNormPluginV3::supportsFormatCombination( return false; } -int32_t SkipLayerNormPluginV3::configurePlugin(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) noexcept +int32_t SkipLayerNormPluginV3::configurePlugin(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) noexcept { return pluginStatus_t::STATUS_SUCCESS; } -size_t SkipLayerNormPluginV3::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t SkipLayerNormPluginV3::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } int32_t SkipLayerNormPluginV3::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, - nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, void* workspace, + nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, void* /*workspace*/, cudaStream_t stream) noexcept { int32_t status = -1; @@ -189,7 +190,7 @@ int32_t SkipLayerNormPluginV3::enqueue(nvinfer1::PluginTensorDesc const* inputDe { PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const inputVolume = volume(inputDesc[0].dims); + int32_t const inputVolume = static_cast(volume(inputDesc[0].dims)); DataType iType = inputDesc->type; // Our plugin outputs only one tensor @@ -259,7 +260,7 @@ int32_t SkipLayerNormPluginV3::enqueue(nvinfer1::PluginTensorDesc const* inputDe { PLUGIN_ERROR(("Unsupported type error, expected [kINT8,kHALF,kFLOAT], but received " + std::to_string(static_cast(iType))) - .c_str()); + .c_str()); } } catch (std::exception const& e) @@ -335,7 +336,7 @@ PluginFieldCollection const* SkipLayerNormPluginV3::getFieldsToSerialize() noexc } } - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; @@ -391,7 +392,7 @@ int32_t SkipLayerNormPluginV3::onShapeChange( PLUGIN_VALIDATE(std::equal(inDims0.d, inDims0.d + inDims0.nbDims, inDims1.d)); PLUGIN_VALIDATE(inDims0.nbDims == 5); - mLd = inDims0.d[HDIM]; // hiddensize + mLd = static_cast(inDims0.d[HDIM]); // hiddensize PLUGIN_VALIDATE(mLd != 0); PLUGIN_VALIDATE(inDims0.d[3] == 1); PLUGIN_VALIDATE(inDims0.d[4] == 1); @@ -408,7 +409,7 @@ int32_t SkipLayerNormPluginV3::onShapeChange( return pluginStatus_t::STATUS_FAILURE; } -IPluginV3* SkipLayerNormPluginV3::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* SkipLayerNormPluginV3::attachToContext(IPluginResourceContext* /*context*/) noexcept { return clone(); } @@ -447,7 +448,7 @@ SkipLayerNormPluginV3Creator::SkipLayerNormPluginV3Creator() mPluginAttributes.emplace_back(PluginField("beta")); mPluginAttributes.emplace_back(PluginField("gamma")); mPluginAttributes.emplace_back(PluginField("bias")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -467,7 +468,7 @@ PluginFieldCollection const* SkipLayerNormPluginV3Creator::getFieldNames() noexc } IPluginV3* SkipLayerNormPluginV3Creator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* name, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { @@ -555,7 +556,7 @@ SkipLayerNormVarSeqlenPluginV3::SkipLayerNormVarSeqlenPluginV3( : mLayerName(name) , mGammaDev(nullptr) , mBetaDev(nullptr) - , mLd(beta.count) + , mLd(static_cast(beta.count)) , mType(type) , mBiasDev(nullptr) { @@ -617,8 +618,8 @@ IPluginV3* SkipLayerNormVarSeqlenPluginV3::clone() noexcept } int32_t SkipLayerNormVarSeqlenPluginV3::getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, - DimsExprs const* shapeInputs, int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, - IExprBuilder& exprBuilder) noexcept + DimsExprs const* /*shapeInputs*/, int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t nbOutputs, + IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -682,20 +683,20 @@ bool SkipLayerNormVarSeqlenPluginV3::supportsFormatCombination( return false; } -int32_t SkipLayerNormVarSeqlenPluginV3::configurePlugin(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) noexcept +int32_t SkipLayerNormVarSeqlenPluginV3::configurePlugin(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) noexcept { return pluginStatus_t::STATUS_SUCCESS; } -size_t SkipLayerNormVarSeqlenPluginV3::getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t SkipLayerNormVarSeqlenPluginV3::getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } int32_t SkipLayerNormVarSeqlenPluginV3::enqueue(nvinfer1::PluginTensorDesc const* inputDesc, - nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, void* workspace, + nvinfer1::PluginTensorDesc const* outputDesc, void const* const* inputs, void* const* outputs, void* /*workspace*/, cudaStream_t stream) noexcept { int32_t status = -1; @@ -703,7 +704,7 @@ int32_t SkipLayerNormVarSeqlenPluginV3::enqueue(nvinfer1::PluginTensorDesc const { PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const inputVolume = volume(inputDesc[0].dims); + int32_t const inputVolume = static_cast(volume(inputDesc[0].dims)); PLUGIN_VALIDATE(inputVolume % mLd == 0 && "inconsistent dimensions"); DataType iType = inputDesc->type; @@ -774,7 +775,7 @@ int32_t SkipLayerNormVarSeqlenPluginV3::enqueue(nvinfer1::PluginTensorDesc const { PLUGIN_VALIDATE(("Unsupported type error, expected [kINT8,kHALF,kFLOAT], but received " + std::to_string(static_cast(iType))) - .c_str()); + .c_str()); } } catch (std::exception const& e) @@ -849,7 +850,7 @@ PluginFieldCollection const* SkipLayerNormVarSeqlenPluginV3::getFieldsToSerializ } } - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); return &mFCToSerialize; @@ -910,7 +911,7 @@ int32_t SkipLayerNormVarSeqlenPluginV3::onShapeChange( return pluginStatus_t::STATUS_FAILURE; } -IPluginV3* SkipLayerNormVarSeqlenPluginV3::attachToContext(IPluginResourceContext* context) noexcept +IPluginV3* SkipLayerNormVarSeqlenPluginV3::attachToContext(IPluginResourceContext* /*context*/) noexcept { return clone(); } @@ -948,7 +949,7 @@ SkipLayerNormVarSeqlenPluginV3Creator::SkipLayerNormVarSeqlenPluginV3Creator() mPluginAttributes.emplace_back(PluginField("beta")); mPluginAttributes.emplace_back(PluginField("gamma")); mPluginAttributes.emplace_back(PluginField("bias")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -968,7 +969,7 @@ PluginFieldCollection const* SkipLayerNormVarSeqlenPluginV3Creator::getFieldName } IPluginV3* SkipLayerNormVarSeqlenPluginV3Creator::createPlugin( - char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept + char const* name, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept { try { diff --git a/plugin/skipLayerNormPlugin/skipLayerNormPluginLegacy.cpp b/plugin/skipLayerNormPlugin/skipLayerNormPluginLegacy.cpp index 2a679ac10..3dfd47274 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormPluginLegacy.cpp +++ b/plugin/skipLayerNormPlugin/skipLayerNormPluginLegacy.cpp @@ -113,7 +113,7 @@ IPluginV2DynamicExt* SkipLayerNormPluginDynamic::clone() const noexcept { BERT_DEBUG_MSG("SkipLayerNormPluginDynamic clone"); - auto* p = new SkipLayerNormPluginDynamic(mLayerName, mType, mLd, mBeta, mGamma, mBias); + auto* p = new SkipLayerNormPluginDynamic(mLayerName, mType, static_cast(mLd), mBeta, mGamma, mBias); p->initialize(); p->setPluginNamespace(mNamespace.c_str()); return p; @@ -126,7 +126,7 @@ IPluginV2DynamicExt* SkipLayerNormPluginDynamic::clone() const noexcept } DimsExprs SkipLayerNormPluginDynamic::getOutputDimensions( - int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept + int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -231,8 +231,8 @@ void SkipLayerNormPluginDynamic::configurePlugin(DynamicPluginTensorDesc const* } } -size_t SkipLayerNormPluginDynamic::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t SkipLayerNormPluginDynamic::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -245,7 +245,7 @@ int32_t SkipLayerNormPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, P { PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const inputVolume = volume(inputDesc[0].dims); + int32_t const inputVolume = static_cast(volume(inputDesc[0].dims)); DataType iType = inputDesc->type; // Our plugin outputs only one tensor @@ -315,7 +315,7 @@ int32_t SkipLayerNormPluginDynamic::enqueue(PluginTensorDesc const* inputDesc, P { PLUGIN_ERROR(("Unsupported type error, expected [kINT8,kHALF,kFLOAT], but received " + std::to_string(static_cast(iType))) - .c_str()); + .c_str()); } } catch (std::exception const& e) @@ -442,7 +442,7 @@ SkipLayerNormPluginDynamicCreator::SkipLayerNormPluginDynamicCreator() mPluginAttributes.emplace_back(PluginField("beta")); mPluginAttributes.emplace_back(PluginField("gamma")); mPluginAttributes.emplace_back(PluginField("bias")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -657,7 +657,7 @@ IPluginV2DynamicExt* SkipLayerNormVarSeqlenPlugin::clone() const noexcept } DimsExprs SkipLayerNormVarSeqlenPlugin::getOutputDimensions( - int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept + int32_t outputIndex, DimsExprs const* inputs, int32_t nbInputs, IExprBuilder& /*exprBuilder*/) noexcept { try { @@ -758,8 +758,8 @@ void SkipLayerNormVarSeqlenPlugin::configurePlugin(DynamicPluginTensorDesc const } } -size_t SkipLayerNormVarSeqlenPlugin::getWorkspaceSize( - PluginTensorDesc const* inputs, int32_t nbInputs, PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t SkipLayerNormVarSeqlenPlugin::getWorkspaceSize(PluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { return 0; } @@ -772,7 +772,7 @@ int32_t SkipLayerNormVarSeqlenPlugin::enqueue(PluginTensorDesc const* inputDesc, { PLUGIN_VALIDATE(inputDesc != nullptr && outputDesc != nullptr && inputs != nullptr && outputs != nullptr); - int32_t const inputVolume = volume(inputDesc[0].dims); + int32_t const inputVolume = static_cast(volume(inputDesc[0].dims)); PLUGIN_VALIDATE(inputVolume % mLd == 0 && "inconsistent dimensions"); DataType iType = inputDesc->type; @@ -843,7 +843,7 @@ int32_t SkipLayerNormVarSeqlenPlugin::enqueue(PluginTensorDesc const* inputDesc, { PLUGIN_VALIDATE(("Unsupported type error, expected [kINT8,kHALF,kFLOAT], but received " + std::to_string(static_cast(iType))) - .c_str()); + .c_str()); } } catch (std::exception const& e) @@ -954,7 +954,7 @@ SkipLayerNormVarSeqlenPluginCreator::SkipLayerNormVarSeqlenPluginCreator() mPluginAttributes.emplace_back(PluginField("beta")); mPluginAttributes.emplace_back(PluginField("gamma")); mPluginAttributes.emplace_back(PluginField("bias")); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } diff --git a/plugin/skipLayerNormPlugin/skipLayerNormPluginLegacy.h b/plugin/skipLayerNormPlugin/skipLayerNormPluginLegacy.h index abd4ff81e..156373e22 100644 --- a/plugin/skipLayerNormPlugin/skipLayerNormPluginLegacy.h +++ b/plugin/skipLayerNormPlugin/skipLayerNormPluginLegacy.h @@ -46,6 +46,11 @@ int32_t computeSkipLayerNorm(cudaStream_t stream, int32_t const ld, int32_t cons class SkipLayerNormPluginDynamic : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + SkipLayerNormPluginDynamic(const std::string name, const nvinfer1::DataType type, int32_t const ld, nvinfer1::Weights const& beta, nvinfer1::Weights const& gamma, nvinfer1::Weights const& bias); @@ -101,11 +106,6 @@ class SkipLayerNormPluginDynamic : public nvinfer1::IPluginV2DynamicExt bert::WeightsWithOwnership mBias; size_t mParamWordsize{}; - - using IPluginV2::enqueue; - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2Ext::configurePlugin; }; class SkipLayerNormPluginDynamicCreator : public nvinfer1::IPluginCreator @@ -137,6 +137,11 @@ class SkipLayerNormPluginDynamicCreator : public nvinfer1::IPluginCreator class SkipLayerNormVarSeqlenPlugin : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + SkipLayerNormVarSeqlenPlugin(const std::string name, const nvinfer1::DataType type, nvinfer1::Weights const& beta, nvinfer1::Weights const& gamma, nvinfer1::Weights const& bias); @@ -192,11 +197,6 @@ class SkipLayerNormVarSeqlenPlugin : public nvinfer1::IPluginV2DynamicExt bert::WeightsWithOwnership mBias; size_t mParamWordsize{}; - - using IPluginV2::enqueue; - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2Ext::configurePlugin; }; class SkipLayerNormVarSeqlenPluginCreator : public nvinfer1::IPluginCreator diff --git a/plugin/specialSlicePlugin/specialSlicePlugin.cpp b/plugin/specialSlicePlugin/specialSlicePlugin.cpp index 56748c4ba..32a49e4d0 100644 --- a/plugin/specialSlicePlugin/specialSlicePlugin.cpp +++ b/plugin/specialSlicePlugin/specialSlicePlugin.cpp @@ -31,7 +31,7 @@ char const* const kSPECIALSLICE_PLUGIN_NAME{"SpecialSlice_TRT"}; SpecialSlicePluginCreator::SpecialSlicePluginCreator() { - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -50,7 +50,8 @@ PluginFieldCollection const* SpecialSlicePluginCreator::getFieldNames() noexcept return &mFC; } -IPluginV2Ext* SpecialSlicePluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2Ext* SpecialSlicePluginCreator::createPlugin( + char const* /*name*/, PluginFieldCollection const* /*fc*/) noexcept { try { @@ -67,7 +68,8 @@ IPluginV2Ext* SpecialSlicePluginCreator::createPlugin(char const* name, PluginFi return nullptr; } -IPluginV2Ext* SpecialSlicePluginCreator::deserializePlugin(char const* name, void const* data, size_t length) noexcept +IPluginV2Ext* SpecialSlicePluginCreator::deserializePlugin( + char const* /*name*/, void const* data, size_t length) noexcept { try { @@ -84,7 +86,7 @@ IPluginV2Ext* SpecialSlicePluginCreator::deserializePlugin(char const* name, voi return nullptr; } -size_t SpecialSlice::getWorkspaceSize(int32_t) const noexcept +size_t SpecialSlice::getWorkspaceSize(int32_t /*maxBatchSize*/) const noexcept { return 0; } @@ -185,8 +187,8 @@ Dims SpecialSlice::getOutputDimensions(int32_t index, Dims const* inputDims, int return output; } -int32_t SpecialSlice::enqueue( - int32_t batch_size, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept +int32_t SpecialSlice::enqueue(int32_t batch_size, void const* const* inputs, void* const* outputs, void* /*workspace*/, + cudaStream_t stream) noexcept { specialSlice(stream, batch_size, mBboxesCnt, inputs[0], outputs[0]); @@ -196,7 +198,7 @@ int32_t SpecialSlice::enqueue( // Return the DataType of the plugin output at the requested index DataType SpecialSlice::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept { // Only 1 input and 1 output from the plugin layer PLUGIN_ASSERT(index == 0); @@ -207,32 +209,33 @@ DataType SpecialSlice::getOutputDataType( // Return true if output tensor is broadcast across a batch. bool SpecialSlice::isOutputBroadcastAcrossBatch( - int32_t outputIndex, bool const* inputIsBroadcasted, int32_t nbInputs) const noexcept + int32_t /*outputIndex*/, bool const* /*inputIsBroadcasted*/, int32_t /*nbInputs*/) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. -bool SpecialSlice::canBroadcastInputAcrossBatch(int32_t inputIndex) const noexcept +bool SpecialSlice::canBroadcastInputAcrossBatch(int32_t /*inputIndex*/) const noexcept { return false; } // Configure the layer with input and output data types. -void SpecialSlice::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* outputDims, int32_t nbOutputs, - DataType const* inputTypes, DataType const* outputTypes, bool const* inputIsBroadcast, - bool const* outputIsBroadcast, PluginFormat floatFormat, int32_t maxBatchSize) noexcept +void SpecialSlice::configurePlugin(Dims const* inputDims, int32_t nbInputs, Dims const* /*outputDims*/, + int32_t nbOutputs, DataType const* /*inputTypes*/, DataType const* /*outputTypes*/, + bool const* /*inputIsBroadcast*/, bool const* /*outputIsBroadcast*/, PluginFormat /*floatFormat*/, + int32_t /*maxBatchSize*/) noexcept { PLUGIN_ASSERT(nbInputs == 1); PLUGIN_ASSERT(nbOutputs == 1); - mBboxesCnt = inputDims[0].d[0]; + mBboxesCnt = static_cast(inputDims[0].d[0]); } // Attach the plugin object to an execution context and grant the plugin the access to some context resource. void SpecialSlice::attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept + cudnnContext* /*cudnnContext*/, cublasContext* /*cublasContext*/, IGpuAllocator* /*gpuAllocator*/) noexcept { } diff --git a/plugin/splitPlugin/split.h b/plugin/splitPlugin/split.h index 4553b28aa..ba4ac438d 100644 --- a/plugin/splitPlugin/split.h +++ b/plugin/splitPlugin/split.h @@ -46,11 +46,6 @@ class TRT_DEPRECATED SplitPlugin final : public nvinfer1::IPluginV2DynamicExt int32_t _x_stride, _y_stride, _z_stride; std::shared_ptr deviceVectors; - using IPluginV2::getOutputDimensions; - using IPluginV2::getWorkspaceSize; - using IPluginV2::enqueue; - using IPluginV2Ext::configurePlugin; - protected: void deserialize(void const* serialData, size_t serialLength) noexcept { @@ -68,6 +63,11 @@ class TRT_DEPRECATED SplitPlugin final : public nvinfer1::IPluginV2DynamicExt } public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + SplitPlugin(int32_t axis, int32_t* const& output_lengths, int32_t noutput) : _axis(axis) , _output_lengths(std::vector(output_lengths, output_lengths + noutput)) @@ -132,7 +132,7 @@ class TRT_DEPRECATED SplitPlugin final : public nvinfer1::IPluginV2DynamicExt } int32_t getNbOutputs() const noexcept override { - return _output_lengths.size(); + return static_cast(_output_lengths.size()); } void attachToContext( cudnnContext* /*cudnn*/, cublasContext* /*cublas*/, nvinfer1::IGpuAllocator* /*allocator*/) noexcept override diff --git a/plugin/vc/checkMacrosPlugin.h b/plugin/vc/checkMacrosPlugin.h index e35cc64f3..fec821768 100644 --- a/plugin/vc/checkMacrosPlugin.h +++ b/plugin/vc/checkMacrosPlugin.h @@ -49,7 +49,7 @@ class LogStream : public std::ostream return mLogStreamMutex; } LogStream() - : std::ostream(&buffer){}; + : std::ostream(&buffer) {}; }; // Use mutex to protect multi-stream write to buffer @@ -88,7 +88,7 @@ extern LogStream gLogInfo; extern LogStream gLogVerbose; void reportValidationFailure(char const* msg, char const* file, int32_t line); -void reportAssertion(char const* msg, char const* file, int32_t line); +[[noreturn]] void reportAssertion(char const* msg, char const* file, int32_t line); void logError(char const* msg, char const* file, char const* fn, int32_t line); [[noreturn]] void throwCudaError( @@ -173,31 +173,33 @@ inline void caughtError(std::exception const& e) #define PLUGIN_API_CHECK_ENUM_RANGE_RETVAL(Type, val, retval) \ PLUGIN_API_CHECK_RETVAL(int32_t(val) >= 0 && int32_t(val) < EnumMax(), retval) +#undef PLUGIN_CHECK_CUDA #define PLUGIN_CHECK_CUDA(call) \ do \ { \ - cudaError_t status = call; \ - if (status != cudaSuccess) \ + cudaError_t status_check_cuda_ = call; \ + if (status_check_cuda_ != cudaSuccess) \ { \ - return status; \ + return status_check_cuda_; \ } \ } while (0) #define PLUGIN_CUASSERT(status_) \ do \ { \ - auto s_ = status_; \ - if (s_ != cudaSuccess) \ + auto s_cuassert_ = status_; \ + if (s_cuassert_ != cudaSuccess) \ { \ - char const* msg = cudaGetErrorString(s_); \ - nvinfer1::plugin::throwCudaError(__FILE__, FN_NAME, __LINE__, s_, msg); \ + char const* msg_cuassert_ = cudaGetErrorString(s_cuassert_); \ + nvinfer1::plugin::throwCudaError(__FILE__, FN_NAME, __LINE__, s_cuassert_, msg_cuassert_); \ } \ } while (0) // On MSVC, nested macros don't expand correctly without some help, so use TRT_EXPAND to help it out. #define TRT_EXPAND(x) x #define GET_MACRO(_1, _2, NAME, ...) NAME -#define PLUGIN_VALIDATE(...) TRT_EXPAND(GET_MACRO(__VA_ARGS__, PLUGIN_VALIDATE_MSG, PLUGIN_VALIDATE_DEFAULT, )(__VA_ARGS__)) +#define PLUGIN_VALIDATE(...) \ + TRT_EXPAND(GET_MACRO(__VA_ARGS__, PLUGIN_VALIDATE_MSG, PLUGIN_VALIDATE_DEFAULT, )(__VA_ARGS__)) // Logs failed condition and throws a PluginError. // PLUGIN_ASSERT will eventually perform this function, at which point PLUGIN_VALIDATE @@ -247,8 +249,8 @@ inline void caughtError(std::exception const& e) #define PLUGIN_CUERROR(status_) \ do \ { \ - auto s_ = status_; \ - if (s_ != 0) \ + auto s_cuerror_ = status_; \ + if (s_cuerror_ != 0) \ nvinfer1::plugin::logError(#status_ " failure.", __FILE__, FN_NAME, __LINE__); \ } while (0) diff --git a/plugin/voxelGeneratorPlugin/voxelGenerator.cpp b/plugin/voxelGeneratorPlugin/voxelGenerator.cpp index 0a25687e6..d6c02444b 100644 --- a/plugin/voxelGeneratorPlugin/voxelGenerator.cpp +++ b/plugin/voxelGeneratorPlugin/voxelGenerator.cpp @@ -39,12 +39,12 @@ size_t constexpr kSERIALIZATION_SIZE{9 * sizeof(float) + 7 * sizeof(int32_t)}; int32_t npRound(float x) { // half way round to nearest-even - int32_t x2 = lround(x * 2.0F); - if (x != static_cast(x) && x2 == x * 2.0F) + int32_t x2 = static_cast(lround(x * 2.0F)); + if (x != static_cast(static_cast(x)) && static_cast(x2) == x * 2.0F) { - return lround(x / 2.0F + 0.5F) * 2; + return static_cast(lround(x / 2.0F + 0.5F)) * 2; } - return lround(x); + return static_cast(lround(x)); } VoxelGeneratorPlugin::VoxelGeneratorPlugin(int32_t maxVoxels, int32_t maxPoints, int32_t voxelFeatures, float xMin, @@ -90,7 +90,7 @@ VoxelGeneratorPlugin::VoxelGeneratorPlugin(void const* data, size_t length) { PLUGIN_ASSERT(data != nullptr); uint8_t const* d = reinterpret_cast(data); - auto const *a = d; + auto const* a = d; mPillarNum = readFromBuffer(d); mPointNum = readFromBuffer(d); mFeatureNum = readFromBuffer(d); @@ -128,7 +128,7 @@ nvinfer1::IPluginV2DynamicExt* VoxelGeneratorPlugin::clone() const noexcept } nvinfer1::DimsExprs VoxelGeneratorPlugin::getOutputDimensions(int32_t outputIndex, nvinfer1::DimsExprs const* inputs, - int32_t nbInputs, nvinfer1::IExprBuilder& exprBuilder) noexcept + int32_t /*nbInputs*/, nvinfer1::IExprBuilder& exprBuilder) noexcept { try { @@ -213,7 +213,7 @@ void VoxelGeneratorPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc con PLUGIN_VALIDATE(nbInputs == 2); PLUGIN_VALIDATE(nbOutputs == 3); - mPointFeatureNum = in[0].desc.dims.d[2]; + mPointFeatureNum = static_cast(in[0].desc.dims.d[2]); mGridXSize = npRound((mMaxXRange - mMinXRange) / mPillarXSize); mGridYSize = npRound((mMaxYRange - mMinYRange) / mPillarYSize); mGridZSize = npRound((mMaxZRange - mMinZRange) / mPillarZSize); @@ -224,14 +224,15 @@ void VoxelGeneratorPlugin::configurePlugin(nvinfer1::DynamicPluginTensorDesc con } } -size_t VoxelGeneratorPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t nbInputs, - nvinfer1::PluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept +size_t VoxelGeneratorPlugin::getWorkspaceSize(nvinfer1::PluginTensorDesc const* inputs, int32_t /*nbInputs*/, + nvinfer1::PluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept { try { - int32_t batchSize = inputs[0].dims.d[0]; + int32_t batchSize = static_cast(inputs[0].dims.d[0]); size_t maskSize = batchSize * mGridZSize * mGridYSize * mGridXSize * sizeof(uint32_t); - size_t voxelsSize = batchSize * mGridZSize * mGridYSize * mGridXSize * mPointNum * mPointFeatureNum * sizeof(float); + size_t voxelsSize + = batchSize * mGridZSize * mGridYSize * mGridXSize * mPointNum * mPointFeatureNum * sizeof(float); // the actual max pillar num cannot be determined, use upper bound size_t voxelFeaturesSize = voxelsSize; size_t voxelNumPointsSize = maskSize; @@ -257,8 +258,8 @@ int32_t VoxelGeneratorPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDes { PLUGIN_VALIDATE(inputDesc != nullptr && inputs != nullptr && outputs != nullptr && workspace != nullptr); - int32_t batchSize = inputDesc[0].dims.d[0]; - int32_t maxNumPoints = inputDesc[0].dims.d[1]; + int32_t batchSize = static_cast(inputDesc[0].dims.d[0]); + int32_t maxNumPoints = static_cast(inputDesc[0].dims.d[1]); // TRT-input float* pointCloud = const_cast((float const*) inputs[0]); uint32_t* pointNumPtr = const_cast((uint32_t const*) inputs[1]); @@ -285,9 +286,10 @@ int32_t VoxelGeneratorPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDes nextWorkspacePtr(reinterpret_cast(voxelFeatures), voxelFeaturesSize)); // Initialize workspace memory PLUGIN_CUASSERT(cudaMemsetAsync(mask, 0, totalWorkspace, stream)); - uint32_t pillarFeaturesDataSize = batchSize * mPillarNum * mPointNum * mFeatureNum * sizeof(float); - uint32_t coordsDataSize = batchSize * mPillarNum * 4 * sizeof(uint32_t); - uint32_t paramsDataSize = batchSize * sizeof(uint32_t); + size_t pillarFeaturesDataSize + = static_cast(batchSize) * mPillarNum * mPointNum * mFeatureNum * sizeof(float); + size_t coordsDataSize = static_cast(batchSize) * mPillarNum * 4 * sizeof(uint32_t); + size_t paramsDataSize = static_cast(batchSize) * sizeof(uint32_t); PLUGIN_CUASSERT(cudaMemsetAsync(pillarFeaturesData, 0, pillarFeaturesDataSize, stream)); PLUGIN_CUASSERT(cudaMemsetAsync(coordsData, 0, coordsDataSize, stream)); PLUGIN_CUASSERT(cudaMemsetAsync(paramsData, 0, paramsDataSize, stream)); @@ -300,8 +302,8 @@ int32_t VoxelGeneratorPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDes generateBaseFeatures_launch(batchSize, mask, voxels, mGridYSize, mGridXSize, paramsData, mPillarNum, mPointNum, mPointFeatureNum, voxelFeatures, voxelNumPoints, coordsData, stream); generateFeatures_launch(batchSize, densePillarNum, voxelFeatures, voxelNumPoints, coordsData, paramsData, - mPillarXSize, mPillarYSize, mPillarZSize, mMinXRange, mMinYRange, mMinZRange, mFeatureNum, mPointNum, mPillarNum, - mPointFeatureNum, pillarFeaturesData, stream); + mPillarXSize, mPillarYSize, mPillarZSize, mMinXRange, mMinYRange, mMinZRange, mFeatureNum, mPointNum, + mPillarNum, mPointFeatureNum, pillarFeaturesData, stream); return 0; } catch (std::exception const& e) @@ -312,7 +314,7 @@ int32_t VoxelGeneratorPlugin::enqueue(nvinfer1::PluginTensorDesc const* inputDes } nvinfer1::DataType VoxelGeneratorPlugin::getOutputDataType( - int32_t index, nvinfer1::DataType const* inputTypes, int32_t nbInputs) const noexcept + int32_t index, nvinfer1::DataType const* inputTypes, int32_t /*nbInputs*/) const noexcept { try { @@ -362,7 +364,7 @@ void VoxelGeneratorPlugin::serialize(void* buffer) const noexcept PLUGIN_ASSERT(buffer != nullptr); uint8_t* d = reinterpret_cast(buffer); - auto *a = d; + auto* a = d; writeToBuffer(d, mPillarNum); writeToBuffer(d, mPointNum); writeToBuffer(d, mFeatureNum); @@ -413,7 +415,7 @@ VoxelGeneratorPluginCreator::VoxelGeneratorPluginCreator() mPluginAttributes.emplace_back(PluginField("point_cloud_range", nullptr, PluginFieldType::kFLOAT32, 1)); mPluginAttributes.emplace_back(PluginField("voxel_feature_num", nullptr, PluginFieldType::kINT32, 1)); mPluginAttributes.emplace_back(PluginField("voxel_size", nullptr, PluginFieldType::kFLOAT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -432,7 +434,7 @@ PluginFieldCollection const* VoxelGeneratorPluginCreator::getFieldNames() noexce return &mFC; } -IPluginV2* VoxelGeneratorPluginCreator::createPlugin(char const* name, PluginFieldCollection const* fc) noexcept +IPluginV2* VoxelGeneratorPluginCreator::createPlugin(char const* /*name*/, PluginFieldCollection const* fc) noexcept { try { @@ -493,7 +495,7 @@ IPluginV2* VoxelGeneratorPluginCreator::createPlugin(char const* name, PluginFie } IPluginV2* VoxelGeneratorPluginCreator::deserializePlugin( - char const* name, void const* serialData, size_t serialLength) noexcept + char const* /*name*/, void const* serialData, size_t serialLength) noexcept { try { diff --git a/plugin/voxelGeneratorPlugin/voxelGenerator.h b/plugin/voxelGeneratorPlugin/voxelGenerator.h index c6c401467..44fda7b07 100644 --- a/plugin/voxelGeneratorPlugin/voxelGenerator.h +++ b/plugin/voxelGeneratorPlugin/voxelGenerator.h @@ -34,12 +34,17 @@ namespace plugin class VoxelGeneratorPlugin : public nvinfer1::IPluginV2DynamicExt { public: + using nvinfer1::IPluginV2DynamicExt::configurePlugin; + using nvinfer1::IPluginV2DynamicExt::getOutputDimensions; + using nvinfer1::IPluginV2DynamicExt::getWorkspaceSize; + using nvinfer1::IPluginV2DynamicExt::enqueue; + VoxelGeneratorPlugin() = delete; - VoxelGeneratorPlugin(int32_t maxVoxels, int32_t maxPoints, int32_t voxelFeatures, float xMin, float xMax, float yMin, - float yMax, float zMin, float zMax, float pillarX, float pillarY, float pillarZ); - VoxelGeneratorPlugin(int32_t maxVoxels, int32_t maxPoints, int32_t voxelFeatures, float xMin, float xMax, float yMin, - float yMax, float zMin, float zMax, float pillarX, float pillarY, float pillarZ, int32_t pointFeatures, - int32_t gridX, int32_t gridY, int32_t gridZ); + VoxelGeneratorPlugin(int32_t maxVoxels, int32_t maxPoints, int32_t voxelFeatures, float xMin, float xMax, + float yMin, float yMax, float zMin, float zMax, float pillarX, float pillarY, float pillarZ); + VoxelGeneratorPlugin(int32_t maxVoxels, int32_t maxPoints, int32_t voxelFeatures, float xMin, float xMax, + float yMin, float yMax, float zMin, float zMax, float pillarX, float pillarY, float pillarZ, + int32_t pointFeatures, int32_t gridX, int32_t gridY, int32_t gridZ); VoxelGeneratorPlugin(void const* data, size_t length); // IPluginV2DynamicExt Methods nvinfer1::IPluginV2DynamicExt* clone() const noexcept override; diff --git a/samples/common/ErrorRecorder.h b/samples/common/ErrorRecorder.h index 91ddb176f..948f90270 100644 --- a/samples/common/ErrorRecorder.h +++ b/samples/common/ErrorRecorder.h @@ -48,7 +48,7 @@ class SampleErrorRecorder : public IErrorRecorder ~SampleErrorRecorder() noexcept override {} int32_t getNbErrors() const noexcept final { - return mErrorStack.size(); + return static_cast(mErrorStack.size()); } ErrorCode getErrorCode(int32_t errorIdx) const noexcept final { @@ -134,5 +134,5 @@ class SampleErrorRecorder : public IErrorRecorder // The error stack that holds the errors recorded by TensorRT. errorStack mErrorStack; -}; // class SampleErrorRecorder +}; // class SampleErrorRecorder #endif // ERROR_RECORDER_H diff --git a/samples/common/common.h b/samples/common/common.h index bf53058ca..4da105edb 100644 --- a/samples/common/common.h +++ b/samples/common/common.h @@ -162,11 +162,11 @@ struct SimpleProfiler : public nvinfer1::IProfiler out << "========== " << value.mName << " profile ==========" << std::endl; float totalTime = 0; std::string layerNameStr = "TensorRT layer name"; - int maxLayerNameLength = std::max(static_cast(layerNameStr.size()), 70); + int32_t maxLayerNameLength = std::max(static_cast(layerNameStr.size()), 70); for (const auto& elem : value.mProfile) { totalTime += elem.second.time; - maxLayerNameLength = std::max(maxLayerNameLength, static_cast(elem.first.size())); + maxLayerNameLength = std::max(maxLayerNameLength, static_cast(elem.first.size())); } auto old_settings = out.flags(); @@ -264,7 +264,7 @@ class TypedHostMemory : public HostMemory }; ~TypedHostMemory() noexcept override { - delete[](ElemType*) mData; + delete[] (ElemType*) mData; } ElemType* raw() noexcept { @@ -293,7 +293,8 @@ inline bool isDebug() return std::getenv("TENSORRT_DEBUG") != nullptr; } -static auto StreamDeleter = [](cudaStream_t* pStream) { +static auto StreamDeleter = [](cudaStream_t* pStream) +{ if (pStream) { static_cast(cudaStreamDestroy(*pStream)); @@ -786,6 +787,7 @@ inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm, std::vec class TimerBase { public: + virtual ~TimerBase() = default; virtual void start() {} virtual void stop() {} float microseconds() const noexcept @@ -879,17 +881,17 @@ inline std::vector splitString(std::string str, char delimiter = ', inline int getC(nvinfer1::Dims const& d) { - return d.nbDims >= 3 ? d.d[d.nbDims - 3] : 1; + return d.nbDims >= 3 ? static_cast(d.d[d.nbDims - 3]) : 1; } inline int getH(const nvinfer1::Dims& d) { - return d.nbDims >= 2 ? d.d[d.nbDims - 2] : 1; + return d.nbDims >= 2 ? static_cast(d.d[d.nbDims - 2]) : 1; } inline int getW(const nvinfer1::Dims& d) { - return d.nbDims >= 1 ? d.d[d.nbDims - 1] : 1; + return d.nbDims >= 1 ? static_cast(d.d[d.nbDims - 1]) : 1; } //! Platform-agnostic wrapper around dynamic libraries. @@ -1052,9 +1054,8 @@ inline std::ostream& operator<<(std::ostream& os, const nvinfer1::Dims& dims) constexpr size_t kMAX_FILENAME_LENGTH = 150; // Leave some margin due to Windows path length limitation constexpr size_t kELLIPSIS_LENGTH = 3; // Length of "..." - auto processChar = [&kALLOWED](char c) { - return std::isalnum(static_cast(c)) || kALLOWED.find(c) != std::string_view::npos ? c : '_'; - }; + auto processChar = [&kALLOWED](char c) + { return std::isalnum(static_cast(c)) || kALLOWED.find(c) != std::string_view::npos ? c : '_'; }; std::string res; if (s.length() <= kMAX_FILENAME_LENGTH) diff --git a/samples/common/debugTensorWriter.cpp b/samples/common/debugTensorWriter.cpp index f6207c07c..c68a3c7c2 100644 --- a/samples/common/debugTensorWriter.cpp +++ b/samples/common/debugTensorWriter.cpp @@ -127,14 +127,14 @@ class DataIterator { // For Int4x2, each byte contains two 4-bit integers Int4x2 packed(mData[mIndex / 2]); - return packed.element(mIndex % 2); + return packed.element(static_cast(mIndex % 2)); } #if CUDA_VERSION >= 12070 else if constexpr (std::is_same_v) { // For Fp4x2, each byte contains two 4-bit floating point numbers Fp4x2 packed(mData[mIndex / 2]); - return packed.element(mIndex % 2); + return packed.element(static_cast(mIndex % 2)); } #endif else @@ -175,7 +175,7 @@ class DataIterator private: uint8_t const* mData; - int64_t mVolume; + [[maybe_unused]] int64_t mVolume; int64_t mIndex; }; @@ -203,7 +203,7 @@ class DataRange private: void const* mData; - int64_t mVolume; + [[maybe_unused]] int64_t mVolume; }; template @@ -250,7 +250,8 @@ void printTensorElements(T const* data, int64_t volume, std::ofstream& f) ? kPRINT_ELEMENTS_COUNT / 2 : std::max(static_cast(0), volume - kPRINT_ELEMENTS_COUNT / 2); - auto printElement = [&f](auto value) { + auto printElement = [&f](auto value) + { if constexpr (isFloatingPoint) { f << static_cast(value); @@ -303,12 +304,13 @@ void processTensorSummary(void const* addr_host, int64_t volume, std::ofstream& float val = static_cast(value); minVal = std::min(minVal, val); maxVal = std::max(maxVal, val); - sum += val; + sum += static_cast(val); } - float avgVal = sum / volume; + float avgVal = static_cast(sum / static_cast(volume)); // nan and inf turn into string in json - auto valueToStr = [](float val) -> std::string { + auto valueToStr = [](float val) -> std::string + { std::stringstream ss; if (!std::isfinite(val)) { @@ -338,7 +340,7 @@ void processTensorSummary(void const* addr_host, int64_t volume, std::ofstream& maxVal = std::max(maxVal, val); sum += val; } - double avgVal = static_cast(sum) / volume; + double avgVal = static_cast(sum) / static_cast(volume); f << " \"min\": " << minVal << "," << std::endl; f << " \"max\": " << maxVal << "," << std::endl; @@ -359,7 +361,7 @@ std::string getCurrentTimeString() template void writeTensorStringRecursive(T const* data, nvinfer1::Dims const& shape, int32_t currentDim, int64_t offset, - int64_t stride, std::ofstream& f, bool isFirstElement = true, int32_t indent = 0, int32_t maxWidth = 0) + int64_t stride, std::ofstream& f, bool /*isFirstElement*/ = true, int32_t indent = 0, int32_t maxWidth = 0) { bool isLastDim = currentDim == shape.nbDims - 1; if (isLastDim) @@ -533,7 +535,7 @@ std::string writeStringFile(void const* addr_host, nvinfer1::DataType type, nvin std::string escapeJsonString(std::string_view str) { std::string result; - result.reserve(str.length()); + result.reserve(str.size()); for (char c : str) { switch (c) @@ -751,13 +753,13 @@ bool writeNumpyFile(void const* addr_host, std::string_view dtype, nvinfer1::Dim // Pad header to 16 bytes alignment std::string headerStr = header.str(); - int32_t headerLen = 10 + headerStr.length(); + int32_t headerLen = 10 + static_cast(headerStr.length()); int32_t padding = 16 - ((headerLen + 1) % 16); headerStr.append(padding, ' '); headerStr += '\n'; // Write header length and header - uint16_t headerSize = headerStr.length(); + uint16_t headerSize = static_cast(headerStr.length()); f.write(reinterpret_cast(&headerSize), sizeof(uint16_t)); f.write(headerStr.c_str(), headerSize); @@ -778,7 +780,8 @@ std::string writeNumpy(nvinfer1::DataType type, void const* addr_host, int64_t v std::vector floatBuffer; std::vector int8Buffer; - auto convertToFloat = [&](std::vector const& buffer) { + auto convertToFloat = [&](std::vector const& buffer) + { sample::gLogWarning << "Converting " << getDataTypeString(type) << " to float for numpy dump of tensor '" << name << "'." << std::endl; dtype = " const& buffer) { + auto convertToInt8 = [&](std::vector const& buffer) + { sample::gLogWarning << "Converting " << getDataTypeString(type) << " to int8 for numpy dump of tensor '" << name << "'." << std::endl; dtype = "& options, const TRTOption& op { if (matches(opt, options[i])) { - return i; + return static_cast(i); } } return -1; @@ -149,13 +149,13 @@ TRTParsedArgs parseArgs(int argc, const char* const* argv, const std::vector 2) { - return TRTParsedArgs{"Short arg contains more than 1 character at index " + std::to_string(i)}; + return TRTParsedArgs{"Short arg contains more than 1 character at index " + std::to_string(i), {}, {}}; } opt.shortName = argStr[1]; } @@ -206,13 +206,13 @@ TRTParsedArgs parseArgs(int argc, const char* const* argv, const std::vector(parsedArgs.values[idx].second.size()); continue; } if (i + 1 >= argc) { - return TRTParsedArgs{"Last argument requires value, but none given"}; + return TRTParsedArgs{"Last argument requires value, but none given", {}, {}}; } const std::string nextArg(argv[i + 1]); @@ -225,7 +225,7 @@ TRTParsedArgs parseArgs(int argc, const char* const* argv, const std::vector(parsedArgs.values[idx].second.size()); } else { @@ -240,7 +240,7 @@ TRTParsedArgs getOptions(int argc, const char* const* argv, const std::vector uint16 float2half(T value) { return float2half_impl( - value, bool_type < std::numeric_limits::is_iec559 && sizeof(typename bits::type) == sizeof(T) > ()); + value, bool_type::is_iec559 && sizeof(typename bits::type) == sizeof(T)>()); } /// Convert integer to half-precision floating point. @@ -1237,7 +1242,7 @@ template T half2float(uint16 value) { return half2float_impl( - value, T(), bool_type < std::numeric_limits::is_iec559 && sizeof(typename bits::type) == sizeof(T) > ()); + value, T(), bool_type::is_iec559 && sizeof(typename bits::type) == sizeof(T)>()); } /// Convert half-precision floating point to integer. @@ -1553,7 +1558,7 @@ class half private: /// Rounding mode to use - static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE); + static const std::float_round_style round_style = (std::float_round_style) (HALF_ROUND_STYLE); /// Constructor. /// \param bits binary representation to set half to @@ -2609,7 +2614,7 @@ struct binary_specialized /// \tparam T destination type /// \tparam U source type /// \tparam R rounding mode to use -template +template struct half_caster { }; diff --git a/samples/common/sampleDevice.cpp b/samples/common/sampleDevice.cpp index 5eedfc1e2..e0ad088d7 100644 --- a/samples/common/sampleDevice.cpp +++ b/samples/common/sampleDevice.cpp @@ -101,8 +101,8 @@ void setCudaDevice(int32_t device, std::ostream& os) int32_t memoryClockRate = 0; CHECK(cudaDeviceGetAttribute(&clockRate, cudaDevAttrClockRate, device)); CHECK(cudaDeviceGetAttribute(&memoryClockRate, cudaDevAttrMemoryClockRate, device)); - os << "Application Compute Clock Rate: " << clockRate / 1000000.0F << " GHz" << std::endl; - os << "Application Memory Clock Rate: " << memoryClockRate / 1000000.0F << " GHz" << std::endl; + os << "Application Compute Clock Rate: " << static_cast(clockRate) / 1000000.0F << " GHz" << std::endl; + os << "Application Memory Clock Rate: " << static_cast(memoryClockRate) / 1000000.0F << " GHz" << std::endl; os << std::endl; os << "Note: The application clock rates do not reflect the actual clock rates that the GPU is " << "currently running at." << std::endl; diff --git a/samples/common/sampleDevice.h b/samples/common/sampleDevice.h index e784b9f99..767f0141c 100644 --- a/samples/common/sampleDevice.h +++ b/samples/common/sampleDevice.h @@ -502,12 +502,12 @@ class UnifiedMirroredBuffer : public IMirroredBuffer return mBuffer.get(); } - void hostToDevice(TrtCudaStream& stream) override + void hostToDevice(TrtCudaStream& /*stream*/) override { // Does nothing since we are using unified memory. } - void deviceToHost(TrtCudaStream& stream) override + void deviceToHost(TrtCudaStream& /*stream*/) override { // Does nothing since we are using unified memory. } @@ -539,7 +539,7 @@ class OutputAllocator : public nvinfer1::IOutputAllocator ~OutputAllocator() override = default; void* reallocateOutput( - char const* tensorName, void* currentMemory, uint64_t size, uint64_t alignment) noexcept override + char const* /*tensorName*/, void* /*currentMemory*/, uint64_t size, uint64_t alignment) noexcept override { // Some memory allocators return nullptr when allocating zero bytes, but TensorRT requires a non-null ptr // even for empty tensors, so allocate a dummy byte. @@ -559,7 +559,7 @@ class OutputAllocator : public nvinfer1::IOutputAllocator return reallocateOutput(tensorName, currentMemory, size, alignment); } - void notifyShape(char const* tensorName, nvinfer1::Dims const& dims) noexcept override + void notifyShape(char const* /*tensorName*/, nvinfer1::Dims const& dims) noexcept override { mFinalDims = dims; } @@ -589,7 +589,6 @@ int32_t getCudaDriverVersion(); //! Get the CUDA version of the current CUDA runtime. int32_t getCudaRuntimeVersion(); - } // namespace sample #endif // TRT_SAMPLE_DEVICE_H diff --git a/samples/common/sampleEngines.cpp b/samples/common/sampleEngines.cpp index 1a6b87322..4c6b516f2 100644 --- a/samples/common/sampleEngines.cpp +++ b/samples/common/sampleEngines.cpp @@ -223,7 +223,6 @@ void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, std:: } } - //! //! \brief Generate a network definition for a given model //! @@ -637,7 +636,7 @@ void setDecomposables(INetworkDefinition& network, DecomposableAttentions const& void markDebugTensors(INetworkDefinition& network, StringSet const& debugTensors) { - for (int64_t inputIndex = 0; inputIndex < network.getNbInputs(); ++inputIndex) + for (int32_t inputIndex = 0; inputIndex < network.getNbInputs(); ++inputIndex) { auto* t = network.getInput(inputIndex); auto const tensorName = t->getName(); @@ -646,10 +645,10 @@ void markDebugTensors(INetworkDefinition& network, StringSet const& debugTensors network.markDebug(*t); } } - for (int64_t layerIndex = 0; layerIndex < network.getNbLayers(); ++layerIndex) + for (int32_t layerIndex = 0; layerIndex < network.getNbLayers(); ++layerIndex) { auto* layer = network.getLayer(layerIndex); - for (int64_t outputIndex = 0; outputIndex < layer->getNbOutputs(); ++outputIndex) + for (int32_t outputIndex = 0; outputIndex < layer->getNbOutputs(); ++outputIndex) { auto* t = layer->getOutput(outputIndex); auto const tensorName = t->getName(); @@ -662,9 +661,8 @@ void markDebugTensors(INetworkDefinition& network, StringSet const& debugTensors } void setMemoryPoolLimits(IBuilderConfig& config, BuildOptions const& build) { - auto const roundToBytes = [](double const size, bool fromMB = true) { - return static_cast(size * (fromMB ? 1.0_MiB : 1.0_KiB)); - }; + auto const roundToBytes = [](double const size, bool fromMB = true) + { return static_cast(static_cast(size) * static_cast(fromMB ? 1.0_MiB : 1.0_KiB)); }; if (build.workspace >= 0) { config.setMemoryPoolLimit(MemoryPoolType::kWORKSPACE, roundToBytes(build.workspace)); @@ -703,7 +701,8 @@ void setMemoryPoolLimits(IBuilderConfig& config, BuildOptions const& build) void setPreviewFeatures(IBuilderConfig& config, BuildOptions const& build) { - auto const setFlag = [&](PreviewFeature feat) { + auto const setFlag = [&](PreviewFeature feat) + { int32_t featVal = static_cast(feat); if (build.previewFeatures.find(featVal) != build.previewFeatures.end()) { @@ -805,10 +804,10 @@ bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, if (isDynamicInput) { hasDynamicShapes = true; - for (size_t i = 0; i < build.optProfiles.size(); i++) + for (size_t pi = 0; pi < build.optProfiles.size(); pi++) { - auto const& optShapes = build.optProfiles[i]; - auto profile = profiles[i]; + auto const& optShapes = build.optProfiles[pi]; + auto profile = profiles[pi]; auto const tensorName = input->getName(); auto shape = findPlausible(optShapes, tensorName); ShapeRange shapes{}; @@ -1008,7 +1007,7 @@ bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, } if (!pluginPaths.empty()) { - config.setPluginsToSerialize(pluginPaths.data(), pluginPaths.size()); + config.setPluginsToSerialize(pluginPaths.data(), static_cast(pluginPaths.size())); } if (build.excludeLeanRuntime) { @@ -1072,13 +1071,14 @@ bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, auto int8IO = std::count_if(build.inputFormats.begin(), build.inputFormats.end(), isInt8) + std::count_if(build.outputFormats.begin(), build.outputFormats.end(), isInt8); - auto hasQDQLayers = [](INetworkDefinition& network) { + auto hasQDQLayers = [](INetworkDefinition& net) + { // Determine if our network has QDQ layers. - auto const nbLayers = network.getNbLayers(); + auto const nbLayers = net.getNbLayers(); for (int32_t i = 0; i < nbLayers; i++) { - auto const& layer = network.getLayer(i); - if (layer->getType() == LayerType::kQUANTIZE || layer->getType() == LayerType::kDEQUANTIZE) + auto const& layer_ = net.getLayer(i); + if (layer_->getType() == LayerType::kQUANTIZE || layer_->getType() == LayerType::kDEQUANTIZE) { return true; } @@ -1266,7 +1266,6 @@ bool setupNetworkAndConfig(BuildOptions const& build, SystemOptions const& sys, config.setHardwareCompatibilityLevel(build.hardwareCompatibilityLevel); - config.setRuntimePlatform(build.runtimePlatform); if (build.maxAuxStreams != defaultMaxAuxStreams) @@ -1345,7 +1344,8 @@ bool networkToSerializedEngine( reader.read(reinterpret_cast(streamEngine.data()), engineSize); SMP_RETVAL_IF_FALSE((!reader.fail()), "Error when reading engine file", false, err); reader.close(); - sample::gLogInfo << "Created engine with size: " << (engineSize / 1.0_MiB) << " MiB" << std::endl; + sample::gLogInfo << "Created engine with size: " << static_cast(engineSize / 1.0_MiB) << " MiB" + << std::endl; env.engine.setBlob(std::move(streamEngine)); } else @@ -1359,8 +1359,8 @@ bool networkToSerializedEngine( { std::unique_ptr kernelTextPtr(kernelText); env.kernelText.setBlob(kernelTextPtr); - sample::gLogInfo << "Created kernel CPP with size: " << (kernelText->size() / 1.0_MiB) << " MiB" - << std::endl; + sample::gLogInfo << "Created kernel CPP with size: " + << static_cast(kernelText->size() / 1.0_MiB) << " MiB" << std::endl; } else { @@ -1372,7 +1372,8 @@ bool networkToSerializedEngine( serializedEngine = builder.buildSerializedNetwork(*env.network, *config); } SMP_RETVAL_IF_FALSE(serializedEngine != nullptr, "Engine could not be created from network", false, err); - sample::gLogInfo << "Created engine with size: " << (serializedEngine->size() / 1.0_MiB) << " MiB" << std::endl; + sample::gLogInfo << "Created engine with size: " << static_cast(serializedEngine->size() / 1.0_MiB) + << " MiB" << std::endl; if (build.safe && build.consistency) { @@ -1399,14 +1400,14 @@ bool networkToSerializedEngine( { if (build.timingCacheMode == TimingCacheMode::kGLOBAL) { - auto timingCache = config->getTimingCache(); - samplesCommon::updateTimingCacheFile(gLogger.getTRTLogger(), build.timingCacheFile, timingCache, builder); + auto timingCacheUpdated = config->getTimingCache(); + samplesCommon::updateTimingCacheFile( + gLogger.getTRTLogger(), build.timingCacheFile, timingCacheUpdated, builder); } } return true; } - //! //! \brief Parse a given model, create a network and an engine. //! @@ -1476,13 +1477,15 @@ std::pair, std::vector> getLayerWeightsRol std::vector weightsRoles(nbAll); refitter.getAll(nbAll, layerNames.data(), weightsRoles.data()); std::vector layerNameStrs(nbAll); - std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), [](char const* name) { - if (name == nullptr) + std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), + [](char const* name) { - return std::string{}; - } - return std::string{name}; - }); + if (name == nullptr) + { + return std::string{}; + } + return std::string{name}; + }); return {layerNameStrs, weightsRoles}; } @@ -1496,13 +1499,15 @@ std::pair, std::vector> getMissingLayerWei refitter.getMissing(nbMissing, layerNames.data(), weightsRoles.data()); // Convert null names in `layerNames` to empty strings: std::vector layerNameStrs(nbMissing); - std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), [](char const* name) { - if (name == nullptr) + std::transform(layerNames.begin(), layerNames.end(), layerNameStrs.begin(), + [](char const* name) { - return std::string{}; - } - return std::string{name}; - }); + if (name == nullptr) + { + return std::string{}; + } + return std::string{name}; + }); return {std::move(layerNameStrs), std::move(weightsRoles)}; } } // namespace @@ -1521,10 +1526,10 @@ bool loadAsyncStreamingEngineToBuildEnv(std::string const& filepath, BuildEnviro return true; } - bool loadEngineToBuildEnv(std::string const& filepath, BuildEnvironment& env, std::ostream& err, SystemOptions const& sys, bool const enableConsistency) { + static_cast(sys); auto const tBegin = std::chrono::high_resolution_clock::now(); std::ifstream engineFile(filepath, std::ios::binary); SMP_RETVAL_IF_FALSE(engineFile.good(), "", false, err << "Error opening engine file: " << filepath); @@ -1538,7 +1543,7 @@ bool loadEngineToBuildEnv(std::string const& filepath, BuildEnvironment& env, st auto const tEnd = std::chrono::high_resolution_clock::now(); float const loadTime = std::chrono::duration(tEnd - tBegin).count(); sample::gLogInfo << "Engine loaded in " << loadTime << " sec." << std::endl; - sample::gLogInfo << "Loaded engine with size: " << (fsize / 1.0_MiB) << " MiB" << std::endl; + sample::gLogInfo << "Loaded engine with size: " << static_cast(fsize / 1.0_MiB) << " MiB" << std::endl; if (enableConsistency) { @@ -1593,9 +1598,9 @@ bool printPlanVersion(BuildEnvironment& env, std::ostream& err) case 0U: { // Blob index to store the plan version may depend on the serialization version. - sample::gLogInfo << "Plan was created with TensorRT version " << static_cast(blob[24]) - << "." << static_cast(blob[25]) << "." << static_cast(blob[26]) - << "." << static_cast(blob[27]) << std::endl; + sample::gLogInfo << "Plan was created with TensorRT version " << static_cast(blob[24]) << "." + << static_cast(blob[25]) << "." << static_cast(blob[26]) << "." + << static_cast(blob[27]) << std::endl; return true; } } @@ -1889,11 +1894,11 @@ bool timeRefit(INetworkDefinition const& network, nvinfer1::ICudaEngine& engine, std::inserter(layerRoleSet, layerRoleSet.begin()), [](std::string const& layerName, WeightsRole const role) { return std::make_pair(layerName, role); }); - auto const isRefittable = [&layerRoleSet](char const* layerName, WeightsRole const role) { - return layerRoleSet.find(std::make_pair(layerName, role)) != layerRoleSet.end(); - }; + auto const isRefittable = [&layerRoleSet](char const* layerName, WeightsRole const role) + { return layerRoleSet.find(std::make_pair(layerName, role)) != layerRoleSet.end(); }; - auto const setWeights = [&] { + auto const setWeights = [&] + { for (int32_t i = 0; i < nbLayers; i++) { auto const layer = network.getLayer(i); @@ -1913,16 +1918,17 @@ bool timeRefit(INetworkDefinition const& network, nvinfer1::ICudaEngine& engine, return true; }; - auto const reportMissingWeights = [&] { + auto const reportMissingWeights = [&] + { auto const& missingPair = getMissingLayerWeightsRolePair(*refitter); - auto const& layerNames = missingPair.first; - auto const& weightsRoles = missingPair.second; - for (size_t i = 0; i < layerNames.size(); ++i) + auto const& missingLayerNames = missingPair.first; + auto const& missingWeightsRoles = missingPair.second; + for (size_t i = 0; i < missingLayerNames.size(); ++i) { - sample::gLogError << "Missing (" << layerNames[i] << ", " << weightsRoles[i] << ") for refitting." - << std::endl; + sample::gLogError << "Missing (" << missingLayerNames[i] << ", " << missingWeightsRoles[i] + << ") for refitting." << std::endl; } - return layerNames.empty(); + return missingLayerNames.empty(); }; // Skip weights validation since we are confident that the new weights are similar to the weights used to build @@ -2023,7 +2029,7 @@ std::unique_ptr createConsiste if (auto const createFn = reinterpret_cast(dlsym(kCONSISTENCY_CHECKER_LIBRARY.get(), symbolName))) { - if (nvinfer2::safe::consistency::IConsistencyChecker * checker{nullptr}; + if (nvinfer2::safe::consistency::IConsistencyChecker* checker{nullptr}; ErrorCode::kSUCCESS == createFn(checker, recorder, serializedEngine, engineSize, pluginBuildLibPath)) { return std::unique_ptr{checker}; @@ -2049,6 +2055,9 @@ bool checkSafeEngine( void const* serializedEngine, int64_t const engineSize, std::vector const& pluginBuildLibPath) { #if !ENABLE_UNIFIED_BUILDER + static_cast(serializedEngine); + static_cast(engineSize); + static_cast(pluginBuildLibPath); return false; #else if (!hasConsistencyChecker()) diff --git a/samples/common/sampleEngines.h b/samples/common/sampleEngines.h index 3adae4a5b..da096f66a 100644 --- a/samples/common/sampleEngines.h +++ b/samples/common/sampleEngines.h @@ -71,19 +71,19 @@ class LazilyDeserializedEngine //! //! \brief Constructor of LazilyDeserializedEngine. //! - LazilyDeserializedEngine(bool isSafe, bool versionCompatible, int32_t DLACore, std::string const& tempdir, - nvinfer1::TempfileControlFlags tempfileControls, std::string const& leanDLLPath) - : mIsSafe(isSafe) - , mVersionCompatible(versionCompatible) - , mDLACore(DLACore) - , mTempdir(tempdir) - , mTempfileControls(tempfileControls) - , mLeanDLLPath(leanDLLPath) + LazilyDeserializedEngine(bool isSafe_, bool versionCompatible_, int32_t DLACore_, std::string const& tempdir_, + nvinfer1::TempfileControlFlags tempfileControls_, std::string const& leanDLLPath_) + : mIsSafe(isSafe_) + , mVersionCompatible(versionCompatible_) + , mDLACore(DLACore_) + , mTempdir(tempdir_) + , mTempfileControls(tempfileControls_) + , mLeanDLLPath(leanDLLPath_) { // Only one of these is relevant for any given trtexec call. // Enabled using --asyncFileReader flag. mAsyncFileReader = std::make_unique(); - // Enabled using --load flag. + // Enabled using --load flag. mFileReader = std::make_unique(); } @@ -216,7 +216,6 @@ class LazilyDeserializedEngine return *mAsyncFileReader; } - //! //! \brief Get if safe mode is enabled. //! @@ -238,7 +237,6 @@ class LazilyDeserializedEngine std::unique_ptr mFileReader; std::unique_ptr mAsyncFileReader; - // Directly use the host memory of a serialized engine instead of duplicating the engine in CPU memory. std::unique_ptr mEngineBlobHostMemory; @@ -272,12 +270,12 @@ struct BuildEnvironment BuildEnvironment() = delete; BuildEnvironment(BuildEnvironment const& other) = delete; BuildEnvironment(BuildEnvironment&& other) = delete; - BuildEnvironment(bool isSafe, bool versionCompatible, int32_t DLACore, std::string const& tempdir, - nvinfer1::TempfileControlFlags tempfileControls, std::string const& leanDLLPath = "", - std::string const& cmdline = "") - : engine(isSafe, versionCompatible, DLACore, tempdir, tempfileControls, leanDLLPath) - , kernelText(false, false, -1, "", tempfileControls, "") - , cmdline(cmdline) + BuildEnvironment(bool isSafe_, bool versionCompatible_, int32_t DLACore_, std::string const& tempdir_, + nvinfer1::TempfileControlFlags tempfileControls_, std::string const& leanDLLPath_ = "", + std::string const& cmdline_ = "") + : engine(isSafe_, versionCompatible_, DLACore_, tempdir_, tempfileControls_, leanDLLPath_) + , kernelText(false, false, -1, "", tempfileControls_, "") + , cmdline(cmdline_) { } @@ -380,7 +378,6 @@ bool timeRefit(const nvinfer1::INetworkDefinition& network, nvinfer1::ICudaEngin void setTensorScalesFromCalibration(nvinfer1::INetworkDefinition& network, std::vector const& inputFormats, std::vector const& outputFormats, std::string const& calibrationFile); - //! \brief Check if safe runtime is loaded. [[nodiscard]] bool hasSafeRuntime(); diff --git a/samples/common/sampleInference.cpp b/samples/common/sampleInference.cpp index e50b59add..75ca0a53f 100644 --- a/samples/common/sampleInference.cpp +++ b/samples/common/sampleInference.cpp @@ -113,7 +113,8 @@ bool initNvinferSafe() { #if !TRT_STATIC static LibraryPtr libnvinfersafePtr{}; - auto fetchPtrs = [](samplesCommon::DynamicLibrary* l) { + auto fetchPtrs = [](samplesCommon::DynamicLibrary* l) + { if (gUseRuntime == RuntimeMode::kSAFE) { pcreateTRTGraphInternal = l->symbolAddress outputTensors; - outputTensors.reserve(endBindingIndex); + outputTensors.reserve(endBindingIndex_); - for (int32_t b = 0; b < endBindingIndex; b++) + for (int32_t b = 0; b < endBindingIndex_; b++) { TensorInfo tensorInfo; tensorInfo.bindingIndex = b; getTensorInfo(tensorInfo); - tensorInfo.updateVolume(batch); + tensorInfo.updateVolume(batch_); if (tensorInfo.isInput) { fillOneBinding(tensorInfo); @@ -375,9 +376,8 @@ bool allocateContextMemory(InferenceEnvironmentStd& iEnv, InferenceOptions const auto const& ec = iEnv.contexts.at(i); if (inference.memoryAllocationStrategy == MemoryAllocationStrategy::kSTATIC) { - sample::gLogInfo << "Created execution context with device memory size: " << - (engine->getDeviceMemorySize() / 1.0_MiB) - << " MiB" << std::endl; + sample::gLogInfo << "Created execution context with device memory size: " + << static_cast(engine->getDeviceMemorySize() / 1.0_MiB) << " MiB" << std::endl; } else { @@ -402,9 +402,9 @@ bool allocateContextMemory(InferenceEnvironmentStd& iEnv, InferenceOptions const iEnv.deviceMemory.at(i) = TrtDeviceBuffer(sizeToAlloc); ec->setDeviceMemoryV2(iEnv.deviceMemory.at(i).get(), iEnv.deviceMemory.at(i).getSize()); sample::gLogInfo << "Maximum device memory size across all profiles: " - << (engine->getDeviceMemorySizeV2() / 1.0_MiB) << " MiB" << std::endl; + << static_cast(engine->getDeviceMemorySizeV2() / 1.0_MiB) << " MiB" << std::endl; sample::gLogInfo << "Only allocated device memory enough for " << allocReason << ": " - << (sizeToAlloc / 1.0_MiB) << " MiB" << std::endl; + << static_cast(sizeToAlloc / 1.0_MiB) << " MiB" << std::endl; } } return true; @@ -430,7 +430,6 @@ void contractInt64ToInt32(std::vector& shapeData) } // namespace - bool setUpInference(InferenceEnvironmentBase& iEnv, InferenceOptions const& inference, SystemOptions const& system) { #if ENABLE_UNIFIED_BUILDER @@ -550,7 +549,8 @@ bool setUpSafeInference(InferenceEnvironmentSafe& iEnv, InferenceOptions const& } #endif -bool setUpStdInference(InferenceEnvironmentStd& iEnv, InferenceOptions const& inference, SystemOptions const& system) +bool setUpStdInference( + InferenceEnvironmentStd& iEnv, InferenceOptions const& inference, SystemOptions const& /*system*/) { int32_t device{}; CHECK(cudaGetDevice(&device)); @@ -570,7 +570,6 @@ bool setUpStdInference(InferenceEnvironmentStd& iEnv, InferenceOptions const& in // Release serialized blob to save memory space. iEnv.engine.releaseBlob(); - // Setup weight streaming if enabled if (engine->getStreamableWeightsSize() > 0) { @@ -581,7 +580,8 @@ bool setUpStdInference(InferenceEnvironmentStd& iEnv, InferenceOptions const& in double const percent = budget.percent; ASSERT(percent < 100.0); auto const max = engine->getStreamableWeightsSize(); - wsBudget = (max >= 0) ? (percent / 100) * (max) : WeightStreamingBudget::kDISABLE; + wsBudget = (max >= 0) ? static_cast((percent / 100.0) * static_cast(max)) + : WeightStreamingBudget::kDISABLE; } if (wsBudget == WeightStreamingBudget::kDISABLE) @@ -642,8 +642,9 @@ bool setUpStdInference(InferenceEnvironmentStd& iEnv, InferenceOptions const& in IExecutionContext* ec{nullptr}; //! \return the `ExecutionContextAllocationStrategy` to use for the given allocation strategy, \p s. - auto getExecutionContextAllocationStrategy = [](MemoryAllocationStrategy s) { - return s == MemoryAllocationStrategy::kSTATIC + auto getExecutionContextAllocationStrategy = [](MemoryAllocationStrategy strategy) + { + return strategy == MemoryAllocationStrategy::kSTATIC // Let TRT pre-allocate and manage the memory. ? ExecutionContextAllocationStrategy::kSTATIC // Allocate based on the current profile or runtime shapes. @@ -658,8 +659,8 @@ bool setUpStdInference(InferenceEnvironmentStd& iEnv, InferenceOptions const& in } ec->setNvtxVerbosity(inference.nvtxVerbosity); - int32_t const persistentCacheLimit - = samplesCommon::getMaxPersistentCacheSize() * inference.persistentCacheRatio; + int32_t const persistentCacheLimit = static_cast( + static_cast(samplesCommon::getMaxPersistentCacheSize()) * inference.persistentCacheRatio); sample::gLogInfo << "Setting persistentCacheLimit to " << persistentCacheLimit << " bytes." << std::endl; ec->setPersistentCacheLimit(persistentCacheLimit); @@ -832,7 +833,6 @@ bool setUpStdInference(InferenceEnvironmentStd& iEnv, InferenceOptions const& in bool fillBindingsSuccess = FillStdBindings( engine, context, inference.inputs, iEnv.bindings, 1, endBindingIndex, inference.optProfileIndex)(); - return fillBindingsSuccess; } @@ -1463,7 +1463,8 @@ void inferenceExecution(InferenceOptions const& inference, InferenceEnvironmentB if (iEnv.safe) { //! Function to make one iteration: - auto makeIteration = [&](int32_t s) -> std::unique_ptr { + auto makeIteration = [&](int32_t s) -> std::unique_ptr + { int32_t const streamId{threadIdx * streamsPerThread + s}; auto iteration = std::make_unique(streamId, inference, *static_cast(iEnv).mClonedGraphs[streamId], @@ -1506,7 +1507,8 @@ void inferenceExecution(InferenceOptions const& inference, InferenceEnvironmentB #endif //! Function to make one iteration: - auto makeIteration = [&](int32_t s) -> std::unique_ptr { + auto makeIteration = [&](int32_t s) -> std::unique_ptr + { int32_t const streamId{threadIdx * streamsPerThread + s}; auto iteration = std::make_unique(streamId, inference, *static_cast(iEnv).getContext(streamId), @@ -1601,7 +1603,6 @@ bool runInference(InferenceOptions const& inference, InferenceEnvironmentBase& i auto cmpTrace = [](InferenceTrace const& a, InferenceTrace const& b) { return a.h2dStart < b.h2dStart; }; std::sort(trace.begin(), trace.end(), cmpTrace); - return !iEnv.error; } @@ -1620,9 +1621,8 @@ bool runMultiTasksInference(std::vectoriOptions, *(tEnv->iEnv), sync, /*threadIdx*/ 0, /*streamsPerThread*/ 1, tEnv->device, tEnv->trace, - tEnv->rOptions)); + threads.emplace_back(makeThread(tEnv->iOptions, *(tEnv->iEnv), sync, /*threadIdx*/ 0, /*streamsPerThread*/ 1, + tEnv->device, tEnv->trace, tEnv->rOptions)); } for (auto& th : threads) { @@ -1650,11 +1650,12 @@ size_t reportGpuMemory() size_t total{0}; size_t newlyAllocated{0}; CHECK(cudaMemGetInfo(&free, &total)); - sample::gLogInfo << "Free GPU memory = " << free / 1024.0_MiB << " GiB"; + sample::gLogInfo << "Free GPU memory = " << static_cast(free / 1024.0_MiB) << " GiB"; if (prevFree != 0) { newlyAllocated = (prevFree - free); - sample::gLogInfo << ", newly allocated GPU memory = " << newlyAllocated / 1024.0_MiB << " GiB"; + sample::gLogInfo << ", newly allocated GPU memory = " << static_cast(newlyAllocated / 1024.0_MiB) + << " GiB"; } sample::gLogInfo << ", total GPU memory = " << total / 1024.0_MiB << " GiB" << std::endl; prevFree = free; @@ -1671,7 +1672,8 @@ bool timeDeserialize(InferenceEnvironmentBase& iEnv, SystemOptions const& sys) SMP_RETVAL_IF_FALSE(!iEnv.safe, "Safe inference is not supported!", false, sample::gLogError); - auto timeDeserializeFn = [&]() -> float { + auto timeDeserializeFn = [&]() -> float + { bool deserializeOK{false}; engine.reset(nullptr); auto startClock = std::chrono::high_resolution_clock::now(); @@ -1736,7 +1738,9 @@ bool timeDeserialize(InferenceEnvironmentBase& iEnv, SystemOptions const& sys) sample::gLogInfo << "Total deserialization time = " << totalTime << " milliseconds in " << kNB_ITERS << " iterations, average time = " << averageTime << " milliseconds, first time = " << first << " milliseconds." << std::endl; - sample::gLogInfo << "Deserialization Bandwidth = " << 1E-6 * totalEngineSizeGpu / totalTime << " GB/s" << std::endl; + sample::gLogInfo << "Deserialization Bandwidth = " + << 1E-6 * static_cast(totalEngineSizeGpu) / static_cast(totalTime) << " GB/s" + << std::endl; // If the first deserialization is more than tolerance slower than // the average deserialization, return true, which means an error occurred. @@ -1937,7 +1941,8 @@ void BindingsBase::addBinding( } //! Make a UnifiedMirroredBuffer if useManaged or Discrete othereise: - auto makeBuffer = [](bool useManaged) -> std::shared_ptr { + auto makeBuffer = [](bool useManaged) -> std::shared_ptr + { if (useManaged) { return std::make_shared(); @@ -2021,7 +2026,7 @@ void BindingsBase::transferOutputToHost(TrtCudaStream& stream) } void BindingsStd::dumpBindingValues(nvinfer1::IExecutionContext const& context, int32_t binding, std::ostream& os, - std::string const& separator /*= " "*/, int32_t batch /*= 1*/) const + std::string const& separator /*= " "*/, int32_t /*batch*/ /*= 1*/) const { auto const tensorName = context.getEngine().getIOTensorName(binding); Dims dims = context.getTensorShape(tensorName); diff --git a/samples/common/sampleInference.h b/samples/common/sampleInference.h index ba2bb6338..a86ddf1f0 100644 --- a/samples/common/sampleInference.h +++ b/samples/common/sampleInference.h @@ -343,7 +343,7 @@ class BindingsBase std::unordered_map getBindings() const { - auto all = [](Binding const& b) { return true; }; + auto all = [](Binding const& /*b*/) { return true; }; return getBindings(all); } @@ -379,7 +379,7 @@ class BindingsStd : public BindingsBase void dumpBindings(nvinfer1::IExecutionContext const& context, std::ostream& os) const { - auto all = [](Binding const& b) { return true; }; + auto all = [](Binding const& /*b*/) { return true; }; dumpBindings(context, all, os); } @@ -436,7 +436,7 @@ class BindingsSafe : public BindingsBase void dumpBindings(ITRTGraph const& graph, std::ostream& os) const { - auto all = [](Binding const& b) { return true; }; + auto all = [](Binding const& /*b*/) { return true; }; dumpBindings(graph, all, os); } @@ -472,8 +472,7 @@ class BindingsSafe : public BindingsBase struct TaskInferenceEnvironment { TaskInferenceEnvironment(std::string engineFile, InferenceOptions const& inference, - ReportingOptions const& reporting, int32_t deviceId = 0, - int32_t DLACore = -1, int32_t bs = batchNotProvided); + ReportingOptions const& reporting, int32_t deviceId = 0, int32_t DLACore = -1, int32_t bs = batchNotProvided); InferenceOptions iOptions{}; ReportingOptions rOptions{}; int32_t device{defaultDevice}; diff --git a/samples/common/sampleOptions.cpp b/samples/common/sampleOptions.cpp index 6ef03e5e0..26e503c29 100644 --- a/samples/common/sampleOptions.cpp +++ b/samples/common/sampleOptions.cpp @@ -71,7 +71,7 @@ int64_t getUnitMultiplier(std::string const& option) char lastChar = option.at(option.size() - 1); if (!std::isdigit(lastChar)) { - char unit = std::toupper(lastChar); + char unit = static_cast(std::toupper(lastChar)); auto found = kUNIT_MULTIPLIERS.find(unit); if (found == kUNIT_MULTIPLIERS.end()) { @@ -103,13 +103,13 @@ int32_t stringToValue(const std::string& option) template <> int64_t stringToValue(const std::string& option) { - return std::stoi(option); + return static_cast(std::stoi(option)); } template <> size_t stringToValue(const std::string& option) { - return std::stoi(option) * getUnitMultiplier(option); + return static_cast(std::stoi(option) * getUnitMultiplier(option)); } template <> @@ -121,11 +121,11 @@ float stringToValue(const std::string& option) template <> double stringToValue(const std::string& option) { - return std::stod(option) * getUnitMultiplier(option); + return std::stod(option) * static_cast(getUnitMultiplier(option)); } template <> -bool stringToValue(const std::string& option) +bool stringToValue(const std::string& /*option*/) { return true; } @@ -223,11 +223,7 @@ template <> SparsityFlag stringToValue(std::string const& option) { std::unordered_map const table{ - {"disable", SparsityFlag::kDISABLE}, {"enable", SparsityFlag::kENABLE}, - { - "force", SparsityFlag::kFORCE - } - }; + {"disable", SparsityFlag::kDISABLE}, {"enable", SparsityFlag::kENABLE}, {"force", SparsityFlag::kFORCE}}; auto search = table.find(option); if (search == table.end()) { @@ -287,7 +283,6 @@ samplesSafeCommon::SafetyPluginLibraryArgument stringToValue std::pair splitNameAndValue(const std::string& s) { @@ -346,7 +341,7 @@ template std::string joinValuesToString(std::vector const& list, std::string const& sep) { std::ostringstream os; - for (int32_t i = 0, n = list.size(); i < n; ++i) + for (int32_t i = 0, n = static_cast(list.size()); i < n; ++i) { os << list[i]; if (i != n - 1) @@ -718,16 +713,17 @@ bool getOptimizationProfiles( size_t profileIndex{}; auto getShapes - = [](BuildOptions::ShapeProfile& shapes, std::string const& list, nvinfer1::OptProfileSelector selector) { - std::vector shapeList{splitToStringVec(list, ',')}; - for (auto const& s : shapeList) - { - auto nameDimsPair = splitNameAndValue>(s); - auto tensorName = removeSingleQuotationMarks(nameDimsPair.first); - auto dims = nameDimsPair.second; - insertShapesBuild(shapes, selector, tensorName, dims); - } - }; + = [](BuildOptions::ShapeProfile& shapes, std::string const& list, nvinfer1::OptProfileSelector selector) + { + std::vector shapeList{splitToStringVec(list, ',')}; + for (auto const& s : shapeList) + { + auto nameDimsPair = splitNameAndValue>(s); + auto tensorName = removeSingleQuotationMarks(nameDimsPair.first); + auto dims = nameDimsPair.second; + insertShapesBuild(shapes, selector, tensorName, dims); + } + }; while (getAndDelOptionWithPosition(arguments, argument, profileIndex, pos)) { @@ -964,7 +960,8 @@ std::ostream& printPreviewFlags(std::ostream& os, BuildOptions const& options) return os; } - auto const addFlag = [&](PreviewFeature feat) { + auto const addFlag = [&](PreviewFeature feat) + { int32_t featVal = static_cast(feat); if (options.previewFeatures.find(featVal) != options.previewFeatures.end()) { @@ -1100,7 +1097,8 @@ void getTempfileControls(Arguments& arguments, char const* argument, TempfileCon void BuildOptions::parse(Arguments& arguments) { getAndDelOption(arguments, "--cpuOnly", cpuOnly); - auto getFormats = [&arguments](std::vector& formatsVector, const char* argument) { + auto getFormats = [&arguments](std::vector& formatsVector, const char* argument) + { std::string list; getAndDelOption(arguments, argument, list); std::vector formats{splitToStringVec(list, ',')}; @@ -1191,27 +1189,27 @@ void BuildOptions::parse(Arguments& arguments) if (memPoolName == "workspace") { // use unit in MB. - workspace = memPoolSize / 1.0_MiB; + workspace = static_cast(memPoolSize / 1.0_MiB); } else if (memPoolName == "dlaSRAM") { // use unit in MB. - dlaSRAM = memPoolSize / 1.0_MiB; + dlaSRAM = static_cast(memPoolSize / 1.0_MiB); } else if (memPoolName == "dlaLocalDRAM") { // use unit in MB. - dlaLocalDRAM = memPoolSize / 1.0_MiB; + dlaLocalDRAM = static_cast(memPoolSize / 1.0_MiB); } else if (memPoolName == "dlaGlobalDRAM") { // use unit in MB. - dlaGlobalDRAM = memPoolSize / 1.0_MiB; + dlaGlobalDRAM = static_cast(memPoolSize / 1.0_MiB); } else if (memPoolName == "tacticSharedMem") { // use unit in KB. - tacticSharedMem = memPoolSize / 1.0_KiB; + tacticSharedMem = static_cast(memPoolSize / 1.0_KiB); } else if (!memPoolName.empty()) { @@ -1477,23 +1475,22 @@ void BuildOptions::parse(Arguments& arguments) { source = nvinfer1::TacticSource::kCUBLAS_LT; } + else if (t == "CUDNN") + { + source = nvinfer1::TacticSource::kCUDNN; + } + else if (t == "EDGE_MASK_CONVOLUTIONS") + { + source = nvinfer1::TacticSource::kEDGE_MASK_CONVOLUTIONS; + } + else if (t == "JIT_CONVOLUTIONS") + { + source = nvinfer1::TacticSource::kJIT_CONVOLUTIONS; + } else - if (t == "CUDNN") - { - source = nvinfer1::TacticSource::kCUDNN; - } - else if (t == "EDGE_MASK_CONVOLUTIONS") - { - source = nvinfer1::TacticSource::kEDGE_MASK_CONVOLUTIONS; - } - else if (t == "JIT_CONVOLUTIONS") - { - source = nvinfer1::TacticSource::kJIT_CONVOLUTIONS; - } - else - { - throw std::invalid_argument(std::string("Unknown tactic source: ") + t); - } + { + throw std::invalid_argument(std::string("Unknown tactic source: ") + t); + } uint32_t sourceBit = 1U << static_cast(source); @@ -1908,20 +1905,24 @@ void AllOptions::parse(Arguments& arguments) if (build.buildDLAStandalone) { build.skipInference = true; - auto checkSafeDLAFormats = [](std::vector const& fmt, bool isInput) { - return !fmt.empty() && std::all_of(fmt.begin(), fmt.end(), [&](IOFormat const& pair) { - auto const& [dataType, tensorFormats] = pair; - using TF = nvinfer1::TensorFormat; - using nvinfer1::DataType; - - bool const isDLA_LINEAR{tensorFormats == 1U << static_cast(TF::kDLA_LINEAR)}; - bool const isHWC4{tensorFormats == 1U << static_cast(TF::kCHW4) - || tensorFormats == 1U << static_cast(TF::kDLA_HWC4)}; - bool const isCHW32{tensorFormats == 1U << static_cast(TF::kCHW32)}; - bool const isCHW16{tensorFormats == 1U << static_cast(TF::kCHW16)}; - return (dataType == DataType::kINT8 && (isDLA_LINEAR || (isInput && isHWC4) || isCHW32)) - || (dataType == DataType::kHALF && (isDLA_LINEAR || (isInput && isHWC4) || isCHW16)); - }); + auto checkSafeDLAFormats = [](std::vector const& fmt, bool isInput) + { + return !fmt.empty() + && std::all_of(fmt.begin(), fmt.end(), + [&](IOFormat const& pair) + { + auto const& [dataType, tensorFormats] = pair; + using TF = nvinfer1::TensorFormat; + using nvinfer1::DataType; + + bool const isDLA_LINEAR{tensorFormats == 1U << static_cast(TF::kDLA_LINEAR)}; + bool const isHWC4{tensorFormats == 1U << static_cast(TF::kCHW4) + || tensorFormats == 1U << static_cast(TF::kDLA_HWC4)}; + bool const isCHW32{tensorFormats == 1U << static_cast(TF::kCHW32)}; + bool const isCHW16{tensorFormats == 1U << static_cast(TF::kCHW16)}; + return (dataType == DataType::kINT8 && (isDLA_LINEAR || (isInput && isHWC4) || isCHW32)) + || (dataType == DataType::kHALF && (isDLA_LINEAR || (isInput && isHWC4) || isCHW16)); + }); }; if (!checkSafeDLAFormats(build.inputFormats, true) || !checkSafeDLAFormats(build.outputFormats, false)) { @@ -1954,7 +1955,8 @@ void TaskInferenceOptions::parse(Arguments& arguments) void SafeBuilderOptions::parse(Arguments& arguments) { - auto getFormats = [&arguments](std::vector& formatsVector, const char* argument) { + auto getFormats = [&arguments](std::vector& formatsVector, const char* argument) + { std::string list; getAndDelOption(arguments, argument, list); std::vector formats{splitToStringVec(list, ',')}; @@ -2212,7 +2214,6 @@ std::ostream& operator<<(std::ostream& os, nvinfer1::DeviceType devType) return os; } - std::ostream& operator<<(std::ostream& os, nvinfer1::RuntimePlatform platform) { switch (platform) @@ -2345,16 +2346,17 @@ std::ostream& operator<<(std::ostream& os, const BuildOptions& options) "Mark Unfused Tensors As Debug Tensors: " << boolToEnabled(options.markUnfusedTensorsAsDebugTensors) << std::endl; // clang-format on - auto printIOFormats = [](std::ostream& os, const char* direction, const std::vector formats) { + auto printIOFormats = [](std::ostream& oss, const char* direction, const std::vector formats) + { if (formats.empty()) { - os << direction << "s format: fp32:CHW" << std::endl; + oss << direction << "s format: fp32:CHW" << std::endl; } else { for (const auto& f : formats) { - os << direction << ": " << f << std::endl; + oss << direction << ": " << f << std::endl; } } }; @@ -2363,7 +2365,7 @@ std::ostream& operator<<(std::ostream& os, const BuildOptions& options) printIOFormats(os, "Output(s)", options.outputFormats); for (size_t i = 0; i < options.optProfiles.size(); i++) { - printShapes(os, "build", options.optProfiles[i], i); + printShapes(os, "build", options.optProfiles[i], static_cast(i)); } printShapes(os, "calibration", options.shapesCalib, -1); @@ -2503,17 +2505,17 @@ std::ostream& operator<<(std::ostream& os, const AllOptions& options) std::ostream& operator<<(std::ostream& os, const SafeBuilderOptions& options) { - auto printIOFormats = [](std::ostream& os, const char* direction, const std::vector formats) + auto printIOFormats = [](std::ostream& oss, const char* direction, const std::vector formats) { if (formats.empty()) { - os << direction << "s format: fp32:CHW" << std::endl; + oss << direction << "s format: fp32:CHW" << std::endl; } else { for (const auto& f : formats) { - os << direction << ": " << f << std::endl; + oss << direction << ": " << f << std::endl; } } }; diff --git a/samples/common/sampleReporting.cpp b/samples/common/sampleReporting.cpp index febb08699..2ea70008c 100644 --- a/samples/common/sampleReporting.cpp +++ b/samples/common/sampleReporting.cpp @@ -51,7 +51,7 @@ template float findPercentile(float percentile, std::vector const& timings, T const& toFloat) { int32_t const all = static_cast(timings.size()); - int32_t const exclude = static_cast((1 - percentile / 100) * all); + int32_t const exclude = static_cast((1.0F - percentile / 100.0F) * static_cast(all)); if (timings.empty()) { return std::numeric_limits::infinity(); @@ -74,7 +74,7 @@ float findMedian(std::vector const& timings, T const& toFloat) return std::numeric_limits::infinity(); } - int32_t const m = timings.size() / 2; + int32_t const m = static_cast(timings.size() / 2); if (timings.size() % 2) { return toFloat(timings[m]); @@ -99,11 +99,13 @@ float findCoeffOfVariance(std::vector const& timings, T const& to return std::numeric_limits::infinity(); } - auto const metricAccumulator = [toFloat, mean](float acc, InferenceTime const& a) { + auto const metricAccumulator = [toFloat, mean](float acc, InferenceTime const& a) + { float const diff = toFloat(a) - mean; return acc + diff * diff; }; - float const variance = std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) / timings.size(); + float const variance + = std::accumulate(timings.begin(), timings.end(), 0.F, metricAccumulator) / static_cast(timings.size()); return std::sqrt(variance) / mean * 100.F; } @@ -137,7 +139,7 @@ inline std::string dimsToString(Dims const& shape) void printProlog(int32_t warmups, int32_t timings, float warmupMs, float benchTimeMs, std::ostream& os) { os << "Warmup completed " << warmups << " queries over " << warmupMs << " ms" << std::endl; - os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000 << " s" << std::endl; + os << "Timing trace has " << timings << " queries over " << benchTimeMs / 1000.0F << " s" << std::endl; } void printTiming(std::vector const& timings, int32_t runsPerAvg, std::ostream& os) @@ -167,8 +169,8 @@ void printTiming(std::vector const& timings, int32_t runsPerAvg, if (++count == runsPerAvg) { // clang-format off - os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / runsPerAvg - << " ms - Host latency: " << sum.latency() / runsPerAvg << " ms (enqueue " << sum.enq / runsPerAvg + os << "Average on " << runsPerAvg << " runs - GPU latency: " << sum.compute / static_cast(runsPerAvg) + << " ms - Host latency: " << sum.latency() / static_cast(runsPerAvg) << " ms (enqueue " << sum.enq / static_cast(runsPerAvg) << " ms)" << std::endl; // clang-format on count = 0; @@ -219,7 +221,8 @@ PerformanceResult getPerformanceResult(std::vector const& timings PerformanceResult result; result.min = metricGetter(newTimings.front()); result.max = metricGetter(newTimings.back()); - result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0F, metricAccumulator) / newTimings.size(); + result.mean = std::accumulate(newTimings.begin(), newTimings.end(), 0.0F, metricAccumulator) + / static_cast(newTimings.size()); result.median = findMedian(newTimings, metricGetter); for (auto percentile : percentiles) { @@ -232,7 +235,7 @@ PerformanceResult getPerformanceResult(std::vector const& timings void printEpilog(std::vector const& timings, float walltimeMs, std::vector const& percentiles, int32_t batchSize, int32_t infStreams, std::ostream& osInfo, std::ostream& osWarning, std::ostream& osVerbose) { - float const throughput = batchSize * timings.size() / walltimeMs * 1000; + float const throughput = static_cast(batchSize) * static_cast(timings.size()) / walltimeMs * 1000; auto const getLatency = [](InferenceTime const& t) { return t.latency(); }; auto const latencyResult = getPerformanceResult(timings, getLatency, percentiles); @@ -249,11 +252,12 @@ void printEpilog(std::vector const& timings, float walltimeMs, st auto const getD2h = [](InferenceTime const& t) { return t.d2h; }; auto const d2hResult = getPerformanceResult(timings, getD2h, percentiles); - auto const toPerfString = [&](const PerformanceResult& r) { + auto const toPerfString = [&](const PerformanceResult& r) + { std::stringstream s; s << "min = " << r.min << " ms, max = " << r.max << " ms, mean = " << r.mean << " ms, " << "median = " << r.median << " ms"; - for (int32_t i = 0, n = percentiles.size(); i < n; ++i) + for (int32_t i = 0, n = static_cast(percentiles.size()); i < n; ++i) { s << ", percentile(" << percentiles[i] << "%) = " << r.percentiles[i] << " ms"; } @@ -269,7 +273,8 @@ void printEpilog(std::vector const& timings, float walltimeMs, st osInfo << "GPU Compute Time: " << toPerfString(gpuComputeResult) << std::endl; osInfo << "D2H Latency: " << toPerfString(d2hResult) << std::endl; osInfo << "Total Host Walltime: " << walltimeMs / 1000 << " s" << std::endl; - osInfo << "Total GPU Compute Time: " << gpuComputeResult.mean * timings.size() / 1000 << " s" << std::endl; + osInfo << "Total GPU Compute Time: " << gpuComputeResult.mean * static_cast(timings.size()) / 1000 << " s" + << std::endl; // Report warnings if the throughput is bound by other factors than GPU Compute Time. constexpr float kENQUEUE_BOUND_REPORTING_THRESHOLD{0.8F}; @@ -328,11 +333,12 @@ void printPerformanceReport(std::vector const& trace, ReportingO float const warmupMs = infOpts.warmup; auto const isNotWarmup = [&warmupMs](const InferenceTrace& a) { return a.computeStart >= warmupMs; }; auto const noWarmup = std::find_if(trace.begin(), trace.end(), isNotWarmup); - int32_t const warmups = noWarmup - trace.begin(); + int32_t const warmups = static_cast(noWarmup - trace.begin()); float const benchTime = trace.back().d2hEnd - noWarmup->h2dStart; // treat inference with explicit batch as a single query and report the throughput batchSize = batchSize ? batchSize : 1; - printProlog(warmups * batchSize, (trace.size() - warmups) * batchSize, warmupMs, benchTime, osInfo); + printProlog( + warmups * batchSize, static_cast(trace.size() - warmups) * batchSize, warmupMs, benchTime, osInfo); std::vector timings(trace.size() - warmups); std::transform(noWarmup, trace.end(), timings.begin(), traceToTiming); @@ -425,17 +431,17 @@ void Profiler::print(std::ostream& os) const noexcept continue; } // clang-format off - os << std::setw(timeLength) << std::fixed << std::setprecision(2) << getTotalTime(p) - << std::setw(avgLength) << std::fixed << std::setprecision(4) << getAvgTime(p) - << std::setw(medLength) << std::fixed << std::setprecision(4) << getMedianTime(p) - << std::setw(percentageLength) << std::fixed << std::setprecision(1) << getTotalTime(p) / totalTimeMs * 100 + os << std::setw(static_cast(timeLength)) << std::fixed << std::setprecision(2) << getTotalTime(p) + << std::setw(static_cast(avgLength)) << std::fixed << std::setprecision(4) << getAvgTime(p) + << std::setw(static_cast(medLength)) << std::fixed << std::setprecision(4) << getMedianTime(p) + << std::setw(static_cast(percentageLength)) << std::fixed << std::setprecision(1) << getTotalTime(p) / totalTimeMs * 100 << " " << p.name << std::endl; } { - os << std::setw(timeLength) << std::fixed << std::setprecision(2) - << totalTimeMs << std::setw(avgLength) << std::fixed << std::setprecision(4) << totalTimeMs / mUpdatesCount - << std::setw(medLength) << std::fixed << std::setprecision(4) << getMedianTime() - << std::setw(percentageLength) << std::fixed << std::setprecision(1) << 100.0 + os << std::setw(static_cast(timeLength)) << std::fixed << std::setprecision(2) + << totalTimeMs << std::setw(static_cast(avgLength)) << std::fixed << std::setprecision(4) << totalTimeMs / static_cast(mUpdatesCount) + << std::setw(static_cast(medLength)) << std::fixed << std::setprecision(4) << getMedianTime() + << std::setw(static_cast(percentageLength)) << std::fixed << std::setprecision(1) << 100.0 << " Total" << std::endl; // clang-format on } diff --git a/samples/common/sampleReporting.h b/samples/common/sampleReporting.h index 013273a8d..4bba90849 100644 --- a/samples/common/sampleReporting.h +++ b/samples/common/sampleReporting.h @@ -210,9 +210,8 @@ class Profiler : public nvinfer1::IProfiler private: float getTotalTime() const noexcept { - auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) { - return accumulator + std::accumulate(lp.timeMs.begin(), lp.timeMs.end(), 0.F, std::plus()); - }; + auto const plusLayerTime = [](float accumulator, LayerProfile const& lp) + { return accumulator + std::accumulate(lp.timeMs.begin(), lp.timeMs.end(), 0.F, std::plus()); }; return std::accumulate(mLayers.begin(), mLayers.end(), 0.0F, plusLayerTime); } @@ -261,7 +260,7 @@ class Profiler : public nvinfer1::IProfiler float getAvgTime(LayerProfile const& p) const noexcept { - return getTotalTime(p) / p.timeMs.size(); + return getTotalTime(p) / static_cast(p.timeMs.size()); } std::vector mLayers; diff --git a/samples/common/sampleUtils.cpp b/samples/common/sampleUtils.cpp index 1ffcc546b..a7aa13e1d 100644 --- a/samples/common/sampleUtils.cpp +++ b/samples/common/sampleUtils.cpp @@ -166,7 +166,8 @@ void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vec TensorToLayer constO2L; TensorToLayer shuffleI2L; LayerToTensor shuffleL2O; - auto collectMappingInfo = [&](int32_t const idx) { + auto collectMappingInfo = [&](int32_t const idx) + { ILayer* l = network.getLayer(idx); switch (l->getType()) { @@ -210,7 +211,8 @@ void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vec auto isTranspose = [](nvinfer1::Permutation const& perm) -> bool { return (perm.order[0] == 1 && perm.order[1] == 0); }; auto is2D = [](nvinfer1::Dims const& dims) -> bool { return dims.nbDims == 2; }; - auto isIdenticalReshape = [](nvinfer1::Dims const& dims) -> bool { + auto isIdenticalReshape = [](nvinfer1::Dims const& dims) -> bool + { for (int32_t i = 0; i < dims.nbDims; ++i) { if (dims.d[i] != i || dims.d[i] != -1) @@ -220,7 +222,8 @@ void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vec } return true; }; - auto tensorReachedViaTranspose = [&](nvinfer1::ITensor* t, bool& needTranspose) -> ITensor* { + auto tensorReachedViaTranspose = [&](nvinfer1::ITensor* t, bool& needTranspose) -> ITensor* + { while (shuffleI2L.find(t) != shuffleI2L.end()) { nvinfer1::IShuffleLayer* s = static_cast(shuffleI2L.at(t)); @@ -276,12 +279,13 @@ void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vec } // 3. Finally, sparsify the weights - auto sparsifyConstantWeights = [&sparseWeights](nvinfer1::IConstantLayer* layer, bool const needTranspose) { + auto sparsifyConstantWeights = [&sparseWeights](nvinfer1::IConstantLayer* layer, bool const needTranspose) + { Dims dims = layer->getOutput(0)->getDimensions(); ASSERT(dims.nbDims == 2); int32_t const idxN = needTranspose ? 1 : 0; - int32_t const n = dims.d[idxN]; - int32_t const k = dims.d[1 - idxN]; + int32_t const n = static_cast(dims.d[idxN]); + int32_t const k = static_cast(dims.d[1 - idxN]); sparseWeights.emplace_back(); std::vector& spw = sparseWeights.back(); Weights w = layer->getWeights(); @@ -293,12 +297,12 @@ void sparsifyMatMulKernelWeights(nvinfer1::INetworkDefinition& network, std::vec { if (dtype == nvinfer1::DataType::kFLOAT) { - spw.resize(w.count * sizeof(float)); + spw.resize(static_cast(w.count) * sizeof(float)); transpose2DWeights(spw.data(), w.values, k, n); } else if (dtype == nvinfer1::DataType::kHALF) { - spw.resize(w.count * sizeof(half_float::half)); + spw.resize(static_cast(w.count) * sizeof(half_float::half)); transpose2DWeights(spw.data(), w.values, k, n); } @@ -353,8 +357,9 @@ void sparsify(nvinfer1::INetworkDefinition& network, std::vector(layer); auto const& dims = conv.getKernelSizeNd(); ASSERT(dims.nbDims == 2 || dims.nbDims == 3); - auto const k = conv.getNbOutputMaps(); - auto const trs = std::accumulate(dims.d, dims.d + dims.nbDims, 1, std::multiplies()); + auto const k = static_cast(conv.getNbOutputMaps()); + auto const trs = static_cast( + std::accumulate(dims.d, dims.d + dims.nbDims, static_cast(1), std::multiplies())); sparseWeights.emplace_back(); setSparseWeights(conv, k, trs, sparseWeights.back()); } @@ -421,16 +426,16 @@ int32_t dataOffsetFromDims(int64_t v, Dims const& dims, Dims const& strides, int int32_t dataOffset = 0; for (int32_t dimIndex = dims.nbDims - 1; dimIndex >= 0; --dimIndex) { - int32_t dimVal = v % dims.d[dimIndex]; + int32_t dimVal = static_cast(v % dims.d[dimIndex]); if (dimIndex == vectorDim) { - dataOffset += (dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv; + dataOffset += static_cast((dimVal / spv) * strides.d[dimIndex] * spv + dimVal % spv); } else { - dataOffset += dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv); + dataOffset += static_cast(dimVal * strides.d[dimIndex] * (vectorDim == -1 ? 1 : spv)); } - v /= dims.d[dimIndex]; + v /= static_cast(dims.d[dimIndex]); ASSERT(v >= 0); } @@ -507,14 +512,14 @@ template void sparsify(T const* values, int64_t count, int32_t k, int32_t trs, std::vector& sparseWeights) { auto const c = count / (k * trs); - sparseWeights.resize(count * sizeof(T)); + sparseWeights.resize(static_cast(count) * sizeof(T)); auto* sparseValues = reinterpret_cast(sparseWeights.data()); constexpr int32_t window = 4; constexpr int32_t nonzeros = 2; - int32_t const crs = c * trs; - auto const getIndex = [=](int32_t ki, int32_t ci, int32_t rsi) { return ki * crs + ci * trs + rsi; }; + int64_t const crs = c * trs; + auto const getIndex = [=](int64_t ki, int64_t ci, int64_t rsi) { return ki * crs + ci * trs + rsi; }; for (int64_t ki = 0; ki < k; ++ki) { diff --git a/third_party/ieee/half.h b/third_party/ieee/half.h index c4df4b670..f7c3d1d9d 100644 --- a/third_party/ieee/half.h +++ b/third_party/ieee/half.h @@ -19,17 +19,18 @@ // // Copyright (c) 2012-2017 Christian Rau // -// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, -// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following conditions: +// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated +// documentation files (the "Software"), to deal in the Software without restriction, including without limitation the +// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to +// permit persons to whom the Software is furnished to do so, subject to the following conditions: // -// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. +// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the +// Software. // -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE -// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR -// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. // Version 1.12.0 @@ -39,168 +40,173 @@ #ifndef HALF_HALF_HPP #define HALF_HALF_HPP +// Third-party header - suppress compiler warnings +#if defined(__GNUC__) || defined(__clang__) +#pragma GCC system_header +#endif + /// Combined gcc version number. -#define HALF_GNUC_VERSION (__GNUC__*100+__GNUC_MINOR__) - -//check C++11 language features -#if defined(__clang__) //clang - #if __has_feature(cxx_static_assert) && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR) - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT) - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if __has_feature(cxx_user_literals) && !defined(HALF_ENABLE_CPP11_USER_LITERALS) - #define HALF_ENABLE_CPP11_USER_LITERALS 1 - #endif - #if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && !defined(HALF_ENABLE_CPP11_LONG_LONG) - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif +#define HALF_GNUC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) + +// check C++11 language features +#if defined(__clang__) // clang +#if __has_feature(cxx_static_assert) && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) +#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 +#endif +#if __has_feature(cxx_constexpr) && !defined(HALF_ENABLE_CPP11_CONSTEXPR) +#define HALF_ENABLE_CPP11_CONSTEXPR 1 +#endif +#if __has_feature(cxx_noexcept) && !defined(HALF_ENABLE_CPP11_NOEXCEPT) +#define HALF_ENABLE_CPP11_NOEXCEPT 1 +#endif +#if __has_feature(cxx_user_literals) && !defined(HALF_ENABLE_CPP11_USER_LITERALS) +#define HALF_ENABLE_CPP11_USER_LITERALS 1 +#endif +#if (defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L) && !defined(HALF_ENABLE_CPP11_LONG_LONG) +#define HALF_ENABLE_CPP11_LONG_LONG 1 +#endif /*#elif defined(__INTEL_COMPILER) //Intel C++ - #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) ???????? - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) ???????? - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) ???????? - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_LONG_LONG) ???????? - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif*/ -#elif defined(__GNUC__) //gcc - #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L - #if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if HALF_GNUC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) - #define HALF_ENABLE_CPP11_USER_LITERALS 1 - #endif - #if !defined(HALF_ENABLE_CPP11_LONG_LONG) - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif - #endif -#elif defined(_MSC_VER) //Visual C++ - #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) - #define HALF_ENABLE_CPP11_CONSTEXPR 1 - #endif - #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) - #define HALF_ENABLE_CPP11_NOEXCEPT 1 - #endif - #if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) - #define HALF_ENABLE_CPP11_USER_LITERALS 1 - #endif - #if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) - #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 - #endif - #if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG) - #define HALF_ENABLE_CPP11_LONG_LONG 1 - #endif - #define HALF_POP_WARNINGS 1 - #pragma warning(push) - #pragma warning(disable : 4099 4127 4146) //struct vs class, constant in if, negative unsigned -#endif - -//check C++11 library features + #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) ???????? + #define HALF_ENABLE_CPP11_STATIC_ASSERT 1 + #endif + #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) ???????? + #define HALF_ENABLE_CPP11_CONSTEXPR 1 + #endif + #if __INTEL_COMPILER >= 1300 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) ???????? + #define HALF_ENABLE_CPP11_NOEXCEPT 1 + #endif + #if __INTEL_COMPILER >= 1100 && !defined(HALF_ENABLE_CPP11_LONG_LONG) ???????? + #define HALF_ENABLE_CPP11_LONG_LONG 1 + #endif*/ +#elif defined(__GNUC__) // gcc +#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103L +#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) +#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 +#endif +#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) +#define HALF_ENABLE_CPP11_CONSTEXPR 1 +#endif +#if HALF_GNUC_VERSION >= 406 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) +#define HALF_ENABLE_CPP11_NOEXCEPT 1 +#endif +#if HALF_GNUC_VERSION >= 407 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) +#define HALF_ENABLE_CPP11_USER_LITERALS 1 +#endif +#if !defined(HALF_ENABLE_CPP11_LONG_LONG) +#define HALF_ENABLE_CPP11_LONG_LONG 1 +#endif +#endif +#elif defined(_MSC_VER) // Visual C++ +#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_CONSTEXPR) +#define HALF_ENABLE_CPP11_CONSTEXPR 1 +#endif +#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_NOEXCEPT) +#define HALF_ENABLE_CPP11_NOEXCEPT 1 +#endif +#if _MSC_VER >= 1900 && !defined(HALF_ENABLE_CPP11_USER_LITERALS) +#define HALF_ENABLE_CPP11_USER_LITERALS 1 +#endif +#if _MSC_VER >= 1600 && !defined(HALF_ENABLE_CPP11_STATIC_ASSERT) +#define HALF_ENABLE_CPP11_STATIC_ASSERT 1 +#endif +#if _MSC_VER >= 1310 && !defined(HALF_ENABLE_CPP11_LONG_LONG) +#define HALF_ENABLE_CPP11_LONG_LONG 1 +#endif +#define HALF_POP_WARNINGS 1 +#pragma warning(push) +#pragma warning(disable : 4099 4127 4146) // struct vs class, constant in if, negative unsigned +#endif + +// check C++11 library features #include -#if defined(_LIBCPP_VERSION) //libc++ - #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 - #ifndef HALF_ENABLE_CPP11_TYPE_TRAITS - #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 - #endif - #ifndef HALF_ENABLE_CPP11_CSTDINT - #define HALF_ENABLE_CPP11_CSTDINT 1 - #endif - #ifndef HALF_ENABLE_CPP11_CMATH - #define HALF_ENABLE_CPP11_CMATH 1 - #endif - #ifndef HALF_ENABLE_CPP11_HASH - #define HALF_ENABLE_CPP11_HASH 1 - #endif - #endif -#elif defined(__GLIBCXX__) //libstdc++ - #if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 - #ifdef __clang__ - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) - #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 - #endif - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT) - #define HALF_ENABLE_CPP11_CSTDINT 1 - #endif - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH) - #define HALF_ENABLE_CPP11_CMATH 1 - #endif - #if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH) - #define HALF_ENABLE_CPP11_HASH 1 - #endif - #else - #if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT) - #define HALF_ENABLE_CPP11_CSTDINT 1 - #endif - #if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH) - #define HALF_ENABLE_CPP11_CMATH 1 - #endif - #if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH) - #define HALF_ENABLE_CPP11_HASH 1 - #endif - #endif - #endif -#elif defined(_CPPLIB_VER) //Dinkumware/Visual C++ - #if _CPPLIB_VER >= 520 - #ifndef HALF_ENABLE_CPP11_TYPE_TRAITS - #define HALF_ENABLE_CPP11_TYPE_TRAITS 1 - #endif - #ifndef HALF_ENABLE_CPP11_CSTDINT - #define HALF_ENABLE_CPP11_CSTDINT 1 - #endif - #ifndef HALF_ENABLE_CPP11_HASH - #define HALF_ENABLE_CPP11_HASH 1 - #endif - #endif - #if _CPPLIB_VER >= 610 - #ifndef HALF_ENABLE_CPP11_CMATH - #define HALF_ENABLE_CPP11_CMATH 1 - #endif - #endif +#if defined(_LIBCPP_VERSION) // libc++ +#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 +#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS +#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 +#endif +#ifndef HALF_ENABLE_CPP11_CSTDINT +#define HALF_ENABLE_CPP11_CSTDINT 1 +#endif +#ifndef HALF_ENABLE_CPP11_CMATH +#define HALF_ENABLE_CPP11_CMATH 1 +#endif +#ifndef HALF_ENABLE_CPP11_HASH +#define HALF_ENABLE_CPP11_HASH 1 +#endif +#endif +#elif defined(__GLIBCXX__) // libstdc++ +#if defined(__GXX_EXPERIMENTAL_CXX0X__) || __cplusplus >= 201103 +#ifdef __clang__ +#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_TYPE_TRAITS) +#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 +#endif +#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CSTDINT) +#define HALF_ENABLE_CPP11_CSTDINT 1 +#endif +#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_CMATH) +#define HALF_ENABLE_CPP11_CMATH 1 +#endif +#if __GLIBCXX__ >= 20080606 && !defined(HALF_ENABLE_CPP11_HASH) +#define HALF_ENABLE_CPP11_HASH 1 +#endif +#else +#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CSTDINT) +#define HALF_ENABLE_CPP11_CSTDINT 1 +#endif +#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_CMATH) +#define HALF_ENABLE_CPP11_CMATH 1 +#endif +#if HALF_GNUC_VERSION >= 403 && !defined(HALF_ENABLE_CPP11_HASH) +#define HALF_ENABLE_CPP11_HASH 1 +#endif +#endif +#endif +#elif defined(_CPPLIB_VER) // Dinkumware/Visual C++ +#if _CPPLIB_VER >= 520 +#ifndef HALF_ENABLE_CPP11_TYPE_TRAITS +#define HALF_ENABLE_CPP11_TYPE_TRAITS 1 +#endif +#ifndef HALF_ENABLE_CPP11_CSTDINT +#define HALF_ENABLE_CPP11_CSTDINT 1 +#endif +#ifndef HALF_ENABLE_CPP11_HASH +#define HALF_ENABLE_CPP11_HASH 1 +#endif +#endif +#if _CPPLIB_VER >= 610 +#ifndef HALF_ENABLE_CPP11_CMATH +#define HALF_ENABLE_CPP11_CMATH 1 +#endif +#endif #endif #undef HALF_GNUC_VERSION -//support constexpr +// support constexpr #if HALF_ENABLE_CPP11_CONSTEXPR - #define HALF_CONSTEXPR constexpr - #define HALF_CONSTEXPR_CONST constexpr +#define HALF_CONSTEXPR constexpr +#define HALF_CONSTEXPR_CONST constexpr #else - #define HALF_CONSTEXPR - #define HALF_CONSTEXPR_CONST const +#define HALF_CONSTEXPR +#define HALF_CONSTEXPR_CONST const #endif -//support noexcept +// support noexcept #if HALF_ENABLE_CPP11_NOEXCEPT - #define HALF_NOEXCEPT noexcept - #define HALF_NOTHROW noexcept +#define HALF_NOEXCEPT noexcept +#define HALF_NOTHROW noexcept #else - #define HALF_NOEXCEPT - #define HALF_NOTHROW throw() +#define HALF_NOEXCEPT +#define HALF_NOTHROW throw() #endif #include -#include -#include #include #include #include +#include +#include #if HALF_ENABLE_CPP11_TYPE_TRAITS - #include +#include #endif #if HALF_ENABLE_CPP11_CSTDINT #include @@ -228,2867 +234,4088 @@ /// `std::numeric_limits::round_style` to synchronize the rounding mode with that of the underlying /// single-precision implementation. For TRT-1275, changing it to 1 (to nearest) #ifndef HALF_ROUND_STYLE - #define HALF_ROUND_STYLE 1 // = std::round_to_nearest +#define HALF_ROUND_STYLE 1 // = std::round_to_nearest #endif /// Tie-breaking behaviour for round to nearest. -/// This specifies if ties in round to nearest should be resolved by rounding to the nearest even value. By default this is -/// defined to `0` resulting in the faster but slightly more biased behaviour of rounding away from zero in half-way cases (and -/// thus equal to the round() function), but can be redefined to `1` (before including half.hpp) if more IEEE-conformant -/// behaviour is needed. +/// This specifies if ties in round to nearest should be resolved by rounding to the nearest even value. By default this +/// is defined to `0` resulting in the faster but slightly more biased behaviour of rounding away from zero in half-way +/// cases (and thus equal to the round() function), but can be redefined to `1` (before including half.hpp) if more +/// IEEE-conformant behaviour is needed. #ifndef HALF_ROUND_TIES_TO_EVEN - #define HALF_ROUND_TIES_TO_EVEN 0 // ties away from zero +#define HALF_ROUND_TIES_TO_EVEN 0 // ties away from zero #endif /// Value signaling overflow. -/// In correspondence with `HUGE_VAL[F|L]` from `` this symbol expands to a positive value signaling the overflow of an -/// operation, in particular it just evaluates to positive infinity. -#define HUGE_VALH std::numeric_limits::infinity() +/// In correspondence with `HUGE_VAL[F|L]` from `` this symbol expands to a positive value signaling the overflow +/// of an operation, in particular it just evaluates to positive infinity. +#define HUGE_VALH std::numeric_limits::infinity() /// Fast half-precision fma function. -/// This symbol is only defined if the fma() function generally executes as fast as, or faster than, a separate -/// half-precision multiplication followed by an addition. Due to the internal single-precision implementation of all +/// This symbol is only defined if the fma() function generally executes as fast as, or faster than, a separate +/// half-precision multiplication followed by an addition. Due to the internal single-precision implementation of all /// arithmetic operations, this is in fact always the case. -#define FP_FAST_FMAH 1 +#define FP_FAST_FMAH 1 #ifndef FP_ILOGB0 - #define FP_ILOGB0 INT_MIN +#define FP_ILOGB0 INT_MIN #endif #ifndef FP_ILOGBNAN - #define FP_ILOGBNAN INT_MAX +#define FP_ILOGBNAN INT_MAX #endif #ifndef FP_SUBNORMAL - #define FP_SUBNORMAL 0 +#define FP_SUBNORMAL 0 #endif #ifndef FP_ZERO - #define FP_ZERO 1 +#define FP_ZERO 1 #endif #ifndef FP_NAN - #define FP_NAN 2 +#define FP_NAN 2 #endif #ifndef FP_INFINITE - #define FP_INFINITE 3 +#define FP_INFINITE 3 #endif #ifndef FP_NORMAL - #define FP_NORMAL 4 +#define FP_NORMAL 4 #endif - /// Main namespace for half precision functionality. /// This namespace contains all the functionality provided by the library. namespace half_float { - class half; - -#if HALF_ENABLE_CPP11_USER_LITERALS - /// Library-defined half-precision literals. - /// Import this namespace to enable half-precision floating point literals: - /// ~~~~{.cpp} - /// using namespace half_float::literal; - /// half_float::half = 4.2_h; - /// ~~~~ - namespace literal - { - half operator "" _h(long double); - } -#endif - - /// \internal - /// \brief Implementation details. - namespace detail - { - #if HALF_ENABLE_CPP11_TYPE_TRAITS - /// Conditional type. - template struct conditional : std::conditional {}; - - /// Helper for tag dispatching. - template struct bool_type : std::integral_constant {}; - using std::true_type; - using std::false_type; - - /// Type traits for floating point types. - template struct is_float : std::is_floating_point {}; - #else - /// Conditional type. - template struct conditional { typedef T type; }; - template struct conditional { typedef F type; }; - - /// Helper for tag dispatching. - template struct bool_type {}; - typedef bool_type true_type; - typedef bool_type false_type; - - /// Type traits for floating point types. - template struct is_float : false_type {}; - template struct is_float : is_float {}; - template struct is_float : is_float {}; - template struct is_float : is_float {}; - template<> struct is_float : true_type {}; - template<> struct is_float : true_type {}; - template<> struct is_float : true_type {}; - #endif - - /// Type traits for floating point bits. - template struct bits { typedef unsigned char type; }; - template struct bits : bits {}; - template struct bits : bits {}; - template struct bits : bits {}; - - #if HALF_ENABLE_CPP11_CSTDINT - /// Unsigned integer of (at least) 16 bits width. - typedef std::uint_least16_t uint16; - - /// Unsigned integer of (at least) 32 bits width. - template<> struct bits { typedef std::uint_least32_t type; }; - - /// Unsigned integer of (at least) 64 bits width. - template<> struct bits { typedef std::uint_least64_t type; }; - #else - /// Unsigned integer of (at least) 16 bits width. - typedef unsigned short uint16; - - /// Unsigned integer of (at least) 32 bits width. - template<> struct bits : conditional::digits>=32,unsigned int,unsigned long> {}; - - #if HALF_ENABLE_CPP11_LONG_LONG - /// Unsigned integer of (at least) 64 bits width. - template<> struct bits : conditional::digits>=64,unsigned long,unsigned long long> {}; - #else - /// Unsigned integer of (at least) 64 bits width. - template<> struct bits { typedef unsigned long type; }; - #endif - #endif - - /// Tag type for binary construction. - struct binary_t {}; - - /// Tag for binary construction. - HALF_CONSTEXPR_CONST binary_t binary = binary_t(); - - /// Temporary half-precision expression. - /// This class represents a half-precision expression which just stores a single-precision value internally. - struct expr - { - /// Conversion constructor. - /// \param f single-precision value to convert - explicit HALF_CONSTEXPR expr(float f) HALF_NOEXCEPT : value_(f) {} - - /// Conversion to single-precision. - /// \return single precision value representing expression value - HALF_CONSTEXPR operator float() const HALF_NOEXCEPT { return value_; } - - private: - /// Internal expression value stored in single-precision. - float value_; - }; - - /// SFINAE helper for generic half-precision functions. - /// This class template has to be specialized for each valid combination of argument types to provide a corresponding - /// `type` member equivalent to \a T. - /// \tparam T type to return - template struct enable {}; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - template struct enable { typedef T type; }; - - /// Return type for specialized generic 2-argument half-precision functions. - /// This class template has to be specialized for each valid combination of argument types to provide a corresponding - /// `type` member denoting the appropriate return type. - /// \tparam T first argument type - /// \tparam U first argument type - template struct result : enable {}; - template<> struct result { typedef half type; }; - - /// \name Classification helpers - /// \{ - - /// Check for infinity. - /// \tparam T argument type (builtin floating point type) - /// \param arg value to query - /// \retval true if infinity - /// \retval false else - template bool builtin_isinf(T arg) - { - #if HALF_ENABLE_CPP11_CMATH - return std::isinf(arg); - #elif defined(_MSC_VER) - return !::_finite(static_cast(arg)) && !::_isnan(static_cast(arg)); - #else - return arg == std::numeric_limits::infinity() || arg == -std::numeric_limits::infinity(); - #endif - } - - /// Check for NaN. - /// \tparam T argument type (builtin floating point type) - /// \param arg value to query - /// \retval true if not a number - /// \retval false else - template bool builtin_isnan(T arg) - { - #if HALF_ENABLE_CPP11_CMATH - return std::isnan(arg); - #elif defined(_MSC_VER) - return ::_isnan(static_cast(arg)) != 0; - #else - return arg != arg; - #endif - } - - /// Check sign. - /// \tparam T argument type (builtin floating point type) - /// \param arg value to query - /// \retval true if signbit set - /// \retval false else - template bool builtin_signbit(T arg) - { - #if HALF_ENABLE_CPP11_CMATH - return std::signbit(arg); - #else - return arg < T() || (arg == T() && T(1)/arg < T()); - #endif - } - - /// \} - /// \name Conversion - /// \{ - - /// Convert IEEE single-precision to half-precision. - /// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \param value single-precision value - /// \return binary representation of half-precision value - template uint16 float2half_impl(float value, true_type) - { - typedef bits::type uint32; - uint32 bits;// = *reinterpret_cast(&value); //violating strict aliasing! - std::memcpy(&bits, &value, sizeof(float)); -/* uint16 hbits = (bits>>16) & 0x8000; - bits &= 0x7FFFFFFF; - int exp = bits >> 23; - if(exp == 255) - return hbits | 0x7C00 | (0x3FF&-static_cast((bits&0x7FFFFF)!=0)); - if(exp > 142) - { - if(R == std::round_toward_infinity) - return hbits | 0x7C00 - (hbits>>15); - if(R == std::round_toward_neg_infinity) - return hbits | 0x7BFF + (hbits>>15); - return hbits | 0x7BFF + (R!=std::round_toward_zero); - } - int g, s; - if(exp > 112) - { - g = (bits>>12) & 1; - s = (bits&0xFFF) != 0; - hbits |= ((exp-112)<<10) | ((bits>>13)&0x3FF); - } - else if(exp > 101) - { - int i = 125 - exp; - bits = (bits&0x7FFFFF) | 0x800000; - g = (bits>>i) & 1; - s = (bits&((1L<> (i+1); - } - else - { - g = 0; - s = bits != 0; - } - if(R == std::round_to_nearest) - #if HALF_ROUND_TIES_TO_EVEN - hbits += g & (s|hbits); - #else - hbits += g; - #endif - else if(R == std::round_toward_infinity) - hbits += ~(hbits>>15) & (s|g); - else if(R == std::round_toward_neg_infinity) - hbits += (hbits>>15) & (g|s); -*/ static uint16 const base_table[512] = { - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, - 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00, - 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, - 0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, - 0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, - 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00 }; - static const unsigned char shift_table[512] = { - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, - 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, - 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13 }; - uint16 hbits = base_table[bits>>23] + static_cast((bits&0x7FFFFF)>>shift_table[bits>>23]); - if(R == std::round_to_nearest) - hbits += (((bits&0x7FFFFF)>>(shift_table[bits>>23]-1))|(((bits>>23)&0xFF)==102)) & ((hbits&0x7C00)!=0x7C00) - #if HALF_ROUND_TIES_TO_EVEN - & (((((static_cast(1)<<(shift_table[bits>>23]-1))-1)&bits)!=0)|hbits) - #endif - ; - else if(R == std::round_toward_zero) - hbits -= ((hbits&0x7FFF)==0x7C00) & ~shift_table[bits>>23]; - else if(R == std::round_toward_infinity) - hbits += ((((bits&0x7FFFFF&((static_cast(1)<<(shift_table[bits>>23]))-1))!=0)|(((bits>>23)<=102)& - ((bits>>23)!=0)))&(hbits<0x7C00)) - ((hbits==0xFC00)&((bits>>23)!=511)); - else if(R == std::round_toward_neg_infinity) - hbits += ((((bits&0x7FFFFF&((static_cast(1)<<(shift_table[bits>>23]))-1))!=0)|(((bits>>23)<=358)& - ((bits>>23)!=256)))&(hbits<0xFC00)&(hbits>>15)) - ((hbits==0x7C00)&((bits>>23)!=255)); - return hbits; - } - - /// Convert IEEE double-precision to half-precision. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \param value double-precision value - /// \return binary representation of half-precision value - template uint16 float2half_impl(double value, true_type) - { - typedef bits::type uint32; - typedef bits::type uint64; - uint64 bits;// = *reinterpret_cast(&value); //violating strict aliasing! - std::memcpy(&bits, &value, sizeof(double)); - uint32 hi = bits >> 32, lo = bits & 0xFFFFFFFF; - uint16 hbits = (hi>>16) & 0x8000; - hi &= 0x7FFFFFFF; - int exp = hi >> 20; - if(exp == 2047) - return hbits | 0x7C00 | (0x3FF&-static_cast((bits&0xFFFFFFFFFFFFF)!=0)); - if(exp > 1038) - { - if(R == std::round_toward_infinity) - return hbits | (0x7C00 - (hbits>>15)); - if(R == std::round_toward_neg_infinity) - return hbits | (0x7BFF + (hbits>>15)); - return hbits | (0x7BFF + (R!=std::round_toward_zero)); - } - int g, s = lo != 0; - if(exp > 1008) - { - g = (hi>>9) & 1; - s |= (hi&0x1FF) != 0; - hbits |= ((exp-1008)<<10) | ((hi>>10)&0x3FF); - } - else if(exp > 997) - { - int i = 1018 - exp; - hi = (hi&0xFFFFF) | 0x100000; - g = (hi>>i) & 1; - s |= (hi&((1L<> (i+1); - } - else - { - g = 0; - s |= hi != 0; - } - if(R == std::round_to_nearest) - #if HALF_ROUND_TIES_TO_EVEN - hbits += g & (s|hbits); - #else - hbits += g; - #endif - else if(R == std::round_toward_infinity) - hbits += ~(hbits>>15) & (s|g); - else if(R == std::round_toward_neg_infinity) - hbits += (hbits>>15) & (g|s); - return hbits; - } - - /// Convert non-IEEE floating point to half-precision. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \tparam T source type (builtin floating point type) - /// \param value floating point value - /// \return binary representation of half-precision value - template uint16 float2half_impl(T value, false_type) - { - uint16 hbits = static_cast(builtin_signbit(value)) << 15; - if(value == T()) - return hbits; - if(builtin_isnan(value)) - return hbits | 0x7FFF; - if(builtin_isinf(value)) - return hbits | 0x7C00; - int exp; - std::frexp(value, &exp); - if(exp > 16) - { - if(R == std::round_toward_infinity) - return hbits | (0x7C00 - (hbits>>15)); - if(R == std::round_toward_neg_infinity) - return hbits | (0x7BFF + (hbits>>15)); - return hbits | (0x7BFF + (R!=std::round_toward_zero)); - } - if(exp < -13) - value = std::ldexp(value, 24); - else - { - value = std::ldexp(value, 11-exp); - hbits |= ((exp+13)<<10); - } - T ival, frac = std::modf(value, &ival); - hbits += static_cast(std::abs(static_cast(ival))); - if(R == std::round_to_nearest) - { - frac = std::abs(frac); - #if HALF_ROUND_TIES_TO_EVEN - hbits += (frac>T(0.5)) | ((frac==T(0.5))&hbits); - #else - hbits += frac >= T(0.5); - #endif - } - else if(R == std::round_toward_infinity) - hbits += frac > T(); - else if(R == std::round_toward_neg_infinity) - hbits += frac < T(); - return hbits; - } - - /// Convert floating point to half-precision. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \tparam T source type (builtin floating point type) - /// \param value floating point value - /// \return binary representation of half-precision value - template uint16 float2half(T value) - { - return float2half_impl(value, bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); - } - - /// Convert integer to half-precision floating point. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \tparam S `true` if value negative, `false` else - /// \tparam T type to convert (builtin integer type) - /// \param value non-negative integral value - /// \return binary representation of half-precision value - template uint16 int2half_impl(T value) - { - #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_integral::value, "int to half conversion only supports builtin integer types"); - #endif - if(S) - value = -value; - uint16 bits = S << 15; - if(value > 0xFFFF) - { - if(R == std::round_toward_infinity) - bits |= 0x7C00 - S; - else if(R == std::round_toward_neg_infinity) - bits |= 0x7BFF + S; - else - bits |= 0x7BFF + (R!=std::round_toward_zero); - } - else if(value) - { - unsigned int m = value, exp = 24; - for(; m<0x400; m<<=1,--exp) ; - for(; m>0x7FF; m>>=1,++exp) ; - bits |= (exp<<10) + m; - if(exp > 24) - { - if(R == std::round_to_nearest) - bits += (value>>(exp-25)) & 1 - #if HALF_ROUND_TIES_TO_EVEN - & (((((1<<(exp-25))-1)&value)!=0)|bits) - #endif - ; - else if(R == std::round_toward_infinity) - bits += ((value&((1<<(exp-24))-1))!=0) & !S; - else if(R == std::round_toward_neg_infinity) - bits += ((value&((1<<(exp-24))-1))!=0) & S; - } - } - return bits; - } - - /// Convert integer to half-precision floating point. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \tparam T type to convert (builtin integer type) - /// \param value integral value - /// \return binary representation of half-precision value - template uint16 int2half(T value) - { - return (value<0) ? int2half_impl(value) : int2half_impl(value); - } - - /// Convert half-precision to IEEE single-precision. - /// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). - /// \param value binary representation of half-precision value - /// \return single-precision value - inline float half2float_impl(uint16 value, float, true_type) - { - typedef bits::type uint32; -/* uint32 bits = static_cast(value&0x8000) << 16; - int abs = value & 0x7FFF; - if(abs) - { - bits |= 0x38000000 << static_cast(abs>=0x7C00); - for(; abs<0x400; abs<<=1,bits-=0x800000) ; - bits += static_cast(abs) << 13; - } -*/ static const uint32 mantissa_table[2048] = { - 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000, - 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, - 0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, - 0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000, - 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000, - 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, - 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, - 0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, - 0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000, - 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000, - 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, - 0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, - 0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, - 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000, - 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, - 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, - 0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, - 0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000, - 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000, - 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, - 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, - 0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, - 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000, - 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, - 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, - 0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, - 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000, - 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000, - 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, - 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, - 0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, - 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000, - 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, - 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, - 0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, - 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000, - 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000, - 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, - 0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, - 0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, - 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000, - 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, - 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, - 0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, - 0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000, - 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000, - 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, - 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, - 0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000, - 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000, - 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, - 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, - 0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, - 0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000, - 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000, - 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, - 0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, - 0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000, - 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000, - 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, - 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, - 0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, - 0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000, - 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000, - 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, - 0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, - 0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, - 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, 0x3807C000, 0x3807E000, - 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, - 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, - 0x380C0000, 0x380C2000, 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, - 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000, - 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000, - 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, - 0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, - 0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, - 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, 0x3819C000, 0x3819E000, - 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, - 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, - 0x381E0000, 0x381E2000, 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, - 0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000, - 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000, - 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, - 0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, - 0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, - 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, 0x382BC000, 0x382BE000, - 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, - 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, - 0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, - 0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000, - 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000, - 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, - 0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, - 0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, - 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, 0x383DC000, 0x383DE000, - 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, - 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, - 0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, - 0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000, - 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000, - 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, - 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, - 0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, - 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, 0x384FC000, 0x384FE000, - 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, - 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, - 0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, - 0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000, - 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000, - 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, - 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, - 0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, - 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, 0x3861C000, 0x3861E000, - 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, - 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, - 0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, - 0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000, - 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000, - 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, - 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, - 0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, - 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, 0x3873C000, 0x3873E000, - 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, - 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, - 0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, - 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000, - 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000, - 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000 }; - static const uint32 exponent_table[64] = { - 0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000, - 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, - 0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, - 0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000 }; - static const unsigned short offset_table[64] = { - 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, - 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 }; - uint32 bits = mantissa_table[offset_table[value>>10]+(value&0x3FF)] + exponent_table[value>>10]; -// return *reinterpret_cast(&bits); //violating strict aliasing! - float out; - std::memcpy(&out, &bits, sizeof(float)); - return out; - } - - /// Convert half-precision to IEEE double-precision. - /// \param value binary representation of half-precision value - /// \return double-precision value - inline double half2float_impl(uint16 value, double, true_type) - { - typedef bits::type uint32; - typedef bits::type uint64; - uint32 hi = static_cast(value&0x8000) << 16; - int abs = value & 0x7FFF; - if(abs) - { - hi |= 0x3F000000 << static_cast(abs>=0x7C00); - for(; abs<0x400; abs<<=1,hi-=0x100000) ; - hi += static_cast(abs) << 10; - } - uint64 bits = static_cast(hi) << 32; -// return *reinterpret_cast(&bits); //violating strict aliasing! - double out; - std::memcpy(&out, &bits, sizeof(double)); - return out; - } - - /// Convert half-precision to non-IEEE floating point. - /// \tparam T type to convert to (builtin integer type) - /// \param value binary representation of half-precision value - /// \return floating point value - template T half2float_impl(uint16 value, T, ...) - { - T out; - int abs = value & 0x7FFF; - if(abs > 0x7C00) - out = std::numeric_limits::has_quiet_NaN ? std::numeric_limits::quiet_NaN() : T(); - else if(abs == 0x7C00) - out = std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : std::numeric_limits::max(); - else if(abs > 0x3FF) - out = std::ldexp(static_cast((abs&0x3FF)|0x400), (abs>>10)-25); - else - out = std::ldexp(static_cast(abs), -24); - return (value&0x8000) ? -out : out; - } - - /// Convert half-precision to floating point. - /// \tparam T type to convert to (builtin integer type) - /// \param value binary representation of half-precision value - /// \return floating point value - template T half2float(uint16 value) - { - return half2float_impl(value, T(), bool_type::is_iec559&&sizeof(typename bits::type)==sizeof(T)>()); - } - - /// Convert half-precision floating point to integer. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \tparam E `true` for round to even, `false` for round away from zero - /// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits) - /// \param value binary representation of half-precision value - /// \return integral value - template T half2int_impl(uint16 value) - { - #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_integral::value, "half to int conversion only supports builtin integer types"); - #endif - unsigned int e = value & 0x7FFF; - if(e >= 0x7C00) - return (value&0x8000) ? std::numeric_limits::min() : std::numeric_limits::max(); - if(e < 0x3800) - { - if(R == std::round_toward_infinity) - return T(~(value>>15)&(e!=0)); - if(R == std::round_toward_neg_infinity) - return -T(value>0x8000); - return T(); - } - unsigned int m = (value&0x3FF) | 0x400; - e >>= 10; - if(e < 25) - { - if(R == std::round_to_nearest) - m += (1<<(24-e)) - (~(m>>(25-e))&E); - else if(R == std::round_toward_infinity) - m += ((value>>15)-1) & ((1<<(25-e))-1U); - else if(R == std::round_toward_neg_infinity) - m += -(value>>15) & ((1<<(25-e))-1U); - m >>= 25 - e; - } - else - m <<= e - 25; - return (value&0x8000) ? -static_cast(m) : static_cast(m); - } - - /// Convert half-precision floating point to integer. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits) - /// \param value binary representation of half-precision value - /// \return integral value - template T half2int(uint16 value) { return half2int_impl(value); } - - /// Convert half-precision floating point to integer using round-to-nearest-away-from-zero. - /// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits) - /// \param value binary representation of half-precision value - /// \return integral value - template T half2int_up(uint16 value) { return half2int_impl(value); } - - /// Round half-precision number to nearest integer value. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \tparam E `true` for round to even, `false` for round away from zero - /// \param value binary representation of half-precision value - /// \return half-precision bits for nearest integral value - template uint16 round_half_impl(uint16 value) - { - unsigned int e = value & 0x7FFF; - uint16 result = value; - if(e < 0x3C00) - { - result &= 0x8000; - if(R == std::round_to_nearest) - result |= 0x3C00U & -(e>=(0x3800+E)); - else if(R == std::round_toward_infinity) - result |= 0x3C00U & -(~(value>>15)&(e!=0)); - else if(R == std::round_toward_neg_infinity) - result |= 0x3C00U & -(value>0x8000); - } - else if(e < 0x6400) - { - e = 25 - (e>>10); - unsigned int mask = (1<>e)&E); - else if(R == std::round_toward_infinity) - result += mask & ((value>>15)-1); - else if(R == std::round_toward_neg_infinity) - result += mask & -(value>>15); - result &= ~mask; - } - return result; - } - - /// Round half-precision number to nearest integer value. - /// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding - /// \param value binary representation of half-precision value - /// \return half-precision bits for nearest integral value - template uint16 round_half(uint16 value) { return round_half_impl(value); } - - /// Round half-precision number to nearest integer value using round-to-nearest-away-from-zero. - /// \param value binary representation of half-precision value - /// \return half-precision bits for nearest integral value - inline uint16 round_half_up(uint16 value) { return round_half_impl(value); } - /// \} - - struct functions; - template struct unary_specialized; - template struct binary_specialized; - template struct half_caster; - } - - /// Half-precision floating point type. - /// This class implements an IEEE-conformant half-precision floating point type with the usual arithmetic operators and - /// conversions. It is implicitly convertible to single-precision floating point, which makes artihmetic expressions and - /// functions with mixed-type operands to be of the most precise operand type. Additionally all arithmetic operations - /// (and many mathematical functions) are carried out in single-precision internally. All conversions from single- to - /// half-precision are done using the library's default rounding mode, but temporary results inside chained arithmetic - /// expressions are kept in single-precision as long as possible (while of course still maintaining a strong half-precision type). - /// - /// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and - /// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which - /// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the - /// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be of - /// exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will most - /// probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying 16-bit - /// IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 bits if - /// your C++ implementation supports an unsigned integer type of exactly 16 bits width. But this should be the case on - /// nearly any reasonable platform. - /// - /// So if your C++ implementation is not totally exotic or imposes special alignment requirements, it is a reasonable - /// assumption that the data of a half is just comprised of the 2 bytes of the underlying IEEE representation. - #if defined(__clang__) - /* this is a WAR, after nvcc's process, - * `friend class std::numeric_limits;` in the following code, becomes - * `friend class numeric_limits;`, namespsace `std` is removed, which results compilation error in clang. - * tested on nvcc V10.0.95, and clang 5.0.300080 in ndk 16b - */ - using std::numeric_limits; - using std::hash; - #endif - class half - { - friend struct detail::functions; - friend struct detail::unary_specialized; - friend struct detail::binary_specialized; - template friend struct detail::half_caster; - friend class std::numeric_limits; - #if HALF_ENABLE_CPP11_HASH - friend struct std::hash; - #endif - #if HALF_ENABLE_CPP11_USER_LITERALS - friend half literal::operator "" _h(long double); - #endif - - public: - /// Default constructor. - /// This initializes the half to 0. Although this does not match the builtin types' default-initialization semantics - /// and may be less efficient than no initialization, it is needed to provide proper value-initialization semantics. - HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {} - - /// Copy constructor. - /// \tparam T type of concrete half expression - /// \param rhs half expression to copy from - half(detail::expr rhs) : data_(detail::float2half(static_cast(rhs))) {} - - /// Conversion constructor. - /// \param rhs float to convert - explicit half(float rhs) : data_(detail::float2half(rhs)) {} - - /// Conversion to single-precision. - /// \return single precision value representing expression value - operator float() const { return detail::half2float(data_); } - - /// Assignment operator. - /// \tparam T type of concrete half expression - /// \param rhs half expression to copy from - /// \return reference to this half - half& operator=(detail::expr rhs) { return *this = static_cast(rhs); } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to add - /// \return reference to this half - template typename detail::enable::type operator+=(T rhs) { return *this += static_cast(rhs); } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to subtract - /// \return reference to this half - template typename detail::enable::type operator-=(T rhs) { return *this -= static_cast(rhs); } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to multiply with - /// \return reference to this half - template typename detail::enable::type operator*=(T rhs) { return *this *= static_cast(rhs); } - - /// Arithmetic assignment. - /// \tparam T type of concrete half expression - /// \param rhs half expression to divide by - /// \return reference to this half - template typename detail::enable::type operator/=(T rhs) { return *this /= static_cast(rhs); } - - /// Assignment operator. - /// \param rhs single-precision value to copy from - /// \return reference to this half - half& operator=(float rhs) { data_ = detail::float2half(rhs); return *this; } - - /// Arithmetic assignment. - /// \param rhs single-precision value to add - /// \return reference to this half - half& operator+=(float rhs) { data_ = detail::float2half(detail::half2float(data_)+rhs); return *this; } - - /// Arithmetic assignment. - /// \param rhs single-precision value to subtract - /// \return reference to this half - half& operator-=(float rhs) { data_ = detail::float2half(detail::half2float(data_)-rhs); return *this; } - - /// Arithmetic assignment. - /// \param rhs single-precision value to multiply with - /// \return reference to this half - half& operator*=(float rhs) { data_ = detail::float2half(detail::half2float(data_)*rhs); return *this; } - - /// Arithmetic assignment. - /// \param rhs single-precision value to divide by - /// \return reference to this half - half& operator/=(float rhs) { data_ = detail::float2half(detail::half2float(data_)/rhs); return *this; } - - /// Prefix increment. - /// \return incremented half value - half& operator++() { return *this += 1.0f; } - - /// Prefix decrement. - /// \return decremented half value - half& operator--() { return *this -= 1.0f; } - - /// Postfix increment. - /// \return non-incremented half value - half operator++(int) { half out(*this); ++*this; return out; } - - /// Postfix decrement. - /// \return non-decremented half value - half operator--(int) { half out(*this); --*this; return out; } - - private: - /// Rounding mode to use - static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE); - - /// Constructor. - /// \param bits binary representation to set half to - HALF_CONSTEXPR half(detail::binary_t, detail::uint16 bits) HALF_NOEXCEPT : data_(bits) {} - - /// Internal binary representation - detail::uint16 data_; - }; +class half; #if HALF_ENABLE_CPP11_USER_LITERALS - namespace literal - { - /// Half literal. - /// While this returns an actual half-precision value, half literals can unfortunately not be constant expressions due - /// to rather involved conversions. - /// \param value literal value - /// \return half with given value (if representable) - inline half operator "" _h(long double value) { return half(detail::binary, detail::float2half(value)); } - } -#endif - - namespace detail - { - /// Wrapper implementing unspecialized half-precision functions. - struct functions - { - /// Addition implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision sum stored in single-precision - static expr plus(float x, float y) { return expr(x+y); } - - /// Subtraction implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision difference stored in single-precision - static expr minus(float x, float y) { return expr(x-y); } - - /// Multiplication implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision product stored in single-precision - static expr multiplies(float x, float y) { return expr(x*y); } - - /// Division implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision quotient stored in single-precision - static expr divides(float x, float y) { return expr(x/y); } - - /// Output implementation. - /// \param out stream to write to - /// \param arg value to write - /// \return reference to stream - template static std::basic_ostream& write(std::basic_ostream &out, float arg) { return out << arg; } - - /// Input implementation. - /// \param in stream to read from - /// \param arg half to read into - /// \return reference to stream - template static std::basic_istream& read(std::basic_istream &in, half &arg) - { - float f; - if(in >> f) - arg = f; - return in; - } - - /// Modulo implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision division remainder stored in single-precision - static expr fmod(float x, float y) { return expr(std::fmod(x, y)); } - - /// Remainder implementation. - /// \param x first operand - /// \param y second operand - /// \return Half-precision division remainder stored in single-precision - static expr remainder(float x, float y) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::remainder(x, y)); - #else - if(builtin_isnan(x) || builtin_isnan(y)) - return expr(std::numeric_limits::quiet_NaN()); - float ax = std::fabs(x), ay = std::fabs(y); - if(ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) - return expr(std::numeric_limits::quiet_NaN()); - if(ay >= 65536.0f) - return expr(x); - if(ax == ay) - return expr(builtin_signbit(x) ? -0.0f : 0.0f); - ax = std::fmod(ax, ay+ay); - float y2 = 0.5f * ay; - if(ax > y2) - { - ax -= ay; - if(ax >= y2) - ax -= ay; - } - return expr(builtin_signbit(x) ? -ax : ax); - #endif - } - - /// Remainder implementation. - /// \param x first operand - /// \param y second operand - /// \param quo address to store quotient bits at - /// \return Half-precision division remainder stored in single-precision - static expr remquo(float x, float y, int *quo) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::remquo(x, y, quo)); - #else - if(builtin_isnan(x) || builtin_isnan(y)) - return expr(std::numeric_limits::quiet_NaN()); - bool sign = builtin_signbit(x), qsign = static_cast(sign^builtin_signbit(y)); - float ax = std::fabs(x), ay = std::fabs(y); - if(ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) - return expr(std::numeric_limits::quiet_NaN()); - if(ay >= 65536.0f) - return expr(x); - if(ax == ay) - return *quo = qsign ? -1 : 1, expr(sign ? -0.0f : 0.0f); - ax = std::fmod(ax, 8.0f*ay); - int cquo = 0; - if(ax >= 4.0f * ay) - { - ax -= 4.0f * ay; - cquo += 4; - } - if(ax >= 2.0f * ay) - { - ax -= 2.0f * ay; - cquo += 2; - } - float y2 = 0.5f * ay; - if(ax > y2) - { - ax -= ay; - ++cquo; - if(ax >= y2) - { - ax -= ay; - ++cquo; - } - } - return *quo = qsign ? -cquo : cquo, expr(sign ? -ax : ax); - #endif - } - - /// Positive difference implementation. - /// \param x first operand - /// \param y second operand - /// \return Positive difference stored in single-precision - static expr fdim(float x, float y) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::fdim(x, y)); - #else - return expr((x<=y) ? 0.0f : (x-y)); - #endif - } - - /// Fused multiply-add implementation. - /// \param x first operand - /// \param y second operand - /// \param z third operand - /// \return \a x * \a y + \a z stored in single-precision - static expr fma(float x, float y, float z) - { - #if HALF_ENABLE_CPP11_CMATH && defined(FP_FAST_FMAF) - return expr(std::fma(x, y, z)); - #else - return expr(x*y+z); - #endif - } - - /// Get NaN. - /// \return Half-precision quiet NaN - static half nanh() { return half(binary, 0x7FFF); } - - /// Exponential implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr exp(float arg) { return expr(std::exp(arg)); } - - /// Exponential implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr expm1(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::expm1(arg)); - #else - return expr(static_cast(std::exp(static_cast(arg))-1.0)); - #endif - } - - /// Binary exponential implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr exp2(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::exp2(arg)); - #else - return expr(static_cast(std::exp(arg*0.69314718055994530941723212145818))); - #endif - } - - /// Logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr log(float arg) { return expr(std::log(arg)); } - - /// Common logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr log10(float arg) { return expr(std::log10(arg)); } - - /// Logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr log1p(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::log1p(arg)); - #else - return expr(static_cast(std::log(1.0+arg))); - #endif - } - - /// Binary logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr log2(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::log2(arg)); - #else - return expr(static_cast(std::log(static_cast(arg))*1.4426950408889634073599246810019)); - #endif - } - - /// Square root implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr sqrt(float arg) { return expr(std::sqrt(arg)); } - - /// Cubic root implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr cbrt(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::cbrt(arg)); - #else - if(builtin_isnan(arg) || builtin_isinf(arg)) - return expr(arg); - return expr(builtin_signbit(arg) ? -static_cast(std::pow(-static_cast(arg), 1.0/3.0)) : - static_cast(std::pow(static_cast(arg), 1.0/3.0))); - #endif - } - - /// Hypotenuse implementation. - /// \param x first argument - /// \param y second argument - /// \return function value stored in single-preicision - static expr hypot(float x, float y) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::hypot(x, y)); - #else - return expr((builtin_isinf(x) || builtin_isinf(y)) ? std::numeric_limits::infinity() : - static_cast(std::sqrt(static_cast(x)*x+static_cast(y)*y))); - #endif - } - - /// Power implementation. - /// \param base value to exponentiate - /// \param exp power to expontiate to - /// \return function value stored in single-preicision - static expr pow(float base, float exp) { return expr(std::pow(base, exp)); } - - /// Sine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr sin(float arg) { return expr(std::sin(arg)); } - - /// Cosine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr cos(float arg) { return expr(std::cos(arg)); } - - /// Tan implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr tan(float arg) { return expr(std::tan(arg)); } - - /// Arc sine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr asin(float arg) { return expr(std::asin(arg)); } - - /// Arc cosine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr acos(float arg) { return expr(std::acos(arg)); } - - /// Arc tangent implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr atan(float arg) { return expr(std::atan(arg)); } - - /// Arc tangent implementation. - /// \param x first argument - /// \param y second argument - /// \return function value stored in single-preicision - static expr atan2(float x, float y) { return expr(std::atan2(x, y)); } - - /// Hyperbolic sine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr sinh(float arg) { return expr(std::sinh(arg)); } - - /// Hyperbolic cosine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr cosh(float arg) { return expr(std::cosh(arg)); } - - /// Hyperbolic tangent implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr tanh(float arg) { return expr(std::tanh(arg)); } - - /// Hyperbolic area sine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr asinh(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::asinh(arg)); - #else - return expr((arg==-std::numeric_limits::infinity()) ? arg : static_cast(std::log(arg+std::sqrt(arg*arg+1.0)))); - #endif - } - - /// Hyperbolic area cosine implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr acosh(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::acosh(arg)); - #else - return expr((arg<-1.0f) ? std::numeric_limits::quiet_NaN() : static_cast(std::log(arg+std::sqrt(arg*arg-1.0)))); - #endif - } - - /// Hyperbolic area tangent implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr atanh(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::atanh(arg)); - #else - return expr(static_cast(0.5*std::log((1.0+arg)/(1.0-arg)))); - #endif - } - - /// Error function implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr erf(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::erf(arg)); - #else - return expr(static_cast(erf(static_cast(arg)))); - #endif - } - - /// Complementary implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr erfc(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::erfc(arg)); - #else - return expr(static_cast(1.0-erf(static_cast(arg)))); - #endif - } - - /// Gamma logarithm implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr lgamma(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::lgamma(arg)); - #else - if(builtin_isinf(arg)) - return expr(std::numeric_limits::infinity()); - if(arg < 0.0f) - { - float i, f = std::modf(-arg, &i); - if(f == 0.0f) - return expr(std::numeric_limits::infinity()); - return expr(static_cast(1.1447298858494001741434273513531- - std::log(std::abs(std::sin(3.1415926535897932384626433832795*f)))-lgamma(1.0-arg))); - } - return expr(static_cast(lgamma(static_cast(arg)))); - #endif - } - - /// Gamma implementation. - /// \param arg function argument - /// \return function value stored in single-preicision - static expr tgamma(float arg) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::tgamma(arg)); - #else - if(arg == 0.0f) - return builtin_signbit(arg) ? expr(-std::numeric_limits::infinity()) : expr(std::numeric_limits::infinity()); - if(arg < 0.0f) - { - float i, f = std::modf(-arg, &i); - if(f == 0.0f) - return expr(std::numeric_limits::quiet_NaN()); - double value = 3.1415926535897932384626433832795 / (std::sin(3.1415926535897932384626433832795*f)*std::exp(lgamma(1.0-arg))); - return expr(static_cast((std::fmod(i, 2.0f)==0.0f) ? -value : value)); - } - if(builtin_isinf(arg)) - return expr(arg); - return expr(static_cast(std::exp(lgamma(static_cast(arg))))); - #endif - } - - /// Floor implementation. - /// \param arg value to round - /// \return rounded value - static half floor(half arg) { return half(binary, round_half(arg.data_)); } - - /// Ceiling implementation. - /// \param arg value to round - /// \return rounded value - static half ceil(half arg) { return half(binary, round_half(arg.data_)); } - - /// Truncation implementation. - /// \param arg value to round - /// \return rounded value - static half trunc(half arg) { return half(binary, round_half(arg.data_)); } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static half round(half arg) { return half(binary, round_half_up(arg.data_)); } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static long lround(half arg) { return detail::half2int_up(arg.data_); } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static half rint(half arg) { return half(binary, round_half(arg.data_)); } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static long lrint(half arg) { return detail::half2int(arg.data_); } - - #if HALF_ENABLE_CPP11_LONG_LONG - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static long long llround(half arg) { return detail::half2int_up(arg.data_); } - - /// Nearest integer implementation. - /// \param arg value to round - /// \return rounded value - static long long llrint(half arg) { return detail::half2int(arg.data_); } - #endif - - /// Decompression implementation. - /// \param arg number to decompress - /// \param exp address to store exponent at - /// \return normalized significant - static half frexp(half arg, int *exp) - { - int m = arg.data_ & 0x7FFF, e = -14; - if(m >= 0x7C00 || !m) - return *exp = 0, arg; - for(; m<0x400; m<<=1,--e) ; - return *exp = e+(m>>10), half(binary, (arg.data_&0x8000)|0x3800|(m&0x3FF)); - } - - /// Decompression implementation. - /// \param arg number to decompress - /// \param iptr address to store integer part at - /// \return fractional part - static half modf(half arg, half *iptr) - { - unsigned int e = arg.data_ & 0x7FFF; - if(e >= 0x6400) - return *iptr = arg, half(binary, arg.data_&(0x8000U|-(e>0x7C00))); - if(e < 0x3C00) - return iptr->data_ = arg.data_ & 0x8000, arg; - e >>= 10; - unsigned int mask = (1<<(25-e)) - 1, m = arg.data_ & mask; - iptr->data_ = arg.data_ & ~mask; - if(!m) - return half(binary, arg.data_&0x8000); - for(; m<0x400; m<<=1,--e) ; - return half(binary, static_cast((arg.data_&0x8000)|(e<<10)|(m&0x3FF))); - } - - /// Scaling implementation. - /// \param arg number to scale - /// \param exp power of two to scale by - /// \return scaled number - static half scalbln(half arg, long exp) - { - unsigned int m = arg.data_ & 0x7FFF; - if(m >= 0x7C00 || !m) - return arg; - for(; m<0x400; m<<=1,--exp) ; - exp += m >> 10; - uint16 value = arg.data_ & 0x8000; - if(exp > 30) - { - if(half::round_style == std::round_toward_zero) - value |= 0x7BFF; - else if(half::round_style == std::round_toward_infinity) - value |= 0x7C00 - (value>>15); - else if(half::round_style == std::round_toward_neg_infinity) - value |= 0x7BFF + (value>>15); - else - value |= 0x7C00; - } - else if(exp > 0) - value |= (exp<<10) | (m&0x3FF); - else if(exp > -11) - { - m = (m&0x3FF) | 0x400; - if(half::round_style == std::round_to_nearest) - { - m += 1 << -exp; - #if HALF_ROUND_TIES_TO_EVEN - m -= (m>>(1-exp)) & 1; - #endif - } - else if(half::round_style == std::round_toward_infinity) - m += ((value>>15)-1) & ((1<<(1-exp))-1U); - else if(half::round_style == std::round_toward_neg_infinity) - m += -(value>>15) & ((1<<(1-exp))-1U); - value |= m >> (1-exp); - } - else if(half::round_style == std::round_toward_infinity) - value -= (value>>15) - 1; - else if(half::round_style == std::round_toward_neg_infinity) - value += value >> 15; - return half(binary, value); - } - - /// Exponent implementation. - /// \param arg number to query - /// \return floating point exponent - static int ilogb(half arg) - { - int abs = arg.data_ & 0x7FFF; - if(!abs) - return FP_ILOGB0; - if(abs < 0x7C00) - { - int exp = (abs>>10) - 15; - if(abs < 0x400) - for(; abs<0x200; abs<<=1,--exp) ; - return exp; - } - if(abs > 0x7C00) - return FP_ILOGBNAN; - return INT_MAX; - } - - /// Exponent implementation. - /// \param arg number to query - /// \return floating point exponent - static half logb(half arg) - { - int abs = arg.data_ & 0x7FFF; - if(!abs) - return half(binary, 0xFC00); - if(abs < 0x7C00) - { - int exp = (abs>>10) - 15; - if(abs < 0x400) - for(; abs<0x200; abs<<=1,--exp) ; - uint16 bits = (exp<0) << 15; - if(exp) - { - unsigned int m = std::abs(exp) << 6, e = 18; - for(; m<0x400; m<<=1,--e) ; - bits |= (e<<10) + m; - } - return half(binary, bits); - } - if(abs > 0x7C00) - return arg; - return half(binary, 0x7C00); - } - - /// Enumeration implementation. - /// \param from number to increase/decrease - /// \param to direction to enumerate into - /// \return next representable number - static half nextafter(half from, half to) - { - uint16 fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF; - if(fabs > 0x7C00) - return from; - if(tabs > 0x7C00 || from.data_ == to.data_ || !(fabs|tabs)) - return to; - if(!fabs) - return half(binary, (to.data_&0x8000)+1); - bool lt = ((fabs==from.data_) ? static_cast(fabs) : -static_cast(fabs)) < - ((tabs==to.data_) ? static_cast(tabs) : -static_cast(tabs)); - return half(binary, from.data_+(((from.data_>>15)^static_cast(lt))<<1)-1); - } - - /// Enumeration implementation. - /// \param from number to increase/decrease - /// \param to direction to enumerate into - /// \return next representable number - static half nexttoward(half from, long double to) - { - if(isnan(from)) - return from; - auto lfrom = static_cast(from); - if(builtin_isnan(to) || lfrom == to) - return half(static_cast(to)); - if(!(from.data_&0x7FFF)) - return half(binary, (static_cast(builtin_signbit(to))<<15)+1); - return half(binary, from.data_+(((from.data_>>15)^static_cast(lfrom0x3FF) ? ((abs>=0x7C00) ? ((abs>0x7C00) ? FP_NAN : FP_INFINITE) : FP_NORMAL) :FP_SUBNORMAL) : FP_ZERO; - } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if finite number - /// \retval false else - static bool isfinite(half arg) { return (arg.data_&0x7C00) != 0x7C00; } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if infinite number - /// \retval false else - static bool isinf(half arg) { return (arg.data_&0x7FFF) == 0x7C00; } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if not a number - /// \retval false else - static bool isnan(half arg) { return (arg.data_&0x7FFF) > 0x7C00; } - - /// Classification implementation. - /// \param arg value to classify - /// \retval true if normal number - /// \retval false else - static bool isnormal(half arg) { return ((arg.data_&0x7C00)!=0) & ((arg.data_&0x7C00)!=0x7C00); } - - /// Sign bit implementation. - /// \param arg value to check - /// \retval true if signed - /// \retval false if unsigned - static bool signbit(half arg) { return (arg.data_&0x8000) != 0; } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if operands equal - /// \retval false else - static bool isequal(half x, half y) { return (x.data_==y.data_ || !((x.data_|y.data_)&0x7FFF)) && !isnan(x); } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if operands not equal - /// \retval false else - static bool isnotequal(half x, half y) { return (x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF)) || isnan(x); } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x > \a y - /// \retval false else - static bool isgreater(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - return xabs<=0x7C00 && yabs<=0x7C00 && (((xabs==x.data_) ? xabs : -xabs) > ((yabs==y.data_) ? yabs : -yabs)); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x >= \a y - /// \retval false else - static bool isgreaterequal(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - return xabs<=0x7C00 && yabs<=0x7C00 && (((xabs==x.data_) ? xabs : -xabs) >= ((yabs==y.data_) ? yabs : -yabs)); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x < \a y - /// \retval false else - static bool isless(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - return xabs<=0x7C00 && yabs<=0x7C00 && (((xabs==x.data_) ? xabs : -xabs) < ((yabs==y.data_) ? yabs : -yabs)); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x <= \a y - /// \retval false else - static bool islessequal(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - return xabs<=0x7C00 && yabs<=0x7C00 && (((xabs==x.data_) ? xabs : -xabs) <= ((yabs==y.data_) ? yabs : -yabs)); - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if either \a x > \a y nor \a x < \a y - /// \retval false else - static bool islessgreater(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - if(xabs > 0x7C00 || yabs > 0x7C00) - return false; - int a = (xabs==x.data_) ? xabs : -xabs, b = (yabs==y.data_) ? yabs : -yabs; - return a < b || a > b; - } - - /// Comparison implementation. - /// \param x first operand - /// \param y second operand - /// \retval true if operand unordered - /// \retval false else - static bool isunordered(half x, half y) { return isnan(x) || isnan(y); } - - private: - static double erf(double arg) - { - if(builtin_isinf(arg)) - return (arg<0.0) ? -1.0 : 1.0; - double x2 = arg * arg, ax2 = 0.147 * x2, value = std::sqrt(1.0-std::exp(-x2*(1.2732395447351626861510701069801+ax2)/(1.0+ax2))); - return builtin_signbit(arg) ? -value : value; - } - - static double lgamma(double arg) - { - double v = 1.0; - for(; arg<8.0; ++arg) v *= arg; - double w = 1.0 / (arg*arg); - return (((((((-0.02955065359477124183006535947712*w+0.00641025641025641025641025641026)*w+ - -0.00191752691752691752691752691753)*w+8.4175084175084175084175084175084e-4)*w+ - -5.952380952380952380952380952381e-4)*w+7.9365079365079365079365079365079e-4)*w+ - -0.00277777777777777777777777777778)*w+0.08333333333333333333333333333333)/arg + - 0.91893853320467274178032973640562 - std::log(v) - arg + (arg-0.5) * std::log(arg); - } - }; - - /// Wrapper for unary half-precision functions needing specialization for individual argument types. - /// \tparam T argument type - template struct unary_specialized - { - /// Negation implementation. - /// \param arg value to negate - /// \return negated value - static HALF_CONSTEXPR half negate(half arg) { return half(binary, arg.data_^0x8000); } - - /// Absolute value implementation. - /// \param arg function argument - /// \return absolute value - static half fabs(half arg) { return half(binary, arg.data_&0x7FFF); } - }; - template<> struct unary_specialized - { - static HALF_CONSTEXPR expr negate(float arg) { return expr(-arg); } - static expr fabs(float arg) { return expr(std::fabs(arg)); } - }; - - /// Wrapper for binary half-precision functions needing specialization for individual argument types. - /// \tparam T first argument type - /// \tparam U first argument type - template struct binary_specialized - { - /// Minimum implementation. - /// \param x first operand - /// \param y second operand - /// \return minimum value - static expr fmin(float x, float y) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::fmin(x, y)); - #else - if(builtin_isnan(x)) - return expr(y); - if(builtin_isnan(y)) - return expr(x); - return expr(std::min(x, y)); - #endif - } - - /// Maximum implementation. - /// \param x first operand - /// \param y second operand - /// \return maximum value - static expr fmax(float x, float y) - { - #if HALF_ENABLE_CPP11_CMATH - return expr(std::fmax(x, y)); - #else - if(builtin_isnan(x)) - return expr(y); - if(builtin_isnan(y)) - return expr(x); - return expr(std::max(x, y)); - #endif - } - }; - template<> struct binary_specialized - { - static half fmin(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - if(xabs > 0x7C00) - return y; - if(yabs > 0x7C00) - return x; - return (((xabs==x.data_) ? xabs : -xabs) > ((yabs==y.data_) ? yabs : -yabs)) ? y : x; - } - static half fmax(half x, half y) - { - int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; - if(xabs > 0x7C00) - return y; - if(yabs > 0x7C00) - return x; - return (((xabs==x.data_) ? xabs : -xabs) < ((yabs==y.data_) ? yabs : -yabs)) ? y : x; - } - }; - - /// Helper class for half casts. - /// This class template has to be specialized for all valid cast argument to define an appropriate static `cast` member - /// function and a corresponding `type` member denoting its return type. - /// \tparam T destination type - /// \tparam U source type - /// \tparam R rounding mode to use - template struct half_caster {}; - template struct half_caster - { - #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_arithmetic::value, "half_cast from non-arithmetic type unsupported"); - #endif - - static half cast(U arg) { return cast_impl(arg, is_float()); }; - - private: - static half cast_impl(U arg, true_type) { return half(binary, float2half(arg)); } - static half cast_impl(U arg, false_type) { return half(binary, int2half(arg)); } - }; - template struct half_caster - { - #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); - #endif - - static T cast(half arg) { return cast_impl(arg, is_float()); } - - private: - static T cast_impl(half arg, true_type) { return half2float(arg.data_); } - static T cast_impl(half arg, false_type) { return half2int(arg.data_); } - }; - template struct half_caster - { - #if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS - static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); - #endif - - static T cast(expr arg) { return cast_impl(arg, is_float()); } - - private: - static T cast_impl(float arg, true_type) { return static_cast(arg); } - static T cast_impl(half arg, false_type) { return half2int(arg.data_); } - }; - template struct half_caster - { - static half cast(half arg) { return arg; } - }; - template struct half_caster : half_caster {}; - - /// \name Comparison operators - /// \{ - - /// Comparison for equality. - /// \param x first operand - /// \param y second operand - /// \retval true if operands equal - /// \retval false else - template typename enable::type operator==(T x, U y) { return functions::isequal(x, y); } - - /// Comparison for inequality. - /// \param x first operand - /// \param y second operand - /// \retval true if operands not equal - /// \retval false else - template typename enable::type operator!=(T x, U y) { return functions::isnotequal(x, y); } - - /// Comparison for less than. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x less than \a y - /// \retval false else - template typename enable::type operator<(T x, U y) { return functions::isless(x, y); } - - /// Comparison for greater than. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x greater than \a y - /// \retval false else - template typename enable::type operator>(T x, U y) { return functions::isgreater(x, y); } - - /// Comparison for less equal. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x less equal \a y - /// \retval false else - template typename enable::type operator<=(T x, U y) { return functions::islessequal(x, y); } - - /// Comparison for greater equal. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x greater equal \a y - /// \retval false else - template typename enable::type operator>=(T x, U y) { return functions::isgreaterequal(x, y); } - - /// \} - /// \name Arithmetic operators - /// \{ - - /// Add halfs. - /// \param x left operand - /// \param y right operand - /// \return sum of half expressions - template typename enable::type operator+(T x, U y) { return functions::plus(x, y); } - - /// Subtract halfs. - /// \param x left operand - /// \param y right operand - /// \return difference of half expressions - template typename enable::type operator-(T x, U y) { return functions::minus(x, y); } - - /// Multiply halfs. - /// \param x left operand - /// \param y right operand - /// \return product of half expressions - template typename enable::type operator*(T x, U y) { return functions::multiplies(x, y); } - - /// Divide halfs. - /// \param x left operand - /// \param y right operand - /// \return quotient of half expressions - template typename enable::type operator/(T x, U y) { return functions::divides(x, y); } - - /// Identity. - /// \param arg operand - /// \return uncahnged operand - template HALF_CONSTEXPR typename enable::type operator+(T arg) { return arg; } - - /// Negation. - /// \param arg operand - /// \return negated operand - template HALF_CONSTEXPR typename enable::type operator-(T arg) { return unary_specialized::negate(arg); } - - /// \} - /// \name Input and output - /// \{ - - /// Output operator. - /// \param out output stream to write into - /// \param arg half expression to write - /// \return reference to output stream - template typename enable&,T>::type - operator<<(std::basic_ostream &out, T arg) { return functions::write(out, arg); } - - /// Input operator. - /// \param in input stream to read from - /// \param arg half to read into - /// \return reference to input stream - template std::basic_istream& - operator>>(std::basic_istream &in, half &arg) { return functions::read(in, arg); } - - /// \} - /// \name Basic mathematical operations - /// \{ - - /// Absolute value. - /// \param arg operand - /// \return absolute value of \a arg -// template typename enable::type abs(T arg) { return unary_specialized::fabs(arg); } - inline half abs(half arg) { return unary_specialized::fabs(arg); } - inline expr abs(expr arg) { return unary_specialized::fabs(arg); } - - /// Absolute value. - /// \param arg operand - /// \return absolute value of \a arg -// template typename enable::type fabs(T arg) { return unary_specialized::fabs(arg); } - inline half fabs(half arg) { return unary_specialized::fabs(arg); } - inline expr fabs(expr arg) { return unary_specialized::fabs(arg); } - - /// Remainder of division. - /// \param x first operand - /// \param y second operand - /// \return remainder of floating point division. -// template typename enable::type fmod(T x, U y) { return functions::fmod(x, y); } - inline expr fmod(half x, half y) { return functions::fmod(x, y); } - inline expr fmod(half x, expr y) { return functions::fmod(x, y); } - inline expr fmod(expr x, half y) { return functions::fmod(x, y); } - inline expr fmod(expr x, expr y) { return functions::fmod(x, y); } - - /// Remainder of division. - /// \param x first operand - /// \param y second operand - /// \return remainder of floating point division. -// template typename enable::type remainder(T x, U y) { return functions::remainder(x, y); } - inline expr remainder(half x, half y) { return functions::remainder(x, y); } - inline expr remainder(half x, expr y) { return functions::remainder(x, y); } - inline expr remainder(expr x, half y) { return functions::remainder(x, y); } - inline expr remainder(expr x, expr y) { return functions::remainder(x, y); } - - /// Remainder of division. - /// \param x first operand - /// \param y second operand - /// \param quo address to store some bits of quotient at - /// \return remainder of floating point division. -// template typename enable::type remquo(T x, U y, int *quo) { return functions::remquo(x, y, quo); } - inline expr remquo(half x, half y, int *quo) { return functions::remquo(x, y, quo); } - inline expr remquo(half x, expr y, int *quo) { return functions::remquo(x, y, quo); } - inline expr remquo(expr x, half y, int *quo) { return functions::remquo(x, y, quo); } - inline expr remquo(expr x, expr y, int *quo) { return functions::remquo(x, y, quo); } - - /// Fused multiply add. - /// \param x first operand - /// \param y second operand - /// \param z third operand - /// \return ( \a x * \a y ) + \a z rounded as one operation. -// template typename enable::type fma(T x, U y, V z) { return functions::fma(x, y, z); } - inline expr fma(half x, half y, half z) { return functions::fma(x, y, z); } - inline expr fma(half x, half y, expr z) { return functions::fma(x, y, z); } - inline expr fma(half x, expr y, half z) { return functions::fma(x, y, z); } - inline expr fma(half x, expr y, expr z) { return functions::fma(x, y, z); } - inline expr fma(expr x, half y, half z) { return functions::fma(x, y, z); } - inline expr fma(expr x, half y, expr z) { return functions::fma(x, y, z); } - inline expr fma(expr x, expr y, half z) { return functions::fma(x, y, z); } - inline expr fma(expr x, expr y, expr z) { return functions::fma(x, y, z); } - - /// Maximum of half expressions. - /// \param x first operand - /// \param y second operand - /// \return maximum of operands -// template typename result::type fmax(T x, U y) { return binary_specialized::fmax(x, y); } - inline half fmax(half x, half y) { return binary_specialized::fmax(x, y); } - inline expr fmax(half x, expr y) { return binary_specialized::fmax(x, y); } - inline expr fmax(expr x, half y) { return binary_specialized::fmax(x, y); } - inline expr fmax(expr x, expr y) { return binary_specialized::fmax(x, y); } - - /// Minimum of half expressions. - /// \param x first operand - /// \param y second operand - /// \return minimum of operands -// template typename result::type fmin(T x, U y) { return binary_specialized::fmin(x, y); } - inline half fmin(half x, half y) { return binary_specialized::fmin(x, y); } - inline expr fmin(half x, expr y) { return binary_specialized::fmin(x, y); } - inline expr fmin(expr x, half y) { return binary_specialized::fmin(x, y); } - inline expr fmin(expr x, expr y) { return binary_specialized::fmin(x, y); } - - /// Positive difference. - /// \param x first operand - /// \param y second operand - /// \return \a x - \a y or 0 if difference negative -// template typename enable::type fdim(T x, U y) { return functions::fdim(x, y); } - inline expr fdim(half x, half y) { return functions::fdim(x, y); } - inline expr fdim(half x, expr y) { return functions::fdim(x, y); } - inline expr fdim(expr x, half y) { return functions::fdim(x, y); } - inline expr fdim(expr x, expr y) { return functions::fdim(x, y); } - - /// Get NaN value. - /// \return quiet NaN - inline half nanh(const char*) { return functions::nanh(); } - - /// \} - /// \name Exponential functions - /// \{ - - /// Exponential function. - /// \param arg function argument - /// \return e raised to \a arg -// template typename enable::type exp(T arg) { return functions::exp(arg); } - inline expr exp(half arg) { return functions::exp(arg); } - inline expr exp(expr arg) { return functions::exp(arg); } - - /// Exponential minus one. - /// \param arg function argument - /// \return e raised to \a arg subtracted by 1 -// template typename enable::type expm1(T arg) { return functions::expm1(arg); } - inline expr expm1(half arg) { return functions::expm1(arg); } - inline expr expm1(expr arg) { return functions::expm1(arg); } - - /// Binary exponential. - /// \param arg function argument - /// \return 2 raised to \a arg -// template typename enable::type exp2(T arg) { return functions::exp2(arg); } - inline expr exp2(half arg) { return functions::exp2(arg); } - inline expr exp2(expr arg) { return functions::exp2(arg); } - - /// Natural logorithm. - /// \param arg function argument - /// \return logarithm of \a arg to base e -// template typename enable::type log(T arg) { return functions::log(arg); } - inline expr log(half arg) { return functions::log(arg); } - inline expr log(expr arg) { return functions::log(arg); } - - /// Common logorithm. - /// \param arg function argument - /// \return logarithm of \a arg to base 10 -// template typename enable::type log10(T arg) { return functions::log10(arg); } - inline expr log10(half arg) { return functions::log10(arg); } - inline expr log10(expr arg) { return functions::log10(arg); } - - /// Natural logorithm. - /// \param arg function argument - /// \return logarithm of \a arg plus 1 to base e -// template typename enable::type log1p(T arg) { return functions::log1p(arg); } - inline expr log1p(half arg) { return functions::log1p(arg); } - inline expr log1p(expr arg) { return functions::log1p(arg); } - - /// Binary logorithm. - /// \param arg function argument - /// \return logarithm of \a arg to base 2 -// template typename enable::type log2(T arg) { return functions::log2(arg); } - inline expr log2(half arg) { return functions::log2(arg); } - inline expr log2(expr arg) { return functions::log2(arg); } - - /// \} - /// \name Power functions - /// \{ - - /// Square root. - /// \param arg function argument - /// \return square root of \a arg -// template typename enable::type sqrt(T arg) { return functions::sqrt(arg); } - inline expr sqrt(half arg) { return functions::sqrt(arg); } - inline expr sqrt(expr arg) { return functions::sqrt(arg); } - - /// Cubic root. - /// \param arg function argument - /// \return cubic root of \a arg -// template typename enable::type cbrt(T arg) { return functions::cbrt(arg); } - inline expr cbrt(half arg) { return functions::cbrt(arg); } - inline expr cbrt(expr arg) { return functions::cbrt(arg); } - - /// Hypotenuse function. - /// \param x first argument - /// \param y second argument - /// \return square root of sum of squares without internal over- or underflows -// template typename enable::type hypot(T x, U y) { return functions::hypot(x, y); } - inline expr hypot(half x, half y) { return functions::hypot(x, y); } - inline expr hypot(half x, expr y) { return functions::hypot(x, y); } - inline expr hypot(expr x, half y) { return functions::hypot(x, y); } - inline expr hypot(expr x, expr y) { return functions::hypot(x, y); } - - /// Power function. - /// \param base first argument - /// \param exp second argument - /// \return \a base raised to \a exp -// template typename enable::type pow(T base, U exp) { return functions::pow(base, exp); } - inline expr pow(half base, half exp) { return functions::pow(base, exp); } - inline expr pow(half base, expr exp) { return functions::pow(base, exp); } - inline expr pow(expr base, half exp) { return functions::pow(base, exp); } - inline expr pow(expr base, expr exp) { return functions::pow(base, exp); } - - /// \} - /// \name Trigonometric functions - /// \{ - - /// Sine function. - /// \param arg function argument - /// \return sine value of \a arg -// template typename enable::type sin(T arg) { return functions::sin(arg); } - inline expr sin(half arg) { return functions::sin(arg); } - inline expr sin(expr arg) { return functions::sin(arg); } - - /// Cosine function. - /// \param arg function argument - /// \return cosine value of \a arg -// template typename enable::type cos(T arg) { return functions::cos(arg); } - inline expr cos(half arg) { return functions::cos(arg); } - inline expr cos(expr arg) { return functions::cos(arg); } - - /// Tangent function. - /// \param arg function argument - /// \return tangent value of \a arg -// template typename enable::type tan(T arg) { return functions::tan(arg); } - inline expr tan(half arg) { return functions::tan(arg); } - inline expr tan(expr arg) { return functions::tan(arg); } - - /// Arc sine. - /// \param arg function argument - /// \return arc sine value of \a arg -// template typename enable::type asin(T arg) { return functions::asin(arg); } - inline expr asin(half arg) { return functions::asin(arg); } - inline expr asin(expr arg) { return functions::asin(arg); } - - /// Arc cosine function. - /// \param arg function argument - /// \return arc cosine value of \a arg -// template typename enable::type acos(T arg) { return functions::acos(arg); } - inline expr acos(half arg) { return functions::acos(arg); } - inline expr acos(expr arg) { return functions::acos(arg); } - - /// Arc tangent function. - /// \param arg function argument - /// \return arc tangent value of \a arg -// template typename enable::type atan(T arg) { return functions::atan(arg); } - inline expr atan(half arg) { return functions::atan(arg); } - inline expr atan(expr arg) { return functions::atan(arg); } - - /// Arc tangent function. - /// \param x first argument - /// \param y second argument - /// \return arc tangent value -// template typename enable::type atan2(T x, U y) { return functions::atan2(x, y); } - inline expr atan2(half x, half y) { return functions::atan2(x, y); } - inline expr atan2(half x, expr y) { return functions::atan2(x, y); } - inline expr atan2(expr x, half y) { return functions::atan2(x, y); } - inline expr atan2(expr x, expr y) { return functions::atan2(x, y); } - - /// \} - /// \name Hyperbolic functions - /// \{ - - /// Hyperbolic sine. - /// \param arg function argument - /// \return hyperbolic sine value of \a arg -// template typename enable::type sinh(T arg) { return functions::sinh(arg); } - inline expr sinh(half arg) { return functions::sinh(arg); } - inline expr sinh(expr arg) { return functions::sinh(arg); } - - /// Hyperbolic cosine. - /// \param arg function argument - /// \return hyperbolic cosine value of \a arg -// template typename enable::type cosh(T arg) { return functions::cosh(arg); } - inline expr cosh(half arg) { return functions::cosh(arg); } - inline expr cosh(expr arg) { return functions::cosh(arg); } - - /// Hyperbolic tangent. - /// \param arg function argument - /// \return hyperbolic tangent value of \a arg -// template typename enable::type tanh(T arg) { return functions::tanh(arg); } - inline expr tanh(half arg) { return functions::tanh(arg); } - inline expr tanh(expr arg) { return functions::tanh(arg); } - - /// Hyperbolic area sine. - /// \param arg function argument - /// \return area sine value of \a arg -// template typename enable::type asinh(T arg) { return functions::asinh(arg); } - inline expr asinh(half arg) { return functions::asinh(arg); } - inline expr asinh(expr arg) { return functions::asinh(arg); } - - /// Hyperbolic area cosine. - /// \param arg function argument - /// \return area cosine value of \a arg -// template typename enable::type acosh(T arg) { return functions::acosh(arg); } - inline expr acosh(half arg) { return functions::acosh(arg); } - inline expr acosh(expr arg) { return functions::acosh(arg); } - - /// Hyperbolic area tangent. - /// \param arg function argument - /// \return area tangent value of \a arg -// template typename enable::type atanh(T arg) { return functions::atanh(arg); } - inline expr atanh(half arg) { return functions::atanh(arg); } - inline expr atanh(expr arg) { return functions::atanh(arg); } - - /// \} - /// \name Error and gamma functions - /// \{ - - /// Error function. - /// \param arg function argument - /// \return error function value of \a arg -// template typename enable::type erf(T arg) { return functions::erf(arg); } - inline expr erf(half arg) { return functions::erf(arg); } - inline expr erf(expr arg) { return functions::erf(arg); } - - /// Complementary error function. - /// \param arg function argument - /// \return 1 minus error function value of \a arg -// template typename enable::type erfc(T arg) { return functions::erfc(arg); } - inline expr erfc(half arg) { return functions::erfc(arg); } - inline expr erfc(expr arg) { return functions::erfc(arg); } - - /// Natural logarithm of gamma function. - /// \param arg function argument - /// \return natural logarith of gamma function for \a arg -// template typename enable::type lgamma(T arg) { return functions::lgamma(arg); } - inline expr lgamma(half arg) { return functions::lgamma(arg); } - inline expr lgamma(expr arg) { return functions::lgamma(arg); } - - /// Gamma function. - /// \param arg function argument - /// \return gamma function value of \a arg -// template typename enable::type tgamma(T arg) { return functions::tgamma(arg); } - inline expr tgamma(half arg) { return functions::tgamma(arg); } - inline expr tgamma(expr arg) { return functions::tgamma(arg); } - - /// \} - /// \name Rounding - /// \{ - - /// Nearest integer not less than half value. - /// \param arg half to round - /// \return nearest integer not less than \a arg -// template typename enable::type ceil(T arg) { return functions::ceil(arg); } - inline half ceil(half arg) { return functions::ceil(arg); } - inline half ceil(expr arg) { return functions::ceil(arg); } - - /// Nearest integer not greater than half value. - /// \param arg half to round - /// \return nearest integer not greater than \a arg -// template typename enable::type floor(T arg) { return functions::floor(arg); } - inline half floor(half arg) { return functions::floor(arg); } - inline half floor(expr arg) { return functions::floor(arg); } - - /// Nearest integer not greater in magnitude than half value. - /// \param arg half to round - /// \return nearest integer not greater in magnitude than \a arg -// template typename enable::type trunc(T arg) { return functions::trunc(arg); } - inline half trunc(half arg) { return functions::trunc(arg); } - inline half trunc(expr arg) { return functions::trunc(arg); } - - /// Nearest integer. - /// \param arg half to round - /// \return nearest integer, rounded away from zero in half-way cases -// template typename enable::type round(T arg) { return functions::round(arg); } - inline half round(half arg) { return functions::round(arg); } - inline half round(expr arg) { return functions::round(arg); } +/// Library-defined half-precision literals. +/// Import this namespace to enable half-precision floating point literals: +/// ~~~~{.cpp} +/// using namespace half_float::literal; +/// half_float::half = 4.2_h; +/// ~~~~ +namespace literal +{ +half operator"" _h(long double); +} +#endif - /// Nearest integer. - /// \param arg half to round - /// \return nearest integer, rounded away from zero in half-way cases -// template typename enable::type lround(T arg) { return functions::lround(arg); } - inline long lround(half arg) { return functions::lround(arg); } - inline long lround(expr arg) { return functions::lround(arg); } +/// \internal +/// \brief Implementation details. +namespace detail +{ +#if HALF_ENABLE_CPP11_TYPE_TRAITS +/// Conditional type. +template +struct conditional : std::conditional +{ +}; - /// Nearest integer using half's internal rounding mode. - /// \param arg half expression to round - /// \return nearest integer using default rounding mode -// template typename enable::type nearbyint(T arg) { return functions::nearbyint(arg); } - inline half nearbyint(half arg) { return functions::rint(arg); } - inline half nearbyint(expr arg) { return functions::rint(arg); } +/// Helper for tag dispatching. +template +struct bool_type : std::integral_constant +{ +}; +using std::true_type; +using std::false_type; - /// Nearest integer using half's internal rounding mode. - /// \param arg half expression to round - /// \return nearest integer using default rounding mode -// template typename enable::type rint(T arg) { return functions::rint(arg); } - inline half rint(half arg) { return functions::rint(arg); } - inline half rint(expr arg) { return functions::rint(arg); } +/// Type traits for floating point types. +template +struct is_float : std::is_floating_point +{ +}; +#else +/// Conditional type. +template +struct conditional +{ + typedef T type; +}; +template +struct conditional +{ + typedef F type; +}; - /// Nearest integer using half's internal rounding mode. - /// \param arg half expression to round - /// \return nearest integer using default rounding mode -// template typename enable::type lrint(T arg) { return functions::lrint(arg); } - inline long lrint(half arg) { return functions::lrint(arg); } - inline long lrint(expr arg) { return functions::lrint(arg); } - #if HALF_ENABLE_CPP11_LONG_LONG - /// Nearest integer. - /// \param arg half to round - /// \return nearest integer, rounded away from zero in half-way cases -// template typename enable::type llround(T arg) { return functions::llround(arg); } - inline long long llround(half arg) { return functions::llround(arg); } - inline long long llround(expr arg) { return functions::llround(arg); } +/// Helper for tag dispatching. +template +struct bool_type +{ +}; +typedef bool_type true_type; +typedef bool_type false_type; - /// Nearest integer using half's internal rounding mode. - /// \param arg half expression to round - /// \return nearest integer using default rounding mode -// template typename enable::type llrint(T arg) { return functions::llrint(arg); } - inline long long llrint(half arg) { return functions::llrint(arg); } - inline long long llrint(expr arg) { return functions::llrint(arg); } - #endif - - /// \} - /// \name Floating point manipulation - /// \{ - - /// Decompress floating point number. - /// \param arg number to decompress - /// \param exp address to store exponent at - /// \return significant in range [0.5, 1) -// template typename enable::type frexp(T arg, int *exp) { return functions::frexp(arg, exp); } - inline half frexp(half arg, int *exp) { return functions::frexp(arg, exp); } - inline half frexp(expr arg, int *exp) { return functions::frexp(arg, exp); } - - /// Multiply by power of two. - /// \param arg number to modify - /// \param exp power of two to multiply with - /// \return \a arg multplied by 2 raised to \a exp -// template typename enable::type ldexp(T arg, int exp) { return functions::scalbln(arg, exp); } - inline half ldexp(half arg, int exp) { return functions::scalbln(arg, exp); } - inline half ldexp(expr arg, int exp) { return functions::scalbln(arg, exp); } - - /// Extract integer and fractional parts. - /// \param arg number to decompress - /// \param iptr address to store integer part at - /// \return fractional part -// template typename enable::type modf(T arg, half *iptr) { return functions::modf(arg, iptr); } - inline half modf(half arg, half *iptr) { return functions::modf(arg, iptr); } - inline half modf(expr arg, half *iptr) { return functions::modf(arg, iptr); } - - /// Multiply by power of two. - /// \param arg number to modify - /// \param exp power of two to multiply with - /// \return \a arg multplied by 2 raised to \a exp -// template typename enable::type scalbn(T arg, int exp) { return functions::scalbln(arg, exp); } - inline half scalbn(half arg, int exp) { return functions::scalbln(arg, exp); } - inline half scalbn(expr arg, int exp) { return functions::scalbln(arg, exp); } - - /// Multiply by power of two. - /// \param arg number to modify - /// \param exp power of two to multiply with - /// \return \a arg multplied by 2 raised to \a exp -// template typename enable::type scalbln(T arg, long exp) { return functions::scalbln(arg, exp); } - inline half scalbln(half arg, long exp) { return functions::scalbln(arg, exp); } - inline half scalbln(expr arg, long exp) { return functions::scalbln(arg, exp); } - - /// Extract exponent. - /// \param arg number to query - /// \return floating point exponent - /// \retval FP_ILOGB0 for zero - /// \retval FP_ILOGBNAN for NaN - /// \retval MAX_INT for infinity -// template typename enable::type ilogb(T arg) { return functions::ilogb(arg); } - inline int ilogb(half arg) { return functions::ilogb(arg); } - inline int ilogb(expr arg) { return functions::ilogb(arg); } +/// Type traits for floating point types. +template +struct is_float : false_type +{ +}; +template +struct is_float : is_float +{ +}; +template +struct is_float : is_float +{ +}; +template +struct is_float : is_float +{ +}; +template <> +struct is_float : true_type +{ +}; +template <> +struct is_float : true_type +{ +}; +template <> +struct is_float : true_type +{ +}; +#endif - /// Extract exponent. - /// \param arg number to query - /// \return floating point exponent -// template typename enable::type logb(T arg) { return functions::logb(arg); } - inline half logb(half arg) { return functions::logb(arg); } - inline half logb(expr arg) { return functions::logb(arg); } - - /// Next representable value. - /// \param from value to compute next representable value for - /// \param to direction towards which to compute next value - /// \return next representable value after \a from in direction towards \a to -// template typename enable::type nextafter(T from, U to) { return functions::nextafter(from, to); } - inline half nextafter(half from, half to) { return functions::nextafter(from, to); } - inline half nextafter(half from, expr to) { return functions::nextafter(from, to); } - inline half nextafter(expr from, half to) { return functions::nextafter(from, to); } - inline half nextafter(expr from, expr to) { return functions::nextafter(from, to); } - - /// Next representable value. - /// \param from value to compute next representable value for - /// \param to direction towards which to compute next value - /// \return next representable value after \a from in direction towards \a to -// template typename enable::type nexttoward(T from, long double to) { return functions::nexttoward(from, to); } - inline half nexttoward(half from, long double to) { return functions::nexttoward(from, to); } - inline half nexttoward(expr from, long double to) { return functions::nexttoward(from, to); } - - /// Take sign. - /// \param x value to change sign for - /// \param y value to take sign from - /// \return value equal to \a x in magnitude and to \a y in sign -// template typename enable::type copysign(T x, U y) { return functions::copysign(x, y); } - inline half copysign(half x, half y) { return functions::copysign(x, y); } - inline half copysign(half x, expr y) { return functions::copysign(x, y); } - inline half copysign(expr x, half y) { return functions::copysign(x, y); } - inline half copysign(expr x, expr y) { return functions::copysign(x, y); } - - /// \} - /// \name Floating point classification - /// \{ - - - /// Classify floating point value. - /// \param arg number to classify - /// \retval FP_ZERO for positive and negative zero - /// \retval FP_SUBNORMAL for subnormal numbers - /// \retval FP_INFINITY for positive and negative infinity - /// \retval FP_NAN for NaNs - /// \retval FP_NORMAL for all other (normal) values -// template typename enable::type fpclassify(T arg) { return functions::fpclassify(arg); } - inline int fpclassify(half arg) { return functions::fpclassify(arg); } - inline int fpclassify(expr arg) { return functions::fpclassify(arg); } +/// Type traits for floating point bits. +template +struct bits +{ + typedef unsigned char type; +}; +template +struct bits : bits +{ +}; +template +struct bits : bits +{ +}; +template +struct bits : bits +{ +}; - /// Check if finite number. - /// \param arg number to check - /// \retval true if neither infinity nor NaN - /// \retval false else -// template typename enable::type isfinite(T arg) { return functions::isfinite(arg); } - inline bool isfinite(half arg) { return functions::isfinite(arg); } - inline bool isfinite(expr arg) { return functions::isfinite(arg); } +#if HALF_ENABLE_CPP11_CSTDINT +/// Unsigned integer of (at least) 16 bits width. +typedef std::uint_least16_t uint16; - /// Check for infinity. - /// \param arg number to check - /// \retval true for positive or negative infinity - /// \retval false else -// template typename enable::type isinf(T arg) { return functions::isinf(arg); } - inline bool isinf(half arg) { return functions::isinf(arg); } - inline bool isinf(expr arg) { return functions::isinf(arg); } +/// Unsigned integer of (at least) 32 bits width. +template <> +struct bits +{ + typedef std::uint_least32_t type; +}; - /// Check for NaN. - /// \param arg number to check - /// \retval true for NaNs - /// \retval false else -// template typename enable::type isnan(T arg) { return functions::isnan(arg); } - inline bool isnan(half arg) { return functions::isnan(arg); } - inline bool isnan(expr arg) { return functions::isnan(arg); } +/// Unsigned integer of (at least) 64 bits width. +template <> +struct bits +{ + typedef std::uint_least64_t type; +}; +#else +/// Unsigned integer of (at least) 16 bits width. +typedef unsigned short uint16; - /// Check if normal number. - /// \param arg number to check - /// \retval true if normal number - /// \retval false if either subnormal, zero, infinity or NaN -// template typename enable::type isnormal(T arg) { return functions::isnormal(arg); } - inline bool isnormal(half arg) { return functions::isnormal(arg); } - inline bool isnormal(expr arg) { return functions::isnormal(arg); } +/// Unsigned integer of (at least) 32 bits width. +template <> +struct bits : conditional::digits >= 32, unsigned int, unsigned long> +{ +}; - /// Check sign. - /// \param arg number to check - /// \retval true for negative number - /// \retval false for positive number -// template typename enable::type signbit(T arg) { return functions::signbit(arg); } - inline bool signbit(half arg) { return functions::signbit(arg); } - inline bool signbit(expr arg) { return functions::signbit(arg); } - - /// \} - /// \name Comparison - /// \{ - - /// Comparison for greater than. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x greater than \a y - /// \retval false else -// template typename enable::type isgreater(T x, U y) { return functions::isgreater(x, y); } - inline bool isgreater(half x, half y) { return functions::isgreater(x, y); } - inline bool isgreater(half x, expr y) { return functions::isgreater(x, y); } - inline bool isgreater(expr x, half y) { return functions::isgreater(x, y); } - inline bool isgreater(expr x, expr y) { return functions::isgreater(x, y); } - - /// Comparison for greater equal. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x greater equal \a y - /// \retval false else -// template typename enable::type isgreaterequal(T x, U y) { return functions::isgreaterequal(x, y); } - inline bool isgreaterequal(half x, half y) { return functions::isgreaterequal(x, y); } - inline bool isgreaterequal(half x, expr y) { return functions::isgreaterequal(x, y); } - inline bool isgreaterequal(expr x, half y) { return functions::isgreaterequal(x, y); } - inline bool isgreaterequal(expr x, expr y) { return functions::isgreaterequal(x, y); } - - /// Comparison for less than. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x less than \a y - /// \retval false else -// template typename enable::type isless(T x, U y) { return functions::isless(x, y); } - inline bool isless(half x, half y) { return functions::isless(x, y); } - inline bool isless(half x, expr y) { return functions::isless(x, y); } - inline bool isless(expr x, half y) { return functions::isless(x, y); } - inline bool isless(expr x, expr y) { return functions::isless(x, y); } - - /// Comparison for less equal. - /// \param x first operand - /// \param y second operand - /// \retval true if \a x less equal \a y - /// \retval false else -// template typename enable::type islessequal(T x, U y) { return functions::islessequal(x, y); } - inline bool islessequal(half x, half y) { return functions::islessequal(x, y); } - inline bool islessequal(half x, expr y) { return functions::islessequal(x, y); } - inline bool islessequal(expr x, half y) { return functions::islessequal(x, y); } - inline bool islessequal(expr x, expr y) { return functions::islessequal(x, y); } - - /// Comarison for less or greater. - /// \param x first operand - /// \param y second operand - /// \retval true if either less or greater - /// \retval false else -// template typename enable::type islessgreater(T x, U y) { return functions::islessgreater(x, y); } - inline bool islessgreater(half x, half y) { return functions::islessgreater(x, y); } - inline bool islessgreater(half x, expr y) { return functions::islessgreater(x, y); } - inline bool islessgreater(expr x, half y) { return functions::islessgreater(x, y); } - inline bool islessgreater(expr x, expr y) { return functions::islessgreater(x, y); } - - /// Check if unordered. - /// \param x first operand - /// \param y second operand - /// \retval true if unordered (one or two NaN operands) - /// \retval false else -// template typename enable::type isunordered(T x, U y) { return functions::isunordered(x, y); } - inline bool isunordered(half x, half y) { return functions::isunordered(x, y); } - inline bool isunordered(half x, expr y) { return functions::isunordered(x, y); } - inline bool isunordered(expr x, half y) { return functions::isunordered(x, y); } - inline bool isunordered(expr x, expr y) { return functions::isunordered(x, y); } - - /// \name Casting - /// \{ - - /// Cast to or from half-precision floating point number. - /// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted - /// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. - /// It uses the default rounding mode. - /// - /// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types - /// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler - /// error and casting between [half](\ref half_float::half)s is just a no-op. - /// \tparam T destination type (half or built-in arithmetic type) - /// \tparam U source type (half or built-in arithmetic type) - /// \param arg value to cast - /// \return \a arg converted to destination type - template T half_cast(U arg) { return half_caster::cast(arg); } - - /// Cast to or from half-precision floating point number. - /// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted - /// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. - /// - /// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types - /// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler - /// error and casting between [half](\ref half_float::half)s is just a no-op. - /// \tparam T destination type (half or built-in arithmetic type) - /// \tparam R rounding mode to use. - /// \tparam U source type (half or built-in arithmetic type) - /// \param arg value to cast - /// \return \a arg converted to destination type - template T half_cast(U arg) { return half_caster::cast(arg); } - /// \} - } - - using detail::operator==; - using detail::operator!=; - using detail::operator<; - using detail::operator>; - using detail::operator<=; - using detail::operator>=; - using detail::operator+; - using detail::operator-; - using detail::operator*; - using detail::operator/; - using detail::operator<<; - using detail::operator>>; - - using detail::abs; - using detail::fabs; - using detail::fmod; - using detail::remainder; - using detail::remquo; - using detail::fma; - using detail::fmax; - using detail::fmin; - using detail::fdim; - using detail::nanh; - using detail::exp; - using detail::expm1; - using detail::exp2; - using detail::log; - using detail::log10; - using detail::log1p; - using detail::log2; - using detail::sqrt; - using detail::cbrt; - using detail::hypot; - using detail::pow; - using detail::sin; - using detail::cos; - using detail::tan; - using detail::asin; - using detail::acos; - using detail::atan; - using detail::atan2; - using detail::sinh; - using detail::cosh; - using detail::tanh; - using detail::asinh; - using detail::acosh; - using detail::atanh; - using detail::erf; - using detail::erfc; - using detail::lgamma; - using detail::tgamma; - using detail::ceil; - using detail::floor; - using detail::trunc; - using detail::round; - using detail::lround; - using detail::nearbyint; - using detail::rint; - using detail::lrint; #if HALF_ENABLE_CPP11_LONG_LONG - using detail::llround; - using detail::llrint; -#endif - using detail::frexp; - using detail::ldexp; - using detail::modf; - using detail::scalbn; - using detail::scalbln; - using detail::ilogb; - using detail::logb; - using detail::nextafter; - using detail::nexttoward; - using detail::copysign; - using detail::fpclassify; - using detail::isfinite; - using detail::isinf; - using detail::isnan; - using detail::isnormal; - using detail::signbit; - using detail::isgreater; - using detail::isgreaterequal; - using detail::isless; - using detail::islessequal; - using detail::islessgreater; - using detail::isunordered; - - using detail::half_cast; -} - - -/// Extensions to the C++ standard library. -namespace std +/// Unsigned integer of (at least) 64 bits width. +template <> +struct bits : conditional::digits >= 64, unsigned long, unsigned long long> { - /// Numeric limits for half-precision floats. - /// Because of the underlying single-precision implementation of many operations, it inherits some properties from - /// `std::numeric_limits`. - template<> class numeric_limits : public numeric_limits - { - public: - /// Supports signed values. - static HALF_CONSTEXPR_CONST bool is_signed = true; - - /// Is not exact. - static HALF_CONSTEXPR_CONST bool is_exact = false; - - /// Doesn't provide modulo arithmetic. - static HALF_CONSTEXPR_CONST bool is_modulo = false; - - /// IEEE conformant. - static HALF_CONSTEXPR_CONST bool is_iec559 = true; - - /// Supports infinity. - static HALF_CONSTEXPR_CONST bool has_infinity = true; +}; +#else +/// Unsigned integer of (at least) 64 bits width. +template <> +struct bits +{ + typedef unsigned long type; +}; +#endif +#endif - /// Supports quiet NaNs. - static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true; +/// Tag type for binary construction. +struct binary_t +{ +}; - /// Supports subnormal values. - static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present; +/// Tag for binary construction. +HALF_CONSTEXPR_CONST binary_t binary = binary_t(); - /// Rounding mode. - /// Due to the mix of internal single-precision computations (using the rounding mode of the underlying - /// single-precision implementation) with the rounding mode of the single-to-half conversions, the actual rounding - /// mode might be `std::round_indeterminate` if the default half-precision rounding mode doesn't match the - /// single-precision rounding mode. - static HALF_CONSTEXPR_CONST float_round_style round_style = (std::numeric_limits::round_style== - half_float::half::round_style) ? half_float::half::round_style : round_indeterminate; +/// Temporary half-precision expression. +/// This class represents a half-precision expression which just stores a single-precision value internally. +struct expr +{ + /// Conversion constructor. + /// \param f single-precision value to convert + explicit HALF_CONSTEXPR expr(float f) HALF_NOEXCEPT : value_(f) {} + + /// Conversion to single-precision. + /// \return single precision value representing expression value + HALF_CONSTEXPR operator float() const HALF_NOEXCEPT + { + return value_; + } + +private: + /// Internal expression value stored in single-precision. + float value_; +}; + +/// SFINAE helper for generic half-precision functions. +/// This class template has to be specialized for each valid combination of argument types to provide a corresponding +/// `type` member equivalent to \a T. +/// \tparam T type to return +template +struct enable +{ +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; +template +struct enable +{ + typedef T type; +}; + +/// Return type for specialized generic 2-argument half-precision functions. +/// This class template has to be specialized for each valid combination of argument types to provide a corresponding +/// `type` member denoting the appropriate return type. +/// \tparam T first argument type +/// \tparam U first argument type +template +struct result : enable +{ +}; +template <> +struct result +{ + typedef half type; +}; + +/// \name Classification helpers +/// \{ + +/// Check for infinity. +/// \tparam T argument type (builtin floating point type) +/// \param arg value to query +/// \retval true if infinity +/// \retval false else +template +bool builtin_isinf(T arg) +{ +#if HALF_ENABLE_CPP11_CMATH + return std::isinf(arg); +#elif defined(_MSC_VER) + return !::_finite(static_cast(arg)) && !::_isnan(static_cast(arg)); +#else + return arg == std::numeric_limits::infinity() || arg == -std::numeric_limits::infinity(); +#endif +} - /// Significant digits. - static HALF_CONSTEXPR_CONST int digits = 11; +/// Check for NaN. +/// \tparam T argument type (builtin floating point type) +/// \param arg value to query +/// \retval true if not a number +/// \retval false else +template +bool builtin_isnan(T arg) +{ +#if HALF_ENABLE_CPP11_CMATH + return std::isnan(arg); +#elif defined(_MSC_VER) + return ::_isnan(static_cast(arg)) != 0; +#else + return arg != arg; +#endif +} - /// Significant decimal digits. - static HALF_CONSTEXPR_CONST int digits10 = 3; +/// Check sign. +/// \tparam T argument type (builtin floating point type) +/// \param arg value to query +/// \retval true if signbit set +/// \retval false else +template +bool builtin_signbit(T arg) +{ +#if HALF_ENABLE_CPP11_CMATH + return std::signbit(arg); +#else + return arg < T() || (arg == T() && T(1) / arg < T()); +#endif +} - /// Required decimal digits to represent all possible values. - static HALF_CONSTEXPR_CONST int max_digits10 = 5; +/// \} +/// \name Conversion +/// \{ + +/// Convert IEEE single-precision to half-precision. +/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \param value single-precision value +/// \return binary representation of half-precision value +template +uint16 float2half_impl(float value, true_type) +{ + typedef bits::type uint32; + uint32 bits; // = *reinterpret_cast(&value); //violating strict aliasing! + std::memcpy(&bits, &value, sizeof(float)); + /* uint16 hbits = (bits>>16) & 0x8000; + bits &= 0x7FFFFFFF; + int exp = bits >> 23; + if(exp == 255) + return hbits | 0x7C00 | (0x3FF&-static_cast((bits&0x7FFFFF)!=0)); + if(exp > 142) + { + if(R == std::round_toward_infinity) + return hbits | 0x7C00 - (hbits>>15); + if(R == std::round_toward_neg_infinity) + return hbits | 0x7BFF + (hbits>>15); + return hbits | 0x7BFF + (R!=std::round_toward_zero); + } + int g, s; + if(exp > 112) + { + g = (bits>>12) & 1; + s = (bits&0xFFF) != 0; + hbits |= ((exp-112)<<10) | ((bits>>13)&0x3FF); + } + else if(exp > 101) + { + int i = 125 - exp; + bits = (bits&0x7FFFFF) | 0x800000; + g = (bits>>i) & 1; + s = (bits&((1L<> (i+1); + } + else + { + g = 0; + s = bits != 0; + } + if(R == std::round_to_nearest) + #if HALF_ROUND_TIES_TO_EVEN + hbits += g & (s|hbits); + #else + hbits += g; + #endif + else if(R == std::round_toward_infinity) + hbits += ~(hbits>>15) & (s|g); + else if(R == std::round_toward_neg_infinity) + hbits += (hbits>>15) & (g|s); + */ + static uint16 const base_table[512] = {0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, + 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, + 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00, 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, + 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, + 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x7C00, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, + 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100, 0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, + 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00, 0xC000, 0xC400, 0xC800, + 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, + 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00, 0xFC00}; + static const unsigned char shift_table[512] = {24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13}; + uint16 hbits = base_table[bits >> 23] + static_cast((bits & 0x7FFFFF) >> shift_table[bits >> 23]); + if (R == std::round_to_nearest) + hbits += (((bits & 0x7FFFFF) >> (shift_table[bits >> 23] - 1)) | (((bits >> 23) & 0xFF) == 102)) + & ((hbits & 0x7C00) != 0x7C00) +#if HALF_ROUND_TIES_TO_EVEN + & (((((static_cast(1) << (shift_table[bits >> 23] - 1)) - 1) & bits) != 0) | hbits) +#endif + ; + else if (R == std::round_toward_zero) + hbits -= ((hbits & 0x7FFF) == 0x7C00) & ~shift_table[bits >> 23]; + else if (R == std::round_toward_infinity) + hbits += ((((bits & 0x7FFFFF & ((static_cast(1) << (shift_table[bits >> 23])) - 1)) != 0) + | (((bits >> 23) <= 102) & ((bits >> 23) != 0))) + & (hbits < 0x7C00)) + - ((hbits == 0xFC00) & ((bits >> 23) != 511)); + else if (R == std::round_toward_neg_infinity) + hbits += ((((bits & 0x7FFFFF & ((static_cast(1) << (shift_table[bits >> 23])) - 1)) != 0) + | (((bits >> 23) <= 358) & ((bits >> 23) != 256))) + & (hbits < 0xFC00) & (hbits >> 15)) + - ((hbits == 0x7C00) & ((bits >> 23) != 255)); + return hbits; +} - /// Number base. - static HALF_CONSTEXPR_CONST int radix = 2; +/// Convert IEEE double-precision to half-precision. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \param value double-precision value +/// \return binary representation of half-precision value +template +uint16 float2half_impl(double value, true_type) +{ + typedef bits::type uint32; + typedef bits::type uint64; + uint64 bits; // = *reinterpret_cast(&value); //violating strict aliasing! + std::memcpy(&bits, &value, sizeof(double)); + uint32 hi = bits >> 32, lo = bits & 0xFFFFFFFF; + uint16 hbits = (hi >> 16) & 0x8000; + hi &= 0x7FFFFFFF; + int exp = hi >> 20; + if (exp == 2047) + return hbits | 0x7C00 | (0x3FF & -static_cast((bits & 0xFFFFFFFFFFFFF) != 0)); + if (exp > 1038) + { + if (R == std::round_toward_infinity) + return hbits | (0x7C00 - (hbits >> 15)); + if (R == std::round_toward_neg_infinity) + return hbits | (0x7BFF + (hbits >> 15)); + return hbits | (0x7BFF + (R != std::round_toward_zero)); + } + int g, s = lo != 0; + if (exp > 1008) + { + g = (hi >> 9) & 1; + s |= (hi & 0x1FF) != 0; + hbits |= ((exp - 1008) << 10) | ((hi >> 10) & 0x3FF); + } + else if (exp > 997) + { + int i = 1018 - exp; + hi = (hi & 0xFFFFF) | 0x100000; + g = (hi >> i) & 1; + s |= (hi & ((1L << i) - 1)) != 0; + hbits |= hi >> (i + 1); + } + else + { + g = 0; + s |= hi != 0; + } + if (R == std::round_to_nearest) +#if HALF_ROUND_TIES_TO_EVEN + hbits += g & (s | hbits); +#else + hbits += g; +#endif + else if (R == std::round_toward_infinity) + hbits += ~(hbits >> 15) & (s | g); + else if (R == std::round_toward_neg_infinity) + hbits += (hbits >> 15) & (g | s); + return hbits; +} - /// One more than smallest exponent. - static HALF_CONSTEXPR_CONST int min_exponent = -13; +/// Convert non-IEEE floating point to half-precision. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam T source type (builtin floating point type) +/// \param value floating point value +/// \return binary representation of half-precision value +template +uint16 float2half_impl(T value, false_type) +{ + uint16 hbits = static_cast(builtin_signbit(value)) << 15; + if (value == T()) + return hbits; + if (builtin_isnan(value)) + return hbits | 0x7FFF; + if (builtin_isinf(value)) + return hbits | 0x7C00; + int exp; + std::frexp(value, &exp); + if (exp > 16) + { + if (R == std::round_toward_infinity) + return hbits | (0x7C00 - (hbits >> 15)); + if (R == std::round_toward_neg_infinity) + return hbits | (0x7BFF + (hbits >> 15)); + return hbits | (0x7BFF + (R != std::round_toward_zero)); + } + if (exp < -13) + value = std::ldexp(value, 24); + else + { + value = std::ldexp(value, 11 - exp); + hbits |= ((exp + 13) << 10); + } + T ival, frac = std::modf(value, &ival); + hbits += static_cast(std::abs(static_cast(ival))); + if (R == std::round_to_nearest) + { + frac = std::abs(frac); +#if HALF_ROUND_TIES_TO_EVEN + hbits += (frac > T(0.5)) | ((frac == T(0.5)) & hbits); +#else + hbits += frac >= T(0.5); +#endif + } + else if (R == std::round_toward_infinity) + hbits += frac > T(); + else if (R == std::round_toward_neg_infinity) + hbits += frac < T(); + return hbits; +} - /// Smallest normalized representable power of 10. - static HALF_CONSTEXPR_CONST int min_exponent10 = -4; +/// Convert floating point to half-precision. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam T source type (builtin floating point type) +/// \param value floating point value +/// \return binary representation of half-precision value +template +uint16 float2half(T value) +{ + return float2half_impl( + value, bool_type::is_iec559 && sizeof(typename bits::type) == sizeof(T)>()); +} - /// One more than largest exponent - static HALF_CONSTEXPR_CONST int max_exponent = 16; +/// Convert integer to half-precision floating point. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam S `true` if value negative, `false` else +/// \tparam T type to convert (builtin integer type) +/// \param value non-negative integral value +/// \return binary representation of half-precision value +template +uint16 int2half_impl(T value) +{ +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_integral::value, "int to half conversion only supports builtin integer types"); +#endif + if (S) + value = -value; + uint16 bits = S << 15; + if (value > 0xFFFF) + { + if (R == std::round_toward_infinity) + bits |= 0x7C00 - S; + else if (R == std::round_toward_neg_infinity) + bits |= 0x7BFF + S; + else + bits |= 0x7BFF + (R != std::round_toward_zero); + } + else if (value) + { + unsigned int m = value, exp = 24; + for (; m < 0x400; m <<= 1, --exp) + ; + for (; m > 0x7FF; m >>= 1, ++exp) + ; + bits |= (exp << 10) + m; + if (exp > 24) + { + if (R == std::round_to_nearest) + bits += (value >> (exp - 25)) & 1 +#if HALF_ROUND_TIES_TO_EVEN + & (((((1 << (exp - 25)) - 1) & value) != 0) | bits) +#endif + ; + else if (R == std::round_toward_infinity) + bits += ((value & ((1 << (exp - 24)) - 1)) != 0) & !S; + else if (R == std::round_toward_neg_infinity) + bits += ((value & ((1 << (exp - 24)) - 1)) != 0) & S; + } + } + return bits; +} - /// Largest finitely representable power of 10. - static HALF_CONSTEXPR_CONST int max_exponent10 = 4; +/// Convert integer to half-precision floating point. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam T type to convert (builtin integer type) +/// \param value integral value +/// \return binary representation of half-precision value +template +uint16 int2half(T value) +{ + return (value < 0) ? int2half_impl(value) : int2half_impl(value); +} - /// Smallest positive normal value. - static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0400); } +/// Convert half-precision to IEEE single-precision. +/// Credit for this goes to [Jeroen van der Zijp](ftp://ftp.fox-toolkit.org/pub/fasthalffloatconversion.pdf). +/// \param value binary representation of half-precision value +/// \return single-precision value +inline float half2float_impl(uint16 value, float, true_type) +{ + typedef bits::type uint32; + /* uint32 bits = static_cast(value&0x8000) << 16; + int abs = value & 0x7FFF; + if(abs) + { + bits |= 0x38000000 << static_cast(abs>=0x7C00); + for(; abs<0x400; abs<<=1,bits-=0x800000) ; + bits += static_cast(abs) << 13; + } + */ + static const uint32 mantissa_table[2048] = {0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, + 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, + 0x35700000, 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, + 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000, 0x36000000, + 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, + 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000, 0x36400000, 0x36440000, 0x36480000, + 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, + 0x36700000, 0x36740000, 0x36780000, 0x367C0000, 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, + 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, + 0x369C0000, 0x369E0000, 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, + 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000, + 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, + 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000, 0x36E00000, 0x36E20000, + 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, + 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000, 0x37000000, 0x37010000, 0x37020000, 0x37030000, + 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, + 0x370D0000, 0x370E0000, 0x370F0000, 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, + 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, + 0x371F0000, 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, + 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000, 0x37300000, + 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, + 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000, 0x37400000, 0x37410000, 0x37420000, + 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, + 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000, 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, + 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, + 0x375E0000, 0x375F0000, 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, + 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000, + 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, + 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000, 0x37800000, 0x37808000, + 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, + 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000, 0x37880000, 0x37888000, 0x37890000, 0x37898000, + 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, + 0x378E8000, 0x378F0000, 0x378F8000, 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, + 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, + 0x37978000, 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, + 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000, 0x37A00000, + 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, + 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000, 0x37A80000, 0x37A88000, 0x37A90000, + 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, + 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000, 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, + 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, + 0x37B70000, 0x37B78000, 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, + 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000, + 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, + 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000, 0x37C80000, 0x37C88000, + 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, + 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000, 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, + 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, + 0x37D68000, 0x37D70000, 0x37D78000, 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, + 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, + 0x37DF8000, 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, + 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000, 0x37E80000, + 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, + 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000, 0x37F00000, 0x37F08000, 0x37F10000, + 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, + 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000, 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, + 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, + 0x37FF0000, 0x37FF8000, 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, + 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000, + 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, + 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000, 0x38080000, 0x38084000, + 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, + 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000, 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, + 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, + 0x380F4000, 0x380F8000, 0x380FC000, 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, + 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, + 0x3813C000, 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, + 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000, 0x38180000, + 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, + 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000, 0x381C0000, 0x381C4000, 0x381C8000, + 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, + 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000, 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, + 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, + 0x38238000, 0x3823C000, 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, + 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000, + 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, + 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000, 0x382C0000, 0x382C4000, + 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, + 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000, 0x38300000, 0x38304000, 0x38308000, 0x3830C000, + 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, + 0x38334000, 0x38338000, 0x3833C000, 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, + 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, + 0x3837C000, 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, + 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000, 0x383C0000, + 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, + 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000, 0x38400000, 0x38404000, 0x38408000, + 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, + 0x38430000, 0x38434000, 0x38438000, 0x3843C000, 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, + 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, + 0x38478000, 0x3847C000, 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, + 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000, + 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, + 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000, 0x38500000, 0x38504000, + 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, + 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000, 0x38540000, 0x38544000, 0x38548000, 0x3854C000, + 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, + 0x38574000, 0x38578000, 0x3857C000, 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, + 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, + 0x385BC000, 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, + 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000, 0x38600000, + 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, + 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000, 0x38640000, 0x38644000, 0x38648000, + 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, + 0x38670000, 0x38674000, 0x38678000, 0x3867C000, 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, + 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, + 0x386B8000, 0x386BC000, 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, + 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000, + 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, + 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000, 0x38740000, 0x38744000, + 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, + 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000, 0x38780000, 0x38784000, 0x38788000, 0x3878C000, + 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, + 0x387B4000, 0x387B8000, 0x387BC000, 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, + 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, + 0x387FC000, 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, + 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000, 0x38020000, + 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, + 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000, 0x38040000, 0x38042000, 0x38044000, + 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, + 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000, 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, + 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, + 0x3807C000, 0x3807E000, 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, + 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000, + 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, + 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000, 0x380C0000, 0x380C2000, + 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, + 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000, 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, + 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, + 0x380FA000, 0x380FC000, 0x380FE000, 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, + 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, + 0x3811E000, 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, + 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000, 0x38140000, + 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, + 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000, 0x38160000, 0x38162000, 0x38164000, + 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, + 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000, 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, + 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, + 0x3819C000, 0x3819E000, 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, + 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000, + 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, + 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000, 0x381E0000, 0x381E2000, + 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, + 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000, 0x38200000, 0x38202000, 0x38204000, 0x38206000, + 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, + 0x3821A000, 0x3821C000, 0x3821E000, 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, + 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, + 0x3823E000, 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, + 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000, 0x38260000, + 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, + 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000, 0x38280000, 0x38282000, 0x38284000, + 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, + 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000, 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, + 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, + 0x382BC000, 0x382BE000, 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, + 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000, + 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, + 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000, 0x38300000, 0x38302000, + 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, + 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000, 0x38320000, 0x38322000, 0x38324000, 0x38326000, + 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, + 0x3833A000, 0x3833C000, 0x3833E000, 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, + 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, + 0x3835E000, 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, + 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000, 0x38380000, + 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, + 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000, 0x383A0000, 0x383A2000, 0x383A4000, + 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, + 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000, 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, + 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, + 0x383DC000, 0x383DE000, 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, + 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000, + 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, + 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000, 0x38420000, 0x38422000, + 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, + 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000, 0x38440000, 0x38442000, 0x38444000, 0x38446000, + 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, + 0x3845A000, 0x3845C000, 0x3845E000, 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, + 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, + 0x3847E000, 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, + 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000, 0x384A0000, + 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, + 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000, 0x384C0000, 0x384C2000, 0x384C4000, + 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, + 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000, 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, + 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, + 0x384FC000, 0x384FE000, 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, + 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000, + 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, + 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000, 0x38540000, 0x38542000, + 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, + 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000, 0x38560000, 0x38562000, 0x38564000, 0x38566000, + 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, + 0x3857A000, 0x3857C000, 0x3857E000, 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, + 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, + 0x3859E000, 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, + 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000, 0x385C0000, + 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, + 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000, 0x385E0000, 0x385E2000, 0x385E4000, + 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, + 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000, 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, + 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, + 0x3861C000, 0x3861E000, 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, + 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000, + 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, + 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000, 0x38660000, 0x38662000, + 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, + 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000, 0x38680000, 0x38682000, 0x38684000, 0x38686000, + 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, + 0x3869A000, 0x3869C000, 0x3869E000, 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, + 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, + 0x386BE000, 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, + 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000, 0x386E0000, + 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, + 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000, 0x38700000, 0x38702000, 0x38704000, + 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, + 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000, 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, + 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, + 0x3873C000, 0x3873E000, 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, + 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000, + 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, + 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000, 0x38780000, 0x38782000, + 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, + 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000, 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, + 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, + 0x387BA000, 0x387BC000, 0x387BE000, 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, + 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, + 0x387DE000, 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, + 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000}; + static const uint32 exponent_table[64] = {0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, + 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, + 0x07800000, 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, + 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000, 0x80000000, + 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, + 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000, 0x88000000, 0x88800000, 0x89000000, + 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, + 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000}; + static const unsigned short offset_table[64] = {0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, + 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, + 1024, 1024, 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, + 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024}; + uint32 bits = mantissa_table[offset_table[value >> 10] + (value & 0x3FF)] + exponent_table[value >> 10]; + // return *reinterpret_cast(&bits); //violating strict aliasing! + float out; + std::memcpy(&out, &bits, sizeof(float)); + return out; +} - /// Smallest finite value. - static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0xFBFF); } +/// Convert half-precision to IEEE double-precision. +/// \param value binary representation of half-precision value +/// \return double-precision value +inline double half2float_impl(uint16 value, double, true_type) +{ + typedef bits::type uint32; + typedef bits::type uint64; + uint32 hi = static_cast(value & 0x8000) << 16; + int abs = value & 0x7FFF; + if (abs) + { + hi |= 0x3F000000 << static_cast(abs >= 0x7C00); + for (; abs < 0x400; abs <<= 1, hi -= 0x100000) + ; + hi += static_cast(abs) << 10; + } + uint64 bits = static_cast(hi) << 32; + // return *reinterpret_cast(&bits); //violating strict aliasing! + double out; + std::memcpy(&out, &bits, sizeof(double)); + return out; +} - /// Largest finite value. - static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7BFF); } +/// Convert half-precision to non-IEEE floating point. +/// \tparam T type to convert to (builtin integer type) +/// \param value binary representation of half-precision value +/// \return floating point value +template +T half2float_impl(uint16 value, T, ...) +{ + T out; + int abs = value & 0x7FFF; + if (abs > 0x7C00) + out = std::numeric_limits::has_quiet_NaN ? std::numeric_limits::quiet_NaN() : T(); + else if (abs == 0x7C00) + out = std::numeric_limits::has_infinity ? std::numeric_limits::infinity() : std::numeric_limits::max(); + else if (abs > 0x3FF) + out = std::ldexp(static_cast((abs & 0x3FF) | 0x400), (abs >> 10) - 25); + else + out = std::ldexp(static_cast(abs), -24); + return (value & 0x8000) ? -out : out; +} - /// Difference between one and next representable value. - static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x1400); } +/// Convert half-precision to floating point. +/// \tparam T type to convert to (builtin integer type) +/// \param value binary representation of half-precision value +/// \return floating point value +template +T half2float(uint16 value) +{ + return half2float_impl( + value, T(), bool_type::is_iec559 && sizeof(typename bits::type) == sizeof(T)>()); +} - /// Maximum rounding error. - static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW - { return half_float::half(half_float::detail::binary, (round_style==std::round_to_nearest) ? 0x3800 : 0x3C00); } +/// Convert half-precision floating point to integer. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam E `true` for round to even, `false` for round away from zero +/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign +/// bits) +/// \param value binary representation of half-precision value +/// \return integral value +template +T half2int_impl(uint16 value) +{ +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_integral::value, "half to int conversion only supports builtin integer types"); +#endif + unsigned int e = value & 0x7FFF; + if (e >= 0x7C00) + return (value & 0x8000) ? std::numeric_limits::min() : std::numeric_limits::max(); + if (e < 0x3800) + { + if (R == std::round_toward_infinity) + return T(~(value >> 15) & (e != 0)); + if (R == std::round_toward_neg_infinity) + return -T(value > 0x8000); + return T(); + } + unsigned int m = (value & 0x3FF) | 0x400; + e >>= 10; + if (e < 25) + { + if (R == std::round_to_nearest) + m += (1 << (24 - e)) - (~(m >> (25 - e)) & E); + else if (R == std::round_toward_infinity) + m += ((value >> 15) - 1) & ((1 << (25 - e)) - 1U); + else if (R == std::round_toward_neg_infinity) + m += -(value >> 15) & ((1 << (25 - e)) - 1U); + m >>= 25 - e; + } + else + m <<= e - 25; + return (value & 0x8000) ? -static_cast(m) : static_cast(m); +} - /// Positive infinity. - static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7C00); } +/// Convert half-precision floating point to integer. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign +/// bits) +/// \param value binary representation of half-precision value +/// \return integral value +template +T half2int(uint16 value) +{ + return half2int_impl(value); +} - /// Quiet NaN. - static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7FFF); } +/// Convert half-precision floating point to integer using round-to-nearest-away-from-zero. +/// \tparam T type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign +/// bits) +/// \param value binary representation of half-precision value +/// \return integral value +template +T half2int_up(uint16 value) +{ + return half2int_impl(value); +} - /// Signalling NaN. - static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x7DFF); } +/// Round half-precision number to nearest integer value. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \tparam E `true` for round to even, `false` for round away from zero +/// \param value binary representation of half-precision value +/// \return half-precision bits for nearest integral value +template +uint16 round_half_impl(uint16 value) +{ + unsigned int e = value & 0x7FFF; + uint16 result = value; + if (e < 0x3C00) + { + result &= 0x8000; + if (R == std::round_to_nearest) + result |= 0x3C00U & -(e >= (0x3800 + E)); + else if (R == std::round_toward_infinity) + result |= 0x3C00U & -(~(value >> 15) & (e != 0)); + else if (R == std::round_toward_neg_infinity) + result |= 0x3C00U & -(value > 0x8000); + } + else if (e < 0x6400) + { + e = 25 - (e >> 10); + unsigned int mask = (1 << e) - 1; + if (R == std::round_to_nearest) + result += (1 << (e - 1)) - (~(result >> e) & E); + else if (R == std::round_toward_infinity) + result += mask & ((value >> 15) - 1); + else if (R == std::round_toward_neg_infinity) + result += mask & -(value >> 15); + result &= ~mask; + } + return result; +} - /// Smallest positive subnormal value. - static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW { return half_float::half(half_float::detail::binary, 0x0001); } - }; +/// Round half-precision number to nearest integer value. +/// \tparam R rounding mode to use, `std::round_indeterminate` for fastest rounding +/// \param value binary representation of half-precision value +/// \return half-precision bits for nearest integral value +template +uint16 round_half(uint16 value) +{ + return round_half_impl(value); +} +/// Round half-precision number to nearest integer value using round-to-nearest-away-from-zero. +/// \param value binary representation of half-precision value +/// \return half-precision bits for nearest integral value +inline uint16 round_half_up(uint16 value) +{ + return round_half_impl(value); +} +/// \} + +struct functions; +template +struct unary_specialized; +template +struct binary_specialized; +template +struct half_caster; +} // namespace detail + +/// Half-precision floating point type. +/// This class implements an IEEE-conformant half-precision floating point type with the usual arithmetic operators and +/// conversions. It is implicitly convertible to single-precision floating point, which makes artihmetic expressions and +/// functions with mixed-type operands to be of the most precise operand type. Additionally all arithmetic operations +/// (and many mathematical functions) are carried out in single-precision internally. All conversions from single- to +/// half-precision are done using the library's default rounding mode, but temporary results inside chained arithmetic +/// expressions are kept in single-precision as long as possible (while of course still maintaining a strong +/// half-precision type). +/// +/// According to the C++98/03 definition, the half type is not a POD type. But according to C++11's less strict and +/// extended definitions it is both a standard layout type and a trivially copyable type (even if not a POD type), which +/// means it can be standard-conformantly copied using raw binary copies. But in this context some more words about the +/// actual size of the type. Although the half is representing an IEEE 16-bit type, it does not neccessarily have to be +/// of exactly 16-bits size. But on any reasonable implementation the actual binary representation of this type will +/// most probably not ivolve any additional "magic" or padding beyond the simple binary representation of the underlying +/// 16-bit IEEE number, even if not strictly guaranteed by the standard. But even then it only has an actual size of 16 +/// bits if your C++ implementation supports an unsigned integer type of exactly 16 bits width. But this should be the +/// case on nearly any reasonable platform. +/// +/// So if your C++ implementation is not totally exotic or imposes special alignment requirements, it is a reasonable +/// assumption that the data of a half is just comprised of the 2 bytes of the underlying IEEE representation. +#if defined(__clang__) +/* this is a WAR, after nvcc's process, + * `friend class std::numeric_limits;` in the following code, becomes + * `friend class numeric_limits;`, namespsace `std` is removed, which results compilation error in clang. + * tested on nvcc V10.0.95, and clang 5.0.300080 in ndk 16b + */ +using std::numeric_limits; +using std::hash; +#endif +class half +{ + friend struct detail::functions; + friend struct detail::unary_specialized; + friend struct detail::binary_specialized; + template + friend struct detail::half_caster; + friend class std::numeric_limits; #if HALF_ENABLE_CPP11_HASH - /// Hash function for half-precision floats. - /// This is only defined if C++11 `std::hash` is supported and enabled. - template<> struct hash //: unary_function - { - /// Type of function argument. - typedef half_float::half argument_type; + friend struct std::hash; +#endif +#if HALF_ENABLE_CPP11_USER_LITERALS + friend half literal::operator"" _h(long double); +#endif - /// Function return type. - typedef size_t result_type; +public: + /// Default constructor. + /// This initializes the half to 0. Although this does not match the builtin types' default-initialization semantics + /// and may be less efficient than no initialization, it is needed to provide proper value-initialization semantics. + HALF_CONSTEXPR half() HALF_NOEXCEPT : data_() {} + + /// Copy constructor. + /// \tparam T type of concrete half expression + /// \param rhs half expression to copy from + half(detail::expr rhs) + : data_(detail::float2half(static_cast(rhs))) + { + } + + /// Conversion constructor. + /// \param rhs float to convert + explicit half(float rhs) + : data_(detail::float2half(rhs)) + { + } + + /// Conversion to single-precision. + /// \return single precision value representing expression value + operator float() const + { + return detail::half2float(data_); + } + + /// Assignment operator. + /// \tparam T type of concrete half expression + /// \param rhs half expression to copy from + /// \return reference to this half + half& operator=(detail::expr rhs) + { + return *this = static_cast(rhs); + } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to add + /// \return reference to this half + template + typename detail::enable::type operator+=(T rhs) + { + return *this += static_cast(rhs); + } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to subtract + /// \return reference to this half + template + typename detail::enable::type operator-=(T rhs) + { + return *this -= static_cast(rhs); + } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to multiply with + /// \return reference to this half + template + typename detail::enable::type operator*=(T rhs) + { + return *this *= static_cast(rhs); + } + + /// Arithmetic assignment. + /// \tparam T type of concrete half expression + /// \param rhs half expression to divide by + /// \return reference to this half + template + typename detail::enable::type operator/=(T rhs) + { + return *this /= static_cast(rhs); + } + + /// Assignment operator. + /// \param rhs single-precision value to copy from + /// \return reference to this half + half& operator=(float rhs) + { + data_ = detail::float2half(rhs); + return *this; + } + + /// Arithmetic assignment. + /// \param rhs single-precision value to add + /// \return reference to this half + half& operator+=(float rhs) + { + data_ = detail::float2half(detail::half2float(data_) + rhs); + return *this; + } + + /// Arithmetic assignment. + /// \param rhs single-precision value to subtract + /// \return reference to this half + half& operator-=(float rhs) + { + data_ = detail::float2half(detail::half2float(data_) - rhs); + return *this; + } + + /// Arithmetic assignment. + /// \param rhs single-precision value to multiply with + /// \return reference to this half + half& operator*=(float rhs) + { + data_ = detail::float2half(detail::half2float(data_) * rhs); + return *this; + } + + /// Arithmetic assignment. + /// \param rhs single-precision value to divide by + /// \return reference to this half + half& operator/=(float rhs) + { + data_ = detail::float2half(detail::half2float(data_) / rhs); + return *this; + } + + /// Prefix increment. + /// \return incremented half value + half& operator++() + { + return *this += 1.0f; + } + + /// Prefix decrement. + /// \return decremented half value + half& operator--() + { + return *this -= 1.0f; + } + + /// Postfix increment. + /// \return non-incremented half value + half operator++(int) + { + half out(*this); + ++*this; + return out; + } + + /// Postfix decrement. + /// \return non-decremented half value + half operator--(int) + { + half out(*this); + --*this; + return out; + } + +private: + /// Rounding mode to use + static const std::float_round_style round_style = (std::float_round_style) (HALF_ROUND_STYLE); + + /// Constructor. + /// \param bits binary representation to set half to + HALF_CONSTEXPR half(detail::binary_t, detail::uint16 bits) HALF_NOEXCEPT : data_(bits) {} + + /// Internal binary representation + detail::uint16 data_; +}; - /// Compute hash function. - /// \param arg half to hash - /// \return hash value - result_type operator()(argument_type arg) const - { return hash()(static_cast(arg.data_)&-(arg.data_!=0x8000)); } - }; -#endif +#if HALF_ENABLE_CPP11_USER_LITERALS +namespace literal +{ +/// Half literal. +/// While this returns an actual half-precision value, half literals can unfortunately not be constant expressions due +/// to rather involved conversions. +/// \param value literal value +/// \return half with given value (if representable) +inline half operator"" _h(long double value) +{ + return half(detail::binary, detail::float2half(value)); } +} // namespace literal +#endif +namespace detail +{ +/// Wrapper implementing unspecialized half-precision functions. +struct functions +{ + /// Addition implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision sum stored in single-precision + static expr plus(float x, float y) + { + return expr(x + y); + } + + /// Subtraction implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision difference stored in single-precision + static expr minus(float x, float y) + { + return expr(x - y); + } + + /// Multiplication implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision product stored in single-precision + static expr multiplies(float x, float y) + { + return expr(x * y); + } + + /// Division implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision quotient stored in single-precision + static expr divides(float x, float y) + { + return expr(x / y); + } + + /// Output implementation. + /// \param out stream to write to + /// \param arg value to write + /// \return reference to stream + template + static std::basic_ostream& write(std::basic_ostream& out, float arg) + { + return out << arg; + } + + /// Input implementation. + /// \param in stream to read from + /// \param arg half to read into + /// \return reference to stream + template + static std::basic_istream& read(std::basic_istream& in, half& arg) + { + float f; + if (in >> f) + arg = f; + return in; + } + + /// Modulo implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision division remainder stored in single-precision + static expr fmod(float x, float y) + { + return expr(std::fmod(x, y)); + } + + /// Remainder implementation. + /// \param x first operand + /// \param y second operand + /// \return Half-precision division remainder stored in single-precision + static expr remainder(float x, float y) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::remainder(x, y)); +#else + if (builtin_isnan(x) || builtin_isnan(y)) + return expr(std::numeric_limits::quiet_NaN()); + float ax = std::fabs(x), ay = std::fabs(y); + if (ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) + return expr(std::numeric_limits::quiet_NaN()); + if (ay >= 65536.0f) + return expr(x); + if (ax == ay) + return expr(builtin_signbit(x) ? -0.0f : 0.0f); + ax = std::fmod(ax, ay + ay); + float y2 = 0.5f * ay; + if (ax > y2) + { + ax -= ay; + if (ax >= y2) + ax -= ay; + } + return expr(builtin_signbit(x) ? -ax : ax); +#endif + } + + /// Remainder implementation. + /// \param x first operand + /// \param y second operand + /// \param quo address to store quotient bits at + /// \return Half-precision division remainder stored in single-precision + static expr remquo(float x, float y, int* quo) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::remquo(x, y, quo)); +#else + if (builtin_isnan(x) || builtin_isnan(y)) + return expr(std::numeric_limits::quiet_NaN()); + bool sign = builtin_signbit(x), qsign = static_cast(sign ^ builtin_signbit(y)); + float ax = std::fabs(x), ay = std::fabs(y); + if (ax >= 65536.0f || ay < std::ldexp(1.0f, -24)) + return expr(std::numeric_limits::quiet_NaN()); + if (ay >= 65536.0f) + return expr(x); + if (ax == ay) + return *quo = qsign ? -1 : 1, expr(sign ? -0.0f : 0.0f); + ax = std::fmod(ax, 8.0f * ay); + int cquo = 0; + if (ax >= 4.0f * ay) + { + ax -= 4.0f * ay; + cquo += 4; + } + if (ax >= 2.0f * ay) + { + ax -= 2.0f * ay; + cquo += 2; + } + float y2 = 0.5f * ay; + if (ax > y2) + { + ax -= ay; + ++cquo; + if (ax >= y2) + { + ax -= ay; + ++cquo; + } + } + return *quo = qsign ? -cquo : cquo, expr(sign ? -ax : ax); +#endif + } + + /// Positive difference implementation. + /// \param x first operand + /// \param y second operand + /// \return Positive difference stored in single-precision + static expr fdim(float x, float y) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::fdim(x, y)); +#else + return expr((x <= y) ? 0.0f : (x - y)); +#endif + } + + /// Fused multiply-add implementation. + /// \param x first operand + /// \param y second operand + /// \param z third operand + /// \return \a x * \a y + \a z stored in single-precision + static expr fma(float x, float y, float z) + { +#if HALF_ENABLE_CPP11_CMATH && defined(FP_FAST_FMAF) + return expr(std::fma(x, y, z)); +#else + return expr(x * y + z); +#endif + } + + /// Get NaN. + /// \return Half-precision quiet NaN + static half nanh() + { + return half(binary, 0x7FFF); + } + + /// Exponential implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr exp(float arg) + { + return expr(std::exp(arg)); + } + + /// Exponential implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr expm1(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::expm1(arg)); +#else + return expr(static_cast(std::exp(static_cast(arg)) - 1.0)); +#endif + } + + /// Binary exponential implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr exp2(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::exp2(arg)); +#else + return expr(static_cast(std::exp(arg * 0.69314718055994530941723212145818))); +#endif + } + + /// Logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr log(float arg) + { + return expr(std::log(arg)); + } + + /// Common logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr log10(float arg) + { + return expr(std::log10(arg)); + } + + /// Logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr log1p(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::log1p(arg)); +#else + return expr(static_cast(std::log(1.0 + arg))); +#endif + } + + /// Binary logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr log2(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::log2(arg)); +#else + return expr(static_cast(std::log(static_cast(arg)) * 1.4426950408889634073599246810019)); +#endif + } + + /// Square root implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr sqrt(float arg) + { + return expr(std::sqrt(arg)); + } + + /// Cubic root implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr cbrt(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::cbrt(arg)); +#else + if (builtin_isnan(arg) || builtin_isinf(arg)) + return expr(arg); + return expr(builtin_signbit(arg) ? -static_cast(std::pow(-static_cast(arg), 1.0 / 3.0)) + : static_cast(std::pow(static_cast(arg), 1.0 / 3.0))); +#endif + } + + /// Hypotenuse implementation. + /// \param x first argument + /// \param y second argument + /// \return function value stored in single-preicision + static expr hypot(float x, float y) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::hypot(x, y)); +#else + return expr((builtin_isinf(x) || builtin_isinf(y)) + ? std::numeric_limits::infinity() + : static_cast(std::sqrt(static_cast(x) * x + static_cast(y) * y))); +#endif + } + + /// Power implementation. + /// \param base value to exponentiate + /// \param exp power to expontiate to + /// \return function value stored in single-preicision + static expr pow(float base, float exp) + { + return expr(std::pow(base, exp)); + } + + /// Sine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr sin(float arg) + { + return expr(std::sin(arg)); + } + + /// Cosine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr cos(float arg) + { + return expr(std::cos(arg)); + } + + /// Tan implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr tan(float arg) + { + return expr(std::tan(arg)); + } + + /// Arc sine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr asin(float arg) + { + return expr(std::asin(arg)); + } + + /// Arc cosine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr acos(float arg) + { + return expr(std::acos(arg)); + } + + /// Arc tangent implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr atan(float arg) + { + return expr(std::atan(arg)); + } + + /// Arc tangent implementation. + /// \param x first argument + /// \param y second argument + /// \return function value stored in single-preicision + static expr atan2(float x, float y) + { + return expr(std::atan2(x, y)); + } + + /// Hyperbolic sine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr sinh(float arg) + { + return expr(std::sinh(arg)); + } + + /// Hyperbolic cosine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr cosh(float arg) + { + return expr(std::cosh(arg)); + } + + /// Hyperbolic tangent implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr tanh(float arg) + { + return expr(std::tanh(arg)); + } + + /// Hyperbolic area sine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr asinh(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::asinh(arg)); +#else + return expr((arg == -std::numeric_limits::infinity()) + ? arg + : static_cast(std::log(arg + std::sqrt(arg * arg + 1.0)))); +#endif + } + + /// Hyperbolic area cosine implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr acosh(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::acosh(arg)); +#else + return expr((arg < -1.0f) ? std::numeric_limits::quiet_NaN() + : static_cast(std::log(arg + std::sqrt(arg * arg - 1.0)))); +#endif + } + + /// Hyperbolic area tangent implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr atanh(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::atanh(arg)); +#else + return expr(static_cast(0.5 * std::log((1.0 + arg) / (1.0 - arg)))); +#endif + } + + /// Error function implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr erf(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::erf(arg)); +#else + return expr(static_cast(erf(static_cast(arg)))); +#endif + } + + /// Complementary implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr erfc(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::erfc(arg)); +#else + return expr(static_cast(1.0 - erf(static_cast(arg)))); +#endif + } + + /// Gamma logarithm implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr lgamma(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::lgamma(arg)); +#else + if (builtin_isinf(arg)) + return expr(std::numeric_limits::infinity()); + if (arg < 0.0f) + { + float i, f = std::modf(-arg, &i); + if (f == 0.0f) + return expr(std::numeric_limits::infinity()); + return expr(static_cast(1.1447298858494001741434273513531 + - std::log(std::abs(std::sin(3.1415926535897932384626433832795 * f))) - lgamma(1.0 - arg))); + } + return expr(static_cast(lgamma(static_cast(arg)))); +#endif + } + + /// Gamma implementation. + /// \param arg function argument + /// \return function value stored in single-preicision + static expr tgamma(float arg) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::tgamma(arg)); +#else + if (arg == 0.0f) + return builtin_signbit(arg) ? expr(-std::numeric_limits::infinity()) + : expr(std::numeric_limits::infinity()); + if (arg < 0.0f) + { + float i, f = std::modf(-arg, &i); + if (f == 0.0f) + return expr(std::numeric_limits::quiet_NaN()); + double value = 3.1415926535897932384626433832795 + / (std::sin(3.1415926535897932384626433832795 * f) * std::exp(lgamma(1.0 - arg))); + return expr(static_cast((std::fmod(i, 2.0f) == 0.0f) ? -value : value)); + } + if (builtin_isinf(arg)) + return expr(arg); + return expr(static_cast(std::exp(lgamma(static_cast(arg))))); +#endif + } + + /// Floor implementation. + /// \param arg value to round + /// \return rounded value + static half floor(half arg) + { + return half(binary, round_half(arg.data_)); + } + + /// Ceiling implementation. + /// \param arg value to round + /// \return rounded value + static half ceil(half arg) + { + return half(binary, round_half(arg.data_)); + } + + /// Truncation implementation. + /// \param arg value to round + /// \return rounded value + static half trunc(half arg) + { + return half(binary, round_half(arg.data_)); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static half round(half arg) + { + return half(binary, round_half_up(arg.data_)); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static long lround(half arg) + { + return detail::half2int_up(arg.data_); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static half rint(half arg) + { + return half(binary, round_half(arg.data_)); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static long lrint(half arg) + { + return detail::half2int(arg.data_); + } + +#if HALF_ENABLE_CPP11_LONG_LONG + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static long long llround(half arg) + { + return detail::half2int_up(arg.data_); + } + + /// Nearest integer implementation. + /// \param arg value to round + /// \return rounded value + static long long llrint(half arg) + { + return detail::half2int(arg.data_); + } +#endif + + /// Decompression implementation. + /// \param arg number to decompress + /// \param exp address to store exponent at + /// \return normalized significant + static half frexp(half arg, int* exp) + { + int m = arg.data_ & 0x7FFF, e = -14; + if (m >= 0x7C00 || !m) + return *exp = 0, arg; + for (; m < 0x400; m <<= 1, --e) + ; + return *exp = e + (m >> 10), half(binary, (arg.data_ & 0x8000) | 0x3800 | (m & 0x3FF)); + } + + /// Decompression implementation. + /// \param arg number to decompress + /// \param iptr address to store integer part at + /// \return fractional part + static half modf(half arg, half* iptr) + { + unsigned int e = arg.data_ & 0x7FFF; + if (e >= 0x6400) + return *iptr = arg, half(binary, arg.data_ & (0x8000U | -(e > 0x7C00))); + if (e < 0x3C00) + return iptr->data_ = arg.data_ & 0x8000, arg; + e >>= 10; + unsigned int mask = (1 << (25 - e)) - 1, m = arg.data_ & mask; + iptr->data_ = arg.data_ & ~mask; + if (!m) + return half(binary, arg.data_ & 0x8000); + for (; m < 0x400; m <<= 1, --e) + ; + return half(binary, static_cast((arg.data_ & 0x8000) | (e << 10) | (m & 0x3FF))); + } + + /// Scaling implementation. + /// \param arg number to scale + /// \param exp power of two to scale by + /// \return scaled number + static half scalbln(half arg, long exp) + { + unsigned int m = arg.data_ & 0x7FFF; + if (m >= 0x7C00 || !m) + return arg; + for (; m < 0x400; m <<= 1, --exp) + ; + exp += m >> 10; + uint16 value = arg.data_ & 0x8000; + if (exp > 30) + { + if (half::round_style == std::round_toward_zero) + value |= 0x7BFF; + else if (half::round_style == std::round_toward_infinity) + value |= 0x7C00 - (value >> 15); + else if (half::round_style == std::round_toward_neg_infinity) + value |= 0x7BFF + (value >> 15); + else + value |= 0x7C00; + } + else if (exp > 0) + value |= (exp << 10) | (m & 0x3FF); + else if (exp > -11) + { + m = (m & 0x3FF) | 0x400; + if (half::round_style == std::round_to_nearest) + { + m += 1 << -exp; +#if HALF_ROUND_TIES_TO_EVEN + m -= (m >> (1 - exp)) & 1; +#endif + } + else if (half::round_style == std::round_toward_infinity) + m += ((value >> 15) - 1) & ((1 << (1 - exp)) - 1U); + else if (half::round_style == std::round_toward_neg_infinity) + m += -(value >> 15) & ((1 << (1 - exp)) - 1U); + value |= m >> (1 - exp); + } + else if (half::round_style == std::round_toward_infinity) + value -= (value >> 15) - 1; + else if (half::round_style == std::round_toward_neg_infinity) + value += value >> 15; + return half(binary, value); + } + + /// Exponent implementation. + /// \param arg number to query + /// \return floating point exponent + static int ilogb(half arg) + { + int abs = arg.data_ & 0x7FFF; + if (!abs) + return FP_ILOGB0; + if (abs < 0x7C00) + { + int exp = (abs >> 10) - 15; + if (abs < 0x400) + for (; abs < 0x200; abs <<= 1, --exp) + ; + return exp; + } + if (abs > 0x7C00) + return FP_ILOGBNAN; + return INT_MAX; + } + + /// Exponent implementation. + /// \param arg number to query + /// \return floating point exponent + static half logb(half arg) + { + int abs = arg.data_ & 0x7FFF; + if (!abs) + return half(binary, 0xFC00); + if (abs < 0x7C00) + { + int exp = (abs >> 10) - 15; + if (abs < 0x400) + for (; abs < 0x200; abs <<= 1, --exp) + ; + uint16 bits = (exp < 0) << 15; + if (exp) + { + unsigned int m = std::abs(exp) << 6, e = 18; + for (; m < 0x400; m <<= 1, --e) + ; + bits |= (e << 10) + m; + } + return half(binary, bits); + } + if (abs > 0x7C00) + return arg; + return half(binary, 0x7C00); + } + + /// Enumeration implementation. + /// \param from number to increase/decrease + /// \param to direction to enumerate into + /// \return next representable number + static half nextafter(half from, half to) + { + uint16 fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF; + if (fabs > 0x7C00) + return from; + if (tabs > 0x7C00 || from.data_ == to.data_ || !(fabs | tabs)) + return to; + if (!fabs) + return half(binary, (to.data_ & 0x8000) + 1); + bool lt = ((fabs == from.data_) ? static_cast(fabs) : -static_cast(fabs)) + < ((tabs == to.data_) ? static_cast(tabs) : -static_cast(tabs)); + return half(binary, from.data_ + (((from.data_ >> 15) ^ static_cast(lt)) << 1) - 1); + } + + /// Enumeration implementation. + /// \param from number to increase/decrease + /// \param to direction to enumerate into + /// \return next representable number + static half nexttoward(half from, long double to) + { + if (isnan(from)) + return from; + auto lfrom = static_cast(from); + if (builtin_isnan(to) || lfrom == to) + return half(static_cast(to)); + if (!(from.data_ & 0x7FFF)) + return half(binary, (static_cast(builtin_signbit(to)) << 15) + 1); + return half(binary, from.data_ + (((from.data_ >> 15) ^ static_cast(lfrom < to)) << 1) - 1); + } + + /// Sign implementation + /// \param x first operand + /// \param y second operand + /// \return composed value + static half copysign(half x, half y) + { + return half(binary, x.data_ ^ ((x.data_ ^ y.data_) & 0x8000)); + } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if infinite number + /// \retval false else + static int fpclassify(half arg) + { + unsigned int abs = arg.data_ & 0x7FFF; + return abs + ? ((abs > 0x3FF) ? ((abs >= 0x7C00) ? ((abs > 0x7C00) ? FP_NAN : FP_INFINITE) : FP_NORMAL) : FP_SUBNORMAL) + : FP_ZERO; + } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if finite number + /// \retval false else + static bool isfinite(half arg) + { + return (arg.data_ & 0x7C00) != 0x7C00; + } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if infinite number + /// \retval false else + static bool isinf(half arg) + { + return (arg.data_ & 0x7FFF) == 0x7C00; + } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if not a number + /// \retval false else + static bool isnan(half arg) + { + return (arg.data_ & 0x7FFF) > 0x7C00; + } + + /// Classification implementation. + /// \param arg value to classify + /// \retval true if normal number + /// \retval false else + static bool isnormal(half arg) + { + return ((arg.data_ & 0x7C00) != 0) & ((arg.data_ & 0x7C00) != 0x7C00); + } + + /// Sign bit implementation. + /// \param arg value to check + /// \retval true if signed + /// \retval false if unsigned + static bool signbit(half arg) + { + return (arg.data_ & 0x8000) != 0; + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if operands equal + /// \retval false else + static bool isequal(half x, half y) + { + return (x.data_ == y.data_ || !((x.data_ | y.data_) & 0x7FFF)) && !isnan(x); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if operands not equal + /// \retval false else + static bool isnotequal(half x, half y) + { + return (x.data_ != y.data_ && ((x.data_ | y.data_) & 0x7FFF)) || isnan(x); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x > \a y + /// \retval false else + static bool isgreater(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + return xabs <= 0x7C00 && yabs <= 0x7C00 + && (((xabs == x.data_) ? xabs : -xabs) > ((yabs == y.data_) ? yabs : -yabs)); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x >= \a y + /// \retval false else + static bool isgreaterequal(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + return xabs <= 0x7C00 && yabs <= 0x7C00 + && (((xabs == x.data_) ? xabs : -xabs) >= ((yabs == y.data_) ? yabs : -yabs)); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x < \a y + /// \retval false else + static bool isless(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + return xabs <= 0x7C00 && yabs <= 0x7C00 + && (((xabs == x.data_) ? xabs : -xabs) < ((yabs == y.data_) ? yabs : -yabs)); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if \a x <= \a y + /// \retval false else + static bool islessequal(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + return xabs <= 0x7C00 && yabs <= 0x7C00 + && (((xabs == x.data_) ? xabs : -xabs) <= ((yabs == y.data_) ? yabs : -yabs)); + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if either \a x > \a y nor \a x < \a y + /// \retval false else + static bool islessgreater(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + if (xabs > 0x7C00 || yabs > 0x7C00) + return false; + int a = (xabs == x.data_) ? xabs : -xabs, b = (yabs == y.data_) ? yabs : -yabs; + return a < b || a > b; + } + + /// Comparison implementation. + /// \param x first operand + /// \param y second operand + /// \retval true if operand unordered + /// \retval false else + static bool isunordered(half x, half y) + { + return isnan(x) || isnan(y); + } + +private: + static double erf(double arg) + { + if (builtin_isinf(arg)) + return (arg < 0.0) ? -1.0 : 1.0; + double x2 = arg * arg, ax2 = 0.147 * x2, + value = std::sqrt(1.0 - std::exp(-x2 * (1.2732395447351626861510701069801 + ax2) / (1.0 + ax2))); + return builtin_signbit(arg) ? -value : value; + } + + static double lgamma(double arg) + { + double v = 1.0; + for (; arg < 8.0; ++arg) + v *= arg; + double w = 1.0 / (arg * arg); + return (((((((-0.02955065359477124183006535947712 * w + 0.00641025641025641025641025641026) * w + + -0.00191752691752691752691752691753) + * w + + 8.4175084175084175084175084175084e-4) + * w + + -5.952380952380952380952380952381e-4) + * w + + 7.9365079365079365079365079365079e-4) + * w + + -0.00277777777777777777777777777778) + * w + + 0.08333333333333333333333333333333) + / arg + + 0.91893853320467274178032973640562 - std::log(v) - arg + (arg - 0.5) * std::log(arg); + } +}; + +/// Wrapper for unary half-precision functions needing specialization for individual argument types. +/// \tparam T argument type +template +struct unary_specialized +{ + /// Negation implementation. + /// \param arg value to negate + /// \return negated value + static HALF_CONSTEXPR half negate(half arg) + { + return half(binary, arg.data_ ^ 0x8000); + } + + /// Absolute value implementation. + /// \param arg function argument + /// \return absolute value + static half fabs(half arg) + { + return half(binary, arg.data_ & 0x7FFF); + } +}; +template <> +struct unary_specialized +{ + static HALF_CONSTEXPR expr negate(float arg) + { + return expr(-arg); + } + static expr fabs(float arg) + { + return expr(std::fabs(arg)); + } +}; + +/// Wrapper for binary half-precision functions needing specialization for individual argument types. +/// \tparam T first argument type +/// \tparam U first argument type +template +struct binary_specialized +{ + /// Minimum implementation. + /// \param x first operand + /// \param y second operand + /// \return minimum value + static expr fmin(float x, float y) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::fmin(x, y)); +#else + if (builtin_isnan(x)) + return expr(y); + if (builtin_isnan(y)) + return expr(x); + return expr(std::min(x, y)); +#endif + } + + /// Maximum implementation. + /// \param x first operand + /// \param y second operand + /// \return maximum value + static expr fmax(float x, float y) + { +#if HALF_ENABLE_CPP11_CMATH + return expr(std::fmax(x, y)); +#else + if (builtin_isnan(x)) + return expr(y); + if (builtin_isnan(y)) + return expr(x); + return expr(std::max(x, y)); +#endif + } +}; +template <> +struct binary_specialized +{ + static half fmin(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + if (xabs > 0x7C00) + return y; + if (yabs > 0x7C00) + return x; + return (((xabs == x.data_) ? xabs : -xabs) > ((yabs == y.data_) ? yabs : -yabs)) ? y : x; + } + static half fmax(half x, half y) + { + int xabs = x.data_ & 0x7FFF, yabs = y.data_ & 0x7FFF; + if (xabs > 0x7C00) + return y; + if (yabs > 0x7C00) + return x; + return (((xabs == x.data_) ? xabs : -xabs) < ((yabs == y.data_) ? yabs : -yabs)) ? y : x; + } +}; + +/// Helper class for half casts. +/// This class template has to be specialized for all valid cast argument to define an appropriate static `cast` member +/// function and a corresponding `type` member denoting its return type. +/// \tparam T destination type +/// \tparam U source type +/// \tparam R rounding mode to use +template +struct half_caster +{ +}; +template +struct half_caster +{ +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, "half_cast from non-arithmetic type unsupported"); +#endif + + static half cast(U arg) + { + return cast_impl(arg, is_float()); + }; + +private: + static half cast_impl(U arg, true_type) + { + return half(binary, float2half(arg)); + } + static half cast_impl(U arg, false_type) + { + return half(binary, int2half(arg)); + } +}; +template +struct half_caster +{ +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); +#endif + + static T cast(half arg) + { + return cast_impl(arg, is_float()); + } + +private: + static T cast_impl(half arg, true_type) + { + return half2float(arg.data_); + } + static T cast_impl(half arg, false_type) + { + return half2int(arg.data_); + } +}; +template +struct half_caster +{ +#if HALF_ENABLE_CPP11_STATIC_ASSERT && HALF_ENABLE_CPP11_TYPE_TRAITS + static_assert(std::is_arithmetic::value, "half_cast to non-arithmetic type unsupported"); +#endif + + static T cast(expr arg) + { + return cast_impl(arg, is_float()); + } + +private: + static T cast_impl(float arg, true_type) + { + return static_cast(arg); + } + static T cast_impl(half arg, false_type) + { + return half2int(arg.data_); + } +}; +template +struct half_caster +{ + static half cast(half arg) + { + return arg; + } +}; +template +struct half_caster : half_caster +{ +}; + +/// \name Comparison operators +/// \{ + +/// Comparison for equality. +/// \param x first operand +/// \param y second operand +/// \retval true if operands equal +/// \retval false else +template +typename enable::type operator==(T x, U y) +{ + return functions::isequal(x, y); +} + +/// Comparison for inequality. +/// \param x first operand +/// \param y second operand +/// \retval true if operands not equal +/// \retval false else +template +typename enable::type operator!=(T x, U y) +{ + return functions::isnotequal(x, y); +} + +/// Comparison for less than. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x less than \a y +/// \retval false else +template +typename enable::type operator<(T x, U y) +{ + return functions::isless(x, y); +} + +/// Comparison for greater than. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x greater than \a y +/// \retval false else +template +typename enable::type operator>(T x, U y) +{ + return functions::isgreater(x, y); +} + +/// Comparison for less equal. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x less equal \a y +/// \retval false else +template +typename enable::type operator<=(T x, U y) +{ + return functions::islessequal(x, y); +} + +/// Comparison for greater equal. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x greater equal \a y +/// \retval false else +template +typename enable::type operator>=(T x, U y) +{ + return functions::isgreaterequal(x, y); +} + +/// \} +/// \name Arithmetic operators +/// \{ + +/// Add halfs. +/// \param x left operand +/// \param y right operand +/// \return sum of half expressions +template +typename enable::type operator+(T x, U y) +{ + return functions::plus(x, y); +} + +/// Subtract halfs. +/// \param x left operand +/// \param y right operand +/// \return difference of half expressions +template +typename enable::type operator-(T x, U y) +{ + return functions::minus(x, y); +} + +/// Multiply halfs. +/// \param x left operand +/// \param y right operand +/// \return product of half expressions +template +typename enable::type operator*(T x, U y) +{ + return functions::multiplies(x, y); +} + +/// Divide halfs. +/// \param x left operand +/// \param y right operand +/// \return quotient of half expressions +template +typename enable::type operator/(T x, U y) +{ + return functions::divides(x, y); +} + +/// Identity. +/// \param arg operand +/// \return uncahnged operand +template +HALF_CONSTEXPR typename enable::type operator+(T arg) +{ + return arg; +} + +/// Negation. +/// \param arg operand +/// \return negated operand +template +HALF_CONSTEXPR typename enable::type operator-(T arg) +{ + return unary_specialized::negate(arg); +} + +/// \} +/// \name Input and output +/// \{ + +/// Output operator. +/// \param out output stream to write into +/// \param arg half expression to write +/// \return reference to output stream +template +typename enable&, T>::type operator<<(std::basic_ostream& out, T arg) +{ + return functions::write(out, arg); +} + +/// Input operator. +/// \param in input stream to read from +/// \param arg half to read into +/// \return reference to input stream +template +std::basic_istream& operator>>(std::basic_istream& in, half& arg) +{ + return functions::read(in, arg); +} + +/// \} +/// \name Basic mathematical operations +/// \{ + +/// Absolute value. +/// \param arg operand +/// \return absolute value of \a arg +// template typename enable::type abs(T arg) { return unary_specialized::fabs(arg); } +inline half abs(half arg) +{ + return unary_specialized::fabs(arg); +} +inline expr abs(expr arg) +{ + return unary_specialized::fabs(arg); +} + +/// Absolute value. +/// \param arg operand +/// \return absolute value of \a arg +// template typename enable::type fabs(T arg) { return unary_specialized::fabs(arg); } +inline half fabs(half arg) +{ + return unary_specialized::fabs(arg); +} +inline expr fabs(expr arg) +{ + return unary_specialized::fabs(arg); +} + +/// Remainder of division. +/// \param x first operand +/// \param y second operand +/// \return remainder of floating point division. +// template typename enable::type fmod(T x, U y) { return functions::fmod(x, y); } +inline expr fmod(half x, half y) +{ + return functions::fmod(x, y); +} +inline expr fmod(half x, expr y) +{ + return functions::fmod(x, y); +} +inline expr fmod(expr x, half y) +{ + return functions::fmod(x, y); +} +inline expr fmod(expr x, expr y) +{ + return functions::fmod(x, y); +} + +/// Remainder of division. +/// \param x first operand +/// \param y second operand +/// \return remainder of floating point division. +// template typename enable::type remainder(T x, U y) { return +//functions::remainder(x, y); } +inline expr remainder(half x, half y) +{ + return functions::remainder(x, y); +} +inline expr remainder(half x, expr y) +{ + return functions::remainder(x, y); +} +inline expr remainder(expr x, half y) +{ + return functions::remainder(x, y); +} +inline expr remainder(expr x, expr y) +{ + return functions::remainder(x, y); +} + +/// Remainder of division. +/// \param x first operand +/// \param y second operand +/// \param quo address to store some bits of quotient at +/// \return remainder of floating point division. +// template typename enable::type remquo(T x, U y, int *quo) { return +//functions::remquo(x, y, quo); } +inline expr remquo(half x, half y, int* quo) +{ + return functions::remquo(x, y, quo); +} +inline expr remquo(half x, expr y, int* quo) +{ + return functions::remquo(x, y, quo); +} +inline expr remquo(expr x, half y, int* quo) +{ + return functions::remquo(x, y, quo); +} +inline expr remquo(expr x, expr y, int* quo) +{ + return functions::remquo(x, y, quo); +} + +/// Fused multiply add. +/// \param x first operand +/// \param y second operand +/// \param z third operand +/// \return ( \a x * \a y ) + \a z rounded as one operation. +// template typename enable::type fma(T x, U y, V z) { return +//functions::fma(x, y, z); } +inline expr fma(half x, half y, half z) +{ + return functions::fma(x, y, z); +} +inline expr fma(half x, half y, expr z) +{ + return functions::fma(x, y, z); +} +inline expr fma(half x, expr y, half z) +{ + return functions::fma(x, y, z); +} +inline expr fma(half x, expr y, expr z) +{ + return functions::fma(x, y, z); +} +inline expr fma(expr x, half y, half z) +{ + return functions::fma(x, y, z); +} +inline expr fma(expr x, half y, expr z) +{ + return functions::fma(x, y, z); +} +inline expr fma(expr x, expr y, half z) +{ + return functions::fma(x, y, z); +} +inline expr fma(expr x, expr y, expr z) +{ + return functions::fma(x, y, z); +} + +/// Maximum of half expressions. +/// \param x first operand +/// \param y second operand +/// \return maximum of operands +// template typename result::type fmax(T x, U y) { return +//binary_specialized::fmax(x, y); } +inline half fmax(half x, half y) +{ + return binary_specialized::fmax(x, y); +} +inline expr fmax(half x, expr y) +{ + return binary_specialized::fmax(x, y); +} +inline expr fmax(expr x, half y) +{ + return binary_specialized::fmax(x, y); +} +inline expr fmax(expr x, expr y) +{ + return binary_specialized::fmax(x, y); +} + +/// Minimum of half expressions. +/// \param x first operand +/// \param y second operand +/// \return minimum of operands +// template typename result::type fmin(T x, U y) { return +//binary_specialized::fmin(x, y); } +inline half fmin(half x, half y) +{ + return binary_specialized::fmin(x, y); +} +inline expr fmin(half x, expr y) +{ + return binary_specialized::fmin(x, y); +} +inline expr fmin(expr x, half y) +{ + return binary_specialized::fmin(x, y); +} +inline expr fmin(expr x, expr y) +{ + return binary_specialized::fmin(x, y); +} + +/// Positive difference. +/// \param x first operand +/// \param y second operand +/// \return \a x - \a y or 0 if difference negative +// template typename enable::type fdim(T x, U y) { return functions::fdim(x, y); } +inline expr fdim(half x, half y) +{ + return functions::fdim(x, y); +} +inline expr fdim(half x, expr y) +{ + return functions::fdim(x, y); +} +inline expr fdim(expr x, half y) +{ + return functions::fdim(x, y); +} +inline expr fdim(expr x, expr y) +{ + return functions::fdim(x, y); +} + +/// Get NaN value. +/// \return quiet NaN +inline half nanh(const char*) +{ + return functions::nanh(); +} + +/// \} +/// \name Exponential functions +/// \{ + +/// Exponential function. +/// \param arg function argument +/// \return e raised to \a arg +// template typename enable::type exp(T arg) { return functions::exp(arg); } +inline expr exp(half arg) +{ + return functions::exp(arg); +} +inline expr exp(expr arg) +{ + return functions::exp(arg); +} + +/// Exponential minus one. +/// \param arg function argument +/// \return e raised to \a arg subtracted by 1 +// template typename enable::type expm1(T arg) { return functions::expm1(arg); } +inline expr expm1(half arg) +{ + return functions::expm1(arg); +} +inline expr expm1(expr arg) +{ + return functions::expm1(arg); +} + +/// Binary exponential. +/// \param arg function argument +/// \return 2 raised to \a arg +// template typename enable::type exp2(T arg) { return functions::exp2(arg); } +inline expr exp2(half arg) +{ + return functions::exp2(arg); +} +inline expr exp2(expr arg) +{ + return functions::exp2(arg); +} + +/// Natural logorithm. +/// \param arg function argument +/// \return logarithm of \a arg to base e +// template typename enable::type log(T arg) { return functions::log(arg); } +inline expr log(half arg) +{ + return functions::log(arg); +} +inline expr log(expr arg) +{ + return functions::log(arg); +} + +/// Common logorithm. +/// \param arg function argument +/// \return logarithm of \a arg to base 10 +// template typename enable::type log10(T arg) { return functions::log10(arg); } +inline expr log10(half arg) +{ + return functions::log10(arg); +} +inline expr log10(expr arg) +{ + return functions::log10(arg); +} + +/// Natural logorithm. +/// \param arg function argument +/// \return logarithm of \a arg plus 1 to base e +// template typename enable::type log1p(T arg) { return functions::log1p(arg); } +inline expr log1p(half arg) +{ + return functions::log1p(arg); +} +inline expr log1p(expr arg) +{ + return functions::log1p(arg); +} + +/// Binary logorithm. +/// \param arg function argument +/// \return logarithm of \a arg to base 2 +// template typename enable::type log2(T arg) { return functions::log2(arg); } +inline expr log2(half arg) +{ + return functions::log2(arg); +} +inline expr log2(expr arg) +{ + return functions::log2(arg); +} + +/// \} +/// \name Power functions +/// \{ + +/// Square root. +/// \param arg function argument +/// \return square root of \a arg +// template typename enable::type sqrt(T arg) { return functions::sqrt(arg); } +inline expr sqrt(half arg) +{ + return functions::sqrt(arg); +} +inline expr sqrt(expr arg) +{ + return functions::sqrt(arg); +} + +/// Cubic root. +/// \param arg function argument +/// \return cubic root of \a arg +// template typename enable::type cbrt(T arg) { return functions::cbrt(arg); } +inline expr cbrt(half arg) +{ + return functions::cbrt(arg); +} +inline expr cbrt(expr arg) +{ + return functions::cbrt(arg); +} + +/// Hypotenuse function. +/// \param x first argument +/// \param y second argument +/// \return square root of sum of squares without internal over- or underflows +// template typename enable::type hypot(T x, U y) { return functions::hypot(x, y); +//} +inline expr hypot(half x, half y) +{ + return functions::hypot(x, y); +} +inline expr hypot(half x, expr y) +{ + return functions::hypot(x, y); +} +inline expr hypot(expr x, half y) +{ + return functions::hypot(x, y); +} +inline expr hypot(expr x, expr y) +{ + return functions::hypot(x, y); +} + +/// Power function. +/// \param base first argument +/// \param exp second argument +/// \return \a base raised to \a exp +// template typename enable::type pow(T base, U exp) { return functions::pow(base, +//exp); } +inline expr pow(half base, half exp) +{ + return functions::pow(base, exp); +} +inline expr pow(half base, expr exp) +{ + return functions::pow(base, exp); +} +inline expr pow(expr base, half exp) +{ + return functions::pow(base, exp); +} +inline expr pow(expr base, expr exp) +{ + return functions::pow(base, exp); +} + +/// \} +/// \name Trigonometric functions +/// \{ + +/// Sine function. +/// \param arg function argument +/// \return sine value of \a arg +// template typename enable::type sin(T arg) { return functions::sin(arg); } +inline expr sin(half arg) +{ + return functions::sin(arg); +} +inline expr sin(expr arg) +{ + return functions::sin(arg); +} + +/// Cosine function. +/// \param arg function argument +/// \return cosine value of \a arg +// template typename enable::type cos(T arg) { return functions::cos(arg); } +inline expr cos(half arg) +{ + return functions::cos(arg); +} +inline expr cos(expr arg) +{ + return functions::cos(arg); +} + +/// Tangent function. +/// \param arg function argument +/// \return tangent value of \a arg +// template typename enable::type tan(T arg) { return functions::tan(arg); } +inline expr tan(half arg) +{ + return functions::tan(arg); +} +inline expr tan(expr arg) +{ + return functions::tan(arg); +} + +/// Arc sine. +/// \param arg function argument +/// \return arc sine value of \a arg +// template typename enable::type asin(T arg) { return functions::asin(arg); } +inline expr asin(half arg) +{ + return functions::asin(arg); +} +inline expr asin(expr arg) +{ + return functions::asin(arg); +} + +/// Arc cosine function. +/// \param arg function argument +/// \return arc cosine value of \a arg +// template typename enable::type acos(T arg) { return functions::acos(arg); } +inline expr acos(half arg) +{ + return functions::acos(arg); +} +inline expr acos(expr arg) +{ + return functions::acos(arg); +} + +/// Arc tangent function. +/// \param arg function argument +/// \return arc tangent value of \a arg +// template typename enable::type atan(T arg) { return functions::atan(arg); } +inline expr atan(half arg) +{ + return functions::atan(arg); +} +inline expr atan(expr arg) +{ + return functions::atan(arg); +} + +/// Arc tangent function. +/// \param x first argument +/// \param y second argument +/// \return arc tangent value +// template typename enable::type atan2(T x, U y) { return functions::atan2(x, y); +//} +inline expr atan2(half x, half y) +{ + return functions::atan2(x, y); +} +inline expr atan2(half x, expr y) +{ + return functions::atan2(x, y); +} +inline expr atan2(expr x, half y) +{ + return functions::atan2(x, y); +} +inline expr atan2(expr x, expr y) +{ + return functions::atan2(x, y); +} + +/// \} +/// \name Hyperbolic functions +/// \{ + +/// Hyperbolic sine. +/// \param arg function argument +/// \return hyperbolic sine value of \a arg +// template typename enable::type sinh(T arg) { return functions::sinh(arg); } +inline expr sinh(half arg) +{ + return functions::sinh(arg); +} +inline expr sinh(expr arg) +{ + return functions::sinh(arg); +} + +/// Hyperbolic cosine. +/// \param arg function argument +/// \return hyperbolic cosine value of \a arg +// template typename enable::type cosh(T arg) { return functions::cosh(arg); } +inline expr cosh(half arg) +{ + return functions::cosh(arg); +} +inline expr cosh(expr arg) +{ + return functions::cosh(arg); +} + +/// Hyperbolic tangent. +/// \param arg function argument +/// \return hyperbolic tangent value of \a arg +// template typename enable::type tanh(T arg) { return functions::tanh(arg); } +inline expr tanh(half arg) +{ + return functions::tanh(arg); +} +inline expr tanh(expr arg) +{ + return functions::tanh(arg); +} + +/// Hyperbolic area sine. +/// \param arg function argument +/// \return area sine value of \a arg +// template typename enable::type asinh(T arg) { return functions::asinh(arg); } +inline expr asinh(half arg) +{ + return functions::asinh(arg); +} +inline expr asinh(expr arg) +{ + return functions::asinh(arg); +} + +/// Hyperbolic area cosine. +/// \param arg function argument +/// \return area cosine value of \a arg +// template typename enable::type acosh(T arg) { return functions::acosh(arg); } +inline expr acosh(half arg) +{ + return functions::acosh(arg); +} +inline expr acosh(expr arg) +{ + return functions::acosh(arg); +} + +/// Hyperbolic area tangent. +/// \param arg function argument +/// \return area tangent value of \a arg +// template typename enable::type atanh(T arg) { return functions::atanh(arg); } +inline expr atanh(half arg) +{ + return functions::atanh(arg); +} +inline expr atanh(expr arg) +{ + return functions::atanh(arg); +} + +/// \} +/// \name Error and gamma functions +/// \{ + +/// Error function. +/// \param arg function argument +/// \return error function value of \a arg +// template typename enable::type erf(T arg) { return functions::erf(arg); } +inline expr erf(half arg) +{ + return functions::erf(arg); +} +inline expr erf(expr arg) +{ + return functions::erf(arg); +} + +/// Complementary error function. +/// \param arg function argument +/// \return 1 minus error function value of \a arg +// template typename enable::type erfc(T arg) { return functions::erfc(arg); } +inline expr erfc(half arg) +{ + return functions::erfc(arg); +} +inline expr erfc(expr arg) +{ + return functions::erfc(arg); +} + +/// Natural logarithm of gamma function. +/// \param arg function argument +/// \return natural logarith of gamma function for \a arg +// template typename enable::type lgamma(T arg) { return functions::lgamma(arg); } +inline expr lgamma(half arg) +{ + return functions::lgamma(arg); +} +inline expr lgamma(expr arg) +{ + return functions::lgamma(arg); +} + +/// Gamma function. +/// \param arg function argument +/// \return gamma function value of \a arg +// template typename enable::type tgamma(T arg) { return functions::tgamma(arg); } +inline expr tgamma(half arg) +{ + return functions::tgamma(arg); +} +inline expr tgamma(expr arg) +{ + return functions::tgamma(arg); +} + +/// \} +/// \name Rounding +/// \{ + +/// Nearest integer not less than half value. +/// \param arg half to round +/// \return nearest integer not less than \a arg +// template typename enable::type ceil(T arg) { return functions::ceil(arg); } +inline half ceil(half arg) +{ + return functions::ceil(arg); +} +inline half ceil(expr arg) +{ + return functions::ceil(arg); +} + +/// Nearest integer not greater than half value. +/// \param arg half to round +/// \return nearest integer not greater than \a arg +// template typename enable::type floor(T arg) { return functions::floor(arg); } +inline half floor(half arg) +{ + return functions::floor(arg); +} +inline half floor(expr arg) +{ + return functions::floor(arg); +} + +/// Nearest integer not greater in magnitude than half value. +/// \param arg half to round +/// \return nearest integer not greater in magnitude than \a arg +// template typename enable::type trunc(T arg) { return functions::trunc(arg); } +inline half trunc(half arg) +{ + return functions::trunc(arg); +} +inline half trunc(expr arg) +{ + return functions::trunc(arg); +} + +/// Nearest integer. +/// \param arg half to round +/// \return nearest integer, rounded away from zero in half-way cases +// template typename enable::type round(T arg) { return functions::round(arg); } +inline half round(half arg) +{ + return functions::round(arg); +} +inline half round(expr arg) +{ + return functions::round(arg); +} + +/// Nearest integer. +/// \param arg half to round +/// \return nearest integer, rounded away from zero in half-way cases +// template typename enable::type lround(T arg) { return functions::lround(arg); } +inline long lround(half arg) +{ + return functions::lround(arg); +} +inline long lround(expr arg) +{ + return functions::lround(arg); +} + +/// Nearest integer using half's internal rounding mode. +/// \param arg half expression to round +/// \return nearest integer using default rounding mode +// template typename enable::type nearbyint(T arg) { return functions::nearbyint(arg); } +inline half nearbyint(half arg) +{ + return functions::rint(arg); +} +inline half nearbyint(expr arg) +{ + return functions::rint(arg); +} + +/// Nearest integer using half's internal rounding mode. +/// \param arg half expression to round +/// \return nearest integer using default rounding mode +// template typename enable::type rint(T arg) { return functions::rint(arg); } +inline half rint(half arg) +{ + return functions::rint(arg); +} +inline half rint(expr arg) +{ + return functions::rint(arg); +} + +/// Nearest integer using half's internal rounding mode. +/// \param arg half expression to round +/// \return nearest integer using default rounding mode +// template typename enable::type lrint(T arg) { return functions::lrint(arg); } +inline long lrint(half arg) +{ + return functions::lrint(arg); +} +inline long lrint(expr arg) +{ + return functions::lrint(arg); +} +#if HALF_ENABLE_CPP11_LONG_LONG +/// Nearest integer. +/// \param arg half to round +/// \return nearest integer, rounded away from zero in half-way cases +// template typename enable::type llround(T arg) { return functions::llround(arg); } +inline long long llround(half arg) +{ + return functions::llround(arg); +} +inline long long llround(expr arg) +{ + return functions::llround(arg); +} + +/// Nearest integer using half's internal rounding mode. +/// \param arg half expression to round +/// \return nearest integer using default rounding mode +// template typename enable::type llrint(T arg) { return functions::llrint(arg); } +inline long long llrint(half arg) +{ + return functions::llrint(arg); +} +inline long long llrint(expr arg) +{ + return functions::llrint(arg); +} +#endif + +/// \} +/// \name Floating point manipulation +/// \{ + +/// Decompress floating point number. +/// \param arg number to decompress +/// \param exp address to store exponent at +/// \return significant in range [0.5, 1) +// template typename enable::type frexp(T arg, int *exp) { return functions::frexp(arg, exp); } +inline half frexp(half arg, int* exp) +{ + return functions::frexp(arg, exp); +} +inline half frexp(expr arg, int* exp) +{ + return functions::frexp(arg, exp); +} + +/// Multiply by power of two. +/// \param arg number to modify +/// \param exp power of two to multiply with +/// \return \a arg multplied by 2 raised to \a exp +// template typename enable::type ldexp(T arg, int exp) { return functions::scalbln(arg, exp); +//} +inline half ldexp(half arg, int exp) +{ + return functions::scalbln(arg, exp); +} +inline half ldexp(expr arg, int exp) +{ + return functions::scalbln(arg, exp); +} + +/// Extract integer and fractional parts. +/// \param arg number to decompress +/// \param iptr address to store integer part at +/// \return fractional part +// template typename enable::type modf(T arg, half *iptr) { return functions::modf(arg, iptr); +//} +inline half modf(half arg, half* iptr) +{ + return functions::modf(arg, iptr); +} +inline half modf(expr arg, half* iptr) +{ + return functions::modf(arg, iptr); +} + +/// Multiply by power of two. +/// \param arg number to modify +/// \param exp power of two to multiply with +/// \return \a arg multplied by 2 raised to \a exp +// template typename enable::type scalbn(T arg, int exp) { return functions::scalbln(arg, exp); +//} +inline half scalbn(half arg, int exp) +{ + return functions::scalbln(arg, exp); +} +inline half scalbn(expr arg, int exp) +{ + return functions::scalbln(arg, exp); +} + +/// Multiply by power of two. +/// \param arg number to modify +/// \param exp power of two to multiply with +/// \return \a arg multplied by 2 raised to \a exp +// template typename enable::type scalbln(T arg, long exp) { return functions::scalbln(arg, +//exp); } +inline half scalbln(half arg, long exp) +{ + return functions::scalbln(arg, exp); +} +inline half scalbln(expr arg, long exp) +{ + return functions::scalbln(arg, exp); +} + +/// Extract exponent. +/// \param arg number to query +/// \return floating point exponent +/// \retval FP_ILOGB0 for zero +/// \retval FP_ILOGBNAN for NaN +/// \retval MAX_INT for infinity +// template typename enable::type ilogb(T arg) { return functions::ilogb(arg); } +inline int ilogb(half arg) +{ + return functions::ilogb(arg); +} +inline int ilogb(expr arg) +{ + return functions::ilogb(arg); +} + +/// Extract exponent. +/// \param arg number to query +/// \return floating point exponent +// template typename enable::type logb(T arg) { return functions::logb(arg); } +inline half logb(half arg) +{ + return functions::logb(arg); +} +inline half logb(expr arg) +{ + return functions::logb(arg); +} + +/// Next representable value. +/// \param from value to compute next representable value for +/// \param to direction towards which to compute next value +/// \return next representable value after \a from in direction towards \a to +// template typename enable::type nextafter(T from, U to) { return +//functions::nextafter(from, to); } +inline half nextafter(half from, half to) +{ + return functions::nextafter(from, to); +} +inline half nextafter(half from, expr to) +{ + return functions::nextafter(from, to); +} +inline half nextafter(expr from, half to) +{ + return functions::nextafter(from, to); +} +inline half nextafter(expr from, expr to) +{ + return functions::nextafter(from, to); +} + +/// Next representable value. +/// \param from value to compute next representable value for +/// \param to direction towards which to compute next value +/// \return next representable value after \a from in direction towards \a to +// template typename enable::type nexttoward(T from, long double to) { return +//functions::nexttoward(from, to); } +inline half nexttoward(half from, long double to) +{ + return functions::nexttoward(from, to); +} +inline half nexttoward(expr from, long double to) +{ + return functions::nexttoward(from, to); +} + +/// Take sign. +/// \param x value to change sign for +/// \param y value to take sign from +/// \return value equal to \a x in magnitude and to \a y in sign +// template typename enable::type copysign(T x, U y) { return +//functions::copysign(x, y); } +inline half copysign(half x, half y) +{ + return functions::copysign(x, y); +} +inline half copysign(half x, expr y) +{ + return functions::copysign(x, y); +} +inline half copysign(expr x, half y) +{ + return functions::copysign(x, y); +} +inline half copysign(expr x, expr y) +{ + return functions::copysign(x, y); +} + +/// \} +/// \name Floating point classification +/// \{ + +/// Classify floating point value. +/// \param arg number to classify +/// \retval FP_ZERO for positive and negative zero +/// \retval FP_SUBNORMAL for subnormal numbers +/// \retval FP_INFINITY for positive and negative infinity +/// \retval FP_NAN for NaNs +/// \retval FP_NORMAL for all other (normal) values +// template typename enable::type fpclassify(T arg) { return functions::fpclassify(arg); } +inline int fpclassify(half arg) +{ + return functions::fpclassify(arg); +} +inline int fpclassify(expr arg) +{ + return functions::fpclassify(arg); +} + +/// Check if finite number. +/// \param arg number to check +/// \retval true if neither infinity nor NaN +/// \retval false else +// template typename enable::type isfinite(T arg) { return functions::isfinite(arg); } +inline bool isfinite(half arg) +{ + return functions::isfinite(arg); +} +inline bool isfinite(expr arg) +{ + return functions::isfinite(arg); +} + +/// Check for infinity. +/// \param arg number to check +/// \retval true for positive or negative infinity +/// \retval false else +// template typename enable::type isinf(T arg) { return functions::isinf(arg); } +inline bool isinf(half arg) +{ + return functions::isinf(arg); +} +inline bool isinf(expr arg) +{ + return functions::isinf(arg); +} + +/// Check for NaN. +/// \param arg number to check +/// \retval true for NaNs +/// \retval false else +// template typename enable::type isnan(T arg) { return functions::isnan(arg); } +inline bool isnan(half arg) +{ + return functions::isnan(arg); +} +inline bool isnan(expr arg) +{ + return functions::isnan(arg); +} + +/// Check if normal number. +/// \param arg number to check +/// \retval true if normal number +/// \retval false if either subnormal, zero, infinity or NaN +// template typename enable::type isnormal(T arg) { return functions::isnormal(arg); } +inline bool isnormal(half arg) +{ + return functions::isnormal(arg); +} +inline bool isnormal(expr arg) +{ + return functions::isnormal(arg); +} + +/// Check sign. +/// \param arg number to check +/// \retval true for negative number +/// \retval false for positive number +// template typename enable::type signbit(T arg) { return functions::signbit(arg); } +inline bool signbit(half arg) +{ + return functions::signbit(arg); +} +inline bool signbit(expr arg) +{ + return functions::signbit(arg); +} + +/// \} +/// \name Comparison +/// \{ + +/// Comparison for greater than. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x greater than \a y +/// \retval false else +// template typename enable::type isgreater(T x, U y) { return +//functions::isgreater(x, y); } +inline bool isgreater(half x, half y) +{ + return functions::isgreater(x, y); +} +inline bool isgreater(half x, expr y) +{ + return functions::isgreater(x, y); +} +inline bool isgreater(expr x, half y) +{ + return functions::isgreater(x, y); +} +inline bool isgreater(expr x, expr y) +{ + return functions::isgreater(x, y); +} + +/// Comparison for greater equal. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x greater equal \a y +/// \retval false else +// template typename enable::type isgreaterequal(T x, U y) { return +//functions::isgreaterequal(x, y); } +inline bool isgreaterequal(half x, half y) +{ + return functions::isgreaterequal(x, y); +} +inline bool isgreaterequal(half x, expr y) +{ + return functions::isgreaterequal(x, y); +} +inline bool isgreaterequal(expr x, half y) +{ + return functions::isgreaterequal(x, y); +} +inline bool isgreaterequal(expr x, expr y) +{ + return functions::isgreaterequal(x, y); +} + +/// Comparison for less than. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x less than \a y +/// \retval false else +// template typename enable::type isless(T x, U y) { return functions::isless(x, +//y); } +inline bool isless(half x, half y) +{ + return functions::isless(x, y); +} +inline bool isless(half x, expr y) +{ + return functions::isless(x, y); +} +inline bool isless(expr x, half y) +{ + return functions::isless(x, y); +} +inline bool isless(expr x, expr y) +{ + return functions::isless(x, y); +} + +/// Comparison for less equal. +/// \param x first operand +/// \param y second operand +/// \retval true if \a x less equal \a y +/// \retval false else +// template typename enable::type islessequal(T x, U y) { return +//functions::islessequal(x, y); } +inline bool islessequal(half x, half y) +{ + return functions::islessequal(x, y); +} +inline bool islessequal(half x, expr y) +{ + return functions::islessequal(x, y); +} +inline bool islessequal(expr x, half y) +{ + return functions::islessequal(x, y); +} +inline bool islessequal(expr x, expr y) +{ + return functions::islessequal(x, y); +} + +/// Comarison for less or greater. +/// \param x first operand +/// \param y second operand +/// \retval true if either less or greater +/// \retval false else +// template typename enable::type islessgreater(T x, U y) { return +//functions::islessgreater(x, y); } +inline bool islessgreater(half x, half y) +{ + return functions::islessgreater(x, y); +} +inline bool islessgreater(half x, expr y) +{ + return functions::islessgreater(x, y); +} +inline bool islessgreater(expr x, half y) +{ + return functions::islessgreater(x, y); +} +inline bool islessgreater(expr x, expr y) +{ + return functions::islessgreater(x, y); +} + +/// Check if unordered. +/// \param x first operand +/// \param y second operand +/// \retval true if unordered (one or two NaN operands) +/// \retval false else +// template typename enable::type isunordered(T x, U y) { return +//functions::isunordered(x, y); } +inline bool isunordered(half x, half y) +{ + return functions::isunordered(x, y); +} +inline bool isunordered(half x, expr y) +{ + return functions::isunordered(x, y); +} +inline bool isunordered(expr x, half y) +{ + return functions::isunordered(x, y); +} +inline bool isunordered(expr x, expr y) +{ + return functions::isunordered(x, y); +} + +/// \name Casting +/// \{ + +/// Cast to or from half-precision floating point number. +/// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted +/// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. +/// It uses the default rounding mode. +/// +/// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types +/// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler +/// error and casting between [half](\ref half_float::half)s is just a no-op. +/// \tparam T destination type (half or built-in arithmetic type) +/// \tparam U source type (half or built-in arithmetic type) +/// \param arg value to cast +/// \return \a arg converted to destination type +template +T half_cast(U arg) +{ + return half_caster::cast(arg); +} + +/// Cast to or from half-precision floating point number. +/// This casts between [half](\ref half_float::half) and any built-in arithmetic type. The values are converted +/// directly using the given rounding mode, without any roundtrip over `float` that a `static_cast` would otherwise do. +/// +/// Using this cast with neither of the two types being a [half](\ref half_float::half) or with any of the two types +/// not being a built-in arithmetic type (apart from [half](\ref half_float::half), of course) results in a compiler +/// error and casting between [half](\ref half_float::half)s is just a no-op. +/// \tparam T destination type (half or built-in arithmetic type) +/// \tparam R rounding mode to use. +/// \tparam U source type (half or built-in arithmetic type) +/// \param arg value to cast +/// \return \a arg converted to destination type +template +T half_cast(U arg) +{ + return half_caster::cast(arg); +} +/// \} +} // namespace detail + +using detail::operator==; +using detail::operator!=; +using detail::operator<; +using detail::operator>; +using detail::operator<=; +using detail::operator>=; +using detail::operator+; +using detail::operator-; +using detail::operator*; +using detail::operator/; +using detail::operator<<; +using detail::operator>>; + +using detail::abs; +using detail::fabs; +using detail::fmod; +using detail::remainder; +using detail::remquo; +using detail::fma; +using detail::fmax; +using detail::fmin; +using detail::fdim; +using detail::nanh; +using detail::exp; +using detail::expm1; +using detail::exp2; +using detail::log; +using detail::log10; +using detail::log1p; +using detail::log2; +using detail::sqrt; +using detail::cbrt; +using detail::hypot; +using detail::pow; +using detail::sin; +using detail::cos; +using detail::tan; +using detail::asin; +using detail::acos; +using detail::atan; +using detail::atan2; +using detail::sinh; +using detail::cosh; +using detail::tanh; +using detail::asinh; +using detail::acosh; +using detail::atanh; +using detail::erf; +using detail::erfc; +using detail::lgamma; +using detail::tgamma; +using detail::ceil; +using detail::floor; +using detail::trunc; +using detail::round; +using detail::lround; +using detail::nearbyint; +using detail::rint; +using detail::lrint; +#if HALF_ENABLE_CPP11_LONG_LONG +using detail::llround; +using detail::llrint; +#endif +using detail::frexp; +using detail::ldexp; +using detail::modf; +using detail::scalbn; +using detail::scalbln; +using detail::ilogb; +using detail::logb; +using detail::nextafter; +using detail::nexttoward; +using detail::copysign; +using detail::fpclassify; +using detail::isfinite; +using detail::isinf; +using detail::isnan; +using detail::isnormal; +using detail::signbit; +using detail::isgreater; +using detail::isgreaterequal; +using detail::isless; +using detail::islessequal; +using detail::islessgreater; +using detail::isunordered; + +using detail::half_cast; +} // namespace half_float + +/// Extensions to the C++ standard library. +namespace std +{ +/// Numeric limits for half-precision floats. +/// Because of the underlying single-precision implementation of many operations, it inherits some properties from +/// `std::numeric_limits`. +template <> +class numeric_limits : public numeric_limits +{ +public: + /// Supports signed values. + static HALF_CONSTEXPR_CONST bool is_signed = true; + + /// Is not exact. + static HALF_CONSTEXPR_CONST bool is_exact = false; + + /// Doesn't provide modulo arithmetic. + static HALF_CONSTEXPR_CONST bool is_modulo = false; + + /// IEEE conformant. + static HALF_CONSTEXPR_CONST bool is_iec559 = true; + + /// Supports infinity. + static HALF_CONSTEXPR_CONST bool has_infinity = true; + + /// Supports quiet NaNs. + static HALF_CONSTEXPR_CONST bool has_quiet_NaN = true; + + /// Supports subnormal values. + static HALF_CONSTEXPR_CONST float_denorm_style has_denorm = denorm_present; + + /// Rounding mode. + /// Due to the mix of internal single-precision computations (using the rounding mode of the underlying + /// single-precision implementation) with the rounding mode of the single-to-half conversions, the actual rounding + /// mode might be `std::round_indeterminate` if the default half-precision rounding mode doesn't match the + /// single-precision rounding mode. + static HALF_CONSTEXPR_CONST float_round_style round_style + = (std::numeric_limits::round_style == half_float::half::round_style) ? half_float::half::round_style + : round_indeterminate; + + /// Significant digits. + static HALF_CONSTEXPR_CONST int digits = 11; + + /// Significant decimal digits. + static HALF_CONSTEXPR_CONST int digits10 = 3; + + /// Required decimal digits to represent all possible values. + static HALF_CONSTEXPR_CONST int max_digits10 = 5; + + /// Number base. + static HALF_CONSTEXPR_CONST int radix = 2; + + /// One more than smallest exponent. + static HALF_CONSTEXPR_CONST int min_exponent = -13; + + /// Smallest normalized representable power of 10. + static HALF_CONSTEXPR_CONST int min_exponent10 = -4; + + /// One more than largest exponent + static HALF_CONSTEXPR_CONST int max_exponent = 16; + + /// Largest finitely representable power of 10. + static HALF_CONSTEXPR_CONST int max_exponent10 = 4; + + /// Smallest positive normal value. + static HALF_CONSTEXPR half_float::half min() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x0400); + } + + /// Smallest finite value. + static HALF_CONSTEXPR half_float::half lowest() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0xFBFF); + } + + /// Largest finite value. + static HALF_CONSTEXPR half_float::half max() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x7BFF); + } + + /// Difference between one and next representable value. + static HALF_CONSTEXPR half_float::half epsilon() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x1400); + } + + /// Maximum rounding error. + static HALF_CONSTEXPR half_float::half round_error() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, (round_style == std::round_to_nearest) ? 0x3800 : 0x3C00); + } + + /// Positive infinity. + static HALF_CONSTEXPR half_float::half infinity() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x7C00); + } + + /// Quiet NaN. + static HALF_CONSTEXPR half_float::half quiet_NaN() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x7FFF); + } + + /// Signalling NaN. + static HALF_CONSTEXPR half_float::half signaling_NaN() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x7DFF); + } + + /// Smallest positive subnormal value. + static HALF_CONSTEXPR half_float::half denorm_min() HALF_NOTHROW + { + return half_float::half(half_float::detail::binary, 0x0001); + } +}; + +#if HALF_ENABLE_CPP11_HASH +/// Hash function for half-precision floats. +/// This is only defined if C++11 `std::hash` is supported and enabled. +template <> +struct hash //: unary_function +{ + /// Type of function argument. + typedef half_float::half argument_type; + + /// Function return type. + typedef size_t result_type; + + /// Compute hash function. + /// \param arg half to hash + /// \return hash value + result_type operator()(argument_type arg) const + { + return hash()(static_cast(arg.data_) & -(arg.data_ != 0x8000)); + } +}; +#endif +} // namespace std #undef HALF_CONSTEXPR #undef HALF_CONSTEXPR_CONST #undef HALF_NOEXCEPT #undef HALF_NOTHROW #ifdef HALF_POP_WARNINGS - #pragma warning(pop) - #undef HALF_POP_WARNINGS +#pragma warning(pop) +#undef HALF_POP_WARNINGS #endif #endif From 5ef4b5248a2276d8301cc9c78f18761c4ffb3821 Mon Sep 17 00:00:00 2001 From: Max Buckley Date: Sun, 22 Mar 2026 19:54:26 +0100 Subject: [PATCH 2/3] Add Clang compatibility and fix remaining Clang-specific warnings - Add Clang-specific suppression for -Wvariadic-macro-arguments-omitted (triggered by CUDA toolkit headers) and -Wsign-conversion (not part of GCC's -Wconversion) - Fix deprecated literal operator syntax (remove space before _GiB etc.) - Fix Clang -Wdouble-promotion: use lroundf() instead of lround() for float args, explicit casts for long double expressions - Fix Clang -Wfloat-conversion: use explicit != 0.0F instead of implicit float-to-bool - Fix Clang -Wsign-conversion: add static_cast() for int-to- size_t conversions in sample code Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Max Buckley --- CMakeLists.txt | 8 ++++ .../groupNormalizationPlugin.cpp | 2 +- .../voxelGeneratorPlugin/voxelGenerator.cpp | 6 +-- samples/common/ErrorRecorder.h | 8 ++-- samples/common/common.h | 44 +++++++++---------- samples/common/getOptions.cpp | 14 +++--- samples/common/sampleInference.cpp | 2 +- samples/common/sampleOptions.cpp | 10 ++--- 8 files changed, 53 insertions(+), 41 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6d2e8346f..aa90ede39 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -264,6 +264,14 @@ if(NOT MSVC) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wformat=2 -Wimplicit-fallthrough") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") + + # Clang-specific suppressions + if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + # CUDA toolkit headers trigger variadic macro warnings + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-variadic-macro-arguments-omitted") + # Clang's -Wpedantic enables -Wsign-conversion (GCC does not) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-sign-conversion") + endif() else() set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DBUILD_SYSTEM=cmake_oss") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /WX") diff --git a/plugin/groupNormalizationPlugin/groupNormalizationPlugin.cpp b/plugin/groupNormalizationPlugin/groupNormalizationPlugin.cpp index 87bb384b5..debd15af0 100644 --- a/plugin/groupNormalizationPlugin/groupNormalizationPlugin.cpp +++ b/plugin/groupNormalizationPlugin/groupNormalizationPlugin.cpp @@ -159,7 +159,7 @@ int32_t GroupNormalizationPlugin::enqueue(nvinfer1::PluginTensorDesc const* inpu mBNTensorDesc, // mBnScales->mPtr, // 1 mBnBias->mPtr, // 0 - 0.0F, // exponential average factor + static_cast(0.0F), // exponential average factor nullptr, // resultRunningMean nullptr, // resultRunningVar static_cast(mEpsilon), // eps diff --git a/plugin/voxelGeneratorPlugin/voxelGenerator.cpp b/plugin/voxelGeneratorPlugin/voxelGenerator.cpp index d6c02444b..68c3e2f52 100644 --- a/plugin/voxelGeneratorPlugin/voxelGenerator.cpp +++ b/plugin/voxelGeneratorPlugin/voxelGenerator.cpp @@ -39,12 +39,12 @@ size_t constexpr kSERIALIZATION_SIZE{9 * sizeof(float) + 7 * sizeof(int32_t)}; int32_t npRound(float x) { // half way round to nearest-even - int32_t x2 = static_cast(lround(x * 2.0F)); + int32_t x2 = static_cast(lroundf(x * 2.0F)); if (x != static_cast(static_cast(x)) && static_cast(x2) == x * 2.0F) { - return static_cast(lround(x / 2.0F + 0.5F)) * 2; + return static_cast(lroundf(x / 2.0F + 0.5F)) * 2; } - return static_cast(lround(x)); + return static_cast(lroundf(x)); } VoxelGeneratorPlugin::VoxelGeneratorPlugin(int32_t maxVoxels, int32_t maxPoints, int32_t voxelFeatures, float xMin, diff --git a/samples/common/ErrorRecorder.h b/samples/common/ErrorRecorder.h index 948f90270..d96723d37 100644 --- a/samples/common/ErrorRecorder.h +++ b/samples/common/ErrorRecorder.h @@ -52,11 +52,13 @@ class SampleErrorRecorder : public IErrorRecorder } ErrorCode getErrorCode(int32_t errorIdx) const noexcept final { - return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT : (*this)[errorIdx].first; + return invalidIndexCheck(errorIdx) ? ErrorCode::kINVALID_ARGUMENT + : (*this)[static_cast(errorIdx)].first; }; IErrorRecorder::ErrorDesc getErrorDesc(int32_t errorIdx) const noexcept final { - return invalidIndexCheck(errorIdx) ? "errorIdx out of range." : (*this)[errorIdx].second.c_str(); + return invalidIndexCheck(errorIdx) ? "errorIdx out of range." + : (*this)[static_cast(errorIdx)].second.c_str(); } // This class can never overflow since we have dynamic resize via std::vector usage. bool hasOverflowed() const noexcept final @@ -122,7 +124,7 @@ class SampleErrorRecorder : public IErrorRecorder { // By converting signed to unsigned, we only need a single check since // negative numbers turn into large positive greater than the size. - size_t sIndex = index; + size_t sIndex = static_cast(index); return sIndex >= mErrorStack.size(); } // Mutex to hold when locking mErrorStack. diff --git a/samples/common/common.h b/samples/common/common.h index 4da105edb..6af6454b3 100644 --- a/samples/common/common.h +++ b/samples/common/common.h @@ -105,15 +105,15 @@ using namespace nvinfer1; #undef CHECK #define CHECK(status) CHECK_WITH_STREAM(status, std::cerr) -constexpr long double operator"" _GiB(long double val) +constexpr long double operator""_GiB(long double val) { return val * (1 << 30); } -constexpr long double operator"" _MiB(long double val) +constexpr long double operator""_MiB(long double val) { return val * (1 << 20); } -constexpr long double operator"" _KiB(long double val) +constexpr long double operator""_KiB(long double val) { return val * (1 << 10); } @@ -536,28 +536,28 @@ inline size_t getNbBytes(nvinfer1::DataType t, int64_t vol) noexcept { switch (t) { - case nvinfer1::DataType::kINT64: return 8 * vol; + case nvinfer1::DataType::kINT64: return static_cast(8 * vol); case nvinfer1::DataType::kINT32: - case nvinfer1::DataType::kFLOAT: return 4 * vol; + case nvinfer1::DataType::kFLOAT: return static_cast(4 * vol); case nvinfer1::DataType::kBF16: - case nvinfer1::DataType::kHALF: return 2 * vol; + case nvinfer1::DataType::kHALF: return static_cast(2 * vol); case nvinfer1::DataType::kBOOL: case nvinfer1::DataType::kUINT8: - case nvinfer1::DataType::kINT8: return vol; + case nvinfer1::DataType::kINT8: return static_cast(vol); case nvinfer1::DataType::kFP8: #if CUDA_VERSION < 11060 ASSERT(false && "FP8 is not supported"); #else - return vol; + return static_cast(vol); #endif case nvinfer1::DataType::kE8M0: #if CUDA_VERSION < 12080 ASSERT(false && "E8M0 is not supported"); #else - return vol; + return static_cast(vol); #endif // CUDA_VERSION < 12080 case nvinfer1::DataType::kINT4: - case nvinfer1::DataType::kFP4: return (vol + 1) / 2; + case nvinfer1::DataType::kFP4: return static_cast((vol + 1) / 2); } ASSERT(false && "Unknown element type"); } @@ -759,25 +759,25 @@ inline void writePPMFileWithBBox(const std::string& filename, vPPM ppm, std::vec for (int x = int(bbox.x1); x < int(bbox.x2); ++x) { // bbox top border - ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3] = 255; - ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 1] = 0; - ppm.buffer[(round(bbox.y1) * ppm.w + x) * 3 + 2] = 0; + ppm.buffer[static_cast((round(bbox.y1) * ppm.w + x) * 3)] = 255; + ppm.buffer[static_cast((round(bbox.y1) * ppm.w + x) * 3 + 1)] = 0; + ppm.buffer[static_cast((round(bbox.y1) * ppm.w + x) * 3 + 2)] = 0; // bbox bottom border - ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3] = 255; - ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 1] = 0; - ppm.buffer[(round(bbox.y2) * ppm.w + x) * 3 + 2] = 0; + ppm.buffer[static_cast((round(bbox.y2) * ppm.w + x) * 3)] = 255; + ppm.buffer[static_cast((round(bbox.y2) * ppm.w + x) * 3 + 1)] = 0; + ppm.buffer[static_cast((round(bbox.y2) * ppm.w + x) * 3 + 2)] = 0; } for (int y = int(bbox.y1); y < int(bbox.y2); ++y) { // bbox left border - ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3] = 255; - ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 1] = 0; - ppm.buffer[(y * ppm.w + round(bbox.x1)) * 3 + 2] = 0; + ppm.buffer[static_cast((y * ppm.w + round(bbox.x1)) * 3)] = 255; + ppm.buffer[static_cast((y * ppm.w + round(bbox.x1)) * 3 + 1)] = 0; + ppm.buffer[static_cast((y * ppm.w + round(bbox.x1)) * 3 + 2)] = 0; // bbox right border - ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3] = 255; - ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 1] = 0; - ppm.buffer[(y * ppm.w + round(bbox.x2)) * 3 + 2] = 0; + ppm.buffer[static_cast((y * ppm.w + round(bbox.x2)) * 3)] = 255; + ppm.buffer[static_cast((y * ppm.w + round(bbox.x2)) * 3 + 1)] = 0; + ppm.buffer[static_cast((y * ppm.w + round(bbox.x2)) * 3 + 2)] = 0; } } diff --git a/samples/common/getOptions.cpp b/samples/common/getOptions.cpp index a7a571079..b04851a03 100644 --- a/samples/common/getOptions.cpp +++ b/samples/common/getOptions.cpp @@ -201,12 +201,13 @@ TRTParsedArgs parseArgs(int argc, const char* const* argv, const std::vector(idx)].valueRequired) { if (!value.empty()) { - parsedArgs.values[idx].second.push_back(value); - parsedArgs.values[idx].first = static_cast(parsedArgs.values[idx].second.size()); + parsedArgs.values[static_cast(idx)].second.push_back(value); + parsedArgs.values[static_cast(idx)].first + = static_cast(parsedArgs.values[static_cast(idx)].second.size()); continue; } @@ -222,14 +223,15 @@ TRTParsedArgs parseArgs(int argc, const char* const* argv, const std::vector(idx)].second.push_back(nextArg); i += 1; // Next argument already consumed - parsedArgs.values[idx].first = static_cast(parsedArgs.values[idx].second.size()); + parsedArgs.values[static_cast(idx)].first + = static_cast(parsedArgs.values[static_cast(idx)].second.size()); } else { - parsedArgs.values[idx].first += 1; + parsedArgs.values[static_cast(idx)].first += 1; } } return parsedArgs; diff --git a/samples/common/sampleInference.cpp b/samples/common/sampleInference.cpp index 75ca0a53f..7f68fc7dd 100644 --- a/samples/common/sampleInference.cpp +++ b/samples/common/sampleInference.cpp @@ -1426,7 +1426,7 @@ bool inferenceLoop(std::vector>& iStreams, TimePo } if (durationMs < warmupMs) // Warming up { - if (durationMs) // Skip complete iterations + if (durationMs != 0.0F) // Skip complete iterations { ++skip; } diff --git a/samples/common/sampleOptions.cpp b/samples/common/sampleOptions.cpp index 26e503c29..18556550a 100644 --- a/samples/common/sampleOptions.cpp +++ b/samples/common/sampleOptions.cpp @@ -1189,27 +1189,27 @@ void BuildOptions::parse(Arguments& arguments) if (memPoolName == "workspace") { // use unit in MB. - workspace = static_cast(memPoolSize / 1.0_MiB); + workspace = static_cast(static_cast(memPoolSize) / 1.0_MiB); } else if (memPoolName == "dlaSRAM") { // use unit in MB. - dlaSRAM = static_cast(memPoolSize / 1.0_MiB); + dlaSRAM = static_cast(static_cast(memPoolSize) / 1.0_MiB); } else if (memPoolName == "dlaLocalDRAM") { // use unit in MB. - dlaLocalDRAM = static_cast(memPoolSize / 1.0_MiB); + dlaLocalDRAM = static_cast(static_cast(memPoolSize) / 1.0_MiB); } else if (memPoolName == "dlaGlobalDRAM") { // use unit in MB. - dlaGlobalDRAM = static_cast(memPoolSize / 1.0_MiB); + dlaGlobalDRAM = static_cast(static_cast(memPoolSize) / 1.0_MiB); } else if (memPoolName == "tacticSharedMem") { // use unit in KB. - tacticSharedMem = static_cast(memPoolSize / 1.0_KiB); + tacticSharedMem = static_cast(static_cast(memPoolSize) / 1.0_KiB); } else if (!memPoolName.empty()) { From 30b08238341e0b4663c9ca4beb952ef7779fcefb Mon Sep 17 00:00:00 2001 From: Max Buckley Date: Mon, 23 Mar 2026 01:25:44 +0100 Subject: [PATCH 3/3] Fix warnings in additional sample files built with TensorRT runtime Fix warnings in 10 sample files that are only compiled when the real TensorRT runtime libraries are available (not with stub libs): - BatchStream.h: add virtual destructor, int64_t narrowing casts - sampleCharRNN: shadow fix, size_t casts - sampleDynamicReshape: unused param, double-promotion, conversion - sampleINT8API: shadow fix, float/int casts - sampleIOFormats: shadow fixes, unused params, sizeof casts - sampleNamedDimensions: unused params, float literal fixes - sampleNonZeroPlugin: unused params, int64_t/size_t casts - sampleOnnxMNIST: unused param, exp() float cast - sampleOnnxMnistCoordConvAC: float literal fixes, exp() cast - sampleProgressMonitor: int-to-float cast, exp() cast, ptr diff cast Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Max Buckley --- samples/common/BatchStream.h | 25 ++++---- samples/sampleCharRNN/sampleCharRNN.cpp | 10 ++-- .../sampleDynamicReshape.cpp | 6 +- samples/sampleINT8API/sampleINT8API.cpp | 5 +- samples/sampleIOFormats/sampleIOFormats.cpp | 60 +++++++++---------- .../sampleNamedDimensions.cpp | 15 ++--- .../sampleNonZeroPlugin.cpp | 46 +++++++------- samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp | 12 ++-- .../sampleOnnxMnistCoordConvAC.cpp | 16 ++--- .../sampleProgressMonitor.cpp | 14 ++--- 10 files changed, 105 insertions(+), 104 deletions(-) diff --git a/samples/common/BatchStream.h b/samples/common/BatchStream.h index d12596e2c..7bae344e6 100644 --- a/samples/common/BatchStream.h +++ b/samples/common/BatchStream.h @@ -26,6 +26,7 @@ class IBatchStream { public: + virtual ~IBatchStream() = default; virtual void reset(int firstBatch) = 0; virtual bool next() = 0; virtual void skip(int skipCount) = 0; @@ -173,7 +174,7 @@ class BatchStream : public IBatchStream mDims.d[3] = d[3]; // Width ASSERT(mDims.d[0] > 0 && mDims.d[1] > 0 && mDims.d[2] > 0 && mDims.d[3] > 0); - mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3]; + mImageSize = static_cast(mDims.d[1] * mDims.d[2] * mDims.d[3]); mBatch.resize(mBatchSize * mImageSize, 0); mLabels.resize(mBatchSize, 0); mFileBatch.resize(mDims.d[0] * mImageSize, 0); @@ -193,7 +194,7 @@ class BatchStream : public IBatchStream , mListFile(listFile) , mDataDir(directories) { - mImageSize = mDims.d[1] * mDims.d[2] * mDims.d[3]; + mImageSize = static_cast(mDims.d[1] * mDims.d[2] * mDims.d[3]); mBatch.resize(mBatchSize * mImageSize, 0); mLabels.resize(mBatchSize, 0); mFileBatch.resize(mDims.d[0] * mImageSize, 0); @@ -205,7 +206,7 @@ class BatchStream : public IBatchStream { mBatchCount = 0; mFileCount = 0; - mFileBatchPos = mDims.d[0]; + mFileBatchPos = static_cast(mDims.d[0]); skip(firstBatch); } @@ -217,7 +218,7 @@ class BatchStream : public IBatchStream return false; } - for (int64_t csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize, mFileBatchPos += csize) + for (int64_t csize = 1, batchPos = 0; batchPos < mBatchSize; batchPos += csize, mFileBatchPos += static_cast(csize)) { ASSERT(mFileBatchPos > 0 && mFileBatchPos <= mDims.d[0]); if (mFileBatchPos == mDims.d[0] && !update()) @@ -238,9 +239,9 @@ class BatchStream : public IBatchStream // Skips the batches void skip(int skipCount) override { - if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 && mFileBatchPos == mDims.d[0]) + if (mBatchSize >= mDims.d[0] && mBatchSize % mDims.d[0] == 0 && mFileBatchPos == static_cast(mDims.d[0])) { - mFileCount += skipCount * mBatchSize / mDims.d[0]; + mFileCount += static_cast(skipCount * mBatchSize / mDims.d[0]); return; } @@ -269,7 +270,7 @@ class BatchStream : public IBatchStream int getBatchSize() const override { - return mBatchSize; + return static_cast(mBatchSize); } nvinfer1::Dims getDims() const override @@ -338,14 +339,14 @@ class BatchStream : public IBatchStream } std::vector data(samplesCommon::volume(mDims)); - const float scale = 2.0 / 255.0; - const float bias = 1.0; - long int volChl = mDims.d[2] * mDims.d[3]; + const float scale = 2.0f / 255.0f; + const float bias = 1.0f; + long int volChl = static_cast(mDims.d[2] * mDims.d[3]); // Normalize input data - for (int i = 0, volImg = mDims.d[1] * mDims.d[2] * mDims.d[3]; i < mBatchSize; ++i) + for (int i = 0, volImg = static_cast(mDims.d[1] * mDims.d[2] * mDims.d[3]); i < mBatchSize; ++i) { - for (int c = 0; c < mDims.d[1]; ++c) + for (int64_t c = 0; c < mDims.d[1]; ++c) { for (int j = 0; j < volChl; ++j) { diff --git a/samples/sampleCharRNN/sampleCharRNN.cpp b/samples/sampleCharRNN/sampleCharRNN.cpp index 0a59712f4..39fe46709 100644 --- a/samples/sampleCharRNN/sampleCharRNN.cpp +++ b/samples/sampleCharRNN/sampleCharRNN.cpp @@ -639,12 +639,12 @@ nvinfer1::ILayer* SampleCharRNNLoop::addLSTMLayers(std::unique_ptr nvinfer1::Weights { - const char* data = static_cast(weights.values); + const char* data_ = static_cast(weights.values); int64_t shift = samplesCommon::volume(start); - const int bufferSize = samplesCommon::getNbBytes(weights.type, shift); + const int bufferSize = static_cast(samplesCommon::getNbBytes(weights.type, shift)); int64_t count = samplesCommon::volume(size); ASSERT(shift + count <= weights.count); - return nvinfer1::Weights{weights.type, data + bufferSize, count}; + return nvinfer1::Weights{weights.type, data_ + bufferSize, count}; }; for (int i = 0; i < mParams.layerCount; ++i) { @@ -667,7 +667,7 @@ nvinfer1::ILayer* SampleCharRNNLoop::addLSTMLayers(std::unique_ptr tensors) -> nvinfer1::ITensor* { - nvinfer1::IConcatenationLayer* concat = network->addConcatenation(tensors.data(), tensors.size()); + nvinfer1::IConcatenationLayer* concat = network->addConcatenation(tensors.data(), static_cast(tensors.size())); concat->setAxis(0); return concat->getOutput(0); }; @@ -776,7 +776,7 @@ bool SampleCharRNNBase::infer() // Select a random seed string. srand(unsigned(time(nullptr))); - int sentenceIndex = rand() % mParams.inputSentences.size(); + int sentenceIndex = static_cast(rand() % mParams.inputSentences.size()); std::string inputSentence = mParams.inputSentences[sentenceIndex]; std::string expected = mParams.outputSentences[sentenceIndex]; std::string genstr; diff --git a/samples/sampleDynamicReshape/sampleDynamicReshape.cpp b/samples/sampleDynamicReshape/sampleDynamicReshape.cpp index d5c3f221f..5504002ad 100644 --- a/samples/sampleDynamicReshape/sampleDynamicReshape.cpp +++ b/samples/sampleDynamicReshape/sampleDynamicReshape.cpp @@ -150,7 +150,7 @@ bool SampleDynamicReshape::build() //! \return false if error in build preprocessor engine. //! bool SampleDynamicReshape::buildPreprocessorEngine( - nvinfer1::IBuilder& builder, nvinfer1::IRuntime& runtime, cudaStream_t profileStream) + nvinfer1::IBuilder& builder, nvinfer1::IRuntime& runtime, cudaStream_t /*profileStream*/) { // Create the preprocessor engine using a network that supports full dimensions (createNetworkV2). auto preprocessorNetwork = std::unique_ptr( @@ -437,7 +437,7 @@ Dims SampleDynamicReshape::loadPGMFile(const std::string& fileName) mInput.hostBuffer.resize(inputDims); float* hostDataBuffer = static_cast(mInput.hostBuffer.data()); std::transform(fileData.begin(), fileData.end(), hostDataBuffer, - [](uint8_t x) { return 1.0 - static_cast(x / 255.0); }); + [](uint8_t x) { return 1.0f - static_cast(x) / 255.0f; }); return inputDims; } @@ -459,7 +459,7 @@ bool SampleDynamicReshape::validateOutput(int digit) ++curIndex; } - int predictedDigit = std::max_element(prob.begin(), prob.end()) - prob.begin(); + int predictedDigit = static_cast(std::max_element(prob.begin(), prob.end()) - prob.begin()); return digit == predictedDigit; } diff --git a/samples/sampleINT8API/sampleINT8API.cpp b/samples/sampleINT8API/sampleINT8API.cpp index 9a6f9e17f..c644396d9 100644 --- a/samples/sampleINT8API/sampleINT8API.cpp +++ b/samples/sampleINT8API/sampleINT8API.cpp @@ -241,7 +241,6 @@ void SampleINT8API::setLayerPrecision(nvinfer1::INetworkDefinition const& networ std::string tensorName = layer->getOutput(j)->getName(); if (mParams.verbose) { - std::string tensorName = layer->getOutput(j)->getName(); sample::gLogInfo << "Tensor: " << tensorName << ". OutputType: INT8" << std::endl; } // set output type of execution tensors and not shape tensors. @@ -391,7 +390,7 @@ bool SampleINT8API::setDynamicRange(nvinfer1::INetworkDefinition& network) max = std::max(max, std::abs(val)); } - if (!lyr->getOutput(j)->setDynamicRange(-max, max)) + if (!lyr->getOutput(j)->setDynamicRange(static_cast(-max), static_cast(max))) { return false; } @@ -466,7 +465,7 @@ bool SampleINT8API::verifyOutput(samplesCommon::BufferManager const& buffers) co { // copy output host buffer data for further processing float const* probPtr = static_cast(buffers.getHostBuffer(mInOut.at("output"))); - std::vector output(probPtr, probPtr + mOutputDims.d[1]); + std::vector output(probPtr, probPtr + static_cast(mOutputDims.d[1])); auto inds = samplesCommon::argMagnitudeSort(output.cbegin(), output.cend()); diff --git a/samples/sampleIOFormats/sampleIOFormats.cpp b/samples/sampleIOFormats/sampleIOFormats.cpp index 6039bee33..213c2599c 100644 --- a/samples/sampleIOFormats/sampleIOFormats.cpp +++ b/samples/sampleIOFormats/sampleIOFormats.cpp @@ -74,32 +74,32 @@ class BufferDesc public: BufferDesc() = default; - BufferDesc(nvinfer1::Dims dims, int32_t dataWidth, TensorFormat format) + BufferDesc(nvinfer1::Dims dims_, int32_t dataWidth_, TensorFormat format) { - this->dataWidth = dataWidth; + this->dataWidth = dataWidth_; if (format == TensorFormat::kLINEAR) { - this->dims[0] = dims.d[0]; - this->dims[1] = dims.d[1]; - this->dims[2] = dims.d[2]; - this->dims[3] = dims.d[3]; + this->dims[0] = static_cast(dims_.d[0]); + this->dims[1] = static_cast(dims_.d[1]); + this->dims[2] = static_cast(dims_.d[2]); + this->dims[3] = static_cast(dims_.d[3]); this->dims[4] = 1; } else if (format == TensorFormat::kCHW32) { - this->dims[0] = dims.d[0]; - this->dims[1] = divUp(dims.d[1], 32); - this->dims[2] = dims.d[2]; - this->dims[3] = dims.d[3]; + this->dims[0] = static_cast(dims_.d[0]); + this->dims[1] = divUp(static_cast(dims_.d[1]), 32); + this->dims[2] = static_cast(dims_.d[2]); + this->dims[3] = static_cast(dims_.d[3]); this->dims[4] = 32; this->scalarPerVector = 32; } else if (format == TensorFormat::kHWC) { - this->dims[0] = dims.d[0]; - this->dims[1] = dims.d[2]; - this->dims[2] = dims.d[3]; - this->dims[3] = dims.d[1]; + this->dims[0] = static_cast(dims_.d[0]); + this->dims[1] = static_cast(dims_.d[2]); + this->dims[2] = static_cast(dims_.d[3]); + this->dims[3] = static_cast(dims_.d[1]); this->dims[4] = 1; this->channelPivot = true; } @@ -144,23 +144,23 @@ class SampleBuffer dims.d[3] = 1; } - SampleBuffer(nvinfer1::Dims dims, int32_t dataWidth, TensorFormat format, bool isInput) - : dims(dims) - , dataWidth(dataWidth) - , format(format) - , isInput(isInput) + SampleBuffer(nvinfer1::Dims dims_, int32_t dataWidth_, TensorFormat format_, bool isInput_) + : dims(dims_) + , dataWidth(dataWidth_) + , format(format_) + , isInput(isInput_) { // Output buffer is unsqueezed to 4D in order to reuse the BufferDesc class - if (isInput == false) + if (isInput_ == false) { - dims.d[2] = dims.d[0]; - dims.d[3] = dims.d[1]; - dims.d[0] = 1; - dims.d[1] = 1; + dims_.d[2] = dims_.d[0]; + dims_.d[3] = dims_.d[1]; + dims_.d[0] = 1; + dims_.d[1] = 1; } - desc = BufferDesc(dims, dataWidth, format); + desc = BufferDesc(dims_, dataWidth_, format_); if (nullptr == buffer) { @@ -330,7 +330,7 @@ bool SampleIOFormats::verify(TypeSpec const& spec) //! //! \return true if the engine was created successfully and false otherwise //! -bool SampleIOFormats::build(int32_t dataWidth) +bool SampleIOFormats::build(int32_t /*dataWidth*/) { auto builder = std::unique_ptr(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger())); if (!builder) @@ -436,7 +436,7 @@ bool SampleIOFormats::build(int32_t dataWidth) //! \param builder Pointer to the engine builder //! bool SampleIOFormats::constructNetwork(std::unique_ptr& builder, - std::unique_ptr& network, std::unique_ptr& config, + std::unique_ptr& /*network*/, std::unique_ptr& config, std::unique_ptr& parser) { auto parsed = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), @@ -548,7 +548,7 @@ void printHelpInfo() //! \brief Used to run the engine build and inference/reference functions //! template -bool process(SampleIOFormats& sample, sample::Logger::TestAtom const& sampleTest, SampleBuffer& inputBuf, +bool process(SampleIOFormats& sample, sample::Logger::TestAtom const& /*sampleTest*/, SampleBuffer& inputBuf, SampleBuffer& outputBuf, TypeSpec& spec) { sample::gLogInfo << "Building and running a GPU inference engine with specified I/O formats." << std::endl; @@ -562,8 +562,8 @@ bool process(SampleIOFormats& sample, sample::Logger::TestAtom const& sampleTest return false; } - inputBuf = SampleBuffer(sample.mInputDims, sizeof(T), sample.mTensorFormat, true); - outputBuf = SampleBuffer(sample.mOutputDims, sizeof(T), TensorFormat::kLINEAR, false); + inputBuf = SampleBuffer(sample.mInputDims, static_cast(sizeof(T)), sample.mTensorFormat, true); + outputBuf = SampleBuffer(sample.mOutputDims, static_cast(sizeof(T)), TensorFormat::kLINEAR, false); if (!sample.infer(inputBuf, outputBuf)) { diff --git a/samples/sampleNamedDimensions/sampleNamedDimensions.cpp b/samples/sampleNamedDimensions/sampleNamedDimensions.cpp index c1da71a68..b0a6983a4 100644 --- a/samples/sampleNamedDimensions/sampleNamedDimensions.cpp +++ b/samples/sampleNamedDimensions/sampleNamedDimensions.cpp @@ -36,6 +36,7 @@ #include #include +#include #include #include #include @@ -223,8 +224,8 @@ bool SampleNamedDimensions::build() //! //! \brief Uses ONNX parser to create the ONNX Network and marks the output layers //! -bool SampleNamedDimensions::constructNetwork(std::unique_ptr& builder, - std::unique_ptr& network, std::unique_ptr& config, +bool SampleNamedDimensions::constructNetwork(std::unique_ptr& /*builder*/, + std::unique_ptr& /*network*/, std::unique_ptr& /*config*/, std::unique_ptr& parser) { auto parsed = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), @@ -314,15 +315,15 @@ bool SampleNamedDimensions::infer() bool SampleNamedDimensions::processInput(samplesCommon::BufferManager const& buffers) { int32_t const input0H = mNamedDimension; - int32_t const input0W = mInputDims[0].d[1]; + int32_t const input0W = static_cast(mInputDims[0].d[1]); int32_t const input1H = mNamedDimension; - int32_t const input1W = mInputDims[1].d[1]; + int32_t const input1W = static_cast(mInputDims[1].d[1]); // Generate random input mInput0.resize(input0H * input0W); mInput1.resize(input1H * input1W); std::default_random_engine generator(static_cast(time(nullptr))); - std::uniform_real_distribution unif_real_distr(-10., 10.); + std::uniform_real_distribution unif_real_distr(-10.0f, 10.0f); sample::gLogInfo << "Input0:\n"; for (int32_t i = 0; i < input0H * input0W; i++) @@ -357,7 +358,7 @@ bool SampleNamedDimensions::processInput(samplesCommon::BufferManager const& buf bool SampleNamedDimensions::verifyOutput(samplesCommon::BufferManager const& buffers) { int32_t const outputH = 2 * mNamedDimension; - int32_t const outputW = mOutputDims[0].d[1]; + int32_t const outputW = static_cast(mOutputDims[0].d[1]); int32_t const outputSize = outputH * outputW; auto* output = static_cast(buffers.getHostBuffer(mParams.outputTensorNames[0])); @@ -374,7 +375,7 @@ bool SampleNamedDimensions::verifyOutput(samplesCommon::BufferManager const& buf for (int32_t i = 0; i < outputH * outputW; i++) { auto const reference_value = i > outputSize / 2 ? mInput1[i - outputSize / 2] : mInput0[i]; - if (fabs(output[i] - reference_value) > std::numeric_limits::epsilon()) + if (std::abs(output[i] - reference_value) > std::numeric_limits::epsilon()) { return false; } diff --git a/samples/sampleNonZeroPlugin/sampleNonZeroPlugin.cpp b/samples/sampleNonZeroPlugin/sampleNonZeroPlugin.cpp index 3edf8519d..1c7b7b419 100644 --- a/samples/sampleNonZeroPlugin/sampleNonZeroPlugin.cpp +++ b/samples/sampleNonZeroPlugin/sampleNonZeroPlugin.cpp @@ -80,7 +80,7 @@ class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV { mDataToSerialize.clear(); mDataToSerialize.emplace_back(PluginField("rowOrder", &mRowOrder, PluginFieldType::kINT32, 1)); - mFCToSerialize.nbFields = mDataToSerialize.size(); + mFCToSerialize.nbFields = static_cast(mDataToSerialize.size()); mFCToSerialize.fields = mDataToSerialize.data(); } @@ -137,14 +137,14 @@ class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV return 2; } - int32_t configurePlugin(DynamicPluginTensorDesc const* in, int32_t nbInputs, DynamicPluginTensorDesc const* out, - int32_t nbOutputs) noexcept override + int32_t configurePlugin(DynamicPluginTensorDesc const* /*in*/, int32_t /*nbInputs*/, DynamicPluginTensorDesc const* /*out*/, + int32_t /*nbOutputs*/) noexcept override { return 0; } bool supportsFormatCombination( - int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept override + int32_t pos, DynamicPluginTensorDesc const* inOut, int32_t /*nbInputs*/, int32_t /*nbOutputs*/) noexcept override { bool typeOk{false}; if (pos == 0) @@ -165,15 +165,15 @@ class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV } int32_t getOutputDataTypes( - DataType* outputTypes, int32_t nbOutputs, DataType const* inputTypes, int32_t nbInputs) const noexcept override + DataType* outputTypes, int32_t /*nbOutputs*/, DataType const* /*inputTypes*/, int32_t /*nbInputs*/) const noexcept override { outputTypes[0] = DataType::kINT32; outputTypes[1] = DataType::kINT64; return 0; } - int32_t getOutputShapes(DimsExprs const* inputs, int32_t nbInputs, DimsExprs const* shapeInputs, - int32_t nbShapeInputs, DimsExprs* outputs, int32_t nbOutputs, IExprBuilder& exprBuilder) noexcept override + int32_t getOutputShapes(DimsExprs const* inputs, int32_t /*nbInputs*/, DimsExprs const* /*shapeInputs*/, + int32_t /*nbShapeInputs*/, DimsExprs* outputs, int32_t /*nbOutputs*/, IExprBuilder& exprBuilder) noexcept override { // The input tensor must be 2-D if (inputs[0].nbDims != 2) @@ -207,12 +207,12 @@ class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV } // IPluginV3OneRuntime methods - int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* outputDesc, void const* const* inputs, + int32_t enqueue(PluginTensorDesc const* inputDesc, PluginTensorDesc const* /*outputDesc*/, void const* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept override { - int32_t const R = inputDesc[0].dims.d[0]; - int32_t const C = inputDesc[0].dims.d[1]; + int32_t const R = static_cast(inputDesc[0].dims.d[0]); + int32_t const C = static_cast(inputDesc[0].dims.d[1]); auto type = inputDesc[0].type; @@ -249,12 +249,12 @@ class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV } int32_t onShapeChange( - PluginTensorDesc const* in, int32_t nbInputs, PluginTensorDesc const* out, int32_t nbOutputs) noexcept override + PluginTensorDesc const* /*in*/, int32_t /*nbInputs*/, PluginTensorDesc const* /*out*/, int32_t /*nbOutputs*/) noexcept override { return 0; } - IPluginV3* attachToContext(IPluginResourceContext* context) noexcept override + IPluginV3* attachToContext(IPluginResourceContext* /*context*/) noexcept override { return clone(); } @@ -264,8 +264,8 @@ class NonZeroPlugin : public IPluginV3, public IPluginV3OneCore, public IPluginV return &mFCToSerialize; } - size_t getWorkspaceSize(DynamicPluginTensorDesc const* inputs, int32_t nbInputs, - DynamicPluginTensorDesc const* outputs, int32_t nbOutputs) const noexcept override + size_t getWorkspaceSize(DynamicPluginTensorDesc const* /*inputs*/, int32_t /*nbInputs*/, + DynamicPluginTensorDesc const* /*outputs*/, int32_t /*nbOutputs*/) const noexcept override { return sizeof(int64_t); } @@ -283,7 +283,7 @@ class NonZeroPluginCreator : public nvinfer1::IPluginCreatorV3One { mPluginAttributes.clear(); mPluginAttributes.emplace_back(PluginField("rowOrder", nullptr, PluginFieldType::kINT32, 1)); - mFC.nbFields = mPluginAttributes.size(); + mFC.nbFields = static_cast(mPluginAttributes.size()); mFC.fields = mPluginAttributes.data(); } @@ -302,7 +302,7 @@ class NonZeroPluginCreator : public nvinfer1::IPluginCreatorV3One return &mFC; } - IPluginV3* createPlugin(char const* name, PluginFieldCollection const* fc, TensorRTPhase phase) noexcept override + IPluginV3* createPlugin(char const* /*name*/, PluginFieldCollection const* fc, TensorRTPhase /*phase*/) noexcept override { try { @@ -483,8 +483,8 @@ bool SampleNonZeroPlugin::build() //! //! \param builder Pointer to the engine builder //! -bool SampleNonZeroPlugin::constructNetwork(std::unique_ptr& builder, - std::unique_ptr& network, std::unique_ptr& config) +bool SampleNonZeroPlugin::constructNetwork(std::unique_ptr& /*builder*/, + std::unique_ptr& network, std::unique_ptr& /*config*/) { std::default_random_engine generator(mSeed); std::uniform_int_distribution distr(10, 25); @@ -510,7 +510,7 @@ bool SampleNonZeroPlugin::constructNetwork(std::unique_ptr& ASSERT(plugin != nullptr && "NonZeroPlugin construction failed"); std::vector inputsVec{in}; - auto pluginNonZeroLayer = network->addPluginV3(inputsVec.data(), inputsVec.size(), nullptr, 0, *plugin); + auto pluginNonZeroLayer = network->addPluginV3(inputsVec.data(), static_cast(inputsVec.size()), nullptr, 0, *plugin); ASSERT(pluginNonZeroLayer != nullptr); ASSERT(pluginNonZeroLayer->getOutput(0) != nullptr); ASSERT(pluginNonZeroLayer->getOutput(1) != nullptr); @@ -595,8 +595,8 @@ bool SampleNonZeroPlugin::infer() //! bool SampleNonZeroPlugin::processInput(samplesCommon::BufferManager const& buffers) { - int32_t const inputH = mInputDims.d[0]; - int32_t const inputW = mInputDims.d[1]; + int32_t const inputH = static_cast(mInputDims.d[0]); + int32_t const inputW = static_cast(mInputDims.d[1]); std::vector fileData(inputH * inputW); @@ -609,7 +609,7 @@ bool SampleNonZeroPlugin::processInput(samplesCommon::BufferManager const& buffe float* hostDataBuffer = static_cast(buffers.getHostBuffer(mParams.inputTensorNames[0])); for (int32_t i = 0; i < inputH * inputW; ++i) { - auto const raw = 1.0 - float(fileData[i] / 255.0); + auto const raw = 1.0f - static_cast(fileData[i]) / 255.0f; hostDataBuffer[i] = raw; } @@ -640,7 +640,7 @@ bool SampleNonZeroPlugin::verifyOutput(samplesCommon::BufferManager const& buffe { float* input = static_cast(buffers.getHostBuffer(mParams.inputTensorNames[0])); int32_t* output = static_cast(buffers.getHostBuffer(mParams.outputTensorNames[0])); - int64_t count = *static_cast(buffers.getHostBuffer(mParams.outputTensorNames[1])); + int32_t count = static_cast(*static_cast(buffers.getHostBuffer(mParams.outputTensorNames[1]))); std::vector covered(mInputDims.d[0] * mInputDims.d[1], false); diff --git a/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp b/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp index 33bb5f57d..2654fa67e 100644 --- a/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp +++ b/samples/sampleOnnxMNIST/sampleOnnxMNIST.cpp @@ -192,7 +192,7 @@ bool SampleOnnxMNIST::build() //! \param builder Pointer to the engine builder //! bool SampleOnnxMNIST::constructNetwork(std::unique_ptr& builder, - std::unique_ptr& network, std::unique_ptr& config, + std::unique_ptr& /*network*/, std::unique_ptr& config, std::unique_ptr& parser, std::unique_ptr& timingCache) { auto parsed = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), @@ -269,8 +269,8 @@ bool SampleOnnxMNIST::infer() //! bool SampleOnnxMNIST::processInput(const samplesCommon::BufferManager& buffers) { - const int inputH = mInputDims.d[2]; - const int inputW = mInputDims.d[3]; + const int inputH = static_cast(mInputDims.d[2]); + const int inputW = static_cast(mInputDims.d[3]); // Read a random digit file srand(unsigned(time(nullptr))); @@ -290,7 +290,7 @@ bool SampleOnnxMNIST::processInput(const samplesCommon::BufferManager& buffers) float* hostDataBuffer = static_cast(buffers.getHostBuffer(mParams.inputTensorNames[0])); for (int i = 0; i < inputH * inputW; i++) { - hostDataBuffer[i] = 1.0 - float(fileData[i] / 255.0); + hostDataBuffer[i] = 1.0f - static_cast(fileData[i]) / 255.0f; } return true; @@ -303,7 +303,7 @@ bool SampleOnnxMNIST::processInput(const samplesCommon::BufferManager& buffers) //! bool SampleOnnxMNIST::verifyOutput(const samplesCommon::BufferManager& buffers) { - const int outputSize = mOutputDims.d[1]; + const int outputSize = static_cast(mOutputDims.d[1]); float* output = static_cast(buffers.getHostBuffer(mParams.outputTensorNames[0])); float val{0.0F}; int idx{0}; @@ -312,7 +312,7 @@ bool SampleOnnxMNIST::verifyOutput(const samplesCommon::BufferManager& buffers) float sum{0.0F}; for (int i = 0; i < outputSize; i++) { - output[i] = exp(output[i]); + output[i] = static_cast(exp(output[i])); sum += output[i]; } diff --git a/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp b/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp index 2acf88bcd..d3f154b2f 100644 --- a/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp +++ b/samples/sampleOnnxMnistCoordConvAC/sampleOnnxMnistCoordConvAC.cpp @@ -48,8 +48,8 @@ const std::string gSampleName = "TensorRT.sample_onnx_mnist_coord_conv_ac"; // Normalization constants from Pytorch transform.Normalize(). // They are needed to preprocess the data: // https://discuss.pytorch.org/t/understanding-transform-normalize/21730 -const float PYTORCH_NORMALIZE_MEAN = 0.1307; -const float PYTORCH_NORMALIZE_STD = 0.3081; +const float PYTORCH_NORMALIZE_MEAN = 0.1307f; +const float PYTORCH_NORMALIZE_STD = 0.3081f; //! \brief The SampleOnnxMnistCoordConvAC class implements the ONNX MNIST sample //! @@ -210,7 +210,7 @@ bool SampleOnnxMnistCoordConvAC::build() //! \param builder Pointer to the engine builder //! bool SampleOnnxMnistCoordConvAC::constructNetwork(std::unique_ptr& builder, - std::unique_ptr& network, std::unique_ptr& config, + std::unique_ptr& /*network*/, std::unique_ptr& config, std::unique_ptr& parser) { auto parsed = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), @@ -281,8 +281,8 @@ bool SampleOnnxMnistCoordConvAC::infer() //! bool SampleOnnxMnistCoordConvAC::processInput(const samplesCommon::BufferManager& buffers) { - const int inputH = mInputDims.d[2]; - const int inputW = mInputDims.d[3]; + const int inputH = static_cast(mInputDims.d[2]); + const int inputW = static_cast(mInputDims.d[3]); // Read a random digit file srand(unsigned(time(nullptr))); @@ -301,7 +301,7 @@ bool SampleOnnxMnistCoordConvAC::processInput(const samplesCommon::BufferManager float* hostDataBuffer = static_cast(buffers.getHostBuffer(mParams.inputTensorNames[0])); for (int i = 0; i < inputH * inputW; i++) { - hostDataBuffer[i] = ((1.0 - float(fileData[i] / 255.0)) - PYTORCH_NORMALIZE_MEAN) / PYTORCH_NORMALIZE_STD; + hostDataBuffer[i] = ((1.0f - static_cast(fileData[i]) / 255.0f) - PYTORCH_NORMALIZE_MEAN) / PYTORCH_NORMALIZE_STD; } return true; @@ -314,7 +314,7 @@ bool SampleOnnxMnistCoordConvAC::processInput(const samplesCommon::BufferManager //! bool SampleOnnxMnistCoordConvAC::verifyOutput(const samplesCommon::BufferManager& buffers) { - const int outputSize = mOutputDims.d[1]; + const int outputSize = static_cast(mOutputDims.d[1]); float* output = static_cast(buffers.getHostBuffer(mParams.outputTensorNames[0])); float val{0.0F}; int idx{0}; @@ -323,7 +323,7 @@ bool SampleOnnxMnistCoordConvAC::verifyOutput(const samplesCommon::BufferManager float sum{0.0F}; for (int i = 0; i < outputSize; i++) { - output[i] = exp(output[i]); + output[i] = static_cast(exp(output[i])); sum += output[i]; } diff --git a/samples/sampleProgressMonitor/sampleProgressMonitor.cpp b/samples/sampleProgressMonitor/sampleProgressMonitor.cpp index 5500e3467..c116c186c 100644 --- a/samples/sampleProgressMonitor/sampleProgressMonitor.cpp +++ b/samples/sampleProgressMonitor/sampleProgressMonitor.cpp @@ -155,7 +155,7 @@ class ConsoleProgressMonitor : public IProgressMonitor std::ostringstream bar; bar << '['; int32_t const completedChars - = static_cast(kPROGRESS_INNER_WIDTH * steps / static_cast(nbSteps)); + = static_cast(static_cast(kPROGRESS_INNER_WIDTH) * static_cast(steps) / static_cast(nbSteps)); for (int32_t i = 0; i < completedChars; ++i) { bar << '='; @@ -328,8 +328,8 @@ bool SampleProgressMonitor::build(IProgressMonitor* monitor) bool SampleProgressMonitor::processInput( samplesCommon::BufferManager const& buffers, std::string const& inputTensorName, int32_t inputFileIdx) const { - int32_t const inputH = mInputDims.d[2]; - int32_t const inputW = mInputDims.d[3]; + int32_t const inputH = static_cast(mInputDims.d[2]); + int32_t const inputW = static_cast(mInputDims.d[3]); // Read a random digit file. srand(unsigned(time(nullptr))); @@ -364,7 +364,7 @@ bool SampleProgressMonitor::verifyOutput( float* prob = static_cast(buffers.getHostBuffer(outputTensorName)); int32_t constexpr kDIGITS = 10; - std::for_each(prob, prob + kDIGITS, [](float& n) { n = exp(n); }); + std::for_each(prob, prob + kDIGITS, [](float& n) { n = static_cast(exp(n)); }); float const sum = std::accumulate(prob, prob + kDIGITS, 0.F); @@ -374,7 +374,7 @@ bool SampleProgressMonitor::verifyOutput( float const val = *max_ele; - int32_t const idx = max_ele - prob; + int32_t const idx = static_cast(max_ele - prob); // Print histogram of the output probability distribution. sample::gLogInfo << "Output:\n"; @@ -399,7 +399,7 @@ bool SampleProgressMonitor::verifyOutput( //! \param builder Pointer to the engine builder. //! bool SampleProgressMonitor::constructNetwork(std::unique_ptr& builder, - std::unique_ptr& network, std::unique_ptr& config, + std::unique_ptr& /*network*/, std::unique_ptr& config, std::unique_ptr& parser) { auto parsed = parser->parseFromFile(samplesCommon::locateFile(mParams.onnxFileName, mParams.dataDirs).c_str(), @@ -432,7 +432,7 @@ bool SampleProgressMonitor::infer() } // Pick a random digit to try to infer. - srand(time(NULL)); + srand(static_cast(time(NULL))); int32_t const digit = rand() % 10; // Read the input data into the managed buffers.