TensorRT 8.4.0
NvInferRuntime.h
Go to the documentation of this file.
1/*
2 * SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3 * SPDX-License-Identifier: LicenseRef-NvidiaProprietary
4 *
5 * NVIDIA CORPORATION, its affiliates and licensors retain all intellectual
6 * property and proprietary rights in and to this material, related
7 * documentation and any modifications thereto. Any use, reproduction,
8 * disclosure or distribution of this material and related documentation
9 * without an express license agreement from NVIDIA CORPORATION or
10 * its affiliates is strictly prohibited.
11 */
12
13#ifndef NV_INFER_RUNTIME_H
14#define NV_INFER_RUNTIME_H
15
21
22#include "NvInferImpl.h"
24
25namespace nvinfer1
26{
27
28class IExecutionContext;
29class ICudaEngine;
30class IPluginFactory;
31class IEngineInspector;
32
41
43{
44protected:
45 INoCopy() = default;
46 virtual ~INoCopy() = default;
47 INoCopy(const INoCopy& other) = delete;
48 INoCopy& operator=(const INoCopy& other) = delete;
49 INoCopy(INoCopy&& other) = delete;
50 INoCopy& operator=(INoCopy&& other) = delete;
51};
52
67
68enum class EngineCapability : int32_t
69{
74 kSTANDARD = 0,
75
78
85 kSAFETY = 1,
86
89
96
99};
100
101namespace impl
102{
104template <>
106{
107 static constexpr int32_t kVALUE = 3;
108};
109} // namespace impl
110
126{
127public:
129 const void* values;
130 int64_t count;
131};
132
143class IHostMemory : public INoCopy
144{
145public:
146 virtual ~IHostMemory() noexcept = default;
147
149 void* data() const noexcept
150 {
151 return mImpl->data();
152 }
153
155 std::size_t size() const noexcept
156 {
157 return mImpl->size();
158 }
159
161 DataType type() const noexcept
162 {
163 return mImpl->type();
164 }
172 TRT_DEPRECATED void destroy() noexcept
173 {
174 delete this;
175 }
176
177protected:
178 apiv::VHostMemory* mImpl;
179};
180
191enum class DimensionOperation : int32_t
192{
193 kSUM = 0,
194 kPROD = 1,
195 kMAX = 2,
196 kMIN = 3,
197 kSUB = 4,
198 kEQUAL = 5,
199 kLESS = 6,
200 kFLOOR_DIV = 7,
201 kCEIL_DIV = 8
202};
203
205template <>
206constexpr inline int32_t EnumMax<DimensionOperation>() noexcept
207{
208 return 9;
209}
210
215enum class TensorLocation : int32_t
216{
217 kDEVICE = 0,
218 kHOST = 1,
219};
220
221namespace impl
222{
224template <>
226{
227 static constexpr int32_t kVALUE = 2;
228};
229} // namespace impl
230
244{
245public:
247 bool isConstant() const noexcept
248 {
249 return mImpl->isConstant();
250 }
251
254 int32_t getConstantValue() const noexcept
255 {
256 return mImpl->getConstantValue();
257 }
258
259protected:
260 apiv::VDimensionExpr* mImpl;
261 virtual ~IDimensionExpr() noexcept = default;
262};
263
281class IExprBuilder : public INoCopy
282{
283public:
285 const IDimensionExpr* constant(int32_t value) noexcept
286 {
287 return mImpl->constant(value);
288 }
289
293 DimensionOperation op, const IDimensionExpr& first, const IDimensionExpr& second) noexcept
294 {
295 return mImpl->operation(op, first, second);
296 }
297
298protected:
299 apiv::VExprBuilder* mImpl;
300 virtual ~IExprBuilder() noexcept = default;
301};
302
309{
310public:
311 int32_t nbDims;
313};
314
321{
324
327
330};
331
351{
352public:
353 IPluginV2DynamicExt* clone() const noexcept override = 0;
354
379 virtual DimsExprs getOutputDimensions(
380 int32_t outputIndex, const DimsExprs* inputs, int32_t nbInputs, IExprBuilder& exprBuilder) noexcept
381 = 0;
382
386 static constexpr int32_t kFORMAT_COMBINATION_LIMIT = 100;
387
420 virtual bool supportsFormatCombination(
421 int32_t pos, const PluginTensorDesc* inOut, int32_t nbInputs, int32_t nbOutputs) noexcept
422 = 0;
423
462 virtual void configurePlugin(const DynamicPluginTensorDesc* in, int32_t nbInputs,
463 const DynamicPluginTensorDesc* out, int32_t nbOutputs) noexcept
464 = 0;
465
475 virtual size_t getWorkspaceSize(const PluginTensorDesc* inputs, int32_t nbInputs, const PluginTensorDesc* outputs,
476 int32_t nbOutputs) const noexcept
477 = 0;
478
491 virtual int32_t enqueue(const PluginTensorDesc* inputDesc, const PluginTensorDesc* outputDesc,
492 const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept
493 = 0;
494
495protected:
503 int32_t getTensorRTVersion() const noexcept override
504 {
505 return (static_cast<int32_t>(PluginVersion::kV2_DYNAMICEXT) << 24 | (NV_TENSORRT_VERSION & 0xFFFFFF));
506 }
507
508 virtual ~IPluginV2DynamicExt() noexcept {}
509
510private:
511 // Following are obsolete base class methods, and must not be implemented or used.
512
513 void configurePlugin(Dims const*, int32_t, Dims const*, int32_t, DataType const*, DataType const*, bool const*,
514 bool const*, PluginFormat, int32_t) noexcept override final
515 {
516 }
517
518 bool supportsFormat(DataType, PluginFormat) const noexcept override final
519 {
520 return false;
521 }
522
523 Dims getOutputDimensions(int32_t, Dims const*, int32_t) noexcept override final
524 {
525 return Dims{-1, {}};
526 }
527
528 bool isOutputBroadcastAcrossBatch(int32_t, bool const*, int32_t) const noexcept override final
529 {
530 return false;
531 }
532
533 bool canBroadcastInputAcrossBatch(int32_t) const noexcept override final
534 {
535 return true;
536 }
537
538 size_t getWorkspaceSize(int32_t) const noexcept override final
539 {
540 return 0;
541 }
542
543 int32_t enqueue(int32_t, const void* const*, void* const*, void*, cudaStream_t) noexcept override final
544 {
545 return 1;
546 }
547};
548
560{
561public:
568 virtual void reportLayerTime(const char* layerName, float ms) noexcept = 0;
569
570 virtual ~IProfiler() noexcept {}
571};
572
579enum class WeightsRole : int32_t
580{
581 kKERNEL = 0,
582 kBIAS = 1,
583 kSHIFT = 2,
584 kSCALE = 3,
585 kCONSTANT = 4,
586 kANY = 5,
587};
588
590template <>
591constexpr inline int32_t EnumMax<WeightsRole>() noexcept
592{
593 return 6;
594}
595
601enum class DeviceType : int32_t
602{
603 kGPU,
604 kDLA,
605};
606
608template <>
609constexpr inline int32_t EnumMax<DeviceType>() noexcept
610{
611 return 2;
612}
613
621class IRuntime : public INoCopy
622{
623public:
624 virtual ~IRuntime() noexcept = default;
625
641 TRT_DEPRECATED nvinfer1::ICudaEngine* deserializeCudaEngine(
642 const void* blob, std::size_t size, IPluginFactory* pluginFactory) noexcept
643 {
644 return mImpl->deserializeCudaEngine(blob, size, nullptr);
645 }
646
654 void setDLACore(int32_t dlaCore) noexcept
655 {
656 mImpl->setDLACore(dlaCore);
657 }
658
665 int32_t getDLACore() const noexcept
666 {
667 return mImpl->getDLACore();
668 }
669
673 int32_t getNbDLACores() const noexcept
674 {
675 return mImpl->getNbDLACores();
676 }
677
685 TRT_DEPRECATED void destroy() noexcept
686 {
687 delete this;
688 }
689
699 void setGpuAllocator(IGpuAllocator* allocator) noexcept
700 {
701 mImpl->setGpuAllocator(allocator);
702 }
703
715 //
718 void setErrorRecorder(IErrorRecorder* recorder) noexcept
719 {
720 mImpl->setErrorRecorder(recorder);
721 }
722
734 {
735 return mImpl->getErrorRecorder();
736 }
737
748 ICudaEngine* deserializeCudaEngine(const void* blob, std::size_t size) noexcept
749 {
750 return mImpl->deserializeCudaEngine(blob, size, nullptr);
751 }
752
758 ILogger* getLogger() const noexcept
759 {
760 return mImpl->getLogger();
761 }
762
772 bool setMaxThreads(int32_t maxThreads) noexcept
773 {
774 return mImpl->setMaxThreads(maxThreads);
775 }
776
786 int32_t getMaxThreads() const noexcept
787 {
788 return mImpl->getMaxThreads();
789 }
790
791protected:
792 apiv::VRuntime* mImpl;
793};
794
802class IRefitter : public INoCopy
803{
804public:
805 virtual ~IRefitter() noexcept = default;
806
817 bool setWeights(const char* layerName, WeightsRole role, Weights weights) noexcept
818 {
819 return mImpl->setWeights(layerName, role, weights);
820 }
821
832 bool refitCudaEngine() noexcept
833 {
834 return mImpl->refitCudaEngine();
835 }
836
853 int32_t getMissing(int32_t size, const char** layerNames, WeightsRole* roles) noexcept
854 {
855 return mImpl->getMissing(size, layerNames, roles);
856 }
857
870 int32_t getAll(int32_t size, const char** layerNames, WeightsRole* roles) noexcept
871 {
872 return mImpl->getAll(size, layerNames, roles);
873 }
874
880 TRT_DEPRECATED void destroy() noexcept
881 {
882 delete this;
883 }
884
897 bool setDynamicRange(const char* tensorName, float min, float max) noexcept
898 {
899 return mImpl->setDynamicRange(tensorName, min, max);
900 }
901
909 float getDynamicRangeMin(const char* tensorName) const noexcept
910 {
911 return mImpl->getDynamicRangeMin(tensorName);
912 }
913
921 float getDynamicRangeMax(const char* tensorName) const noexcept
922 {
923 return mImpl->getDynamicRangeMax(tensorName);
924 }
925
937 int32_t getTensorsWithDynamicRange(int32_t size, const char** tensorNames) const noexcept
938 {
939 return mImpl->getTensorsWithDynamicRange(size, tensorNames);
940 }
941
953 //
956 void setErrorRecorder(IErrorRecorder* recorder) noexcept
957 {
958 mImpl->setErrorRecorder(recorder);
959 }
960
972 {
973 return mImpl->getErrorRecorder();
974 }
975
989 bool setNamedWeights(const char* name, Weights weights) noexcept
990 {
991 return mImpl->setNamedWeights(name, weights);
992 }
993
1009 int32_t getMissingWeights(int32_t size, const char** weightsNames) noexcept
1010 {
1011 return mImpl->getMissingWeights(size, weightsNames);
1012 }
1013
1025 int32_t getAllWeights(int32_t size, const char** weightsNames) noexcept
1026 {
1027 return mImpl->getAllWeights(size, weightsNames);
1028 }
1029
1035 ILogger* getLogger() const noexcept
1036 {
1037 return mImpl->getLogger();
1038 }
1039
1049 bool setMaxThreads(int32_t maxThreads) noexcept
1050 {
1051 return mImpl->setMaxThreads(maxThreads);
1052 }
1053
1063 int32_t getMaxThreads() const noexcept
1064 {
1065 return mImpl->getMaxThreads();
1066 }
1067
1068protected:
1069 apiv::VRefitter* mImpl;
1070};
1071
1082enum class OptProfileSelector : int32_t
1083{
1084 kMIN = 0,
1085 kOPT = 1,
1086 kMAX = 2
1087};
1088
1094template <>
1095constexpr inline int32_t EnumMax<OptProfileSelector>() noexcept
1096{
1097 return 3;
1098}
1099
1123{
1124public:
1150 bool setDimensions(const char* inputName, OptProfileSelector select, Dims dims) noexcept
1151 {
1152 return mImpl->setDimensions(inputName, select, dims);
1153 }
1154
1160 Dims getDimensions(const char* inputName, OptProfileSelector select) const noexcept
1161 {
1162 return mImpl->getDimensions(inputName, select);
1163 }
1164
1204 const char* inputName, OptProfileSelector select, const int32_t* values, int32_t nbValues) noexcept
1205 {
1206 return mImpl->setShapeValues(inputName, select, values, nbValues);
1207 }
1208
1215 int32_t getNbShapeValues(const char* inputName) const noexcept
1216 {
1217 return mImpl->getNbShapeValues(inputName);
1218 }
1219
1225 int32_t const* getShapeValues(const char* inputName, OptProfileSelector select) const noexcept
1226 {
1227 return mImpl->getShapeValues(inputName, select);
1228 }
1229
1243 bool setExtraMemoryTarget(float target) noexcept
1244 {
1245 return mImpl->setExtraMemoryTarget(target);
1246 }
1247
1251 float getExtraMemoryTarget() const noexcept
1252 {
1253 return mImpl->getExtraMemoryTarget();
1254 }
1255
1267 bool isValid() const noexcept
1268 {
1269 return mImpl->isValid();
1270 }
1271
1272protected:
1273 apiv::VOptimizationProfile* mImpl;
1274 virtual ~IOptimizationProfile() noexcept = default;
1275};
1276
1284enum class TacticSource : int32_t
1285{
1287 kCUBLAS = 0,
1288 kCUBLAS_LT = 1,
1289 kCUDNN = 2
1290};
1291
1292template <>
1293constexpr inline int32_t EnumMax<TacticSource>() noexcept
1294{
1295 return 3;
1296}
1297
1304using TacticSources = uint32_t;
1305
1315enum class ProfilingVerbosity : int32_t
1316{
1317 kLAYER_NAMES_ONLY = 0,
1318 kNONE = 1,
1319 kDETAILED = 2,
1320
1325};
1326
1328template <>
1329constexpr inline int32_t EnumMax<ProfilingVerbosity>() noexcept
1330{
1331 return 3;
1332}
1333
1341class ICudaEngine : public INoCopy
1342{
1343public:
1344 virtual ~ICudaEngine() noexcept = default;
1345
1356 int32_t getNbBindings() const noexcept
1357 {
1358 return mImpl->getNbBindings();
1359 }
1360
1378 int32_t getBindingIndex(const char* name) const noexcept
1379 {
1380 return mImpl->getBindingIndex(name);
1381 }
1382
1398 const char* getBindingName(int32_t bindingIndex) const noexcept
1399 {
1400 return mImpl->getBindingName(bindingIndex);
1401 }
1402
1411 bool bindingIsInput(int32_t bindingIndex) const noexcept
1412 {
1413 return mImpl->bindingIsInput(bindingIndex);
1414 }
1415
1436 Dims getBindingDimensions(int32_t bindingIndex) const noexcept
1437 {
1438 return mImpl->getBindingDimensions(bindingIndex);
1439 }
1440
1449 DataType getBindingDataType(int32_t bindingIndex) const noexcept
1450 {
1451 return mImpl->getBindingDataType(bindingIndex);
1452 }
1453
1461 int32_t getMaxBatchSize() const noexcept
1462 {
1463 return mImpl->getMaxBatchSize();
1464 }
1465
1475 int32_t getNbLayers() const noexcept
1476 {
1477 return mImpl->getNbLayers();
1478 }
1479
1489 IHostMemory* serialize() const noexcept
1490 {
1491 return mImpl->serialize();
1492 }
1493
1506 {
1507 return mImpl->createExecutionContext();
1508 }
1509
1517 TRT_DEPRECATED void destroy() noexcept
1518 {
1519 delete this;
1520 }
1521
1532 TensorLocation getLocation(int32_t bindingIndex) const noexcept
1533 {
1534 return mImpl->getLocation(bindingIndex);
1535 }
1536
1542 {
1543 return mImpl->createExecutionContextWithoutDeviceMemory();
1544 }
1545
1551 size_t getDeviceMemorySize() const noexcept
1552 {
1553 return mImpl->getDeviceMemorySize();
1554 }
1555
1561 bool isRefittable() const noexcept
1562 {
1563 return mImpl->isRefittable();
1564 }
1565
1575 int32_t getBindingBytesPerComponent(int32_t bindingIndex) const noexcept
1576 {
1577 return mImpl->getBindingBytesPerComponent(bindingIndex);
1578 }
1579
1589 int32_t getBindingComponentsPerElement(int32_t bindingIndex) const noexcept
1590 {
1591 return mImpl->getBindingComponentsPerElement(bindingIndex);
1592 }
1593
1599 TensorFormat getBindingFormat(int32_t bindingIndex) const noexcept
1600 {
1601 return mImpl->getBindingFormat(bindingIndex);
1602 }
1603
1618 const char* getBindingFormatDesc(int32_t bindingIndex) const noexcept
1619 {
1620 return mImpl->getBindingFormatDesc(bindingIndex);
1621 }
1622
1630 int32_t getBindingVectorizedDim(int32_t bindingIndex) const noexcept
1631 {
1632 return mImpl->getBindingVectorizedDim(bindingIndex);
1633 }
1634
1645 const char* getName() const noexcept
1646 {
1647 return mImpl->getName();
1648 }
1649
1656 int32_t getNbOptimizationProfiles() const noexcept
1657 {
1658 return mImpl->getNbOptimizationProfiles();
1659 }
1660
1683 Dims getProfileDimensions(int32_t bindingIndex, int32_t profileIndex, OptProfileSelector select) const noexcept
1684 {
1685 return mImpl->getProfileDimensions(bindingIndex, profileIndex, select);
1686 }
1687
1709 const int32_t* getProfileShapeValues(int32_t profileIndex, int32_t inputIndex, OptProfileSelector select) const
1710 noexcept
1711 {
1712 return mImpl->getProfileShapeValues(profileIndex, inputIndex, select);
1713 }
1714
1746 bool isShapeBinding(int32_t bindingIndex) const noexcept
1747 {
1748 return mImpl->isShapeBinding(bindingIndex);
1749 }
1750
1760 bool isExecutionBinding(int32_t bindingIndex) const noexcept
1761 {
1762 return mImpl->isExecutionBinding(bindingIndex);
1763 }
1764
1776 {
1777 return mImpl->getEngineCapability();
1778 }
1779
1790 //
1793 void setErrorRecorder(IErrorRecorder* recorder) noexcept
1794 {
1795 return mImpl->setErrorRecorder(recorder);
1796 }
1797
1809 {
1810 return mImpl->getErrorRecorder();
1811 }
1812
1827 bool hasImplicitBatchDimension() const noexcept
1828 {
1829 return mImpl->hasImplicitBatchDimension();
1830 }
1831
1837 {
1838 return mImpl->getTacticSources();
1839 }
1840
1848 {
1849 return mImpl->getProfilingVerbosity();
1850 }
1851
1858 {
1859 return mImpl->createEngineInspector();
1860 }
1861
1862protected:
1863 apiv::VCudaEngine* mImpl;
1864};
1865
1877{
1878public:
1879 virtual ~IExecutionContext() noexcept = default;
1880
1898 bool execute(int32_t batchSize, void* const* bindings) noexcept
1899 {
1900 return mImpl->execute(batchSize, bindings);
1901 }
1902
1927 bool enqueue(int32_t batchSize, void* const* bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) noexcept
1928 {
1929 return mImpl->enqueue(batchSize, bindings, stream, inputConsumed);
1930 }
1931
1940 void setDebugSync(bool sync) noexcept
1941 {
1942 mImpl->setDebugSync(sync);
1943 }
1944
1950 bool getDebugSync() const noexcept
1951 {
1952 return mImpl->getDebugSync();
1953 }
1954
1960 void setProfiler(IProfiler* profiler) noexcept
1961 {
1962 mImpl->setProfiler(profiler);
1963 }
1964
1970 IProfiler* getProfiler() const noexcept
1971 {
1972 return mImpl->getProfiler();
1973 }
1974
1980 const ICudaEngine& getEngine() const noexcept
1981 {
1982 return mImpl->getEngine();
1983 }
1984
1992 TRT_DEPRECATED void destroy() noexcept
1993 {
1994 delete this;
1995 }
1996
2004 void setName(const char* name) noexcept
2005 {
2006 mImpl->setName(name);
2007 }
2008
2014 const char* getName() const noexcept
2015 {
2016 return mImpl->getName();
2017 }
2018
2030 void setDeviceMemory(void* memory) noexcept
2031 {
2032 mImpl->setDeviceMemory(memory);
2033 }
2034
2051 Dims getStrides(int32_t bindingIndex) const noexcept
2052 {
2053 return mImpl->getStrides(bindingIndex);
2054 }
2055
2056public:
2094 bool setOptimizationProfile(int32_t profileIndex) noexcept
2095 {
2096 return mImpl->setOptimizationProfile(profileIndex);
2097 }
2098
2106 int32_t getOptimizationProfile() const noexcept
2107 {
2108 return mImpl->getOptimizationProfile();
2109 }
2110
2143 bool setBindingDimensions(int32_t bindingIndex, Dims dimensions) noexcept
2144 {
2145 return mImpl->setBindingDimensions(bindingIndex, dimensions);
2146 }
2147
2173 Dims getBindingDimensions(int32_t bindingIndex) const noexcept
2174 {
2175 return mImpl->getBindingDimensions(bindingIndex);
2176 }
2177
2203 bool setInputShapeBinding(int32_t bindingIndex, int32_t const* data) noexcept
2204 {
2205 return mImpl->setInputShapeBinding(bindingIndex, data);
2206 }
2207
2225 bool getShapeBinding(int32_t bindingIndex, int32_t* data) const noexcept
2226 {
2227 return mImpl->getShapeBinding(bindingIndex, data);
2228 }
2229
2240 bool allInputDimensionsSpecified() const noexcept
2241 {
2242 return mImpl->allInputDimensionsSpecified();
2243 }
2244
2254 bool allInputShapesSpecified() const noexcept
2255
2256 {
2257 return mImpl->allInputShapesSpecified();
2258 }
2259
2271 //
2274 void setErrorRecorder(IErrorRecorder* recorder) noexcept
2275 {
2276 mImpl->setErrorRecorder(recorder);
2277 }
2278
2290 {
2291 return mImpl->getErrorRecorder();
2292 }
2293
2306 bool executeV2(void* const* bindings) noexcept
2307 {
2308 return mImpl->executeV2(bindings);
2309 }
2310
2334 bool enqueueV2(void* const* bindings, cudaStream_t stream, cudaEvent_t* inputConsumed) noexcept
2335 {
2336 return mImpl->enqueueV2(bindings, stream, inputConsumed);
2337 }
2338
2382 bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
2383 {
2384 return mImpl->setOptimizationProfileAsync(profileIndex, stream);
2385 }
2386
2397 void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
2398 {
2399 mImpl->setEnqueueEmitsProfile(enqueueEmitsProfile);
2400 }
2401
2408 bool getEnqueueEmitsProfile() const noexcept
2409 {
2410 return mImpl->getEnqueueEmitsProfile();
2411 }
2412
2435 bool reportToProfiler() const noexcept
2436 {
2437 return mImpl->reportToProfiler();
2438 }
2439
2440protected:
2441 apiv::VExecutionContext* mImpl;
2442}; // class IExecutionContext
2443
2451enum class LayerInformationFormat : int32_t
2452{
2453 kONELINE = 0,
2454 kJSON = 1,
2455};
2456
2459template <>
2460constexpr inline int32_t EnumMax<LayerInformationFormat>() noexcept
2461{
2462 return 2;
2463}
2464
2481{
2482public:
2483 virtual ~IEngineInspector() noexcept = default;
2484
2497 bool setExecutionContext(IExecutionContext const* context) noexcept
2498 {
2499 return mImpl->setExecutionContext(context);
2500 }
2501
2510 {
2511 return mImpl->getExecutionContext();
2512 }
2513
2534 AsciiChar const* getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
2535 {
2536 return mImpl->getLayerInformation(layerIndex, format);
2537 }
2538
2560 {
2561 return mImpl->getEngineInformation(format);
2562 }
2563
2575 //
2578 void setErrorRecorder(IErrorRecorder* recorder) noexcept
2579 {
2580 mImpl->setErrorRecorder(recorder);
2581 }
2582
2594 {
2595 return mImpl->getErrorRecorder();
2596 }
2597
2598protected:
2599 apiv::VEngineInspector* mImpl;
2600}; // class IEngineInspector
2601
2602} // namespace nvinfer1
2603
2608extern "C" TENSORRTAPI void* createInferRuntime_INTERNAL(void* logger, int32_t version) noexcept;
2609
2614extern "C" TENSORRTAPI void* createInferRefitter_INTERNAL(void* engine, void* logger, int32_t version) noexcept;
2615
2620
2626extern "C" TENSORRTAPI nvinfer1::ILogger* getLogger() noexcept;
2627
2628namespace nvinfer1
2629{
2630namespace // unnamed namespace avoids linkage surprises when linking objects built with different versions of this
2631 // header.
2632{
2638inline IRuntime* createInferRuntime(ILogger& logger) noexcept
2639{
2640 return static_cast<IRuntime*>(createInferRuntime_INTERNAL(&logger, NV_TENSORRT_VERSION));
2641}
2642
2648inline IRefitter* createInferRefitter(ICudaEngine& engine, ILogger& logger) noexcept
2649{
2650 return static_cast<IRefitter*>(createInferRefitter_INTERNAL(&engine, &logger, NV_TENSORRT_VERSION));
2651}
2652
2653} // namespace
2654
2666template <typename T>
2668{
2669public:
2671 {
2672 getPluginRegistry()->registerCreator(instance, "");
2673 }
2674
2675private:
2677 T instance{};
2678};
2679
2680} // namespace nvinfer1
2681
2682#define REGISTER_TENSORRT_PLUGIN(name) \
2683 static nvinfer1::PluginRegistrar<name> pluginRegistrar##name {}
2684#endif // NV_INFER_RUNTIME_H
nvinfer1::ILogger * getLogger() noexcept
Return the logger object.
nvinfer1::IPluginRegistry * getPluginRegistry() noexcept
Return the plugin registry.
#define TENSORRTAPI
Definition: NvInferRuntimeCommon.h:54
#define NV_TENSORRT_VERSION
Definition: NvInferRuntimeCommon.h:73
#define TRT_DEPRECATED
Definition: NvInferRuntimeCommon.h:40
#define TRT_DEPRECATED_ENUM
Definition: NvInferRuntimeCommon.h:41
Structure to define the dimensions of a tensor.
Definition: NvInferRuntimeCommon.h:153
static constexpr int32_t MAX_DIMS
The maximum rank (number of dimensions) supported for a tensor.
Definition: NvInferRuntimeCommon.h:156
Definition: NvInferRuntime.h:309
int32_t nbDims
The number of dimensions.
Definition: NvInferRuntime.h:311
An engine for executing inference on a built network, with functionally unsafe features.
Definition: NvInferRuntime.h:1342
int32_t getBindingBytesPerComponent(int32_t bindingIndex) const noexcept
Return the number of bytes per component of an element.
Definition: NvInferRuntime.h:1575
int32_t getBindingComponentsPerElement(int32_t bindingIndex) const noexcept
Return the number of components included in one element.
Definition: NvInferRuntime.h:1589
bool isShapeBinding(int32_t bindingIndex) const noexcept
True if tensor is required as input for shape calculations or output from them.
Definition: NvInferRuntime.h:1746
bool hasImplicitBatchDimension() const noexcept
Query whether the engine was built with an implicit batch dimension.
Definition: NvInferRuntime.h:1827
const char * getName() const noexcept
Returns the name of the network associated with the engine.
Definition: NvInferRuntime.h:1645
EngineCapability getEngineCapability() const noexcept
Determine what execution capability this engine has.
Definition: NvInferRuntime.h:1775
TRT_DEPRECATED void destroy() noexcept
Destroy this object;.
Definition: NvInferRuntime.h:1517
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:1808
TensorFormat getBindingFormat(int32_t bindingIndex) const noexcept
Return the binding format.
Definition: NvInferRuntime.h:1599
apiv::VCudaEngine * mImpl
Definition: NvInferRuntime.h:1863
const int32_t * getProfileShapeValues(int32_t profileIndex, int32_t inputIndex, OptProfileSelector select) const noexcept
Get minimum / optimum / maximum values for an input shape binding under an optimization profile.
Definition: NvInferRuntime.h:1709
TensorLocation getLocation(int32_t bindingIndex) const noexcept
Get location of binding.
Definition: NvInferRuntime.h:1532
Dims getProfileDimensions(int32_t bindingIndex, int32_t profileIndex, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a particular binding under an optimization profile...
Definition: NvInferRuntime.h:1683
Dims getBindingDimensions(int32_t bindingIndex) const noexcept
Get the dimensions of a binding.
Definition: NvInferRuntime.h:1436
int32_t getBindingVectorizedDim(int32_t bindingIndex) const noexcept
Return the dimension index that the buffer is vectorized.
Definition: NvInferRuntime.h:1630
int32_t getMaxBatchSize() const noexcept
Get the maximum batch size which can be used for inference.
Definition: NvInferRuntime.h:1461
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:1793
size_t getDeviceMemorySize() const noexcept
Return the amount of device memory required by an execution context.
Definition: NvInferRuntime.h:1551
TacticSources getTacticSources() const noexcept
return the tactic sources required by this engine
Definition: NvInferRuntime.h:1836
virtual ~ICudaEngine() noexcept=default
ProfilingVerbosity getProfilingVerbosity() const noexcept
Return the ProfilingVerbosity the builder config was set to when the engine was built.
Definition: NvInferRuntime.h:1847
IHostMemory * serialize() const noexcept
Serialize the network to a stream.
Definition: NvInferRuntime.h:1489
IExecutionContext * createExecutionContextWithoutDeviceMemory() noexcept
create an execution context without any device memory allocated
Definition: NvInferRuntime.h:1541
int32_t getBindingIndex(const char *name) const noexcept
Retrieve the binding index for a named tensor.
Definition: NvInferRuntime.h:1378
DataType getBindingDataType(int32_t bindingIndex) const noexcept
Determine the required data type for a buffer from its binding index.
Definition: NvInferRuntime.h:1449
IExecutionContext * createExecutionContext() noexcept
Create an execution context.
Definition: NvInferRuntime.h:1505
IEngineInspector * createEngineInspector() const noexcept
Create a new engine inspector which prints the layer information in an engine or an execution context...
Definition: NvInferRuntime.h:1857
const char * getBindingName(int32_t bindingIndex) const noexcept
Retrieve the name corresponding to a binding index.
Definition: NvInferRuntime.h:1398
int32_t getNbOptimizationProfiles() const noexcept
Get the number of optimization profiles defined for this engine.
Definition: NvInferRuntime.h:1656
int32_t getNbLayers() const noexcept
Get the number of layers in the network.
Definition: NvInferRuntime.h:1475
const char * getBindingFormatDesc(int32_t bindingIndex) const noexcept
Return the human readable description of the tensor format.
Definition: NvInferRuntime.h:1618
bool bindingIsInput(int32_t bindingIndex) const noexcept
Determine whether a binding is an input binding.
Definition: NvInferRuntime.h:1411
bool isExecutionBinding(int32_t bindingIndex) const noexcept
True if pointer to tensor data is required for execution phase, false if nullptr can be supplied.
Definition: NvInferRuntime.h:1760
bool isRefittable() const noexcept
Return true if an engine can be refit.
Definition: NvInferRuntime.h:1561
Definition: NvInferRuntime.h:244
bool isConstant() const noexcept
Return true if expression is a build-time constant.
Definition: NvInferRuntime.h:247
virtual ~IDimensionExpr() noexcept=default
apiv::VDimensionExpr * mImpl
Definition: NvInferRuntime.h:260
int32_t getConstantValue() const noexcept
Definition: NvInferRuntime.h:254
An engine inspector which prints out the layer information of an engine or an execution context.
Definition: NvInferRuntime.h:2481
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2593
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2578
AsciiChar const * getEngineInformation(LayerInformationFormat format) const noexcept
Get a string describing the information about all the layers in the current engine or the execution c...
Definition: NvInferRuntime.h:2559
IExecutionContext const * getExecutionContext() const noexcept
Get the context currently being inspected.
Definition: NvInferRuntime.h:2509
apiv::VEngineInspector * mImpl
Definition: NvInferRuntime.h:2599
virtual ~IEngineInspector() noexcept=default
AsciiChar const * getLayerInformation(int32_t layerIndex, LayerInformationFormat format) const noexcept
Get a string describing the information about a specific layer in the current engine or the execution...
Definition: NvInferRuntime.h:2534
Reference counted application-implemented error reporting interface for TensorRT objects.
Definition: NvInferRuntimeCommon.h:1661
Context for executing inference using an engine, with functionally unsafe features.
Definition: NvInferRuntime.h:1877
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:2289
bool reportToProfiler() const noexcept
Calculate layer timing info for the current optimization profile in IExecutionContext and update the ...
Definition: NvInferRuntime.h:2435
virtual ~IExecutionContext() noexcept=default
void setDeviceMemory(void *memory) noexcept
Set the device memory for use by this execution context.
Definition: NvInferRuntime.h:2030
Dims getBindingDimensions(int32_t bindingIndex) const noexcept
Get the dynamic dimensions of a binding.
Definition: NvInferRuntime.h:2173
bool getShapeBinding(int32_t bindingIndex, int32_t *data) const noexcept
Get values of an input tensor required for shape calculations or an output tensor produced by shape c...
Definition: NvInferRuntime.h:2225
void setDebugSync(bool sync) noexcept
Set the debug sync flag.
Definition: NvInferRuntime.h:1940
bool enqueueV2(void *const *bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept
Asynchronously execute inference.
Definition: NvInferRuntime.h:2334
void setEnqueueEmitsProfile(bool enqueueEmitsProfile) noexcept
Set whether enqueue emits layer timing to the profiler.
Definition: NvInferRuntime.h:2397
bool setBindingDimensions(int32_t bindingIndex, Dims dimensions) noexcept
Set the dynamic dimensions of a binding.
Definition: NvInferRuntime.h:2143
bool setInputShapeBinding(int32_t bindingIndex, int32_t const *data) noexcept
Set values of input tensor required by shape calculations.
Definition: NvInferRuntime.h:2203
bool executeV2(void *const *bindings) noexcept
Synchronously execute inference a network.
Definition: NvInferRuntime.h:2306
bool getEnqueueEmitsProfile() const noexcept
Get the enqueueEmitsProfile state.
Definition: NvInferRuntime.h:2408
bool setOptimizationProfileAsync(int32_t profileIndex, cudaStream_t stream) noexcept
Select an optimization profile for the current context with async semantics.
Definition: NvInferRuntime.h:2382
apiv::VExecutionContext * mImpl
Definition: NvInferRuntime.h:2441
int32_t getOptimizationProfile() const noexcept
Get the index of the currently selected optimization profile.
Definition: NvInferRuntime.h:2106
bool enqueue(int32_t batchSize, void *const *bindings, cudaStream_t stream, cudaEvent_t *inputConsumed) noexcept
Asynchronously execute inference on a batch.
Definition: NvInferRuntime.h:1927
TRT_DEPRECATED void destroy() noexcept
Destroy this object.
Definition: NvInferRuntime.h:1992
bool getDebugSync() const noexcept
Get the debug sync flag.
Definition: NvInferRuntime.h:1950
const char * getName() const noexcept
Return the name of the execution context.
Definition: NvInferRuntime.h:2014
Dims getStrides(int32_t bindingIndex) const noexcept
Return the strides of the buffer for the given binding.
Definition: NvInferRuntime.h:2051
IProfiler * getProfiler() const noexcept
Get the profiler.
Definition: NvInferRuntime.h:1970
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:2274
TRT_DEPRECATED bool setOptimizationProfile(int32_t profileIndex) noexcept
Select an optimization profile for the current context.
Definition: NvInferRuntime.h:2094
bool allInputDimensionsSpecified() const noexcept
Whether all dynamic dimensions of input tensors have been specified.
Definition: NvInferRuntime.h:2240
const ICudaEngine & getEngine() const noexcept
Get the associated engine.
Definition: NvInferRuntime.h:1980
void setProfiler(IProfiler *profiler) noexcept
Set the profiler.
Definition: NvInferRuntime.h:1960
bool allInputShapesSpecified() const noexcept
Whether all input shape bindings have been specified.
Definition: NvInferRuntime.h:2254
void setName(const char *name) noexcept
Set the name of the execution context.
Definition: NvInferRuntime.h:2004
Definition: NvInferRuntime.h:282
const IDimensionExpr * constant(int32_t value) noexcept
Return pointer to IDimensionExp for given value.
Definition: NvInferRuntime.h:285
virtual ~IExprBuilder() noexcept=default
apiv::VExprBuilder * mImpl
Definition: NvInferRuntime.h:299
const IDimensionExpr * operation(DimensionOperation op, const IDimensionExpr &first, const IDimensionExpr &second) noexcept
Definition: NvInferRuntime.h:292
Application-implemented class for controlling allocation on the GPU.
Definition: NvInferRuntimeCommon.h:1334
Class to handle library allocated memory that is accessible to the user.
Definition: NvInferRuntime.h:144
void * data() const noexcept
A pointer to the raw data that is owned by the library.
Definition: NvInferRuntime.h:149
DataType type() const noexcept
The type of the memory that was allocated.
Definition: NvInferRuntime.h:161
TRT_DEPRECATED void destroy() noexcept
Definition: NvInferRuntime.h:172
std::size_t size() const noexcept
The size in bytes of the data that was allocated.
Definition: NvInferRuntime.h:155
apiv::VHostMemory * mImpl
Definition: NvInferRuntime.h:178
virtual ~IHostMemory() noexcept=default
Application-implemented logging interface for the builder, refitter and runtime.
Definition: NvInferRuntimeCommon.h:1472
Forward declaration of IEngineInspector for use by other interfaces.
Definition: NvInferRuntime.h:43
INoCopy & operator=(INoCopy &&other)=delete
INoCopy & operator=(const INoCopy &other)=delete
INoCopy(INoCopy &&other)=delete
virtual ~INoCopy()=default
INoCopy(const INoCopy &other)=delete
Optimization profile for dynamic input dimensions and shape tensors.
Definition: NvInferRuntime.h:1123
apiv::VOptimizationProfile * mImpl
Definition: NvInferRuntime.h:1273
bool setDimensions(const char *inputName, OptProfileSelector select, Dims dims) noexcept
Set the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:1150
int32_t getNbShapeValues(const char *inputName) const noexcept
Get the number of values for an input shape tensor.
Definition: NvInferRuntime.h:1215
virtual ~IOptimizationProfile() noexcept=default
float getExtraMemoryTarget() const noexcept
Get the extra memory target that has been defined for this profile.
Definition: NvInferRuntime.h:1251
Dims getDimensions(const char *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum dimensions for a dynamic input tensor.
Definition: NvInferRuntime.h:1160
bool setExtraMemoryTarget(float target) noexcept
Set a target for extra GPU memory that may be used by this profile.
Definition: NvInferRuntime.h:1243
int32_t const * getShapeValues(const char *inputName, OptProfileSelector select) const noexcept
Get the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:1225
bool setShapeValues(const char *inputName, OptProfileSelector select, const int32_t *values, int32_t nbValues) noexcept
Set the minimum / optimum / maximum values for an input shape tensor.
Definition: NvInferRuntime.h:1203
bool isValid() const noexcept
Check whether the optimization profile can be passed to an IBuilderConfig object.
Definition: NvInferRuntime.h:1267
Single registration point for all plugins in an application. It is used to find plugin implementation...
Definition: NvInferRuntimeCommon.h:1206
virtual bool registerCreator(IPluginCreator &creator, AsciiChar const *const pluginNamespace) noexcept=0
Register a plugin creator. Returns false if one with same type is already registered.
Definition: NvInferRuntime.h:351
IPluginV2DynamicExt * clone() const noexcept override=0
Clone the plugin object. This copies over internal plugin parameters as well and returns a new plugin...
virtual ~IPluginV2DynamicExt() noexcept
Definition: NvInferRuntime.h:508
Plugin class for user-implemented layers.
Definition: NvInferRuntimeCommon.h:676
Application-implemented interface for profiling.
Definition: NvInferRuntime.h:560
virtual void reportLayerTime(const char *layerName, float ms) noexcept=0
Layer time reporting callback.
virtual ~IProfiler() noexcept
Definition: NvInferRuntime.h:570
Updates weights in an engine.
Definition: NvInferRuntime.h:803
int32_t getMaxThreads() const noexcept
get the maximum number of threads that can be used by the refitter.
Definition: NvInferRuntime.h:1063
float getDynamicRangeMin(const char *tensorName) const noexcept
Get minimum of dynamic range.
Definition: NvInferRuntime.h:909
bool setDynamicRange(const char *tensorName, float min, float max) noexcept
Definition: NvInferRuntime.h:897
ILogger * getLogger() const noexcept
get the logger with which the refitter was created
Definition: NvInferRuntime.h:1035
bool refitCudaEngine() noexcept
Updates associated engine. Return true if successful.
Definition: NvInferRuntime.h:832
int32_t getMissing(int32_t size, const char **layerNames, WeightsRole *roles) noexcept
Get description of missing weights.
Definition: NvInferRuntime.h:853
int32_t getTensorsWithDynamicRange(int32_t size, const char **tensorNames) const noexcept
Get names of all tensors that have refittable dynamic ranges.
Definition: NvInferRuntime.h:937
TRT_DEPRECATED void destroy() noexcept
Definition: NvInferRuntime.h:880
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:1049
int32_t getAll(int32_t size, const char **layerNames, WeightsRole *roles) noexcept
Get description of all weights that could be refit.
Definition: NvInferRuntime.h:870
int32_t getMissingWeights(int32_t size, const char **weightsNames) noexcept
Get names of missing weights.
Definition: NvInferRuntime.h:1009
bool setNamedWeights(const char *name, Weights weights) noexcept
Specify new weights of given name.
Definition: NvInferRuntime.h:989
apiv::VRefitter * mImpl
Definition: NvInferRuntime.h:1069
virtual ~IRefitter() noexcept=default
float getDynamicRangeMax(const char *tensorName) const noexcept
Get maximum of dynamic range.
Definition: NvInferRuntime.h:921
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:956
int32_t getAllWeights(int32_t size, const char **weightsNames) noexcept
Get names of all weights that could be refit.
Definition: NvInferRuntime.h:1025
IErrorRecorder * getErrorRecorder() const noexcept
Get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:971
Allows a serialized functionally unsafe engine to be deserialized.
Definition: NvInferRuntime.h:622
bool setMaxThreads(int32_t maxThreads) noexcept
Set the maximum number of threads.
Definition: NvInferRuntime.h:772
virtual ~IRuntime() noexcept=default
ICudaEngine * deserializeCudaEngine(const void *blob, std::size_t size) noexcept
Deserialize an engine from a stream.
Definition: NvInferRuntime.h:748
TRT_DEPRECATED void destroy() noexcept
Destroy this object.
Definition: NvInferRuntime.h:685
apiv::VRuntime * mImpl
Definition: NvInferRuntime.h:792
void setDLACore(int32_t dlaCore) noexcept
Set the DLA core that the deserialized engine must execute on.
Definition: NvInferRuntime.h:654
int32_t getNbDLACores() const noexcept
Returns number of DLA hardware cores accessible.
Definition: NvInferRuntime.h:673
int32_t getDLACore() const noexcept
Get the DLA core that the engine executes on.
Definition: NvInferRuntime.h:665
void setGpuAllocator(IGpuAllocator *allocator) noexcept
Set the GPU allocator.
Definition: NvInferRuntime.h:699
IErrorRecorder * getErrorRecorder() const noexcept
get the ErrorRecorder assigned to this interface.
Definition: NvInferRuntime.h:733
ILogger * getLogger() const noexcept
get the logger with which the runtime was created
Definition: NvInferRuntime.h:758
int32_t getMaxThreads() const noexcept
Get the maximum number of threads that can be used by the runtime.
Definition: NvInferRuntime.h:786
void setErrorRecorder(IErrorRecorder *recorder) noexcept
Set the ErrorRecorder for this interface.
Definition: NvInferRuntime.h:718
Register the plugin creator to the registry The static registry object will be instantiated when the ...
Definition: NvInferRuntime.h:2668
PluginRegistrar()
Definition: NvInferRuntime.h:2670
An array of weights used as a layer parameter.
Definition: NvInferRuntime.h:126
DataType type
The type of the weights.
Definition: NvInferRuntime.h:128
int64_t count
The number of weights in the array.
Definition: NvInferRuntime.h:130
const void * values
The weight values, in a contiguous array.
Definition: NvInferRuntime.h:129
IRuntime * createInferRuntime(ILogger &logger) noexcept
Create an instance of an safe::IRuntime class.
The TensorRT API version 1 namespace.
uint32_t TacticSources
Represents a collection of one or more TacticSource values combine using bitwise-OR operations.
Definition: NvInferRuntime.h:1304
EngineCapability
List of supported engine capability flows.
Definition: NvInferRuntime.h:69
DimensionOperation
An operation on two IDimensionExpr, which represent integer expressions used in dimension computation...
Definition: NvInferRuntime.h:192
@ kSUB
Substract the second element from the first.
@ kSUM
Sum of the two operands.
@ kPROD
Product of the two operands.
@ kFLOOR_DIV
Floor division of the first element by the second.
@ kEQUAL
1 if operands are equal, 0 otherwise.
@ kMIN
Minimum of the two operands.
@ kLESS
1 if first operand is less than second operand, 0 otherwise.
@ kMAX
Maximum of the two operands.
@ kCEIL_DIV
Division rounding up.
constexpr int32_t EnumMax< WeightsRole >() noexcept
Maximum number of elements in WeightsRole enum.
Definition: NvInferRuntime.h:591
char_t AsciiChar
AsciiChar is the type used by TensorRT to represent valid ASCII characters.
Definition: NvInferRuntimeCommon.h:88
@ kV2_DYNAMICEXT
IPluginV2DynamicExt.
constexpr int32_t EnumMax< LayerInformationFormat >() noexcept
Definition: NvInferRuntime.h:2460
DataType
The type of weights and tensors.
Definition: NvInferRuntimeCommon.h:114
DeviceType
The device that this layer/network will execute on.
Definition: NvInferRuntime.h:602
@ kSCALE
Scale layer.
@ kCONSTANT
Constant layer.
@ kDEFAULT
Similar to ONNX Gather.
constexpr int32_t EnumMax< OptProfileSelector >() noexcept
Number of different values of OptProfileSelector enum.
Definition: NvInferRuntime.h:1095
WeightsRole
How a layer uses particular Weights.
Definition: NvInferRuntime.h:580
@ kSHIFT
shift part of IScaleLayer
@ kANY
Any other weights role.
@ kBIAS
bias for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
@ kKERNEL
kernel for IConvolutionLayer, IDeconvolutionLayer, or IFullyConnectedLayer
constexpr int32_t EnumMax< ProfilingVerbosity >() noexcept
Maximum number of profile verbosity levels in ProfilingVerbosity enum.
Definition: NvInferRuntime.h:1329
ProfilingVerbosity
List of verbosity levels of layer information exposed in NVTX annotations and in IEngineInspector.
Definition: NvInferRuntime.h:1316
@ kLAYER_NAMES_ONLY
Print only the layer names. This is the default setting.
@ kDETAILED
Print detailed layer information including layer names and layer parameters.
TacticSource
List of tactic sources for TensorRT.
Definition: NvInferRuntime.h:1285
@ kCUBLAS_LT
cuBLAS LT tactics
@ kCUDNN
cuDNN tactics
@ kCUBLAS
cuBLAS tactics.
TensorFormat PluginFormat
PluginFormat is reserved for backward compatibility.
Definition: NvInferRuntimeCommon.h:308
@ kMIN
Minimum of the two elements.
TensorFormat
Format of the input/output tensors.
Definition: NvInferRuntimeCommon.h:184
constexpr int32_t EnumMax< TacticSource >() noexcept
Maximum number of tactic sources in TacticSource enum.
Definition: NvInferRuntime.h:1293
LayerInformationFormat
The format in which the IEngineInspector prints the layer information.
Definition: NvInferRuntime.h:2452
@ kJSON
Print layer information in JSON format.
@ kONELINE
Print layer information in one line per layer.
constexpr int32_t EnumMax< DeviceType >() noexcept
Maximum number of elements in DeviceType enum.
Definition: NvInferRuntime.h:609
constexpr int32_t EnumMax< DimensionOperation >() noexcept
Maximum number of elements in DimensionOperation enum.
Definition: NvInferRuntime.h:206
TensorLocation
The location for tensor data storage, device or host.
Definition: NvInferRuntime.h:216
@ kHOST
Data stored on host.
@ kDEVICE
Data stored on device.
OptProfileSelector
When setting or querying optimization profile parameters (such as shape tensor inputs or dynamic dime...
Definition: NvInferRuntime.h:1083
@ kOPT
This is used to set or get the value that is used in the optimization (kernel selection).
Definition: NvInferRuntime.h:321
Dims min
Lower bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:326
Dims max
Upper bounds on tensor’s dimensions.
Definition: NvInferRuntime.h:329
PluginTensorDesc desc
Information required to interpret a pointer to tensor data, except that desc.dims has -1 in place of ...
Definition: NvInferRuntime.h:323
Fields that a plugin might see for an input or output.
Definition: NvInferRuntimeCommon.h:332
Declaration of EnumMaxImpl struct to store maximum number of elements in an enumeration type.
Definition: NvInferRuntimeCommon.h:99