Transition to trt 10

contentis · contentis · commit e56da2ea2dcb · 2024-04-02T00:40:09.000-07:00
diff --git a/install.py b/install.py
@@ -2,15 +2,15 @@
 import sys
 
 python = sys.executable
-
+TRT_VERSION="10.0.0b6"
 
 def install():
     if not launch.is_installed("importlib_metadata"):
         launch.run_pip("install importlib_metadata", "importlib_metadata", live=True)
     from importlib_metadata import version
 
     if launch.is_installed("tensorrt"):
-        if not version("tensorrt") == "9.2.0.post12.dev5":
+        if not version("tensorrt") == TRT_VERSION:
             launch.run(
                 ["python", "-m", "pip", "uninstall", "-y", "tensorrt"],
                 "removing old version of tensorrt",
@@ -19,24 +19,10 @@ def install():
     if not launch.is_installed("tensorrt"):
         print("TensorRT is not installed! Installing...")
         launch.run_pip(
-            "install nvidia-cudnn-cu11==8.9.4.25 --no-cache-dir", "nvidia-cudnn-cu11"
-        )
-        launch.run_pip(
-            "install --pre --extra-index-url https://pypi.nvidia.com tensorrt==9.2.0.post12.dev5 --no-cache-dir",
+            f"install --extra-index-url https://pypi.nvidia.com tensorrt=={TRT_VERSION} --no-cache-dir",
             "tensorrt",
             live=True,
         )
-        launch.run(
-            ["python", "-m", "pip", "uninstall", "-y", "nvidia-cudnn-cu11"],
-            "removing nvidia-cudnn-cu11",
-        )
-
-    if launch.is_installed("nvidia-cudnn-cu11"):
-        if version("nvidia-cudnn-cu11") == "8.9.4.25":
-            launch.run(
-                ["python", "-m", "pip", "uninstall", "-y", "nvidia-cudnn-cu11"],
-                "removing nvidia-cudnn-cu11",
-            )
 
     # Polygraphy
     if not launch.is_installed("polygraphy"):
diff --git a/model_manager.py b/model_manager.py
@@ -45,7 +45,7 @@ def __init__(self, model_file=MODEL_FILE) -> None:
         self.update()
 
     @staticmethod
-    def get_onnx_path(model_name, is_contolnet: bool = False):
+    def get_onnx_path(model_name, is_contolnet: bool = True):
         if is_contolnet:
             model_name = f"{model_name}_cnet"
         onnx_filename = f"{model_name}.onnx"
diff --git a/utilities.py b/utilities.py
@@ -200,7 +200,6 @@ def refit_from_dict(self, refit_weights, is_fp16):
                 trt_datatype = trt.DataType.HALF
 
             # trt.Weight and trt.TensorLocation
-            refit_weights[trt_weight_name] = refit_weights[trt_weight_name].cpu()
             trt_wt_tensor = trt.Weights(
                 trt_datatype,
                 refit_weights[trt_weight_name].data_ptr(),
@@ -213,15 +212,16 @@ def refit_from_dict(self, refit_weights, is_fp16):
             )
 
             # apply refit
-            # refitter.set_named_weights(trt_weight_name, trt_wt_tensor, trt_wt_location)
-            refitter.set_named_weights(trt_weight_name, trt_wt_tensor)
+            refitter.set_named_weights(trt_weight_name, trt_wt_tensor, trt_wt_location)
             refitted_weights.add(trt_weight_name)
 
         assert set(refitted_weights) == set(refit_weights.keys())
         if not refitter.refit_cuda_engine():
             print("Error: failed to refit new weights.")
             exit(0)
 
+        print(f"[I] Total refitted weights {len(refitted_weights)}.")
+
     def build(
         self,
         onnx_path,
@@ -240,14 +240,18 @@ def build(
             for _p, i_profile in zip(p, input_profile):
                 for name, dims in i_profile.items():
                     assert len(dims) == 3
-                    _p.add(name, min=dims[0], opt=dims[1], max=dims[2])
+                    _p.add(namFe, min=dims[0], opt=dims[1], max=dims[2])
 
         config_kwargs = {}
         if not enable_all_tactics:
             config_kwargs["tactic_sources"] = []
 
         network = network_from_onnx_path(
-            onnx_path, flags=[trt.OnnxParserFlag.NATIVE_INSTANCENORM]
+            onnx_path,
+            flags=[
+                trt.OnnxParserFlag.NATIVE_INSTANCENORM,
+                trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED,
+            ],
         )
         if update_output_names:
             print(f"Updating network outputs to {update_output_names}")
@@ -257,7 +261,6 @@ def build(
         config = builder.create_builder_config()
         config.progress_monitor = TQDMProgressMonitor()
 
-        config.set_flag(trt.BuilderFlag.STRICT_TYPES)
         config.set_flag(trt.BuilderFlag.FP16) if fp16 else None
         config.set_flag(trt.BuilderFlag.REFIT) if enable_refit else None
 
@@ -305,53 +308,52 @@ def load(self):
         print(f"Loading TensorRT engine: {self.engine_path}")
         self.engine = engine_from_bytes(bytes_from_path(self.engine_path))
 
-    def activate(self, reuse_device_memory=None):
+    def activate(self, reuse_device_memory=False):
         if reuse_device_memory:
             self.context = self.engine.create_execution_context_without_device_memory()
-        #    self.context.device_memory = reuse_device_memory
         else:
             self.context = self.engine.create_execution_context()
 
     def allocate_buffers(self, shape_dict=None, device="cuda", additional_shapes=None):
         nvtx.range_push("allocate_buffers")
-        for idx in range(self.engine.num_io_tensors):
-            binding = self.engine[idx]
-            if shape_dict and binding in shape_dict:
-                shape = shape_dict[binding].shape
-            elif additional_shapes and binding in additional_shapes:
-                shape = additional_shapes[binding]
+        for binding in range(self.engine.num_io_tensors):
+            name = self.engine.get_tensor_name(binding)
+
+            if shape_dict and name in shape_dict:
+                shape = shape_dict[name].shape
+            elif additional_shapes and name in additional_shapes:
+                shape = additional_shapes[name]
             else:
-                shape = self.context.get_binding_shape(idx)
-            dtype = trt.nptype(self.engine.get_binding_dtype(binding))
-            if self.engine.binding_is_input(binding):
-                self.context.set_binding_shape(idx, shape)
+                shape = self.context.get_tensor_shape(name)
+
+            dtype = trt.nptype(self.engine.get_tensor_dtype(name))
+            if self.engine.get_tensor_mode(name) == trt.TensorIOMode.INPUT:
+                self.context.set_input_shape(name, shape)
             tensor = torch.zeros(
                 tuple(shape), dtype=numpy_to_torch_dtype_dict[dtype]
             ).to(device=device)
-            self.tensors[binding] = tensor
+            self.tensors[name] = tensor
         nvtx.range_pop()
 
     def infer(self, feed_dict, stream, use_cuda_graph=False):
-        nvtx.range_push("set_tensors")
+
         for name, buf in feed_dict.items():
             self.tensors[name].copy_(buf)
 
         for name, tensor in self.tensors.items():
             self.context.set_tensor_address(name, tensor.data_ptr())
-        nvtx.range_pop()
-        nvtx.range_push("execute")
+
         noerror = self.context.execute_async_v3(stream)
         if not noerror:
-            raise ValueError("ERROR: inference failed.")
-        nvtx.range_pop()
+            raise ValueError(f"ERROR: inference failed.")
+
         return self.tensors
 
     def __str__(self):
         out = ""
         for opt_profile in range(self.engine.num_optimization_profiles):
-            for binding_idx in range(self.engine.num_bindings):
-                name = self.engine.get_binding_name(binding_idx)
+            for binding in range(self.engine.num_io_tensors):
+                name = self.engine.get_tensor_name(binding)
                 shape = self.engine.get_profile_shape(opt_profile, name)
                 out += f"\t{name} = {shape}\n"
         return out
-