Changed the way to enable CudaGraph for MTTM

cehongwang · cehongwang · commit 0aeea36ebc13 · 2025-03-25T09:26:12.000Z
diff --git a/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_MutableTorchTensorRTModule.py
@@ -17,7 +17,6 @@
     to_torch_device,
     to_torch_tensorrt_device,
 )
-from torch_tensorrt.runtime._cudagraphs import get_cuda_graph_module
 
 logger = logging.getLogger(__name__)
 
@@ -64,7 +63,6 @@ def __init__(
         *,
         device: Optional[Union[Device, torch.device, str]] = _defaults.DEVICE,
         use_python_runtime: bool = _defaults.USE_PYTHON_RUNTIME,
-        enable_cuda_graph: bool = False,
         immutable_weights: bool = False,
         strict: bool = True,
         allow_complex_guards_as_runtime_asserts: bool = False,
@@ -160,7 +158,6 @@ def __init__(
                 logger.warning(
                     "Weight stremaing budget is not set. Using auto weight streaming budget"
                 )
-        self.enable_cuda_graph = enable_cuda_graph
 
         cls = self.__class__
         self.__class__ = type(
@@ -347,8 +344,6 @@ def compile(self) -> None:
         )
         self.original_model.to("cpu")
         torch.cuda.empty_cache()
-        if self.enable_cuda_graph:
-            self._enable_cuda_graph()
         if self.enable_weight_streaming:
             self.set_weight_streaming_ctx(self.weight_streaming_budget)
 
@@ -365,9 +360,6 @@ def set_weight_streaming_ctx(self, requested_budget: Optional[int] = None) -> No
         )
         self.weight_streaming_ctx.device_budget = requested_budget
 
-    def _enable_cuda_graph(self) -> None:
-        self.gm = get_cuda_graph_module(self.gm)
-
     def _validate_inputs(self, *args: Any, **kwargs: Any) -> None:
 
         if not self.arg_inputs and not self.kwarg_inputs:
diff --git a/py/torch_tensorrt/runtime/_cudagraphs.py b/py/torch_tensorrt/runtime/_cudagraphs.py
@@ -68,13 +68,22 @@ def __init__(self, compiled_module: torch.nn.Module) -> None:
         global _PY_RT_CUDAGRAPHS
         self.old_mode = _PY_RT_CUDAGRAPHS
         self.compiled_module = compiled_module
+        self.old_module = None
 
     def __enter__(self) -> torch.nn.Module | torch.fx.GraphModule:
+
+        if isinstance(self.compiled_module, torch_tensorrt.MutableTorchTensorRTModule):
+            self.old_module = self.compiled_module.gm
+            self.compiled_module.gm = get_cuda_graph_module(self.compiled_module.gm)
+            return self.compiled_module
+
         return get_cuda_graph_module(self.compiled_module)
 
     def __exit__(self, *args: Any) -> None:
         # Set cudagraphs back to old mode
         set_cudagraphs_mode(self.old_mode)
+        if self.old_module:  # MutableTorchTRTModule
+            self.compiled_module.gm = self.old_module
 
 
 def get_cuda_graph_module(