nv-morpheus · rapids-bot · Aug 30, 2023 · Aug 1, 2023 · Aug 1, 2023 · Aug 1, 2023
@@ -51,15 +51,16 @@ class Pipeline():
 
     Parameters
     ----------
-    c : `morpheus.config.Config`
+    config : `morpheus.config.Config`
         Pipeline configuration instance.
 
     """
 
-    def __init__(self, c: Config):
+    def __init__(self, config: Config):
         self._source_count: int = None  # Maximum number of iterations for progress reporting. None = Unknown/Unlimited
 
         self._id_counter = 0
+        self._num_threads = config.num_threads
 
         # Complete set of nodes across segments in this pipeline
         self._stages: typing.Set[Stage] = set()
@@ -70,14 +71,10 @@ def __init__(self, c: Config):
         # Dictionary containing segment information for this pipeline
         self._segments: typing.Dict = defaultdict(lambda: {"nodes": set(), "ingress_ports": [], "egress_ports": []})
 
-        self._exec_options = mrc.Options()
-        self._exec_options.topology.user_cpuset = "0-{}".format(c.num_threads - 1)
-        self._exec_options.engine_factories.default_engine_type = mrc.core.options.EngineType.Thread
-
         # Set the default channel size
-        mrc.Config.default_channel_size = c.edge_buffer_size
+        mrc.Config.default_channel_size = config.edge_buffer_size
 
-        self.batch_size = c.pipeline_batch_size
+        self.batch_size = config.pipeline_batch_size
 
         self._segment_graphs = defaultdict(lambda: networkx.DiGraph())
 
@@ -86,7 +83,6 @@ def __init__(self, c: Config):
         self._is_started = False
 
         self._mrc_executor: mrc.Executor = None
-        self._mrc_pipeline: mrc.Pipeline = None
 
     @property
     def is_built(self) -> bool:
@@ -126,7 +122,7 @@ def add_stage(self, stage: StageT, segment_id: str = "main") -> StageT:
             segment_nodes.add(stage)
             self._sources.add(stage)
         else:
-            raise NotImplementedError("add_stage() failed. Unknown node type: {}".format(type(stage)))
+            raise NotImplementedError(f"add_stage() failed. Unknown node type: {type(stage)}")
 
         stage._pipeline = self
 
@@ -232,12 +228,16 @@ def build(self):
 
         logger.info("====Registering Pipeline====")
 
-        self._mrc_executor = mrc.Executor(self._exec_options)
+        exec_options = mrc.Options()
+        exec_options.topology.user_cpuset = f"0-{self._num_threads - 1}"
+        exec_options.engine_factories.default_engine_type = mrc.core.options.EngineType.Thread
+
+        self._mrc_executor = mrc.Executor(exec_options)
 
-        self._mrc_pipeline = mrc.Pipeline()
+        mrc_pipeline = mrc.Pipeline()
 
         def inner_build(builder: mrc.Builder, segment_id: str):
-            logger.info(f"====Building Segment: {segment_id}====")
+            logger.info("====Building Segment: %s ====", segment_id)
             segment_graph = self._segment_graphs[segment_id]
 
             # Check if preallocated columns are requested, this needs to happen before the source stages are built
@@ -256,7 +256,7 @@ def inner_build(builder: mrc.Builder, segment_id: str):
                 if (stage.can_build()):
                     stage.build(builder)
 
-            if (not all([x.is_built for x in segment_graph.nodes()])):
+            if (not all(x.is_built for x in segment_graph.nodes())):
                 logger.warning("Cyclic pipeline graph detected! Building with reduced constraints")
 
                 for stage in segment_graph.nodes():
@@ -275,22 +275,22 @@ def inner_build(builder: mrc.Builder, segment_id: str):
             logger.info("====Building Segment Complete!====")
 
         logger.info("====Building Pipeline====")
-        for segment_id in self._segments.keys():
-            segment_ingress_ports = self._segments[segment_id]["ingress_ports"]
-            segment_egress_ports = self._segments[segment_id]["egress_ports"]
+        for (segment_id, segment) in self._segments.items():
+            segment_ingress_ports = segment["ingress_ports"]
+            segment_egress_ports = segment["egress_ports"]
             segment_inner_build = partial(inner_build, segment_id=segment_id)
 
-            self._mrc_pipeline.make_segment(segment_id, [port_info["port_pair"] for port_info in segment_ingress_ports],
-                                            [port_info["port_pair"] for port_info in segment_egress_ports],
-                                            segment_inner_build)
+            mrc_pipeline.make_segment(segment_id, [port_info["port_pair"] for port_info in segment_ingress_ports],
+                                      [port_info["port_pair"] for port_info in segment_egress_ports],
+                                      segment_inner_build)
 
         logger.info("====Building Pipeline Complete!====")
         self._is_build_complete = True
 
         # Finally call _on_start
         self._on_start()
 
-        self._mrc_executor.register_pipeline(self._mrc_pipeline)
+        self._mrc_executor.register_pipeline(mrc_pipeline)
 
         self._is_built = True
 
@@ -311,12 +311,13 @@ def stop(self):
         """
 
         logger.info("====Stopping Pipeline====")
-        for s in list(self._sources) + list(self._stages):
-            s.stop()
+        for src in list(self._sources) + list(self._stages):
+            src.stop()
 
         self._mrc_executor.stop()
 
         logger.info("====Pipeline Stopped====")
+        self._on_stop()
 
     async def join(self):
         """
@@ -330,21 +331,26 @@ async def join(self):
             raise
         finally:
             # Make sure these are always shut down even if there was an error
-            for s in list(self._sources):
-                s.stop()
+            for src in list(self._sources):
+                src.stop()
 
             # First wait for all sources to stop. This only occurs after all messages have been processed fully
-            for s in list(self._sources):
-                await s.join()
+            for src in list(self._sources):
+                await src.join()
 
             # Now that there is no more data, call stop on all stages to ensure shutdown (i.e., for stages that have
             # their own worker loop thread)
-            for s in list(self._stages):
-                s.stop()
+            for stage in list(self._stages):
+                stage.stop()
 
             # Now call join on all stages
-            for s in list(self._stages):
-                await s.join()
+            for stage in list(self._stages):
+                await stage.join()
+
+            self._on_stop()
+
+    def _on_stop(self):
+        self._mrc_executor = None
 
     async def _build_and_start(self):
 
@@ -362,8 +368,8 @@ async def _build_and_start(self):
     async def _async_start(self):
 
         # Loop over all stages and call on_start if it exists
-        for s in self._stages:
-            await s.start_async()
+        for stage in self._stages:
+            await stage.start_async()
 
     def _on_start(self):
 
@@ -374,11 +380,11 @@ def _on_start(self):
         # Stop from running this twice
         self._is_started = True
 
-        logger.debug("Starting! Time: {}".format(time.time()))
+        logger.debug("Starting! Time: %s", time.time())
 
         # Loop over all stages and call on_start if it exists
-        for s in self._stages:
-            s.on_start()
+        for stage in self._stages:
+            stage.on_start()
 
     def visualize(self, filename: str = None, **graph_kwargs):
         """
@@ -414,11 +420,11 @@ def visualize(self, filename: str = None, **graph_kwargs):
         start_def_port = ":e" if is_lr else ":s"
         end_def_port = ":w" if is_lr else ":n"
 
-        def has_ports(n: StreamWrapper, is_input):
+        def has_ports(node: StreamWrapper, is_input):
             if (is_input):
-                return len(n.input_ports) > 0
-            else:
-                return len(n.output_ports) > 0
+                return len(node.input_ports) > 0
+
+            return len(node.output_ports) > 0
 
         if not self._is_build_complete:
             raise RuntimeError("Pipeline.visualize() requires that the Pipeline has been started before generating "
@@ -427,31 +433,31 @@ def has_ports(n: StreamWrapper, is_input):
                                "be fixed in a future release.")
 
         # Now build up the nodes
-        for idx, segment_id in enumerate(self._segments):
+        for segment_id in self._segments:
             gv_subgraphs[segment_id] = graphviz.Digraph(f"cluster_{segment_id}")
             gv_subgraph = gv_subgraphs[segment_id]
             gv_subgraph.attr(label=segment_id)
-            for n, attrs in typing.cast(typing.Mapping[StreamWrapper, dict],
-                                        self._segment_graphs[segment_id].nodes).items():
+            for node, attrs in typing.cast(typing.Mapping[StreamWrapper, dict],
+                                           self._segment_graphs[segment_id].nodes).items():
                 node_attrs = attrs.copy()
 
                 label = ""
 
-                show_in_ports = has_ports(n, is_input=True)
-                show_out_ports = has_ports(n, is_input=False)
+                show_in_ports = has_ports(node, is_input=True)
+                show_out_ports = has_ports(node, is_input=False)
 
                 # Build the ports for the node. Only show ports if there are any
                 # (Would like to have this not show for one port, but the lines get all messed up)
                 if (show_in_ports):
-                    in_port_label = " {{ {} }} | ".format(" | ".join(
-                        [f"<u{x.port_number}> input_port: {x.port_number}" for x in n.input_ports]))
+                    in_port_label = " {{ {} }} | ".format(" | ".join(  # pylint: disable=consider-using-f-string
+                        [f"<u{x.port_number}> input_port: {x.port_number}" for x in node.input_ports]))
                     label += in_port_label
 
-                label += n.unique_name
+                label += node.unique_name
 
                 if (show_out_ports):
-                    out_port_label = " | {{ {} }}".format(" | ".join(
-                        [f"<d{x.port_number}> output_port: {x.port_number}" for x in n.output_ports]))
+                    out_port_label = " | {{ {} }}".format(" | ".join(  # pylint: disable=consider-using-f-string
+                        [f"<d{x.port_number}> output_port: {x.port_number}" for x in node.output_ports]))
                     label += out_port_label
 
                 if (show_in_ports or show_out_ports):
@@ -462,9 +468,8 @@ def has_ports(n: StreamWrapper, is_input):
                     "shape": "record",
                     "fillcolor": "white",
                 })
-                # TODO: Eventually allow nodes to have different attributes based on type
-                # node_attrs.update(n.get_graphviz_attrs())
-                gv_subgraph.node(n.unique_name, **node_attrs)
+
+                gv_subgraph.node(node.unique_name, **node_attrs)
 
         # Build up edges
         for segment_id in self._segments:
@@ -522,7 +527,7 @@ def has_ports(n: StreamWrapper, is_input):
                               style="dashed",
                               label=f"Segment Port: {egress_port['port_pair'][0]}")
 
-        for key, gv_subgraph in gv_subgraphs.items():
+        for gv_subgraph in gv_subgraphs.values():
             gv_graph.subgraph(gv_subgraph)
 
         file_format = os.path.splitext(filename)[-1].replace(".", "")
@@ -544,7 +549,7 @@ async def run_async(self):
 
         def error_handler(_, context: dict):
 
-            msg = "Unhandled exception in async loop! Exception: \n{}".format(context["message"])
+            msg = f"Unhandled exception in async loop! Exception: \n{context['message']}"
             exception = context.get("exception", Exception())
 
             logger.critical(msg, exc_info=exception)
@@ -564,10 +569,10 @@ def term_signal():
                 self.stop()
             else:
                 tqdm.write("Killing")
-                exit(1)
+                exit(1)  # pylint: disable=consider-using-sys-exit
 
-        for s in [signal.SIGINT, signal.SIGTERM]:
-            loop.add_signal_handler(s, term_signal)
+        for sig in [signal.SIGINT, signal.SIGTERM]:
+            loop.add_signal_handler(sig, term_signal)
 
         try:
             await self._build_and_start()

@@ -121,7 +121,7 @@ def _run_mocked_pipeline(config: Config, dataset_cudf: DatasetManager, import_mo
         mock_infer_result = mock.MagicMock()
         mock_infer_result.as_numpy.side_effect = inf_results
 
-        def async_infer(callback=None, **k):
+        def async_infer(callback=None, **_):
             callback(mock_infer_result, None)
 
         mock_triton_client.async_infer.side_effect = async_infer

@@ -36,6 +36,7 @@ def test_linear_boundary_stages(config, filter_probs_df):
     assert_results(comp_stage.get_results())
 
 
+@pytest.mark.skip(reason="Skipping due to MRC issue #360")
 @pytest.mark.use_cudf
 def test_multi_segment_bad_data_type(config, filter_probs_df):
     with pytest.raises(RuntimeError):

diff --git a/tests/test_pipe.py b/tests/test_pipe.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python
+# SPDX-FileCopyrightText: Copyright (c) 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+import typing
+
+import pytest
+
+from morpheus.config import Config
+from morpheus.pipeline import LinearPipeline
+from morpheus.stages.input.in_memory_source_stage import InMemorySourceStage
+from morpheus.stages.output.in_memory_sink_stage import InMemorySinkStage
+from morpheus.utils.type_aliases import DataFrameType
+
+
+class SourceTestStage(InMemorySourceStage):
+
+    def __init__(self,
+                 config,
+                 dataframes: typing.List[DataFrameType],
+                 state_dict: dict,
+                 repeat: int = 1,
+                 state_key: str = "source"):
+        super().__init__(config, dataframes, repeat)
+        self._state_dict = state_dict
+        self._state_key = state_key
+
+    @property
+    def name(self) -> str:
+        return "test-source"
+
+    def __del__(self):
+        self._state_dict[self._state_key] = True
+        self._state_dict = None
+
+
+class SinkTestStage(InMemorySinkStage):
+
+    def __init__(self, config, state_dict: dict, state_key: str = "sink"):
+        super().__init__(config)
+        self._state_dict = state_dict
+        self._state_key = state_key
+
+    @property
+    def name(self) -> str:
+        return "test-sink"
+
+    def __del__(self):
+        self._state_dict[self._state_key] = True
+        self._state_dict = None
+
+
+def _run_pipeline(config: Config, filter_probs_df: DataFrameType, state_dict: dict):
+    pipe = LinearPipeline(config)
+    pipe.set_source(SourceTestStage(config, [filter_probs_df], state_dict=state_dict))
+    pipe.add_stage(SinkTestStage(config, state_dict=state_dict))
+    pipe.run()
+
+
+@pytest.mark.use_cudf
+def test_destructors_called(config: Config, filter_probs_df: DataFrameType):
+    """
+    Test to ensure that the destructors of stages are called (issue #1114).
+    """
+    state_dict = {"source": False, "sink": False}
+    _run_pipeline(config, filter_probs_df, state_dict)
+
+    gc.collect()
+    assert state_dict["source"]
+    assert state_dict["sink"]