diff --git a/.github/release.yml b/.github/release.yml
new file mode 100644
index 000000000..92304e6a9
--- /dev/null
+++ b/.github/release.yml
@@ -0,0 +1,56 @@
+#
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+changelog:
+  exclude:
+    labels:
+      - ignore-for-release
+  categories:
+    - title: Features
+      labels:
+        - 'type: feature'
+      exclude:
+        labels:
+          - non-user-facing
+    - title: Bug Fixes
+      labels:
+        - 'bug: critical'
+        - 'bug: major'
+        - 'bug: minor'
+      exclude:
+        labels:
+          - non-user-facing
+    - title: API Breaks
+      labels:
+        - 'API break'
+      exclude:
+        labels:
+          - non-user-facing
+    - title: Miscellaneous Improvements
+      labels:
+        - "*"
\ No newline at end of file
diff --git a/.github/workflows/build_docs.yml b/.github/workflows/build_docs.yml
index 714eb3819..65aadc03c 100644
--- a/.github/workflows/build_docs.yml
+++ b/.github/workflows/build_docs.yml
@@ -39,6 +39,18 @@ jobs:
     runs-on: ubuntu-latest
 
     steps:
+      
+      # Maximize the space in this image
+      - name: Maximize build space
+        uses: easimon/maximize-build-space@v10
+        with:
+          root-reserve-mb: 30720
+          remove-dotnet: true
+          remove-android: true
+          remove-haskell: true
+          remove-codeql: true
+          remove-docker-images: true
+
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0 # otherwise, there would be errors pushing refs to the destination repository.
diff --git a/smartsim/slurm.py b/.github/workflows/changelog.yml
similarity index 72%
rename from smartsim/slurm.py
rename to .github/workflows/changelog.yml
index 6a32d0213..cd4ab58fa 100644
--- a/smartsim/slurm.py
+++ b/.github/workflows/changelog.yml
@@ -1,3 +1,4 @@
+#
 # BSD 2-Clause License
 #
 # Copyright (c) 2021-2024, Hewlett Packard Enterprise
@@ -23,23 +24,26 @@
 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+name: enforce_changelog
 
+on:
+  pull_request:
+  push:
+    branches:
+      - develop
 
-from warnings import simplefilter, warn
+jobs:
+  changelog:
+    name: check_changelog
+    runs-on: ubuntu-latest
 
-# pylint: disable-next=unused-import
-from .wlm.slurm import (
-    _get_alloc_cmd,
-    _get_system_partition_info,
-    get_allocation,
-    get_default_partition,
-    release_allocation,
-    validate,
-)
+    steps:
+    - uses: actions/checkout@v4
 
-simplefilter("once", category=DeprecationWarning)
-DEPRECATION_MSG = (
-    "`smartsim.slurm` has been deprecated and will be removed in a future release.\n"
-    "Please update your code to use `smartsim.wlm.slurm`"
-)
-warn(DEPRECATION_MSG, category=DeprecationWarning, stacklevel=2)
+    - name: Changelog Enforcer
+      uses: dangoslen/changelog-enforcer@v3.6.0
+      with:
+        changeLogPath: './doc/changelog.md'
+        missingUpdateErrorMessage: 'changelog.md has not been updated'
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index ad9a55e03..6c1361b46 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -53,7 +53,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-20.04, macos-12]
+        os: [ubuntu-22.04, macos-12]
 
     steps:
       - uses: actions/checkout@v4
@@ -98,7 +98,7 @@ jobs:
       - uses: actions/setup-python@v5
         name: Install Python
         with:
-          python-version: '3.8'
+          python-version: '3.9'
 
       - name: Build sdist
         run: |
@@ -124,3 +124,16 @@ jobs:
           user: __token__
           password: ${{ secrets.PYPI }}
           #repository_url: https://test.pypi.org/legacy/
+
+
+  createPullRequest:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Create pull request
+        run: |
+          gh pr create -B develop -H master --title 'Merge master into develop' --body 'This PR brings develop up to date with master for release.'
+        env:
+            GH_TOKEN: ${{ github.token }}
diff --git a/.github/workflows/run_tests.yml b/.github/workflows/run_tests.yml
index 79466b902..2e3463e5b 100644
--- a/.github/workflows/run_tests.yml
+++ b/.github/workflows/run_tests.yml
@@ -54,10 +54,13 @@ jobs:
       fail-fast: false
       matrix:
         subset: [backends, slow_tests, group_a, group_b]
-        os: [macos-12, ubuntu-20.04] # Operating systems
+        os: [macos-12, macos-14, ubuntu-22.04] # Operating systems
         compiler: [8] # GNU compiler version
         rai: [1.2.7] # Redis AI versions
-        py_v: ['3.8', '3.9', '3.10', '3.11'] # Python versions
+        py_v: ["3.9", "3.10", "3.11"] # Python versions
+        exclude:
+          - os: macos-14
+            py_v: "3.9"
 
     env:
       SMARTSIM_REDISAI: ${{ matrix.rai }}
@@ -108,8 +111,13 @@ jobs:
           python -m pip install .[dev,ml]
 
       - name: Install ML Runtimes with Smart (with pt, tf, and onnx support)
+        if: contains( matrix.os, 'ubuntu' ) || contains( matrix.os, 'macos-12')
         run: smart build --device cpu --onnx -v
 
+      - name: Install ML Runtimes with Smart (no ONNX,TF on Apple Silicon)
+        if: contains( matrix.os, 'macos-14' )
+        run: smart build --device cpu --no_tf -v
+
       - name: Run mypy
         run: |
           python -m pip install .[mypy]
diff --git a/.gitignore b/.gitignore
index 428e439b3..77b91d586 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,7 @@ tests/test_output
 
 # Dependencies
 smartsim/_core/.third-party
+smartsim/_core/.dragon
 
 # Docs
 _build
@@ -22,17 +23,14 @@ venv/
 .venv/
 env/
 .env/
+**/.env
 
 # written upon install
 smartsim/version.py
 
-smartsim/_core/bin/*-server
-smartsim/_core/bin/*-cli
-
 # created upon install
+smartsim/_core/bin
 smartsim/_core/lib
 
-**/manifest/
-**/*.err
-**/*.out
-**/.smartsim/*
+# optional dev tools
+.pre-commit-config.yaml
diff --git a/.pylintrc b/.pylintrc
index f2fa17bab..aa378d039 100644
--- a/.pylintrc
+++ b/.pylintrc
@@ -325,7 +325,7 @@ valid-metaclass-classmethod-first-arg=mcs
 max-args=9
 
 # Maximum number of locals for function / method body
-max-locals=20
+max-locals=25
 
 # Maximum number of return / yield for function / method body
 max-returns=11
diff --git a/.readthedocs.yaml b/.readthedocs.yaml
new file mode 100644
index 000000000..cecdfe3bf
--- /dev/null
+++ b/.readthedocs.yaml
@@ -0,0 +1,43 @@
+# Required
+version: 2
+
+# Set the OS, Python version and other tools you might need
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.10"
+  jobs:
+    post_checkout:
+      # Cancel building pull requests when there aren't changed in the docs directory or YAML file.
+      # You can add any other files or directories that you'd like here as well,
+      # like your docs requirements file, or other files that will change your docs build.
+      #
+      # If there are no changes (git diff exits with 0) we force the command to return with 183.
+      # This is a special exit code on Read the Docs that will cancel the build immediately.
+      - |
+        if [ "$READTHEDOCS_VERSION_TYPE" = "external" ] && git diff --quiet origin/main -- doc/ .readthedocs.yaml;
+        then
+          exit 183;
+        fi
+    pre_create_environment:
+      - git clone --depth 1 https://github.com/CrayLabs/SmartRedis.git smartredis
+      - git clone --depth 1 https://github.com/CrayLabs/SmartDashboard.git smartdashboard
+    post_create_environment:
+      - python -m pip install .[dev]
+      - cd smartredis; python -m pip install .
+      - cd smartredis/doc; doxygen Doxyfile_c; doxygen Doxyfile_cpp; doxygen Doxyfile_fortran
+      - ln -s smartredis/examples ./examples
+      - cd smartdashboard; python -m pip install .
+    pre_build:
+      - pip install typing_extensions==4.8.0
+      - pip install pydantic==1.10.13
+      - python -m sphinx -b linkcheck doc/ $READTHEDOCS_OUTPUT/linkcheck
+
+# Build documentation in the "docs/" directory with Sphinx
+sphinx:
+  configuration: doc/conf.py
+  fail_on_warning: true
+
+python:
+   install:
+   - requirements: doc/requirements-doc.txt
\ No newline at end of file
diff --git a/.wci.yml b/.wci.yml
index 265d59579..6194f1939 100644
--- a/.wci.yml
+++ b/.wci.yml
@@ -22,8 +22,8 @@
     language: Python
 
     release:
-      version: 0.6.2
-      date: 2024-02-16
+      version: 0.7.0
+      date: 2024-05-14
 
     documentation:
       general: https://www.craylabs.org/docs/overview.html
diff --git a/Makefile b/Makefile
index f71f2a0b3..bddbda722 100644
--- a/Makefile
+++ b/Makefile
@@ -150,11 +150,11 @@ tutorials-dev:
 	@docker compose build tutorials-dev
 	@docker run -p 8888:8888 smartsim-tutorials:dev-latest
 
-# help: tutorials-prod                 - Build and start a docker container to run the tutorials (v0.6.2)
+# help: tutorials-prod                 - Build and start a docker container to run the tutorials (v0.7.0)
 .PHONY: tutorials-prod
 tutorials-prod:
 	@docker compose build tutorials-prod
-	@docker run -p 8888:8888 smartsim-tutorials:v0.6.2
+	@docker run -p 8888:8888 smartsim-tutorials:v0.7.0
 
 
 # help:
diff --git a/README.md b/README.md
index cfd8d4271..c0986042e 100644
--- a/README.md
+++ b/README.md
@@ -174,13 +174,17 @@ system with which it has a corresponding `RunSettings` class. If one can be foun
 ## Experiments on HPC Systems
 
 SmartSim integrates with common HPC schedulers providing batch and interactive
-launch capabilities for all applications.
+launch capabilities for all applications:
 
  - Slurm
  - LSF
  - PBSPro
  - Local (for laptops/single node, no batch)
 
+In addition, on Slurm and PBS systems, [Dragon](https://dragonhpc.github.io/dragon/doc/_build/html/index.html)
+can be used as a launcher. Please refer to the documentation for instructions on
+how to insall it on your system and use it in SmartSim.
+
 
 ### Interactive Launch Example
 
diff --git a/conftest.py b/conftest.py
index b5a4fd70b..b0457522c 100644
--- a/conftest.py
+++ b/conftest.py
@@ -26,33 +26,51 @@
 
 from __future__ import annotations
 
+import asyncio
+from collections import defaultdict
+from dataclasses import dataclass
 import json
 import os
-import pytest
-import psutil
+import pathlib
 import shutil
+import subprocess
+import signal
+import socket
+import sys
+import tempfile
+import time
+import typing as t
+import uuid
+import warnings
+from subprocess import run
+import time
+
+import psutil
+import pytest
+
 import smartsim
 from smartsim import Experiment
-from smartsim.entity import Model
+from smartsim._core.launcher.dragon.dragonConnector import DragonConnector
+from smartsim._core.launcher.dragon.dragonLauncher import DragonLauncher
+from smartsim._core.config import CONFIG
+from smartsim._core.config.config import Config
+from smartsim._core.utils.telemetry.telemetry import JobEntity
 from smartsim.database import Orchestrator
+from smartsim.entity import Model
+from smartsim.error import SSConfigError, SSInternalError
+from smartsim.log import get_logger
 from smartsim.settings import (
-    SrunSettings,
     AprunSettings,
+    DragonRunSettings,
     JsrunSettings,
-    MpirunSettings,
     MpiexecSettings,
+    MpirunSettings,
     PalsMpiexecSettings,
     RunSettings,
+    SrunSettings,
 )
-from smartsim._core.config import CONFIG
-from smartsim.error import SSConfigError
-from subprocess import run
-import sys
-import tempfile
-import typing as t
-import uuid
-import warnings
 
+logger = get_logger(__name__)
 
 # pylint: disable=redefined-outer-name,invalid-name,global-statement
 
@@ -64,9 +82,12 @@
 test_num_gpus = CONFIG.test_num_gpus
 test_nic = CONFIG.test_interface
 test_alloc_specs_path = os.getenv("SMARTSIM_TEST_ALLOC_SPEC_SHEET_PATH", None)
-test_port = CONFIG.test_port
+test_ports = CONFIG.test_ports
 test_account = CONFIG.test_account or ""
-test_batch_resources: t.Dict[t.Any,t.Any] = CONFIG.test_batch_resources
+test_batch_resources: t.Dict[t.Any, t.Any] = CONFIG.test_batch_resources
+test_output_dirs = 0
+mpi_app_exe = None
+built_mpi_app = False
 
 # Fill this at runtime if needed
 test_hostlist = None
@@ -91,9 +112,7 @@ def print_test_configuration() -> None:
         print("TEST_ALLOC_SPEC_SHEET_PATH:", test_alloc_specs_path)
     print("TEST_DIR:", test_output_root)
     print("Test output will be located in TEST_DIR if there is a failure")
-    print(
-        "TEST_PORTS:", ", ".join(str(port) for port in range(test_port, test_port + 3))
-    )
+    print("TEST_PORTS:", ", ".join(str(port) for port in test_ports))
     if test_batch_resources:
         print("TEST_BATCH_RESOURCES: ")
         print(json.dumps(test_batch_resources, indent=2))
@@ -101,7 +120,7 @@ def print_test_configuration() -> None:
 
 def pytest_configure() -> None:
     pytest.test_launcher = test_launcher
-    pytest.wlm_options = ["slurm", "pbs", "lsf", "pals"]
+    pytest.wlm_options = ["slurm", "pbs", "lsf", "pals", "dragon"]
     account = get_account()
     pytest.test_account = account
     pytest.test_device = test_device
@@ -118,6 +137,14 @@ def pytest_sessionstart(
     if os.path.isdir(test_output_root):
         shutil.rmtree(test_output_root)
     os.makedirs(test_output_root)
+    while not os.path.isdir(test_output_root):
+        time.sleep(0.1)
+
+    if CONFIG.dragon_server_path is None:
+        dragon_server_path =  os.path.join(test_output_root, "dragon_server")
+        os.makedirs(dragon_server_path)
+        os.environ["SMARTSIM_DRAGON_SERVER_PATH"] = dragon_server_path
+
     print_test_configuration()
 
 
@@ -129,12 +156,62 @@ def pytest_sessionfinish(
     returning the exit status to the system.
     """
     if exitstatus == 0:
-        shutil.rmtree(test_output_root)
+        cleanup_attempts = 5
+        while cleanup_attempts > 0:
+            try:
+                shutil.rmtree(test_output_root)
+            except OSError as e:
+                cleanup_attempts -= 1
+                time.sleep(1)
+                if not cleanup_attempts:
+                    raise
+            else:
+                break
     else:
-        # kill all spawned processes in case of error
+        # kill all spawned processes
+        if CONFIG.test_launcher == "dragon":
+            time.sleep(5)
         kill_all_test_spawned_processes()
 
 
+def build_mpi_app() -> t.Optional[pathlib.Path]:
+    global built_mpi_app
+    built_mpi_app = True
+    cc = shutil.which("cc")
+    if cc is None:
+        cc = shutil.which("gcc")
+    if cc is None:
+        return None
+
+    path_to_src =  pathlib.Path(FileUtils().get_test_conf_path("mpi"))
+    path_to_out = pathlib.Path(test_output_root) / "apps" / "mpi_app"
+    os.makedirs(path_to_out.parent, exist_ok=True)
+    cmd = [cc, str(path_to_src / "mpi_hello.c"), "-o", str(path_to_out)]
+    proc = subprocess.Popen(cmd)
+    proc.wait(timeout=1)
+    if proc.returncode == 0:
+        return path_to_out
+    else:
+        return None
+
+@pytest.fixture(scope="session")
+def mpi_app_path() -> t.Optional[pathlib.Path]:
+    """Return path to MPI app if it was built
+
+        return None if it could not or will not be built
+    """
+    if not CONFIG.test_mpi:
+        return None
+
+    # if we already tried to build, return what we have
+    if built_mpi_app:
+        return mpi_app_exe
+
+    # attempt to build, set global
+    mpi_app_exe = build_mpi_app()
+    return mpi_app_exe
+
+
 def kill_all_test_spawned_processes() -> None:
     # in case of test failure, clean up all spawned processes
     pid = os.getpid()
@@ -150,6 +227,7 @@ def kill_all_test_spawned_processes() -> None:
         print("Not all processes were killed after test")
 
 
+
 def get_hostlist() -> t.Optional[t.List[str]]:
     global test_hostlist
     if not test_hostlist:
@@ -200,7 +278,43 @@ def alloc_specs() -> t.Dict[str, t.Any]:
     return specs
 
 
-@pytest.fixture
+def _reset_signal(signalnum: int):
+    """SmartSim will set/overwrite signals on occasion. This function will
+    return a generator that can be used as a fixture to automatically reset the
+    signal handler to what it was at the beginning of the test suite to keep
+    tests atomic.
+    """
+    original = signal.getsignal(signalnum)
+
+    def _reset():
+        yield
+        signal.signal(signalnum, original)
+
+    return _reset
+
+
+_reset_signal_interrupt = pytest.fixture(
+    _reset_signal(signal.SIGINT), autouse=True, scope="function"
+)
+
+
+def _find_free_port(ports: t.Collection[int]) -> int:
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+        for port in ports:
+            try:
+                sock.bind(("127.0.0.1", port))
+            except socket.error:
+                continue
+            else:
+                _, port_ = sock.getsockname()
+                return int(port_)
+    raise SSInternalError(
+        "Could not find a free port out of a options: "
+        f"{', '.join(str(port) for port in sorted(ports))}"
+    )
+
+
+@pytest.fixture(scope="session")
 def wlmutils() -> t.Type[WLMUtils]:
     return WLMUtils
 
@@ -217,7 +331,9 @@ def get_test_launcher() -> str:
 
     @staticmethod
     def get_test_port() -> int:
-        return test_port
+        # TODO: Ideally this should find a free port on the correct host(s),
+        #       but this is good enough for now
+        return _find_free_port(test_ports)
 
     @staticmethod
     def get_test_account() -> str:
@@ -246,6 +362,12 @@ def get_base_run_settings(
             run_args.update(kwargs)
             settings = RunSettings(exe, args, run_command="srun", run_args=run_args)
             return settings
+        if test_launcher == "dragon":
+            run_args = {"nodes": nodes}
+            run_args = {"ntasks": ntasks}
+            run_args.update(kwargs)
+            settings = DragonRunSettings(exe, args, run_args=run_args)
+            return settings
         if test_launcher == "pbs":
             if shutil.which("aprun"):
                 run_command = "aprun"
@@ -287,6 +409,11 @@ def get_run_settings(
             run_args = {"nodes": nodes, "ntasks": ntasks, "time": "00:10:00"}
             run_args.update(kwargs)
             return SrunSettings(exe, args, run_args=run_args)
+        if test_launcher == "dragon":
+            run_args = {"nodes": nodes}
+            run_args.update(kwargs)
+            settings = DragonRunSettings(exe, args, run_args=run_args)
+            return settings
         if test_launcher == "pbs":
             if shutil.which("aprun"):
                 run_args = {"pes": ntasks}
@@ -312,53 +439,6 @@ def get_run_settings(
 
         return RunSettings(exe, args)
 
-    @staticmethod
-    def get_orchestrator(nodes: int = 1, batch: bool = False) -> Orchestrator:
-        if test_launcher == "pbs":
-            if not shutil.which("aprun"):
-                hostlist = get_hostlist()
-            else:
-                hostlist = None
-            return Orchestrator(
-                db_nodes=nodes,
-                port=test_port,
-                batch=batch,
-                interface=test_nic,
-                launcher=test_launcher,
-                hosts=hostlist,
-            )
-        if test_launcher == "pals":
-            hostlist = get_hostlist()
-            return Orchestrator(
-                db_nodes=nodes,
-                port=test_port,
-                batch=batch,
-                interface=test_nic,
-                launcher=test_launcher,
-                hosts=hostlist,
-            )
-        if test_launcher == "slurm":
-            return Orchestrator(
-                db_nodes=nodes,
-                port=test_port,
-                batch=batch,
-                interface=test_nic,
-                launcher=test_launcher,
-            )
-        if test_launcher == "lsf":
-            return Orchestrator(
-                db_nodes=nodes,
-                port=test_port,
-                batch=batch,
-                cpus_per_shard=4,
-                gpus_per_shard=2 if test_device == "GPU" else 0,
-                project=get_account(),
-                interface=test_nic,
-                launcher=test_launcher,
-            )
-
-        return Orchestrator(port=test_port, interface="lo")
-
     @staticmethod
     def choose_host(rs: RunSettings) -> t.Optional[str]:
         if isinstance(rs, (MpirunSettings, MpiexecSettings)):
@@ -368,64 +448,6 @@ def choose_host(rs: RunSettings) -> t.Optional[str]:
 
         return None
 
-@pytest.fixture
-def local_db(
-    request: t.Any, wlmutils: t.Type[WLMUtils], test_dir: str
-) -> t.Generator[Orchestrator, None, None]:
-    """Yield fixture for startup and teardown of an local orchestrator"""
-
-    exp_name = request.function.__name__
-    exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
-    db = Orchestrator(port=wlmutils.get_test_port(), interface="lo")
-    db.set_path(test_dir)
-    exp.start(db)
-
-    yield db
-    # pass or fail, the teardown code below is ran after the
-    # completion of a test case that uses this fixture
-    exp.stop(db)
-
-
-@pytest.fixture
-def db(
-    request: t.Any, wlmutils: t.Type[WLMUtils], test_dir: str
-) -> t.Generator[Orchestrator, None, None]:
-    """Yield fixture for startup and teardown of an orchestrator"""
-    launcher = wlmutils.get_test_launcher()
-
-    exp_name = request.function.__name__
-    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
-    db = wlmutils.get_orchestrator()
-    db.set_path(test_dir)
-    exp.start(db)
-
-    yield db
-    # pass or fail, the teardown code below is ran after the
-    # completion of a test case that uses this fixture
-    exp.stop(db)
-
-
-@pytest.fixture
-def db_cluster(
-    test_dir: str, wlmutils: t.Type[WLMUtils], request: t.Any
-) -> t.Generator[Orchestrator, None, None]:
-    """
-    Yield fixture for startup and teardown of a clustered orchestrator.
-    This should only be used in on_wlm and full_wlm tests.
-    """
-    launcher = wlmutils.get_test_launcher()
-
-    exp_name = request.function.__name__
-    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
-    db = wlmutils.get_orchestrator(nodes=3)
-    db.set_path(test_dir)
-    exp.start(db)
-
-    yield db
-    # pass or fail, the teardown code below is ran after the
-    # completion of a test case that uses this fixture
-    exp.stop(db)
-
 
 @pytest.fixture(scope="function", autouse=True)
 def environment_cleanup(monkeypatch: pytest.MonkeyPatch) -> None:
@@ -436,6 +458,14 @@ def environment_cleanup(monkeypatch: pytest.MonkeyPatch) -> None:
     monkeypatch.delenv("SSKEYOUT", raising=False)
 
 
+@pytest.fixture(scope="function", autouse=True)
+def check_output_dir() -> None:
+    global test_output_dirs
+    assert os.path.isdir(test_output_root)
+    assert len(os.listdir(test_output_root)) >= test_output_dirs
+    test_output_dirs = len(os.listdir(test_output_root))
+
+
 @pytest.fixture
 def dbutils() -> t.Type[DBUtils]:
     return DBUtils
@@ -524,7 +554,7 @@ def _sanitize_caller_function(caller_function: str) -> str:
     # We split at the opening bracket, sanitize the string
     # to its right and then merge the function name and
     # the sanitized list with a dot.
-    caller_function = caller_function.replace("]","")
+    caller_function = caller_function.replace("]", "")
     caller_function_list = caller_function.split("[", maxsplit=1)
 
     def is_accepted_char(char: str) -> bool:
@@ -559,7 +589,8 @@ class FileUtils:
     @staticmethod
     def get_test_output_path(caller_function: str, caller_fspath: str) -> str:
         caller_file_to_dir = os.path.splitext(str(caller_fspath))[0]
-        rel_path = os.path.relpath(caller_file_to_dir, os.path.dirname(test_output_root))
+        dir_name = os.path.dirname(test_output_root)
+        rel_path = os.path.relpath(caller_file_to_dir, dir_name)
         dir_path = os.path.join(test_output_root, rel_path, caller_function)
         return dir_path
 
@@ -574,15 +605,14 @@ def get_test_dir_path(dirname: str) -> str:
         return dir_path
 
     @staticmethod
-    def make_test_file(file_name: str, file_dir: str, file_content: t.Optional[str] = None) -> str:
+    def make_test_file(
+        file_name: str, file_dir: str, file_content: t.Optional[str] = None
+    ) -> str:
         """Create a dummy file in the test output directory.
 
         :param file_name: name of file to create, e.g. "file.txt"
-        :type file_name: str
         :param file_dir: path
-        :type file_dir: str
         :return: String path to test output file
-        :rtype: str
         """
         file_path = os.path.join(file_dir, file_name)
         os.makedirs(file_dir)
@@ -625,7 +655,7 @@ def setup_test_colo(
         db_args: t.Dict[str, t.Any],
         colo_settings: t.Optional[RunSettings] = None,
         colo_model_name: str = "colocated_model",
-        port: int = test_port,
+        port: t.Optional[int] = None,
         on_wlm: bool = False,
     ) -> Model:
         """Setup database needed for the colo pinning tests"""
@@ -641,16 +671,17 @@ def setup_test_colo(
         if on_wlm:
             colo_settings.set_tasks(1)
             colo_settings.set_nodes(1)
+
         colo_model = exp.create_model(colo_model_name, colo_settings)
 
         if db_type in ["tcp", "deprecated"]:
-            db_args["port"] = port
+            db_args["port"] = port if port is not None else _find_free_port(test_ports)
             db_args["ifname"] = "lo"
         if db_type == "uds" and colo_model_name is not None:
             tmp_dir = tempfile.gettempdir()
             socket_suffix = str(uuid.uuid4())[:7]
-            db_args["unix_socket"] = os.path.join(tmp_dir,
-                f"{colo_model_name}_{socket_suffix}.socket")
+            socket_name = f"{colo_model_name}_{socket_suffix}.socket"
+            db_args["unix_socket"] = os.path.join(tmp_dir, socket_name)
 
         colocate_fun: t.Dict[str, t.Callable[..., None]] = {
             "tcp": colo_model.colocate_db_tcp,
@@ -659,16 +690,335 @@ def setup_test_colo(
         }
         with warnings.catch_warnings():
             if db_type == "deprecated":
-                warnings.filterwarnings(
-                    "ignore",
-                    message="`colocate_db` has been deprecated"
-                )
+                message = "`colocate_db` has been deprecated"
+                warnings.filterwarnings("ignore", message=message)
             colocate_fun[db_type](**db_args)
         # assert model will launch with colocated db
         assert colo_model.colocated
         # Check to make sure that limit_db_cpus made it into the colo settings
         return colo_model
 
+
+@pytest.fixture(scope="function")
+def global_dragon_teardown() -> None:
+    """Connect to a dragon server started at the path indicated by
+    the environment variable SMARTSIM_DRAGON_SERVER_PATH and
+    force its shutdown to bring down the runtime and allow a subsequent
+    allocation of a new runtime.
+    """
+    if test_launcher != "dragon" or CONFIG.dragon_server_path is None:
+        return
+    logger.debug(f"Tearing down Dragon infrastructure, server path: {CONFIG.dragon_server_path}")
+    dragon_connector = DragonConnector()
+    dragon_connector.ensure_connected()
+    dragon_connector.cleanup()
+
+
 @pytest.fixture
-def config() -> smartsim._core.config.Config:
+def config() -> Config:
     return CONFIG
+
+
+class MockSink:
+    """Telemetry sink that writes console output for testing purposes"""
+
+    def __init__(self, delay_ms: int = 0) -> None:
+        self._delay_ms = delay_ms
+        self.num_saves = 0
+        self.args: t.Any = None
+
+    async def save(self, *args: t.Any) -> None:
+        """Save all arguments as console logged messages"""
+        self.num_saves += 1
+        if self._delay_ms:
+            # mimic slow collection....
+            delay_s = self._delay_ms / 1000
+            await asyncio.sleep(delay_s)
+        self.args = args
+
+
+@pytest.fixture
+def mock_sink() -> t.Type[MockSink]:
+    return MockSink
+
+
+@pytest.fixture
+def mock_con() -> t.Callable[[int, int], t.Iterable[t.Any]]:
+    """Generates mock db connection telemetry"""
+
+    def _mock_con(min: int = 1, max: int = 254) -> t.Iterable[t.Any]:
+        for i in range(min, max):
+            yield [
+                {"addr": f"127.0.0.{i}:1234", "id": f"ABC{i}"},
+                {"addr": f"127.0.0.{i}:2345", "id": f"XYZ{i}"},
+            ]
+
+    return _mock_con
+
+
+@pytest.fixture
+def mock_mem() -> t.Callable[[int, int], t.Iterable[t.Any]]:
+    """Generates mock db memory usage telemetry"""
+
+    def _mock_mem(min: int = 1, max: int = 1000) -> t.Iterable[t.Any]:
+        for i in range(min, max):
+            yield {
+                "total_system_memory": 1000 * i,
+                "used_memory": 1111 * i,
+                "used_memory_peak": 1234 * i,
+            }
+
+    return _mock_mem
+
+
+@pytest.fixture
+def mock_redis() -> t.Callable[..., t.Any]:
+    def _mock_redis(
+        conn_side_effect=None,
+        mem_stats=None,
+        client_stats=None,
+        coll_side_effect=None,
+    ):
+        """Generate a mock object for the redis.Redis contract"""
+
+        class MockConn:
+            def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
+                if conn_side_effect is not None:
+                    conn_side_effect()
+
+            async def info(self, *args: t.Any, **kwargs: t.Any) -> t.Dict[str, t.Any]:
+                if coll_side_effect:
+                    await coll_side_effect()
+
+                if mem_stats:
+                    return next(mem_stats)
+                return {
+                    "total_system_memory": "111",
+                    "used_memory": "222",
+                    "used_memory_peak": "333",
+                }
+
+            async def client_list(
+                self, *args: t.Any, **kwargs: t.Any
+            ) -> t.Dict[str, t.Any]:
+                if coll_side_effect:
+                    await coll_side_effect()
+
+                if client_stats:
+                    return next(client_stats)
+                return {"addr": "127.0.0.1", "id": "111"}
+
+            async def ping(self):
+                return True
+
+        return MockConn
+
+    return _mock_redis
+
+
+class MockCollectorEntityFunc(t.Protocol):
+    @staticmethod
+    def __call__(
+        host: str = "127.0.0.1",
+        port: int = 6379,
+        name: str = "",
+        type: str = "",
+        telemetry_on: bool = False,
+    ) -> "JobEntity": ...
+
+
+@pytest.fixture
+def mock_entity(test_dir: str) -> MockCollectorEntityFunc:
+    def _mock_entity(
+        host: str = "127.0.0.1",
+        port: int = 6379,
+        name: str = "",
+        type: str = "",
+        telemetry_on: bool = False,
+    ) -> "JobEntity":
+        test_path = pathlib.Path(test_dir)
+
+        entity = JobEntity()
+        entity.name = name if name else str(uuid.uuid4())
+        entity.status_dir = str(test_path / entity.name)
+        entity.type = type
+        entity.telemetry_on = True
+        entity.collectors = {
+            "client": "",
+            "client_count": "",
+            "memory": "",
+        }
+        entity.config = {
+            "host": host,
+            "port": str(port),
+        }
+        entity.telemetry_on = telemetry_on
+        return entity
+
+    return _mock_entity
+
+
+class CountingCallable:
+    def __init__(self) -> None:
+        self._num: int = 0
+        self._details: t.List[t.Tuple[t.Tuple[t.Any, ...], t.Dict[str, t.Any]]] = []
+
+    def __call__(self, *args: t.Any, **kwargs: t.Any) -> t.Any:
+        self._num += 1
+        self._details.append((args, kwargs))
+
+    @property
+    def num_calls(self) -> int:
+        return self._num
+
+    @property
+    def details(self) -> t.List[t.Tuple[t.Tuple[t.Any, ...], t.Dict[str, t.Any]]]:
+        return self._details
+
+## Reuse database across tests
+
+database_registry: t.DefaultDict[str, t.Optional[Orchestrator]] = defaultdict(lambda: None)
+
+@pytest.fixture(scope="function")
+def local_experiment(test_dir: str) -> smartsim.Experiment:
+    """Create a default experiment that uses the requested launcher"""
+    name = pathlib.Path(test_dir).stem
+    return smartsim.Experiment(name, exp_path=test_dir, launcher="local")
+
+@pytest.fixture(scope="function")
+def wlm_experiment(test_dir: str, wlmutils: WLMUtils) -> smartsim.Experiment:
+    """Create a default experiment that uses the requested launcher"""
+    name = pathlib.Path(test_dir).stem
+    return smartsim.Experiment(
+        name,
+        exp_path=test_dir,
+        launcher=wlmutils.get_test_launcher()
+    )
+
+def _cleanup_db(name: str) -> None:
+    global database_registry
+    db = database_registry[name]
+    if db and db.is_active():
+        exp = Experiment("cleanup")
+        try:
+            db = exp.reconnect_orchestrator(db.checkpoint_file)
+            exp.stop(db)
+        except:
+            pass
+
+@dataclass
+class DBConfiguration:
+    name: str
+    launcher: str
+    num_nodes: int
+    interface: t.Union[str,t.List[str]]
+    hostlist: t.Optional[t.List[str]]
+    port: int
+
+@dataclass
+class PrepareDatabaseOutput:
+    orchestrator: t.Optional[Orchestrator] # The actual orchestrator object
+    new_db: bool     # True if a new database was created when calling prepare_db
+
+# Reuse databases
+@pytest.fixture(scope="session")
+def local_db() -> t.Generator[DBConfiguration, None, None]:
+    name = "local_db_fixture"
+    config = DBConfiguration(
+        name,
+        "local",
+        1,
+        "lo",
+        None,
+        _find_free_port(tuple(reversed(test_ports))),
+    )
+    yield config
+    _cleanup_db(name)
+
+@pytest.fixture(scope="session")
+def single_db(wlmutils: WLMUtils) -> t.Generator[DBConfiguration, None, None]:
+    hostlist = wlmutils.get_test_hostlist()
+    hostlist = hostlist[-1:] if hostlist is not None else None
+    name = "single_db_fixture"
+    config = DBConfiguration(
+        name,
+        wlmutils.get_test_launcher(),
+        1,
+        wlmutils.get_test_interface(),
+        hostlist,
+        _find_free_port(tuple(reversed(test_ports)))
+    )
+    yield config
+    _cleanup_db(name)
+
+
+@pytest.fixture(scope="session")
+def clustered_db(wlmutils: WLMUtils) -> t.Generator[DBConfiguration, None, None]:
+    hostlist = wlmutils.get_test_hostlist()
+    hostlist = hostlist[-4:-1] if hostlist is not None else None
+    name = "clustered_db_fixture"
+    config = DBConfiguration(
+        name,
+        wlmutils.get_test_launcher(),
+        3,
+        wlmutils.get_test_interface(),
+        hostlist,
+        _find_free_port(tuple(reversed(test_ports))),
+    )
+    yield config
+    _cleanup_db(name)
+
+
+@pytest.fixture
+def register_new_db() -> t.Callable[[DBConfiguration], Orchestrator]:
+    def _register_new_db(
+        config: DBConfiguration
+    ) -> Orchestrator:
+        exp_path = pathlib.Path(test_output_root, config.name)
+        exp_path.mkdir(exist_ok=True)
+        exp = Experiment(
+            config.name,
+            exp_path=str(exp_path),
+            launcher=config.launcher,
+        )
+        orc = exp.create_database(
+            port=config.port,
+            batch=False,
+            interface=config.interface,
+            hosts=config.hostlist,
+            db_nodes=config.num_nodes
+        )
+        exp.generate(orc, overwrite=True)
+        exp.start(orc)
+        global database_registry
+        database_registry[config.name] = orc
+        return orc
+    return _register_new_db
+
+
+@pytest.fixture(scope="function")
+def prepare_db(
+    register_new_db: t.Callable[
+        [DBConfiguration],
+        Orchestrator
+    ]
+) -> t.Callable[
+    [DBConfiguration],
+    PrepareDatabaseOutput
+]:
+    def _prepare_db(db_config: DBConfiguration) -> PrepareDatabaseOutput:
+        global database_registry
+        db = database_registry[db_config.name]
+
+        new_db = False
+        db_up = False
+
+        if db:
+            db_up = db.is_active()
+
+        if not db_up or db is None:
+            db = register_new_db(db_config)
+            new_db = True
+
+        return PrepareDatabaseOutput(db, new_db)
+    return _prepare_db
diff --git a/doc/_static/version_names.json b/doc/_static/version_names.json
index bbe3b332d..bc095f84a 100644
--- a/doc/_static/version_names.json
+++ b/doc/_static/version_names.json
@@ -1,7 +1,8 @@
 {
     "version_names":[
         "develop (unstable)",
-        "0.6.2 (stable)",
+        "0.7.0 (stable)",
+        "0.6.2",
         "0.6.1",
         "0.6.0",
         "0.5.1",
@@ -14,6 +15,7 @@
     "version_urls": [
         "https://www.craylabs.org/develop/overview.html",
         "https://www.craylabs.org/docs/overview.html",
+        "https://www.craylabs.org/docs/versions/0.6.2/overview.html",
         "https://www.craylabs.org/docs/versions/0.6.1/overview.html",
         "https://www.craylabs.org/docs/versions/0.6.0/overview.html",
         "https://www.craylabs.org/docs/versions/0.5.1/overview.html",
diff --git a/doc/api/smartsim_api.rst b/doc/api/smartsim_api.rst
index adf7081ec..d9615e04c 100644
--- a/doc/api/smartsim_api.rst
+++ b/doc/api/smartsim_api.rst
@@ -1,17 +1,15 @@
-
 *************
 SmartSim API
 *************
 
-
 .. _experiment_api:
 
 Experiment
 ==========
 
-
 .. currentmodule:: smartsim.experiment
 
+.. _exp_init:
 .. autosummary::
 
    Experiment.__init__
@@ -27,13 +25,17 @@ Experiment
    Experiment.finished
    Experiment.get_status
    Experiment.reconnect_orchestrator
+   Experiment.preview
    Experiment.summary
+   Experiment.telemetry
 
 .. autoclass:: Experiment
    :show-inheritance:
    :members:
 
 
+.. _settings-info:
+
 Settings
 ========
 
@@ -58,6 +60,7 @@ Types of Settings:
     MpiexecSettings
     OrterunSettings
     JsrunSettings
+    DragonRunSettings
     SbatchSettings
     QsubBatchSettings
     BsubBatchSettings
@@ -111,6 +114,7 @@ steps to a batch.
 .. autosummary::
 
     SrunSettings.set_nodes
+    SrunSettings.set_node_feature
     SrunSettings.set_tasks
     SrunSettings.set_tasks_per_node
     SrunSettings.set_walltime
@@ -160,6 +164,28 @@ and within batch launches (e.g., ``QsubBatchSettings``)
     :members:
 
 
+.. _dragonsettings_api:
+
+DragonRunSettings
+-----------------
+
+``DragonRunSettings`` can be used on systems that support Slurm or
+PBS, if Dragon is available in the Python environment (see `_dragon_install`
+for instructions on how to install it through ``smart``).
+
+``DragonRunSettings`` can be used in interactive sessions (on allcation)
+and within batch launches (i.e. ``SbatchSettings`` or ``QsubBatchSettings``,
+for Slurm and PBS sessions, respectively).
+
+.. autosummary::
+    DragonRunSettings.set_nodes
+    DragonRunSettings.set_tasks_per_node
+
+.. autoclass:: DragonRunSettings
+    :inherited-members:
+    :undoc-members:
+    :members:
+
 
 .. _jsrun_api:
 
@@ -377,23 +403,50 @@ container.
     :undoc-members:
     :members:
 
+.. _orc_api:
 
 Orchestrator
 ============
 
 .. currentmodule:: smartsim.database
 
-.. _orc_api:
+.. autosummary::
+
+   Orchestrator.__init__
+   Orchestrator.db_identifier
+   Orchestrator.num_shards
+   Orchestrator.db_nodes
+   Orchestrator.hosts
+   Orchestrator.reset_hosts
+   Orchestrator.remove_stale_files
+   Orchestrator.get_address
+   Orchestrator.is_active
+   Orchestrator.set_cpus
+   Orchestrator.set_walltime
+   Orchestrator.set_hosts
+   Orchestrator.set_batch_arg
+   Orchestrator.set_run_arg
+   Orchestrator.enable_checkpoints
+   Orchestrator.set_max_memory
+   Orchestrator.set_eviction_strategy
+   Orchestrator.set_max_clients
+   Orchestrator.set_max_message_size
+   Orchestrator.set_db_conf
+   Orchestrator.telemetry
+   Orchestrator.checkpoint_file
+   Orchestrator.batch
 
 Orchestrator
 ------------
 
+.. _orchestrator_api:
 
 .. autoclass:: Orchestrator
    :members:
    :inherited-members:
    :undoc-members:
 
+.. _model_api:
 
 Model
 =====
@@ -417,17 +470,17 @@ Model
    Model.disable_key_prefixing
    Model.query_key_prefixing
 
+Model
+-----
+
 .. autoclass:: Model
    :members:
    :show-inheritance:
    :inherited-members:
 
-.. _ensemble_api:
-
 Ensemble
 ========
 
-
 .. currentmodule:: smartsim.entity.ensemble
 
 .. autosummary::
@@ -443,6 +496,11 @@ Ensemble
    Ensemble.query_key_prefixing
    Ensemble.register_incoming_entity
 
+Ensemble
+--------
+
+.. _ensemble_api:
+
 .. autoclass:: Ensemble
    :members:
    :show-inheritance:
@@ -461,7 +519,6 @@ SmartSim includes built-in utilities for supporting TensorFlow, Keras, and Pytor
 TensorFlow
 ----------
 
-
 SmartSim includes built-in utilities for supporting TensorFlow and Keras in training and inference.
 
 .. currentmodule:: smartsim.ml.tf.utils
@@ -510,13 +567,18 @@ SmartSim includes built-in utilities for supporting PyTorch in training and infe
 Slurm
 =====
 
-
-.. currentmodule:: smartsim.slurm
+.. currentmodule:: smartsim.wlm.slurm
 
 .. autosummary::
 
     get_allocation
     release_allocation
-
-.. automodule:: smartsim.slurm
+    validate
+    get_default_partition
+    get_hosts
+    get_queue
+    get_tasks
+    get_tasks_per_node
+
+.. automodule:: smartsim.wlm.slurm
     :members:
diff --git a/doc/batch_settings.rst b/doc/batch_settings.rst
new file mode 100644
index 000000000..07cef4c95
--- /dev/null
+++ b/doc/batch_settings.rst
@@ -0,0 +1,127 @@
+.. _batch_settings_doc:
+
+**************
+Batch Settings
+**************
+========
+Overview
+========
+SmartSim provides functionality to launch entities (``Model`` or ``Ensemble``)
+as batch jobs supported by the ``BatchSettings`` base class. While the ``BatchSettings`` base
+class is not intended for direct use by users, its derived child classes offer batch
+launching capabilities tailored for specific workload managers (WLMs). Each SmartSim
+`launcher` interfaces with a ``BatchSettings`` subclass specific to a system's WLM:
+
+- The Slurm `launcher` supports:
+   - :ref:`SbatchSettings<sbatch_api>`
+- The PBS Pro `launcher` supports:
+   - :ref:`QsubBatchSettings<qsub_api>`
+- The LSF `launcher` supports:
+   - :ref:`BsubBatchSettings<bsub_api>`
+
+.. note::
+      The local `launcher` does not support batch jobs.
+
+After creating a ``BatchSettings`` instance, users gain access to the methods
+of the associated child class, providing them with the ability to further configure the batch
+settings for jobs.
+
+In the following :ref:`Examples<batch_settings_ex>` subsection, we demonstrate the initialization
+and configuration of a batch settings object.
+
+.. _batch_settings_ex:
+
+========
+Examples
+========
+A ``BatchSettings`` child class is created using the ``Experiment.create_batch_settings``
+factory method. When the user initializes the ``Experiment`` at the beginning of the Python driver script,
+they may specify a `launcher` argument. SmartSim will then register or detect the `launcher` and return the
+corresponding supported child class when ``Experiment.create_batch_settings`` is called. This
+design allows SmartSim driver scripts utilizing ``BatchSettings`` to be portable between systems,
+requiring only a change in the specified `launcher` during ``Experiment`` initialization.
+
+Below are examples of how to initialize a ``BatchSettings`` object per `launcher`.
+
+.. tabs::
+
+    .. group-tab:: Slurm
+      To instantiate the ``SbatchSettings`` object, which interfaces with the Slurm job scheduler, specify
+      `launcher="slurm"` when initializing the ``Experiment``. Upon calling ``create_batch_settings``,
+      SmartSim will detect the job scheduler and return the appropriate batch settings object.
+
+      .. code-block:: python
+
+          from smartsim import Experiment
+
+          # Initialize the experiment and provide launcher Slurm
+          exp = Experiment("name-of-experiment", launcher="slurm")
+
+          # Initialize a SbatchSettings object
+          sbatch_settings = exp.create_batch_settings(nodes=1, time="10:00:00")
+          # Set the account for the slurm batch job
+          sbatch_settings.set_account("12345-Cray")
+          # Set the partition for the slurm batch job
+          sbatch_settings.set_queue("default")
+
+      The initialized ``SbatchSettings`` instance can now be passed to a SmartSim entity
+      (``Model`` or ``Ensemble``) via the `batch_settings` argument in ``create_batch_settings``.
+
+      .. note::
+        If `launcher="auto"`, SmartSim will detect that the ``Experiment`` is running on a Slurm based
+        machine and set the launcher to `"slurm"`.
+
+    .. group-tab:: PBS Pro
+      To instantiate the ``QsubBatchSettings`` object, which interfaces with the PBS Pro job scheduler, specify
+      `launcher="pbs"` when initializing the ``Experiment``. Upon calling ``create_batch_settings``,
+      SmartSim will detect the job scheduler and return the appropriate batch settings object.
+
+        .. code-block:: python
+
+            from smartsim import Experiment
+
+            # Initialize the experiment and provide launcher PBS Pro
+            exp = Experiment("name-of-experiment", launcher="pbs")
+
+            # Initialize a QsubBatchSettings object
+            qsub_batch_settings = exp.create_batch_settings(nodes=1, time="10:00:00")
+            # Set the account for the PBS Pro batch job
+            qsub_batch_settings.set_account("12345-Cray")
+            # Set the partition for the PBS Pro batch job
+            qsub_batch_settings.set_queue("default")
+
+      The initialized ``QsubBatchSettings`` instance can now be passed to a SmartSim entity
+      (``Model`` or ``Ensemble``) via the `batch_settings` argument in ``create_batch_settings``.
+
+      .. note::
+        If `launcher="auto"`, SmartSim will detect that the ``Experiment`` is running on a PBS Pro based
+        machine and set the launcher to `"pbs"`.
+
+    .. group-tab:: LSF
+      To instantiate the ``BsubBatchSettings`` object, which interfaces with the LSF job scheduler, specify
+      `launcher="lsf"` when initializing the ``Experiment``. Upon calling ``create_batch_settings``,
+      SmartSim will detect the job scheduler and return the appropriate batch settings object.
+
+        .. code-block:: python
+
+            from smartsim import Experiment
+
+            # Initialize the experiment and provide launcher LSF
+            exp = Experiment("name-of-experiment", launcher="lsf")
+
+            # Initialize a BsubBatchSettings object
+            bsub_batch_settings = exp.create_batch_settings(nodes=1, time="10:00:00", batch_args={"ntasks": 1})
+            # Set the account for the lsf batch job
+            bsub_batch_settings.set_account("12345-Cray")
+            # Set the partition for the lsf batch job
+            bsub_batch_settings.set_queue("default")
+
+      The initialized ``BsubBatchSettings`` instance can now be passed to a SmartSim entity
+      (``Model`` or ``Ensemble``) via the `batch_settings` argument in ``create_batch_settings``.
+
+      .. note::
+        If `launcher="auto"`, SmartSim will detect that the ``Experiment`` is running on a LSF based
+        machine and set the launcher to `"lsf"`.
+
+.. warning::
+      Note that initialization values provided (e.g., `nodes`, `time`, etc) will overwrite the same arguments in `batch_args` if present.
\ No newline at end of file
diff --git a/doc/changelog.md b/doc/changelog.md
new file mode 100644
index 000000000..73ea36511
--- /dev/null
+++ b/doc/changelog.md
@@ -0,0 +1,952 @@
+# Changelog
+
+Listed here are the changes between each release of SmartSim,
+SmartRedis and SmartDashboard.
+
+Jump to:
+- {ref}`SmartRedis changelog<smartredis-changelog>`
+- {ref}`SmartDashboard changelog<smartdashboard-changelog>`
+
+## SmartSim
+
+### 0.7.0
+
+Released on 14 May, 2024
+
+Description
+
+-   Improve Dragon server shutdown
+-   Add dragon runtime installer
+-   Add launcher based on Dragon
+-   Reuse Orchestrators within the testing suite to improve performance.
+-   Fix building of documentation
+-   Preview entities on experiment before start
+-   Update authentication in release workflow
+-   Auto-generate type-hints into documentation
+-   Auto-post release PR to develop
+-   Bump manifest.json to version 0.0.4
+-   Fix symlinking batch ensemble and model bug
+-   Fix noisy failing WLM test
+-   Remove defensive regexp in .gitignore
+-   Upgrade ubuntu to 22.04
+-   Remove helper function `init_default`
+-   Fix telemetry monitor logging errors for task history
+-   Change default path for entities
+-   Drop Python 3.8 support
+-   Update watchdog dependency
+-   Historical output files stored under .smartsim directory
+-   Fixes unfalsifiable test that tests SmartSim's custom SIGINT signal
+    handler
+-   Add option to build Torch backend without the Intel Math Kernel
+    Library
+-   Fix ReadTheDocs build issue
+-   Disallow uninitialized variable use
+-   Promote device options to an Enum
+-   Update telemetry monitor, add telemetry collectors
+-   Add method to specify node features for a Slurm job
+-   Colo Orchestrator setup now blocks application start until setup
+    finished
+-   Refactor areas of the code where mypy potential errors
+-   Minor enhancements to test suite
+-   ExecArgs handling correction
+-   ReadTheDocs config file added and enabled on PRs
+-   Enforce changelog updates
+-   Fix Jupyter notebook math expressions
+-   Remove deprecated SmartSim modules
+-   SmartSim Documentation refactor
+-   Promote SmartSim statuses to a dedicated type
+-   Update the version of Redis from [7.0.4]{.title-ref} to
+    [7.2.4]{.title-ref}
+-   Increase disk space in doc builder container
+-   Update Experiment API typing
+-   Prevent duplicate entity names
+-   Fix publishing of development docs
+
+Detailed Notes
+
+-   The Dragon server will now terminate any process which is still running
+    when a request of an immediate shutdown is sent. ([SmartSim-PR582](https://github.com/CrayLabs/SmartSim/pull/582))
+-   Add `--dragon` option to `smart build`. Install appropriate Dragon
+    runtime from Dragon GitHub release assets.
+    ([SmartSim-PR580](https://github.com/CrayLabs/SmartSim/pull/580))
+-   Add new launcher, based on [Dragon](https://dragonhpc.github.io/dragon/doc/_build/html/index.html).
+    The new launcher is compatible with the Slurm and PBS schedulers and can
+    be selected by specifying ``launcher="dragon"`` when creating an `Experiment`,
+    or by using ``DragonRunSettings`` to launch a job. The Dragon launcher
+    is at an early stage of development: early adopters are referred to the
+    dedicated documentation section to learn more about it. ([SmartSim-PR580](https://github.com/CrayLabs/SmartSim/pull/580))
+-   Tests may now request a given configuration and will reconnect to
+    the existing orchestrator instead of building up and tearing down
+    a new one each test.
+    ([SmartSim-PR567](https://github.com/CrayLabs/SmartSim/pull/567))
+-   Manually ensure that typing_extensions==4.6.1 in Dockerfile used to build
+    docs. This fixes the deploy_dev_docs Github action ([SmartSim-PR564](https://github.com/CrayLabs/SmartSim/pull/564))
+-   Added preview functionality to Experiment, including preview of all entities, active infrastructure and
+    client configuration. ([SmartSim-PR525](https://github.com/CrayLabs/SmartSim/pull/525))
+-   Replace the developer created token with the GH_TOKEN environment variable.
+    ([SmartSim-PR570](https://github.com/CrayLabs/SmartSim/pull/570))
+-   Add extension to auto-generate function type-hints into documentation.
+    ([SmartSim-PR561](https://github.com/CrayLabs/SmartSim/pull/561))
+-   Add to github release workflow to auto generate a pull request from
+    master into develop for release.
+    ([SmartSim-PR566](https://github.com/CrayLabs/SmartSim/pull/566))
+-   The manifest.json version needs to match the SmartDashboard version,
+    which is 0.0.4 in the upcoming release.
+    ([SmartSim-PR563](https://github.com/CrayLabs/SmartSim/pull/563))
+-   Properly symlinks batch ensembles and batch models.
+    ([SmartSim-PR547](https://github.com/CrayLabs/SmartSim/pull/547))
+-   Remove defensive regexp in .gitignore and ensure tests write to
+    test_output.
+    ([SmartSim-PR560](https://github.com/CrayLabs/SmartSim/pull/560))
+-   After dropping support for Python 3.8, ubuntu needs to be upgraded.
+    ([SmartSim-PR558](https://github.com/CrayLabs/SmartSim/pull/558))
+-   Remove helper function `init_default` and replace with traditional
+    type narrowing.
+    ([SmartSim-PR545](https://github.com/CrayLabs/SmartSim/pull/545))
+-   Ensure the telemetry monitor does not track a task_id for a managed
+    task.
+    ([SmartSim-PR557](https://github.com/CrayLabs/SmartSim/pull/557))
+-   The default path for an entity is now the path to the experiment /
+    the entity name. create_database and create_ensemble now have path
+    arguments. All path arguments are compatible with relative paths.
+    Relative paths are relative to the CWD.
+    ([SmartSim-PR533](https://github.com/CrayLabs/SmartSim/pull/533))
+-   Python 3.8 is reaching its end-of-life in October, 2024, so it will
+    no longer continue to be supported.
+    ([SmartSim-PR544](https://github.com/CrayLabs/SmartSim/pull/544))
+-   Update watchdog dependency from 3.x to 4.x, fix new type issues
+    ([SmartSim-PR540](https://github.com/CrayLabs/SmartSim/pull/540))
+-   The dashboard needs to display historical logs, so log files are
+    written out under the .smartsim directory and files under the
+    experiment directory are symlinked to them.
+    ([SmartSim-PR532](https://github.com/CrayLabs/SmartSim/pull/532))
+-   Add an option to smart build
+    \"\--torch_with_mkl\"/\"\--no_torch_with_mkl\" to prevent Torch from
+    trying to link in the Intel Math Kernel Library. This is needed
+    because on machines that have the Intel compilers installed, the
+    Torch will unconditionally try to link in this library, however
+    fails because the linking flags are incorrect.
+    ([SmartSim-PR538](https://github.com/CrayLabs/SmartSim/pull/538))
+-   Change typing\_extensions and pydantic versions in readthedocs
+    environment to enable docs build.
+    ([SmartSim-PR537](https://github.com/CrayLabs/SmartSim/pull/537))
+-   Promote devices to a dedicated Enum type throughout the SmartSim
+    code base.
+    ([SmartSim-PR527](https://github.com/CrayLabs/SmartSim/pull/527))
+-   Update the telemetry monitor to enable retrieval of metrics on a
+    scheduled interval. Switch basic experiment tracking telemetry to
+    default to on. Add database metric collectors. Improve telemetry
+    monitor logging. Create telemetry subpackage at
+    [smartsim.\_core.utils.telemetry]{.title-ref}. Refactor telemetry
+    monitor entrypoint.
+    ([SmartSim-PR460](https://github.com/CrayLabs/SmartSim/pull/460))
+-   Users can now specify node features for a Slurm job through
+    `SrunSettings.set_node_feature`. The method accepts a string or list
+    of strings.
+    ([SmartSim-PR529](https://github.com/CrayLabs/SmartSim/pull/529))
+-   The request to the colocated entrypoints file within the shell
+    script is now a blocking process. Once the Orchestrator is setup, it
+    returns which moves the process to the background and allows the
+    application to start. This prevents the application from requesting
+    a ML model or script that has not been uploaded to the Orchestrator
+    yet.
+    ([SmartSim-PR522](https://github.com/CrayLabs/SmartSim/pull/522))
+-   Add checks and tests to ensure SmartSim users cannot initialize run
+    settings with a list of lists as the exe_args argument.
+    ([SmartSim-PR517](https://github.com/CrayLabs/SmartSim/pull/517))
+-   Add readthedocs configuration file and enable readthedocs builds on
+    pull requests. Additionally added robots.txt file generation when
+    readthedocs environment detected.
+    ([SmartSim-PR512](https://github.com/CrayLabs/SmartSim/pull/512))
+-   Add Github Actions workflow that checks if changelog is edited on
+    pull requests into develop.
+    ([SmartSim-PR518](https://github.com/CrayLabs/SmartSim/pull/518))
+-   Add path to MathJax.js file so that Sphinx will use to render math
+    expressions.
+    ([SmartSim-PR516](https://github.com/CrayLabs/SmartSim/pull/516))
+-   Removed deprecated SmartSim modules: slurm and mpirunSettings.
+    ([SmartSim-PR514](https://github.com/CrayLabs/SmartSim/pull/514))
+-   Implemented new structure of SmartSim documentation. Added examples
+    images and further detail of SmartSim components.
+    ([SmartSim-PR463](https://github.com/CrayLabs/SmartSim/pull/463))
+-   Promote SmartSim statuses to a dedicated type named SmartSimStatus.
+    ([SmartSim-PR509](https://github.com/CrayLabs/SmartSim/pull/509))
+-   Update Redis version to [7.2.4]{.title-ref}. This change fixes an
+    issue in the Redis build scripts causing failures on Apple Silicon
+    hosts.
+    ([SmartSim-PR507](https://github.com/CrayLabs/SmartSim/pull/507))
+-   The container which builds the documentation for every merge to
+    develop was failing due to a lack of space within the container.
+    This was fixed by including an additional Github action that removes
+    some unneeded software and files that come from the default Github
+    Ubuntu container.
+    ([SmartSim-PR504](https://github.com/CrayLabs/SmartSim/pull/504))
+-   Update the generic [t.Any]{.title-ref} typehints in Experiment API.
+    ([SmartSim-PR501](https://github.com/CrayLabs/SmartSim/pull/501))
+-   The CI will fail static analysis if common erroneous truthy checks
+    are detected.
+    ([SmartSim-PR524](https://github.com/CrayLabs/SmartSim/pull/524))
+-   Prevent the launch of duplicate named entities. Allow completed
+    entities to run.
+    ([SmartSim-PR480](https://github.com/CrayLabs/SmartSim/pull/480))
+-   The CI will fail static analysis if a local variable used while
+    potentially undefined.
+    ([SmartSim-PR521](https://github.com/CrayLabs/SmartSim/pull/521))
+-   Remove previously deprecated behavior present in test suite on
+    machines with Slurm and Open MPI.
+    ([SmartSim-PR520](https://github.com/CrayLabs/SmartSim/pull/520))
+-   Experiments in the WLM tests are given explicit paths to prevent
+    unexpected directory creation. Ensure database are not left open on
+    test suite failures. Update path to pickle file in
+    `tests/full_wlm/test_generic_orc_launch_batch.py::test_launch_cluster_orc_reconnect`
+    to conform with changes made in
+    ([SmartSim-PR533](https://github.com/CrayLabs/SmartSim/pull/533)).
+    ([SmartSim-PR559](https://github.com/CrayLabs/SmartSim/pull/559))
+-   When calling `Experiment.start` SmartSim would register a signal
+    handler that would capture an interrupt signal (\^C) to kill any
+    jobs launched through its `JobManager`. This would replace the
+    default (or user defined) signal handler. SmartSim will now attempt
+    to kill any launched jobs before calling the previously registered
+    signal handler.
+    ([SmartSim-PR535](https://github.com/CrayLabs/SmartSim/pull/535))
+
+### 0.6.2
+
+Released on 16 February, 2024
+
+Description
+
+-   Patch SmartSim dependency version
+
+Detailed Notes
+
+-   A critical performance concern was identified and addressed in
+    SmartRedis. A patch fix was deployed, and SmartSim was updated to
+    ensure users do not inadvertently pull the unpatched version of
+    SmartRedis.
+    ([SmartSim-PR493](https://github.com/CrayLabs/SmartSim/pull/493))
+
+### 0.6.1
+
+Released on 15 February, 2024
+
+Description
+
+-   Duplicate for DBModel/Script prevented
+-   Update license to include 2024
+-   Telemetry monitor is now active by default
+-   Add support for Mac OSX on Apple Silicon
+-   Remove Torch warnings during testing
+-   Validate Slurm timing format
+-   Expose Python Typehints
+-   Fix test_logs to prevent generation of directory
+-   Fix Python Typehint for colocated database settings
+-   Python 3.11 Support
+-   Quality of life [smart validate]{.title-ref} improvements
+-   Remove Cobalt support
+-   Enrich logging through context variables
+-   Upgrade Machine Learning dependencies
+-   Override sphinx-tabs background color
+-   Add concurrency group to test workflow
+-   Fix index when installing torch through smart build
+
+Detailed Notes
+
+-   Modify the [git clone]{.title-ref} for both Redis and RedisAI to set
+    the line endings to unix-style line endings when using MacOS on ARM.
+    ([SmartSim-PR482](https://github.com/CrayLabs/SmartSim/pull/482))
+-   Separate install instructions are now provided for Mac OSX on x64 vs
+    ARM64
+    ([SmartSim-PR479](https://github.com/CrayLabs/SmartSim/pull/479))
+-   Prevent duplicate ML model and script names being added to an
+    Ensemble member if the names exists.
+    ([SmartSim-PR475](https://github.com/CrayLabs/SmartSim/pull/475))
+-   Updates [Copyright (c) 2021-2023]{.title-ref} to [Copyright (c)
+    2021-2024]{.title-ref} in all of the necessary files.
+    ([SmartSim-PR485](https://github.com/CrayLabs/SmartSim/pull/485))
+-   Bug fix which prevents the expected behavior when the
+    [SMARTSIM_LOG_LEVEL]{.title-ref} environment variable was set to
+    [developer]{.title-ref}.
+    ([SmartSim-PR473](https://github.com/CrayLabs/SmartSim/pull/473))
+-   Sets the default value of the \"enable telemetry\" flag to on. Bumps
+    the output [manifest.json]{.title-ref} version number to match that
+    of [smartdashboard]{.title-ref} and pins a watchdog version to avoid
+    build errors.
+    ([SmartSim-PR477](https://github.com/CrayLabs/SmartSim/pull/477))
+-   Refactor logic of [Manifest.has_db_objects]{.title-ref} to remove
+    excess branching and improve readability/maintainability.
+    ([SmartSim-PR476](https://github.com/CrayLabs/SmartSim/pull/476))
+-   SmartSim can now be built and used on platforms using Apple Silicon
+    (ARM64). Currently, only the PyTorch backend is supported. Note that
+    libtorch will be downloaded from a CrayLabs github repo.
+    ([SmartSim-PR465](https://github.com/CrayLabs/SmartSim/pull/465))
+-   Tests that were saving Torch models were emitting warnings. These
+    warnings were addressed by updating the model save test function.
+    ([SmartSim-PR472](https://github.com/CrayLabs/SmartSim/pull/472))
+-   Validate the timing format when requesting a slurm allocation.
+    ([SmartSim-PR471](https://github.com/CrayLabs/SmartSim/pull/471))
+-   Add and ship [py.typed]{.title-ref} marker to expose inline type
+    hints. Fix type errors related to SmartRedis.
+    ([SmartSim-PR468](https://github.com/CrayLabs/SmartSim/pull/468))
+-   Fix the [test_logs.py::test_context_leak]{.title-ref} test that was
+    erroneously creating a directory named [some value]{.title-ref} in
+    SmartSim\'s root directory.
+    ([SmartSim-PR467](https://github.com/CrayLabs/SmartSim/pull/467))
+-   Add Python type hinting to colocated settings.
+    ([SmartSim-PR462](https://github.com/CrayLabs/SmartSim/pull/462))
+-   Add github actions for running black and isort checks.
+    ([SmartSim-PR464](https://github.com/CrayLabs/SmartSim/pull/464))
+-   Relax the required version of [typing_extensions]{.title-ref}.
+    ([SmartSim-PR459](https://github.com/CrayLabs/SmartSim/pull/459))
+-   Addition of Python 3.11 to SmartSim.
+    ([SmartSim-PR461](https://github.com/CrayLabs/SmartSim/pull/461))
+-   Quality of life [smart validate]{.title-ref} improvements such as
+    setting [CUDA_VISIBLE_DEVICES]{.title-ref} environment variable
+    within [smart validate]{.title-ref} prior to importing any ML deps
+    to prevent false negatives on multi-GPU systems. Additionally, move
+    SmartRedis logs from standard out to dedicated log file in the
+    validation temporary directory as well as suppress
+    [sklearn]{.title-ref} deprecation warning by pinning
+    [KMeans]{.title-ref} constructor argument. Lastly, move TF test to
+    last as TF may reserve the GPUs it uses.
+    ([SmartSim-PR458](https://github.com/CrayLabs/SmartSim/pull/458))
+-   Some actions in the current GitHub CI/CD workflows were outdated.
+    They were replaced with the latest versions.
+    ([SmartSim-PR446](https://github.com/CrayLabs/SmartSim/pull/446))
+-   As the Cobalt workload manager is not used on any system we are
+    aware of, its support in SmartSim was terminated and classes such as
+    [CobaltLauncher]{.title-ref} have been removed.
+    ([SmartSim-PR448](https://github.com/CrayLabs/SmartSim/pull/448))
+-   Experiment logs are written to a file that can be read by the
+    dashboard.
+    ([SmartSim-PR452](https://github.com/CrayLabs/SmartSim/pull/452))
+-   Updated SmartSim\'s machine learning backends to PyTorch 2.0.1,
+    Tensorflow 2.13.1, ONNX 1.14.1, and ONNX Runtime 1.16.1. As a result
+    of this change, there is now an available ONNX wheel for use with
+    Python 3.10, and wheels for all of SmartSim\'s machine learning
+    backends with Python 3.11.
+    ([SmartSim-PR451](https://github.com/CrayLabs/SmartSim/pull/451))
+    ([SmartSim-PR461](https://github.com/CrayLabs/SmartSim/pull/461))
+-   The sphinx-tabs documentation extension uses a white background for
+    the tabs component. A custom CSS for those components to inherit the
+    overall theme color has been added.
+    ([SmartSim-PR453](https://github.com/CrayLabs/SmartSim/pull/453))
+-   Add concurrency groups to GitHub\'s CI/CD workflows, preventing
+    multiple workflows from the same PR to be launched concurrently.
+    ([SmartSim-PR439](https://github.com/CrayLabs/SmartSim/pull/439))
+-   Torch changed their preferred indexing when trying to install their
+    provided wheels. Updated the [pip install]{.title-ref} command
+    within [smart build]{.title-ref} to ensure that the appropriate
+    packages can be found.
+    ([SmartSim-PR449](https://github.com/CrayLabs/SmartSim/pull/449))
+
+### 0.6.0
+
+Released on 18 December, 2023
+
+Description
+
+-   Conflicting directives in the SmartSim packaging instructions were
+    fixed
+-   [sacct]{.title-ref} and [sstat]{.title-ref} errors are now fatal for
+    Slurm-based workflow executions
+-   Added documentation section about ML features and TorchScript
+-   Added TorchScript functions to Online Analysis tutorial
+-   Added multi-DB example to documentation
+-   Improved test stability on HPC systems
+-   Added support for producing & consuming telemetry outputs
+-   Split tests into groups for parallel execution in CI/CD pipeline
+-   Change signature of [Experiment.summary()]{.title-ref}
+-   Expose first_device parameter for scripts, functions, models
+-   Added support for MINBATCHTIMEOUT in model execution
+-   Remove support for RedisAI 1.2.5, use RedisAI 1.2.7 commit
+-   Add support for multiple databases
+
+Detailed Notes
+
+-   Several conflicting directives between the [setup.py]{.title-ref}
+    and the [setup.cfg]{.title-ref} were fixed to mitigate warnings
+    issued when building the pip wheel.
+    ([SmartSim-PR435](https://github.com/CrayLabs/SmartSim/pull/435))
+-   When the Slurm functions [sacct]{.title-ref} and [sstat]{.title-ref}
+    returned an error, it would be ignored and SmartSim\'s state could
+    become inconsistent. To prevent this, errors raised by
+    [sacct]{.title-ref} or [sstat]{.title-ref} now result in an
+    exception.
+    ([SmartSim-PR392](https://github.com/CrayLabs/SmartSim/pull/392))
+-   A section named *ML Features* was added to documentation. It
+    contains multiple examples of how ML models and functions can be
+    added to and executed on the DB. TorchScript-based post-processing
+    was added to the *Online Analysis* tutorial
+    ([SmartSim-PR411](https://github.com/CrayLabs/SmartSim/pull/411))
+-   An example of how to use multiple Orchestrators concurrently was
+    added to the documentation
+    ([SmartSim-PR409](https://github.com/CrayLabs/SmartSim/pull/409))
+-   The test infrastructure was improved. Tests on HPC system are now
+    stable, and issues such as non-stopped [Orchestrators]{.title-ref}
+    or experiments created in the wrong paths have been fixed
+    ([SmartSim-PR381](https://github.com/CrayLabs/SmartSim/pull/381))
+-   A telemetry monitor was added to check updates and produce events
+    for SmartDashboard
+    ([SmartSim-PR426](https://github.com/CrayLabs/SmartSim/pull/426))
+-   Split tests into [group_a]{.title-ref}, [group_b]{.title-ref},
+    [slow_tests]{.title-ref} for parallel execution in CI/CD pipeline
+    ([SmartSim-PR417](https://github.com/CrayLabs/SmartSim/pull/417),
+    [SmartSim-PR424](https://github.com/CrayLabs/SmartSim/pull/424))
+-   Change [format]{.title-ref} argument to [style]{.title-ref} in
+    [Experiment.summary()]{.title-ref}, this is an API break
+    ([SmartSim-PR391](https://github.com/CrayLabs/SmartSim/pull/391))
+-   Added support for first_device parameter for scripts, functions, and
+    models. This causes them to be loaded to the first num_devices
+    beginning with first_device
+    ([SmartSim-PR394](https://github.com/CrayLabs/SmartSim/pull/394))
+-   Added support for MINBATCHTIMEOUT in model execution, which caps the
+    delay waiting for a minimium number of model execution operations to
+    accumulate before executing them as a batch
+    ([SmartSim-PR387](https://github.com/CrayLabs/SmartSim/pull/387))
+-   RedisAI 1.2.5 is not supported anymore. The only RedisAI version is
+    now 1.2.7. Since the officially released RedisAI 1.2.7 has a bug
+    which breaks the build process on Mac OSX, it was decided to use
+    commit
+    [634916c](https://github.com/RedisAI/RedisAI/commit/634916c722e718cc6ea3fad46e63f7d798f9adc2)
+    from RedisAI\'s GitHub repository, where such bug has been fixed.
+    This applies to all operating systems.
+    ([SmartSim-PR383](https://github.com/CrayLabs/SmartSim/pull/383))
+-   Add support for creation of multiple databases with unique
+    identifiers.
+    ([SmartSim-PR342](https://github.com/CrayLabs/SmartSim/pull/342))
+
+### 0.5.1
+
+Released on 14 September, 2023
+
+Description
+
+-   Add typehints throughout the SmartSim codebase
+-   Provide support for Slurm heterogeneous jobs
+-   Provide better support for [PalsMpiexecSettings]{.title-ref}
+-   Allow for easier inspection of SmartSim entities
+-   Log ignored error messages from [sacct]{.title-ref}
+-   Fix colocated db preparation bug when using
+    [JsrunSettings]{.title-ref}
+-   Fix bug when user specify CPU and devices greater than 1
+-   Fix bug when get_allocation called with reserved keywords
+-   Enabled mypy in CI for better type safety
+-   Mitigate additional suppressed pylint errors
+-   Update linting support and apply to existing errors
+-   Various improvements to the [smart]{.title-ref} CLI
+-   Various documentation improvements
+-   Various test suite improvements
+
+Detailed Notes
+
+-   Add methods to allow users to inspect files attached to models and
+    ensembles.
+    ([SmartSim-PR352](https://github.com/CrayLabs/SmartSim/pull/352))
+-   Add a [smart info]{.title-ref} target to provide rudimentary
+    information about the SmartSim installation.
+    ([SmartSim-PR350](https://github.com/CrayLabs/SmartSim/pull/350))
+-   Remove unnecessary generation producing unexpected directories in
+    the test suite.
+    ([SmartSim-PR349](https://github.com/CrayLabs/SmartSim/pull/349))
+-   Add support for heterogeneous jobs to [SrunSettings]{.title-ref} by
+    allowing users to set the [\--het-group]{.title-ref} parameter.
+    ([SmartSim-PR346](https://github.com/CrayLabs/SmartSim/pull/346))
+-   Provide clearer guidelines on how to contribute to SmartSim.
+    ([SmartSim-PR344](https://github.com/CrayLabs/SmartSim/pull/344))
+-   Integrate [PalsMpiexecSettings]{.title-ref} into the
+    [Experiment]{.title-ref} factory methods when using the
+    [\"pals\"]{.title-ref} launcher.
+    ([SmartSim-PR343](https://github.com/CrayLabs/SmartSim/pull/343))
+-   Create public properties where appropriate to mitigate
+    [protected-access]{.title-ref} errors.
+    ([SmartSim-PR341](https://github.com/CrayLabs/SmartSim/pull/341))
+-   Fix a failure to execute [\_prep_colocated_db]{.title-ref} due to
+    incorrect named attr check.
+    ([SmartSim-PR339](https://github.com/CrayLabs/SmartSim/pull/339))
+-   Enabled and mitigated mypy [disallow_any_generics]{.title-ref} and
+    [warn_return_any]{.title-ref}.
+    ([SmartSim-PR338](https://github.com/CrayLabs/SmartSim/pull/338))
+-   Add a [smart validate]{.title-ref} target to provide a simple smoke
+    test to assess a SmartSim build.
+    ([SmartSim-PR336](https://github.com/CrayLabs/SmartSim/pull/336),
+    [SmartSim-PR351](https://github.com/CrayLabs/SmartSim/pull/351))
+-   Add typehints to [smartsim.\_core.launcher.step.\*]{.title-ref}.
+    ([SmartSim-PR334](https://github.com/CrayLabs/SmartSim/pull/334))
+-   Log errors reported from slurm WLM when attempts to retrieve status
+    fail.
+    ([SmartSim-PR331](https://github.com/CrayLabs/SmartSim/pull/331),
+    [SmartSim-PR332](https://github.com/CrayLabs/SmartSim/pull/332))
+-   Fix incorrectly formatted positional arguments in log format
+    strings.
+    ([SmartSim-PR330](https://github.com/CrayLabs/SmartSim/pull/330))
+-   Ensure that launchers pass environment variables to unmanaged job
+    steps.
+    ([SmartSim-PR329](https://github.com/CrayLabs/SmartSim/pull/329))
+-   Add additional tests surrounding the [RAI_PATH]{.title-ref}
+    configuration environment variable.
+    ([SmartSim-PR328](https://github.com/CrayLabs/SmartSim/pull/328))
+-   Remove unnecessary execution of unescaped shell commands.
+    ([SmartSim-PR327](https://github.com/CrayLabs/SmartSim/pull/327))
+-   Add error if user calls get_allocation with reserved keywords in
+    slurm get_allocation.
+    ([SmartSim-PR325](https://github.com/CrayLabs/SmartSim/pull/325))
+-   Add error when user requests CPU with devices greater than 1 within
+    add_ml_model and add_script.
+    ([SmartSim-PR324](https://github.com/CrayLabs/SmartSim/pull/324))
+-   Update documentation surrounding ensemble key prefixing.
+    ([SmartSim-PR322](https://github.com/CrayLabs/SmartSim/pull/322))
+-   Fix formatting of the Frontier site installation.
+    ([SmartSim-PR321](https://github.com/CrayLabs/SmartSim/pull/321))
+-   Update pylint dependency, update .pylintrc, mitigate non-breaking
+    issues, suppress api breaks.
+    ([SmartSim-PR311](https://github.com/CrayLabs/SmartSim/pull/311))
+-   Refactor the [smart]{.title-ref} CLI to use subparsers for better
+    documentation and extension.
+    ([SmartSim-PR308](https://github.com/CrayLabs/SmartSim/pull/308))
+
+### 0.5.0
+
+Released on 6 July, 2023
+
+Description
+
+A full list of changes and detailed notes can be found below:
+
+-   Update SmartRedis dependency to v0.4.1
+-   Fix tests for db models and scripts
+-   Fix add_ml_model() and add_script() documentation, tests, and code
+-   Remove [requirements.txt]{.title-ref} and other places where
+    dependencies were defined
+-   Replace [limit_app_cpus]{.title-ref} with
+    [limit_db_cpus]{.title-ref} for co-located orchestrators
+-   Remove wait time associated with Experiment launch summary
+-   Update and rename Redis conf file
+-   Migrate from redis-py-cluster to redis-py
+-   Update full test suite to not require a TF wheel at test time
+-   Update doc strings
+-   Remove deprecated code
+-   Relax the coloredlogs version
+-   Update Fortran tutorials for SmartRedis
+-   Add support for multiple network interface binding in Orchestrator
+    and Colocated DBs
+-   Add typehints and static analysis
+
+Detailed notes
+
+-   Updates SmartRedis to the most current release
+    ([SmartSim-PR316](https://github.com/CrayLabs/SmartSim/pull/316))
+-   Fixes and enhancements to documentation
+    ([SmartSim-PR317](https://github.com/CrayLabs/SmartSim/pull/317),
+    [SmartSim-PR314](https://github.com/CrayLabs/SmartSim/pull/314),
+    [SmartSim-PR287](https://github.com/CrayLabs/SmartSim/pull/287))
+-   Various fixes and enhancements to the test suite
+    ([SmartSim-PR315](https://github.com/CrayLabs/SmartSim/pull/314),
+    [SmartSim-PR312](https://github.com/CrayLabs/SmartSim/pull/312),
+    [SmartSim-PR310](https://github.com/CrayLabs/SmartSim/pull/310),
+    [SmartSim-PR302](https://github.com/CrayLabs/SmartSim/pull/302),
+    [SmartSim-PR283](https://github.com/CrayLabs/SmartSim/pull/283))
+-   Fix a defect in the tests related to database models and scripts
+    that was causing key collisions when testing on workload managers
+    ([SmartSim-PR313](https://github.com/CrayLabs/SmartSim/pull/313))
+-   Remove [requirements.txt]{.title-ref} and other places where
+    dependencies were defined.
+    ([SmartSim-PR307](https://github.com/CrayLabs/SmartSim/pull/307))
+-   Fix defect where dictionaries used to create run settings can be
+    changed unexpectedly due to copy-by-ref
+    ([SmartSim-PR305](https://github.com/CrayLabs/SmartSim/pull/305))
+-   The underlying code for Model.add_ml_model() and Model.add_script()
+    was fixed to correctly handle multi-GPU configurations. Tests were
+    updated to run on non-local launchers. Documentation was updated and
+    fixed. Also, the default testing interface has been changed to lo
+    instead of ipogif.
+    ([SmartSim-PR304](https://github.com/CrayLabs/SmartSim/pull/304))
+-   Typehints have been added. A makefile target [make
+    check-mypy]{.title-ref} executes static analysis with mypy.
+    ([SmartSim-PR295](https://github.com/CrayLabs/SmartSim/pull/295),
+    [SmartSim-PR301](https://github.com/CrayLabs/SmartSim/pull/301),
+    [SmartSim-PR303](https://github.com/CrayLabs/SmartSim/pull/303))
+-   Replace [limit_app_cpus]{.title-ref} with
+    [limit_db_cpus]{.title-ref} for co-located orchestrators. This
+    resolves some incorrect behavior/assumptions about how the
+    application would be pinned. Instead, users should directly specify
+    the binding options in their application using the options
+    appropriate for their launcher
+    ([SmartSim-PR306](https://github.com/CrayLabs/SmartSim/pull/306))
+-   Simplify code in [random_permutations]{.title-ref} parameter
+    generation strategy
+    ([SmartSim-PR300](https://github.com/CrayLabs/SmartSim/pull/300))
+-   Remove wait time associated with Experiment launch summary
+    ([SmartSim-PR298](https://github.com/CrayLabs/SmartSim/pull/298))
+-   Update Redis conf file to conform with Redis v7.0.5 conf file
+    ([SmartSim-PR293](https://github.com/CrayLabs/SmartSim/pull/293))
+-   Migrate from redis-py-cluster to redis-py for cluster status checks
+    ([SmartSim-PR292](https://github.com/CrayLabs/SmartSim/pull/292))
+-   Update full test suite to no longer require a tensorflow wheel to be
+    available at test time.
+    ([SmartSim-PR291](https://github.com/CrayLabs/SmartSim/pull/291))
+-   Correct spelling of colocated in doc strings
+    ([SmartSim-PR290](https://github.com/CrayLabs/SmartSim/pull/290))
+-   Deprecated launcher-specific orchestrators, constants, and ML
+    utilities were removed.
+    ([SmartSim-PR289](https://github.com/CrayLabs/SmartSim/pull/289))
+-   Relax the coloredlogs version to be greater than 10.0
+    ([SmartSim-PR288](https://github.com/CrayLabs/SmartSim/pull/288))
+-   Update the Github Actions runner image from
+    [macos-10.15]{.title-ref}[ to \`macos-12]{.title-ref}\`. The former
+    began deprecation in May 2022 and was finally removed in May 2023.
+    ([SmartSim-PR285](https://github.com/CrayLabs/SmartSim/pull/285))
+-   The Fortran tutorials had not been fully updated to show how to
+    handle return/error codes. These have now all been updated.
+    ([SmartSim-PR284](https://github.com/CrayLabs/SmartSim/pull/284))
+-   Orchestrator and Colocated DB now accept a list of interfaces to
+    bind to. The argument name is still [interface]{.title-ref} for
+    backward compatibility reasons.
+    ([SmartSim-PR281](https://github.com/CrayLabs/SmartSim/pull/281))
+-   Typehints have been added to public APIs. A makefile target to
+    execute static analysis with mypy is available [make
+    check-mypy]{.title-ref}.
+    ([SmartSim-PR295](https://github.com/CrayLabs/SmartSim/pull/295))
+
+### 0.4.2
+
+Released on April 12, 2023
+
+Description
+
+This release of SmartSim had a focus on polishing and extending exiting
+features already provided by SmartSim. Most notably, this release
+provides support to allow users to colocate their models with an
+orchestrator using Unix domain sockets and support for launching models
+as batch jobs.
+
+Additionally, SmartSim has updated its tool chains to provide a better
+user experience. Notably, SmarSim can now be used with Python 3.10,
+Redis 7.0.5, and RedisAI 1.2.7. Furthermore, SmartSim now utilizes
+SmartRedis\'s aggregation lists to streamline the use and extension of
+ML data loaders, making working with popular machine learning frameworks
+in SmartSim a breeze.
+
+A full list of changes and detailed notes can be found below:
+
+-   Add support for colocating an orchestrator over UDS
+-   Add support for Python 3.10, deprecate support for Python 3.7 and
+    RedisAI 1.2.3
+-   Drop support for Ray
+-   Update ML data loaders to make use of SmartRedis\'s aggregation
+    lists
+-   Allow for models to be launched independently as batch jobs
+-   Update to current version of Redis to 7.0.5
+-   Add support for RedisAI 1.2.7, pyTorch 1.11.0, Tensorflow 2.8.0,
+    ONNXRuntime 1.11.1
+-   Fix bug in colocated database entrypoint when loading PyTorch models
+-   Fix test suite behavior with environment variables
+
+Detailed Notes
+
+-   Running some tests could result in some SmartSim-specific
+    environment variables to be set. Such environment variables are now
+    reset after each test execution. Also, a warning for environment
+    variable usage in Slurm was added, to make the user aware in case an
+    environment variable will not be assigned the desired value with
+    [\--export]{.title-ref}.
+    ([SmartSim-PR270](https://github.com/CrayLabs/SmartSim/pull/270))
+-   The PyTorch and TensorFlow data loaders were update to make use of
+    aggregation lists. This breaks their API, but makes them easier to
+    use.
+    ([SmartSim-PR264](https://github.com/CrayLabs/SmartSim/pull/264))
+-   The support for Ray was dropped, as its most recent versions caused
+    problems when deployed through SmartSim. We plan to release a
+    separate add-on library to accomplish the same results. If you are
+    interested in getting the Ray launch functionality back in your
+    workflow, please get in touch with us!
+    ([SmartSim-PR263](https://github.com/CrayLabs/SmartSim/pull/263))
+-   Update from Redis version 6.0.8 to 7.0.5.
+    ([SmartSim-PR258](https://github.com/CrayLabs/SmartSim/pull/258))
+-   Adds support for Python 3.10 without the ONNX machine learning
+    backend. Deprecates support for Python 3.7 as it will stop receiving
+    security updates. Deprecates support for RedisAI 1.2.3. Update the
+    build process to be able to correctly fetch supported dependencies.
+    If a user attempts to build an unsupported dependency, an error
+    message is shown highlighting the discrepancy.
+    ([SmartSim-PR256](https://github.com/CrayLabs/SmartSim/pull/256))
+-   Models were given a [batch_settings]{.title-ref} attribute. When
+    launching a model through [Experiment.start]{.title-ref} the
+    [Experiment]{.title-ref} will first check for a non-nullish value at
+    that attribute. If the check is satisfied, the
+    [Experiment]{.title-ref} will attempt to wrap the underlying run
+    command in a batch job using the object referenced at
+    [Model.batch_settings]{.title-ref} as the batch settings for the
+    job. If the check is not satisfied, the [Model]{.title-ref} is
+    launched in the traditional manner as a job step.
+    ([SmartSim-PR245](https://github.com/CrayLabs/SmartSim/pull/245))
+-   Fix bug in colocated database entrypoint stemming from uninitialized
+    variables. This bug affects PyTorch models being loaded into the
+    database.
+    ([SmartSim-PR237](https://github.com/CrayLabs/SmartSim/pull/237))
+-   The release of RedisAI 1.2.7 allows us to update support for recent
+    versions of PyTorch, Tensorflow, and ONNX
+    ([SmartSim-PR234](https://github.com/CrayLabs/SmartSim/pull/234))
+-   Make installation of correct Torch backend more reliable according
+    to instruction from PyTorch
+-   In addition to TCP, add UDS support for colocating an orchestrator
+    with models. Methods [Model.colocate_db_tcp]{.title-ref} and
+    [Model.colocate_db_uds]{.title-ref} were added to expose this
+    functionality. The [Model.colocate_db]{.title-ref} method remains
+    and uses TCP for backward compatibility
+    ([SmartSim-PR246](https://github.com/CrayLabs/SmartSim/pull/246))
+
+### 0.4.1
+
+Released on June 24, 2022
+
+Description: This release of SmartSim introduces a new experimental
+feature to help make SmartSim workflows more portable: the ability to
+run simulations models in a container via Singularity. This feature has
+been tested on a small number of platforms and we encourage users to
+provide feedback on its use.
+
+We have also made improvements in a variety of areas: new utilities to
+load scripts and machine learning models into the database directly from
+SmartSim driver scripts and install-time choice to use either
+[KeyDB]{.title-ref} or [Redis]{.title-ref} for the Orchestrator. The
+[RunSettings]{.title-ref} API is now more consistent across subclasses.
+Another key focus of this release was to aid new SmartSim users by
+including more extensive tutorials and improving the documentation. The
+docker image containing the SmartSim tutorials now also includes a
+tutorial on online training.
+
+Launcher improvements
+
+-   New methods for specifying [RunSettings]{.title-ref} parameters
+    ([SmartSim-PR166](https://github.com/CrayLabs/SmartSim/pull/166))
+    ([SmartSim-PR170](https://github.com/CrayLabs/SmartSim/pull/170))
+-   Better support for [mpirun]{.title-ref}, [mpiexec]{.title-ref},
+    and [orterun]{.title-ref} as launchers
+    ([SmartSim-PR186](https://github.com/CrayLabs/SmartSim/pull/186))
+-   Experimental: add support for running models via Singularity
+    ([SmartSim-PR204](https://github.com/CrayLabs/SmartSim/pull/204))
+
+Documentation and tutorials
+
+-   Tutorial updates
+    ([SmartSim-PR155](https://github.com/CrayLabs/SmartSim/pull/155))
+    ([SmartSim-PR203](https://github.com/CrayLabs/SmartSim/pull/203))
+    ([SmartSim-PR208](https://github.com/CrayLabs/SmartSim/pull/208))
+-   Add SmartSim Zoo info to documentation
+    ([SmartSim-PR175](https://github.com/CrayLabs/SmartSim/pull/175))
+-   New tutorial for demonstrating online training
+    ([SmartSim-PR176](https://github.com/CrayLabs/SmartSim/pull/176))
+    ([SmartSim-PR188](https://github.com/CrayLabs/SmartSim/pull/188))
+
+General improvements and bug fixes
+
+-   Set models and scripts at the driver level
+    ([SmartSim-PR185](https://github.com/CrayLabs/SmartSim/pull/185))
+-   Optionally use KeyDB for the orchestrator
+    ([SmartSim-PR180](https://github.com/CrayLabs/SmartSim/pull/180))
+-   Ability to specify system-level libraries
+    ([SmartSim-PR154](https://github.com/CrayLabs/SmartSim/pull/154))
+    ([SmartSim-PR182](https://github.com/CrayLabs/SmartSim/pull/182))
+-   Fix the handling of LSF gpus_per_shard
+    ([SmartSim-PR164](https://github.com/CrayLabs/SmartSim/pull/164))
+-   Fix error when re-running [smart build]{.title-ref}
+    ([SmartSim-PR165](https://github.com/CrayLabs/SmartSim/pull/165))
+-   Fix generator hanging when tagged configuration variables are
+    missing
+    ([SmartSim-PR177](https://github.com/CrayLabs/SmartSim/pull/177))
+
+Dependency updates
+
+-   CMake version from 3.10 to 3.13
+    ([SmartSim-PR152](https://github.com/CrayLabs/SmartSim/pull/152))
+-   Update click to 8.0.2
+    ([SmartSim-PR200](https://github.com/CrayLabs/SmartSim/pull/200))
+
+### 0.4.0
+
+Released on Feb 11, 2022
+
+Description: In this release SmartSim continues to promote ease of use.
+To this end SmartSim has introduced new portability features that allow
+users to abstract away their targeted hardware, while providing even
+more compatibility with existing libraries.
+
+A new feature, Co-located orchestrator deployments has been added which
+provides scalable online inference capabilities that overcome previous
+performance limitations in seperated orchestrator/application
+deployments. For more information on advantages of co-located
+deployments, see the Orchestrator section of the SmartSim documentation.
+
+The SmartSim build was significantly improved to increase customization
+of build toolchain and the `smart` command line inferface was expanded.
+
+Additional tweaks and upgrades have also been made to ensure an optimal
+experience. Here is a comprehensive list of changes made in SmartSim
+0.4.0.
+
+Orchestrator Enhancements:
+
+-   Add Orchestrator Co-location
+    ([SmartSim-PR139](https://github.com/CrayLabs/SmartSim/pull/139))
+-   Add Orchestrator configuration file edit methods
+    ([SmartSim-PR109](https://github.com/CrayLabs/SmartSim/pull/109))
+
+Emphasize Driver Script Portability:
+
+-   Add ability to create run settings through an experiment
+    ([SmartSim-PR110](https://github.com/CrayLabs/SmartSim/pull/110))
+-   Add ability to create batch settings through an experiment
+    ([SmartSim-PR112](https://github.com/CrayLabs/SmartSim/pull/112))
+-   Add automatic launcher detection to experiment portability
+    functions
+    ([SmartSim-PR120](https://github.com/CrayLabs/SmartSim/pull/120))
+
+Expand Machine Learning Library Support:
+
+-   Data loaders for online training in Keras/TF and Pytorch
+    ([SmartSim-PR115](https://github.com/CrayLabs/SmartSim/pull/115))
+    ([SmartSim-PR140](https://github.com/CrayLabs/SmartSim/pull/140))
+-   ML backend versions updated with expanded support for multiple
+    versions
+    ([SmartSim-PR122](https://github.com/CrayLabs/SmartSim/pull/122))
+-   Launch Ray internally using `RunSettings`
+    ([SmartSim-PR118](https://github.com/CrayLabs/SmartSim/pull/118))
+-   Add Ray cluster setup and deployment to SmartSim
+    ([SmartSim-PR50](https://github.com/CrayLabs/SmartSim/pull/50))
+
+Expand Launcher Setting Options:
+
+-   Add ability to use base `RunSettings` on a Slurm, or PBS launchers
+    ([SmartSim-PR90](https://github.com/CrayLabs/SmartSim/pull/90))
+-   Add ability to use base `RunSettings` on LFS launcher
+    ([SmartSim-PR108](https://github.com/CrayLabs/SmartSim/pull/108))
+
+Deprecations and Breaking Changes
+
+-   Orchestrator classes combined into single implementation for
+    portability
+    ([SmartSim-PR139](https://github.com/CrayLabs/SmartSim/pull/139))
+-   `smartsim.constants` changed to `smartsim.status`
+    ([SmartSim-PR122](https://github.com/CrayLabs/SmartSim/pull/122))
+-   `smartsim.tf` migrated to `smartsim.ml.tf`
+    ([SmartSim-PR115](https://github.com/CrayLabs/SmartSim/pull/115))
+    ([SmartSim-PR140](https://github.com/CrayLabs/SmartSim/pull/140))
+-   TOML configuration option removed in favor of environment variable
+    approach
+    ([SmartSim-PR122](https://github.com/CrayLabs/SmartSim/pull/122))
+
+General Improvements and Bug Fixes:
+
+-   Improve and extend parameter handling
+    ([SmartSim-PR107](https://github.com/CrayLabs/SmartSim/pull/107))
+    ([SmartSim-PR119](https://github.com/CrayLabs/SmartSim/pull/119))
+-   Abstract away non-user facing implementation details
+    ([SmartSim-PR122](https://github.com/CrayLabs/SmartSim/pull/122))
+-   Add various dimensions to the CI build matrix for SmartSim testing
+    ([SmartSim-PR130](https://github.com/CrayLabs/SmartSim/pull/130))
+-   Add missing functions to LSFSettings API
+    ([SmartSim-PR113](https://github.com/CrayLabs/SmartSim/pull/113))
+-   Add RedisAI checker for installed backends
+    ([SmartSim-PR137](https://github.com/CrayLabs/SmartSim/pull/137))
+-   Remove heavy and unnecessary dependencies
+    ([SmartSim-PR116](https://github.com/CrayLabs/SmartSim/pull/116))
+    ([SmartSim-PR132](https://github.com/CrayLabs/SmartSim/pull/132))
+-   Fix LSFLauncher and LSFOrchestrator
+    ([SmartSim-PR86](https://github.com/CrayLabs/SmartSim/pull/86))
+-   Fix over greedy Workload Manager Parsers
+    ([SmartSim-PR95](https://github.com/CrayLabs/SmartSim/pull/95))
+-   Fix Slurm handling of comma-separated env vars
+    ([SmartSim-PR104](https://github.com/CrayLabs/SmartSim/pull/104))
+-   Fix internal method calls
+    ([SmartSim-PR138](https://github.com/CrayLabs/SmartSim/pull/138))
+
+Documentation Updates:
+
+-   Updates to documentation build process
+    ([SmartSim-PR133](https://github.com/CrayLabs/SmartSim/pull/133))
+    ([SmartSim-PR143](https://github.com/CrayLabs/SmartSim/pull/143))
+-   Updates to documentation content
+    ([SmartSim-PR96](https://github.com/CrayLabs/SmartSim/pull/96))
+    ([SmartSim-PR129](https://github.com/CrayLabs/SmartSim/pull/129))
+    ([SmartSim-PR136](https://github.com/CrayLabs/SmartSim/pull/136))
+    ([SmartSim-PR141](https://github.com/CrayLabs/SmartSim/pull/141))
+-   Update SmartSim Examples
+    ([SmartSim-PR68](https://github.com/CrayLabs/SmartSim/pull/68))
+    ([SmartSim-PR100](https://github.com/CrayLabs/SmartSim/pull/100))
+
+### 0.3.2
+
+Released on August 10, 2021
+
+Description:
+
+-   Upgraded RedisAI backend to 1.2.3
+    ([SmartSim-PR69](https://github.com/CrayLabs/SmartSim/pull/69))
+-   PyTorch 1.7.1, TF 2.4.2, and ONNX 1.6-7
+    ([SmartSim-PR69](https://github.com/CrayLabs/SmartSim/pull/69))
+-   LSF launcher for IBM machines
+    ([SmartSim-PR62](https://github.com/CrayLabs/SmartSim/pull/62))
+-   Improved code coverage by adding more unit tests
+    ([SmartSim-PR53](https://github.com/CrayLabs/SmartSim/pull/53))
+-   Orchestrator methods to get address and check status
+    ([SmartSim-PR60](https://github.com/CrayLabs/SmartSim/pull/60))
+-   Added Manifest object that tracks deployables in Experiments
+    ([SmartSim-PR61](https://github.com/CrayLabs/SmartSim/pull/61))
+-   Bug fixes
+    ([SmartSim-PR52](https://github.com/CrayLabs/SmartSim/pull/52))
+    ([SmartSim-PR58](https://github.com/CrayLabs/SmartSim/pull/58))
+    ([SmartSim-PR67](https://github.com/CrayLabs/SmartSim/pull/67))
+    ([SmartSim-PR73](https://github.com/CrayLabs/SmartSim/pull/73))
+-   Updated documentation and examples
+    ([SmartSim-PR51](https://github.com/CrayLabs/SmartSim/pull/51))
+    ([SmartSim-PR57](https://github.com/CrayLabs/SmartSim/pull/57))
+    ([SmartSim-PR71](https://github.com/CrayLabs/SmartSim/pull/71))
+-   Improved IP address aquisition
+    ([SmartSim-PR72](https://github.com/CrayLabs/SmartSim/pull/72))
+-   Binding database to network interfaces
+
+### 0.3.1
+
+Released on May 5, 2021
+
+Description: This release was dedicated to making the install process
+easier. SmartSim can be installed from PyPI now and the `smart` cli tool
+makes installing the machine learning runtimes much easier.
+
+-   Pip install
+    ([SmartSim-PR42](https://github.com/CrayLabs/SmartSim/pull/42))
+-   `smart` cli tool for ML backends
+    ([SmartSim-PR42](https://github.com/CrayLabs/SmartSim/pull/42))
+-   Build Documentation for updated install
+    ([SmartSim-PR43](https://github.com/CrayLabs/SmartSim/pull/43))
+-   Migrate from Jenkins to Github Actions CI
+    ([SmartSim-PR42](https://github.com/CrayLabs/SmartSim/pull/42))
+-   Bug fix for setup.cfg
+    ([SmartSim-PR35](https://github.com/CrayLabs/SmartSim/pull/35))
+
+### 0.3.0
+
+Released on April 1, 2021
+
+Description:
+
+-   initial 0.3.0 (first public) release of SmartSim
+
+------------------------------------------------------------------------
+
+(smartredis-changelog)=
+## SmartRedis
+
+```{include} ../smartredis/doc/changelog.md
+:start-line: 2
+```
+
+------------------------------------------------------------------------
+
+(smartdashboard-changelog)=
+## SmartDashboard
+
+```{include} ../smartdashboard/doc/changelog.md
+:start-line: 2
+```
diff --git a/doc/changelog.rst b/doc/changelog.rst
deleted file mode 100644
index d6b735232..000000000
--- a/doc/changelog.rst
+++ /dev/null
@@ -1,699 +0,0 @@
-*********
-Changelog
-*********
-
-Listed here are the changes between each release of SmartSim
-and SmartRedis.
-
-Jump to :ref:`SmartRedis Changelog <sr_changelog>`
-
-
-SmartSim
-========
-
-
-
-0.6.2
------
-
-Released on 16 February, 2024
-
-Description
-
-- Patch SmartSim dependency version
-
-
-Detailed Notes
-
-- A critical performance concern was identified and addressed in SmartRedis. A
-  patch fix was deployed, and SmartSim was updated to ensure users do not
-  inadvertently pull the unpatched version of SmartRedis. (SmartSim-PR493_)
-
-
-.. _SmartSim-PR493: https://github.com/CrayLabs/SmartSim/pull/493
-
-
-0.6.1
------
-
-Released on 15 February, 2024
-
-Description
-
-- Duplicate for DBModel/Script prevented
-- Update license to include 2024
-- Telemetry monitor is now active by default
-- Add support for Mac OSX on Apple Silicon
-- Remove Torch warnings during testing
-- Validate Slurm timing format
-- Expose Python Typehints
-- Fix test_logs to prevent generation of directory
-- Fix Python Typehint for colocated database settings
-- Python 3.11 Support
-- Quality of life `smart validate` improvements
-- Remove Cobalt support
-- Enrich logging through context variables
-- Upgrade Machine Learning dependencies
-- Override sphinx-tabs background color
-- Add concurrency group to test workflow
-- Fix index when installing torch through smart build
-
-
-Detailed Notes
-
-- Modify the `git clone` for both Redis and RedisAI to set the line endings to
-  unix-style line endings when using MacOS on ARM. (SmartSim-PR482_)
-- Separate install instructions are now provided for Mac OSX on x64 vs ARM64 (SmartSim-PR479_)
-- Prevent duplicate ML model and script names being added to an
-  Ensemble member if the names exists. (SmartSim-PR475_)
-- Updates `Copyright (c) 2021-2023` to `Copyright (c) 2021-2024`
-  in all of the necessary files. (SmartSim-PR485_)
-- Bug fix which prevents the expected behavior when the `SMARTSIM_LOG_LEVEL`
-  environment variable was set to `developer`. (SmartSim-PR473_)
-- Sets the default value of the "enable telemetry" flag to on.
-  Bumps the output `manifest.json` version number to match that of
-  `smartdashboard` and pins a watchdog version to avoid build errors.
-  (SmartSim-PR477_)
-- Refactor logic of `Manifest.has_db_objects` to remove excess branching
-  and improve readability/maintainability. (SmartSim-PR476_)
-- SmartSim can now be built and used on platforms using Apple Silicon
-  (ARM64). Currently, only the PyTorch backend is supported. Note that libtorch
-  will be downloaded from a CrayLabs github repo. (SmartSim-PR465_)
-- Tests that were saving Torch models were emitting warnings.  These warnings
-  were addressed by updating the model save test function. (SmartSim-PR472_)
-- Validate the timing format when requesting a slurm allocation. (SmartSim-PR471_)
-- Add and ship `py.typed` marker to expose inline type hints. Fix
-  type errors related to SmartRedis. (SmartSim-PR468_)
-- Fix the `test_logs.py::test_context_leak` test that was
-  erroneously creating a directory named `some value` in SmartSim's root
-  directory. (SmartSim-PR467_)
-- Add Python type hinting to colocated settings. (SmartSim-PR462_)
-- Add github actions for running black and isort checks. (SmartSim-PR464_)
-- Relax the required version of `typing_extensions`. (SmartSim-PR459_)
-- Addition of Python 3.11 to SmartSim. (SmartSim-PR461_)
-- Quality of life `smart validate` improvements such as setting `CUDA_VISIBLE_DEVICES`
-  environment variable within `smart validate` prior to importing any ML deps to
-  prevent false negatives on multi-GPU systems. Additionally, move SmartRedis logs
-  from standard out to dedicated log file in the validation temporary directory as well as
-  suppress `sklearn` deprecation warning by pinning `KMeans` constructor
-  argument. Lastly, move TF test to last as TF may reserve the GPUs it uses.
-  (SmartSim-PR458_)
-- Some actions in the current GitHub CI/CD workflows were outdated. They were
-  replaced with the latest versions. (SmartSim-PR446_)
-- As the Cobalt workload manager is not used on any system we are aware of,
-  its support in SmartSim was terminated and classes such as `CobaltLauncher` have
-  been removed. (SmartSim-PR448_)
-- Experiment logs are written to a file that can be read by the dashboard. (SmartSim-PR452_)
-- Updated SmartSim's machine learning backends to PyTorch 2.0.1, Tensorflow
-  2.13.1, ONNX 1.14.1, and ONNX Runtime 1.16.1. As a result of this change,
-  there is now an available ONNX wheel for use with Python 3.10, and wheels for
-  all of SmartSim's machine learning backends with Python 3.11.
-  (SmartSim-PR451_) (SmartSim-PR461_)
-- The sphinx-tabs documentation extension uses a white background for the tabs component.
-  A custom CSS for those components to inherit the overall theme color has
-  been added. (SmartSim-PR453_)
-- Add concurrency groups to GitHub's CI/CD workflows, preventing
-  multiple workflows from the same PR to be launched concurrently.
-  (SmartSim-PR439_)
-- Torch changed their preferred indexing when trying to install
-  their provided wheels. Updated the `pip install` command within
-  `smart build` to ensure that the appropriate packages can be found.
-  (SmartSim-PR449_)
-
-
-.. _SmartSim-PR485: https://github.com/CrayLabs/SmartSim/pull/485
-.. _SmartSim-PR482: https://github.com/CrayLabs/SmartSim/pull/482
-.. _SmartSim-PR479: https://github.com/CrayLabs/SmartSim/pull/479
-.. _SmartSim-PR477: https://github.com/CrayLabs/SmartSim/pull/477
-.. _SmartSim-PR476: https://github.com/CrayLabs/SmartSim/pull/476
-.. _SmartSim-PR475: https://github.com/CrayLabs/SmartSim/pull/475
-.. _SmartSim-PR473: https://github.com/CrayLabs/SmartSim/pull/473
-.. _SmartSim-PR472: https://github.com/CrayLabs/SmartSim/pull/472
-.. _SmartSim-PR471: https://github.com/CrayLabs/SmartSim/pull/471
-.. _SmartSim-PR468: https://github.com/CrayLabs/SmartSim/pull/468
-.. _SmartSim-PR467: https://github.com/CrayLabs/SmartSim/pull/467
-.. _SmartSim-PR465: https://github.com/CrayLabs/SmartSim/pull/465
-.. _SmartSim-PR464: https://github.com/CrayLabs/SmartSim/pull/464
-.. _SmartSim-PR462: https://github.com/CrayLabs/SmartSim/pull/462
-.. _SmartSim-PR461: https://github.com/CrayLabs/SmartSim/pull/461
-.. _SmartSim-PR459: https://github.com/CrayLabs/SmartSim/pull/459
-.. _SmartSim-PR458: https://github.com/CrayLabs/SmartSim/pull/458
-.. _SmartSim-PR453: https://github.com/CrayLabs/SmartSim/pull/453
-.. _SmartSim-PR452: https://github.com/CrayLabs/SmartSim/pull/452
-.. _SmartSim-PR451: https://github.com/CrayLabs/SmartSim/pull/451
-.. _SmartSim-PR449: https://github.com/CrayLabs/SmartSim/pull/449
-.. _SmartSim-PR448: https://github.com/CrayLabs/SmartSim/pull/448
-.. _SmartSim-PR446: https://github.com/CrayLabs/SmartSim/pull/446
-.. _SmartSim-PR439: https://github.com/CrayLabs/SmartSim/pull/439
-
-0.6.0
------
-
-Released on 18 December, 2023
-
-Description
-
-- Conflicting directives in the SmartSim packaging instructions were fixed
-- `sacct` and `sstat` errors are now fatal for Slurm-based workflow executions
-- Added documentation section about ML features and TorchScript
-- Added TorchScript functions to Online Analysis tutorial
-- Added multi-DB example to documentation
-- Improved test stability on HPC systems
-- Added support for producing & consuming telemetry outputs
-- Split tests into groups for parallel execution in CI/CD pipeline
-- Change signature of `Experiment.summary()`
-- Expose first_device parameter for scripts, functions, models
-- Added support for MINBATCHTIMEOUT in model execution
-- Remove support for RedisAI 1.2.5, use RedisAI 1.2.7 commit
-- Add support for multiple databases
-
-Detailed Notes
-
-- Several conflicting directives between the `setup.py` and the `setup.cfg` were fixed
-  to mitigate warnings issued when building the pip wheel. (SmartSim-PR435_)
-- When the Slurm functions `sacct` and `sstat` returned an error, it would be ignored
-  and SmartSim's state could become inconsistent. To prevent this, errors
-  raised by `sacct` or `sstat` now result in an exception. (SmartSim-PR392_)
-- A section named *ML Features* was added to documentation. It contains multiple
-  examples of how ML models and functions can be added to and executed on the DB.
-  TorchScript-based post-processing was added to the *Online Analysis* tutorial (SmartSim-PR411_)
-- An example of how to use multiple Orchestrators concurrently was added to the documentation (SmartSim-PR409_)
-- The test infrastructure was improved. Tests on HPC system are now stable, and issues such
-  as non-stopped `Orchestrators` or experiments created in the wrong paths have been fixed (SmartSim-PR381_)
-- A telemetry monitor was added to check updates and produce events for SmartDashboard (SmartSim-PR426_)
-- Split tests into `group_a`, `group_b`, `slow_tests` for parallel execution in CI/CD pipeline (SmartSim-PR417_, SmartSim-PR424_)
-- Change `format` argument to `style` in `Experiment.summary()`, this is
-  an API break (SmartSim-PR391_)
-- Added support for first_device parameter for scripts, functions,
-  and models. This causes them to be loaded to the first num_devices
-  beginning with first_device (SmartSim-PR394_)
-- Added support for MINBATCHTIMEOUT in model execution, which caps the delay
-  waiting for a minimium number of model execution operations to accumulate
-  before executing them as a batch (SmartSim-PR387_)
-- RedisAI 1.2.5 is not supported anymore. The only RedisAI version
-  is now 1.2.7. Since the officially released RedisAI 1.2.7 has a
-  bug which breaks the build process on Mac OSX, it was decided to
-  use commit 634916c_ from RedisAI's GitHub repository, where such
-  bug has been fixed. This applies to all operating systems. (SmartSim-PR383_)
-- Add support for creation of multiple databases with unique identifiers. (SmartSim-PR342_)
-
-
-.. _SmartSim-PR435: https://github.com/CrayLabs/SmartSim/pull/435
-.. _SmartSim-PR392: https://github.com/CrayLabs/SmartSim/pull/392
-.. _SmartSim-PR411: https://github.com/CrayLabs/SmartSim/pull/411
-.. _SmartSim-PR409: https://github.com/CrayLabs/SmartSim/pull/409
-.. _SmartSim-PR381: https://github.com/CrayLabs/SmartSim/pull/381
-.. _SmartSim-PR426: https://github.com/CrayLabs/SmartSim/pull/426
-.. _SmartSim-PR424: https://github.com/CrayLabs/SmartSim/pull/424
-.. _SmartSim-PR417: https://github.com/CrayLabs/SmartSim/pull/417
-.. _SmartSim-PR391: https://github.com/CrayLabs/SmartSim/pull/391
-.. _SmartSim-PR342: https://github.com/CrayLabs/SmartSim/pull/342
-.. _SmartSim-PR394: https://github.com/CrayLabs/SmartSim/pull/394
-.. _SmartSim-PR387: https://github.com/CrayLabs/SmartSim/pull/387
-.. _SmartSim-PR383: https://github.com/CrayLabs/SmartSim/pull/383
-.. _634916c: https://github.com/RedisAI/RedisAI/commit/634916c722e718cc6ea3fad46e63f7d798f9adc2
-.. _SmartSim-PR342: https://github.com/CrayLabs/SmartSim/pull/342
-
-
-0.5.1
------
-
-Released on 14 September, 2023
-
-Description
-
-- Add typehints throughout the SmartSim codebase
-- Provide support for Slurm heterogeneous jobs
-- Provide better support for `PalsMpiexecSettings`
-- Allow for easier inspection of SmartSim entities
-- Log ignored error messages from `sacct`
-- Fix colocated db preparation bug when using `JsrunSettings`
-- Fix bug when user specify CPU and devices greater than 1
-- Fix bug when get_allocation called with reserved keywords
-- Enabled mypy in CI for better type safety
-- Mitigate additional suppressed pylint errors
-- Update linting support and apply to existing errors
-- Various improvements to the `smart` CLI
-- Various documentation improvements
-- Various test suite improvements
-
-Detailed Notes
-
-- Add methods to allow users to inspect files attached to models and ensembles. (SmartSim-PR352_)
-- Add a `smart info` target to provide rudimentary information about the SmartSim installation. (SmartSim-PR350_)
-- Remove unnecessary generation producing unexpected directories in the test suite. (SmartSim-PR349_)
-- Add support for heterogeneous jobs to `SrunSettings` by allowing users to set the `--het-group` parameter. (SmartSim-PR346_)
-- Provide clearer guidelines on how to contribute to SmartSim. (SmartSim-PR344_)
-- Integrate `PalsMpiexecSettings` into the `Experiment` factory methods when using the `"pals"` launcher. (SmartSim-PR343_)
-- Create public properties where appropriate to mitigate `protected-access` errors. (SmartSim-PR341_)
-- Fix a failure to execute `_prep_colocated_db` due to incorrect named attr check. (SmartSim-PR339_)
-- Enabled and mitigated mypy `disallow_any_generics` and `warn_return_any`. (SmartSim-PR338_)
-- Add a `smart validate` target to provide a simple smoke test to assess a SmartSim build. (SmartSim-PR336_, SmartSim-PR351_)
-- Add typehints to `smartsim._core.launcher.step.*`. (SmartSim-PR334_)
-- Log errors reported from slurm WLM when attempts to retrieve status fail. (SmartSim-PR331_, SmartSim-PR332_)
-- Fix incorrectly formatted positional arguments in log format strings. (SmartSim-PR330_)
-- Ensure that launchers pass environment variables to unmanaged job steps. (SmartSim-PR329_)
-- Add additional tests surrounding the `RAI_PATH` configuration environment variable. (SmartSim-PR328_)
-- Remove unnecessary execution of unescaped shell commands. (SmartSim-PR327_)
-- Add error if user calls get_allocation with reserved keywords in slurm get_allocation. (SmartSim-PR325_)
-- Add error when user requests CPU with devices greater than 1 within add_ml_model and add_script. (SmartSim-PR324_)
-- Update documentation surrounding ensemble key prefixing. (SmartSim-PR322_)
-- Fix formatting of the Frontier site installation. (SmartSim-PR321_)
-- Update pylint dependency, update .pylintrc, mitigate non-breaking issues, suppress api breaks. (SmartSim-PR311_)
-- Refactor the `smart` CLI to use subparsers for better documentation and extension. (SmartSim-PR308_)
-
-.. _SmartSim-PR352: https://github.com/CrayLabs/SmartSim/pull/352
-.. _SmartSim-PR351: https://github.com/CrayLabs/SmartSim/pull/351
-.. _SmartSim-PR350: https://github.com/CrayLabs/SmartSim/pull/350
-.. _SmartSim-PR349: https://github.com/CrayLabs/SmartSim/pull/349
-.. _SmartSim-PR346: https://github.com/CrayLabs/SmartSim/pull/346
-.. _SmartSim-PR344: https://github.com/CrayLabs/SmartSim/pull/344
-.. _SmartSim-PR343: https://github.com/CrayLabs/SmartSim/pull/343
-.. _SmartSim-PR341: https://github.com/CrayLabs/SmartSim/pull/341
-.. _SmartSim-PR339: https://github.com/CrayLabs/SmartSim/pull/339
-.. _SmartSim-PR338: https://github.com/CrayLabs/SmartSim/pull/338
-.. _SmartSim-PR336: https://github.com/CrayLabs/SmartSim/pull/336
-.. _SmartSim-PR334: https://github.com/CrayLabs/SmartSim/pull/334
-.. _SmartSim-PR332: https://github.com/CrayLabs/SmartSim/pull/332
-.. _SmartSim-PR331: https://github.com/CrayLabs/SmartSim/pull/331
-.. _SmartSim-PR330: https://github.com/CrayLabs/SmartSim/pull/330
-.. _SmartSim-PR329: https://github.com/CrayLabs/SmartSim/pull/329
-.. _SmartSim-PR328: https://github.com/CrayLabs/SmartSim/pull/328
-.. _SmartSim-PR327: https://github.com/CrayLabs/SmartSim/pull/327
-.. _SmartSim-PR325: https://github.com/CrayLabs/SmartSim/pull/325
-.. _SmartSim-PR324: https://github.com/CrayLabs/SmartSim/pull/324
-.. _SmartSim-PR322: https://github.com/CrayLabs/SmartSim/pull/322
-.. _SmartSim-PR321: https://github.com/CrayLabs/SmartSim/pull/321
-.. _SmartSim-PR311: https://github.com/CrayLabs/SmartSim/pull/311
-.. _SmartSim-PR308: https://github.com/CrayLabs/SmartSim/pull/308
-
-
-0.5.0
------
-
-Released on 6 July, 2023
-
-Description
-
-A full list of changes and detailed notes can be found below:
-
-- Update SmartRedis dependency to v0.4.1
-- Fix tests for db models and scripts
-- Fix add_ml_model() and add_script() documentation, tests, and code
-- Remove `requirements.txt` and other places where dependencies were defined
-- Replace `limit_app_cpus` with `limit_db_cpus` for co-located orchestrators
-- Remove wait time associated with Experiment launch summary
-- Update and rename Redis conf file
-- Migrate from redis-py-cluster to redis-py
-- Update full test suite to not require a TF wheel at test time
-- Update doc strings
-- Remove deprecated code
-- Relax the coloredlogs version
-- Update Fortran tutorials for SmartRedis
-- Add support for multiple network interface binding in Orchestrator and Colocated DBs
-- Add typehints and static analysis
-
-Detailed notes
-
-- Updates SmartRedis to the most current release (SmartSim-PR316_)
-- Fixes and enhancements to documentation (SmartSim-PR317_, SmartSim-PR314_, SmartSim-PR287_)
-- Various fixes and enhancements to the test suite (SmartSim-PR315_, SmartSim-PR312_, SmartSim-PR310_, SmartSim-PR302_, SmartSim-PR283_)
-- Fix a defect in the tests related to database models and scripts that was
-  causing key collisions when testing on workload managers (SmartSim-PR313_)
-- Remove `requirements.txt` and other places where dependencies were defined. (SmartSim-PR307_)
-- Fix defect where dictionaries used to create run settings can be changed
-  unexpectedly due to copy-by-ref (SmartSim-PR305_)
-- The underlying code for Model.add_ml_model() and Model.add_script() was fixed
-  to correctly handle multi-GPU configurations.  Tests were updated to run on
-  non-local launchers.  Documentation was updated and fixed.  Also, the default
-  testing interface has been changed to lo instead of ipogif. (SmartSim-PR304_)
-- Typehints have been added. A makefile target `make check-mypy` executes static
-  analysis with mypy. (SmartSim-PR295_, SmartSim-PR301_, SmartSim-PR303_)
-- Replace `limit_app_cpus` with `limit_db_cpus` for co-located orchestrators.
-  This resolves some incorrect behavior/assumptions about how the application
-  would be pinned.  Instead, users should directly specify the binding options in
-  their application using the options appropriate for their launcher (SmartSim-PR306_)
-- Simplify code in `random_permutations` parameter generation strategy (SmartSim-PR300_)
-- Remove wait time associated with Experiment launch summary (SmartSim-PR298_)
-- Update Redis conf file to conform with Redis v7.0.5 conf file (SmartSim-PR293_)
-- Migrate from redis-py-cluster to redis-py for cluster status checks (SmartSim-PR292_)
-- Update full test suite to no longer require a tensorflow wheel to be available at test time. (SmartSim-PR291_)
-- Correct spelling of colocated in doc strings (SmartSim-PR290_)
-- Deprecated launcher-specific orchestrators, constants, and ML
-  utilities were removed. (SmartSim-PR289_)
-- Relax the coloredlogs version to be greater than 10.0 (SmartSim-PR288_)
-- Update the Github Actions runner image from `macos-10.15`` to `macos-12``. The
-  former began deprecation in May 2022 and was finally removed in May 2023. (SmartSim-PR285_)
-- The Fortran tutorials had not been fully updated to show how to handle
-  return/error codes. These have now all been updated. (SmartSim-PR284_)
-- Orchestrator and Colocated DB now accept a list of interfaces to bind to. The
-  argument name is still `interface` for backward compatibility reasons. (SmartSim-PR281_)
-- Typehints have been added to public APIs. A makefile target to execute static
-  analysis with mypy is available `make check-mypy`. (SmartSim-PR295_)
-
-.. _SmartSim-PR317: https://github.com/CrayLabs/SmartSim/pull/317
-.. _SmartSim-PR316: https://github.com/CrayLabs/SmartSim/pull/316
-.. _SmartSim-PR315: https://github.com/CrayLabs/SmartSim/pull/314
-.. _SmartSim-PR314: https://github.com/CrayLabs/SmartSim/pull/314
-.. _SmartSim-PR313: https://github.com/CrayLabs/SmartSim/pull/313
-.. _SmartSim-PR312: https://github.com/CrayLabs/SmartSim/pull/312
-.. _SmartSim-PR310: https://github.com/CrayLabs/SmartSim/pull/310
-.. _SmartSim-PR307: https://github.com/CrayLabs/SmartSim/pull/307
-.. _SmartSim-PR306: https://github.com/CrayLabs/SmartSim/pull/306
-.. _SmartSim-PR305: https://github.com/CrayLabs/SmartSim/pull/305
-.. _SmartSim-PR304: https://github.com/CrayLabs/SmartSim/pull/304
-.. _SmartSim-PR303: https://github.com/CrayLabs/SmartSim/pull/303
-.. _SmartSim-PR302: https://github.com/CrayLabs/SmartSim/pull/302
-.. _SmartSim-PR301: https://github.com/CrayLabs/SmartSim/pull/301
-.. _SmartSim-PR300: https://github.com/CrayLabs/SmartSim/pull/300
-.. _SmartSim-PR298: https://github.com/CrayLabs/SmartSim/pull/298
-.. _SmartSim-PR295: https://github.com/CrayLabs/SmartSim/pull/295
-.. _SmartSim-PR293: https://github.com/CrayLabs/SmartSim/pull/293
-.. _SmartSim-PR292: https://github.com/CrayLabs/SmartSim/pull/292
-.. _SmartSim-PR291: https://github.com/CrayLabs/SmartSim/pull/291
-.. _SmartSim-PR290: https://github.com/CrayLabs/SmartSim/pull/290
-.. _SmartSim-PR289: https://github.com/CrayLabs/SmartSim/pull/289
-.. _SmartSim-PR288: https://github.com/CrayLabs/SmartSim/pull/288
-.. _SmartSim-PR287: https://github.com/CrayLabs/SmartSim/pull/287
-.. _SmartSim-PR285: https://github.com/CrayLabs/SmartSim/pull/285
-.. _SmartSim-PR284: https://github.com/CrayLabs/SmartSim/pull/284
-.. _SmartSim-PR283: https://github.com/CrayLabs/SmartSim/pull/283
-.. _SmartSim-PR281: https://github.com/CrayLabs/SmartSim/pull/281
-
-0.4.2
------
-
-Released on April 12, 2023
-
-Description
-
-This release of SmartSim had a focus on polishing and extending exiting
-features already provided by SmartSim. Most notably, this release provides
-support to allow users to colocate their models with an orchestrator using
-Unix domain sockets and support for launching models as batch jobs.
-
-Additionally, SmartSim has updated its tool chains to provide a better user
-experience. Notably, SmarSim can now be used with Python 3.10, Redis 7.0.5, and
-RedisAI 1.2.7. Furthermore, SmartSim now utilizes SmartRedis's aggregation lists to
-streamline the use and extension of ML data loaders, making working with popular
-machine learning frameworks in SmartSim a breeze.
-
-A full list of changes and detailed notes can be found below:
-
-- Add support for colocating an orchestrator over UDS
-- Add support for Python 3.10, deprecate support for Python 3.7 and RedisAI 1.2.3
-- Drop support for Ray
-- Update ML data loaders to make use of SmartRedis's aggregation lists
-- Allow for models to be launched independently as batch jobs
-- Update to current version of Redis to 7.0.5
-- Add support for RedisAI 1.2.7, pyTorch 1.11.0, Tensorflow 2.8.0, ONNXRuntime 1.11.1
-- Fix bug in colocated database entrypoint when loading PyTorch models
-- Fix test suite behavior with environment variables
-
-Detailed Notes
-
-- Running some tests could result in some SmartSim-specific environment variables to be set. Such environment variables are now reset
-  after each test execution. Also, a warning for environment variable usage in Slurm was added, to make the user aware in case an environment
-  variable will not be assigned the desired value with `--export`. (SmartSim-PR270_)
-- The PyTorch and TensorFlow data loaders were update to make use of aggregation lists. This breaks their API, but makes them easier to use. (SmartSim-PR264_)
-- The support for Ray was dropped, as its most recent versions caused problems when deployed through SmartSim.
-  We plan to release a separate add-on library to accomplish the same results. If
-  you are interested in getting the Ray launch functionality back in your workflow, please get in touch with us! (SmartSim-PR263_)
-- Update from Redis version 6.0.8 to 7.0.5. (SmartSim-PR258_)
-- Adds support for Python 3.10 without the ONNX machine learning backend. Deprecates support for
-  Python 3.7 as it will stop receiving security updates. Deprecates support for RedisAI 1.2.3.
-  Update the build process to be able to correctly fetch supported dependencies. If a user
-  attempts to build an unsupported dependency, an error message is shown highlighting the
-  discrepancy. (SmartSim-PR256_)
-- Models were given a `batch_settings` attribute. When launching a model through `Experiment.start`
-  the `Experiment` will first check for a non-nullish value at that attribute. If the check is
-  satisfied, the `Experiment` will attempt to wrap the underlying run command in a batch job using
-  the object referenced at `Model.batch_settings` as the batch settings for the job. If the check
-  is not satisfied, the `Model` is launched in the traditional manner as a job step. (SmartSim-PR245_)
-- Fix bug in colocated database entrypoint stemming from uninitialized variables. This bug affects PyTorch models being loaded into the database. (SmartSim-PR237_)
-- The release of RedisAI 1.2.7 allows us to update support for recent versions of PyTorch, Tensorflow, and ONNX (SmartSim-PR234_)
-- Make installation of correct Torch backend more reliable according to instruction from PyTorch
-- In addition to TCP, add UDS support for colocating an orchestrator with models. Methods
-  `Model.colocate_db_tcp` and `Model.colocate_db_uds` were added to expose this functionality.
-  The `Model.colocate_db` method remains and uses TCP for backward compatibility (SmartSim-PR246_)
-
-.. _SmartSim-PR270: https://github.com/CrayLabs/SmartSim/pull/270
-.. _SmartSim-PR264: https://github.com/CrayLabs/SmartSim/pull/264
-.. _SmartSim-PR263: https://github.com/CrayLabs/SmartSim/pull/263
-.. _SmartSim-PR258: https://github.com/CrayLabs/SmartSim/pull/258
-.. _SmartSim-PR256: https://github.com/CrayLabs/SmartSim/pull/256
-.. _SmartSim-PR246: https://github.com/CrayLabs/SmartSim/pull/246
-.. _SmartSim-PR245: https://github.com/CrayLabs/SmartSim/pull/245
-.. _SmartSim-PR237: https://github.com/CrayLabs/SmartSim/pull/237
-.. _SmartSim-PR234: https://github.com/CrayLabs/SmartSim/pull/234
-
-
-0.4.1
------
-
-Released on June 24, 2022
-
-Description:
-This release of SmartSim introduces a new experimental feature to help make
-SmartSim workflows more portable: the ability to run simulations models in a
-container via Singularity. This feature has been tested on a small number of
-platforms and we encourage users to provide feedback on its use.
-
-We have also made improvements in a variety of areas: new utilities to load
-scripts and machine learning models into the database directly from SmartSim
-driver scripts and install-time choice to use either `KeyDB` or `Redis` for the
-Orchestrator. The `RunSettings` API is now more consistent across subclasses. Another
-key focus of this release was to aid new SmartSim users by including more
-extensive tutorials and improving the documentation. The docker image containing
-the SmartSim tutorials now also includes a tutorial on online training.
-
-
-Launcher improvements
-
-    - New methods for specifying `RunSettings` parameters (SmartSim-PR166_) (SmartSim-PR170_)
-    - Better support for `mpirun`, `mpiexec`, and `orterun` as launchers (SmartSim-PR186_)
-    - Experimental: add support for running models via Singularity (SmartSim-PR204_)
-
-Documentation and tutorials
-
-    - Tutorial updates (SmartSim-PR155_) (SmartSim-PR203_) (SmartSim-PR208_)
-    - Add SmartSim Zoo info to documentation (SmartSim-PR175_)
-    - New tutorial for demonstrating online training (SmartSim-PR176_) (SmartSim-PR188_)
-
-General improvements and bug fixes
-
-    - Set models and scripts at the driver level (SmartSim-PR185_)
-    - Optionally use KeyDB for the orchestrator (SmartSim-PR180_)
-    - Ability to specify system-level libraries (SmartSim-PR154_) (SmartSim-PR182_)
-    - Fix the handling of LSF gpus_per_shard (SmartSim-PR164_)
-    - Fix error when re-running `smart build` (SmartSim-PR165_)
-    - Fix generator hanging when tagged configuration variables are missing (SmartSim-PR177_)
-
-Dependency updates
-
-    - CMake version from 3.10 to 3.13 (SmartSim-PR152_)
-    - Update click to 8.0.2 (SmartSim-PR200_)
-
-.. _SmartSim-PR152: https://github.com/CrayLabs/SmartSim/pull/152
-.. _SmartSim-PR154: https://github.com/CrayLabs/SmartSim/pull/154
-.. _SmartSim-PR155: https://github.com/CrayLabs/SmartSim/pull/155
-.. _SmartSim-PR164: https://github.com/CrayLabs/SmartSim/pull/164
-.. _SmartSim-PR165: https://github.com/CrayLabs/SmartSim/pull/165
-.. _SmartSim-PR166: https://github.com/CrayLabs/SmartSim/pull/166
-.. _SmartSim-PR170: https://github.com/CrayLabs/SmartSim/pull/170
-.. _SmartSim-PR175: https://github.com/CrayLabs/SmartSim/pull/175
-.. _SmartSim-PR176: https://github.com/CrayLabs/SmartSim/pull/176
-.. _SmartSim-PR177: https://github.com/CrayLabs/SmartSim/pull/177
-.. _SmartSim-PR180: https://github.com/CrayLabs/SmartSim/pull/180
-.. _SmartSim-PR182: https://github.com/CrayLabs/SmartSim/pull/182
-.. _SmartSim-PR185: https://github.com/CrayLabs/SmartSim/pull/185
-.. _SmartSim-PR186: https://github.com/CrayLabs/SmartSim/pull/186
-.. _SmartSim-PR188: https://github.com/CrayLabs/SmartSim/pull/188
-.. _SmartSim-PR200: https://github.com/CrayLabs/SmartSim/pull/200
-.. _SmartSim-PR203: https://github.com/CrayLabs/SmartSim/pull/203
-.. _SmartSim-PR204: https://github.com/CrayLabs/SmartSim/pull/204
-.. _SmartSim-PR208: https://github.com/CrayLabs/SmartSim/pull/208
-
-0.4.0
------
-
-Released on Feb 11, 2022
-
-Description:
-In this release SmartSim continues to promote ease of use.
-To this end SmartSim has introduced new portability features
-that allow users to abstract away their targeted hardware,
-while providing even more compatibility with existing
-libraries.
-
-A new feature, Co-located orchestrator deployments has
-been added which provides scalable online inference
-capabilities that overcome previous performance limitations
-in seperated orchestrator/application deployments.
-For more information on advantages of co-located deployments,
-see the Orchestrator section of the SmartSim documentation.
-
-The SmartSim build was significantly improved to increase
-customization of build toolchain and the ``smart`` command
-line inferface was expanded.
-
-Additional tweaks and upgrades have also been
-made to ensure an optimal experience. Here is a
-comprehensive list of changes made in SmartSim 0.4.0.
-
-
-Orchestrator Enhancements:
-
- - Add Orchestrator Co-location (SmartSim-PR139_)
- - Add Orchestrator configuration file edit methods (SmartSim-PR109_)
-
-Emphasize Driver Script Portability:
-
- - Add ability to create run settings through an experiment (SmartSim-PR110_)
- - Add ability to create batch settings through an experiment (SmartSim-PR112_)
- - Add automatic launcher detection to experiment portability functions (SmartSim-PR120_)
-
-Expand Machine Learning Library Support:
-
- - Data loaders for online training in Keras/TF and Pytorch (SmartSim-PR115_) (SmartSim-PR140_)
- - ML backend versions updated with expanded support for multiple versions (SmartSim-PR122_)
- - Launch Ray internally using ``RunSettings`` (SmartSim-PR118_)
- - Add Ray cluster setup and deployment to SmartSim (SmartSim-PR50_)
-
-Expand Launcher Setting Options:
-
- - Add ability to use base ``RunSettings`` on a Slurm, or PBS launchers (SmartSim-PR90_)
- - Add ability to use base ``RunSettings`` on LFS launcher (SmartSim-PR108_)
-
-Deprecations and Breaking Changes
-
- - Orchestrator classes combined into single implementation for portability (SmartSim-PR139_)
- - ``smartsim.constants`` changed to ``smartsim.status`` (SmartSim-PR122_)
- - ``smartsim.tf`` migrated to ``smartsim.ml.tf`` (SmartSim-PR115_) (SmartSim-PR140_)
- - TOML configuration option removed in favor of environment variable approach (SmartSim-PR122_)
-
-General Improvements and Bug Fixes:
-
- - Improve and extend parameter handling (SmartSim-PR107_) (SmartSim-PR119_)
- - Abstract away non-user facing implementation details (SmartSim-PR122_)
- - Add various dimensions to the CI build matrix for SmartSim testing (SmartSim-PR130_)
- - Add missing functions to LSFSettings API (SmartSim-PR113_)
- - Add RedisAI checker for installed backends (SmartSim-PR137_)
- - Remove heavy and unnecessary dependencies (SmartSim-PR116_) (SmartSim-PR132_)
- - Fix LSFLauncher and LSFOrchestrator (SmartSim-PR86_)
- - Fix over greedy Workload Manager Parsers (SmartSim-PR95_)
- - Fix Slurm handling of comma-separated env vars (SmartSim-PR104_)
- - Fix internal method calls (SmartSim-PR138_)
-
-Documentation Updates:
-
- - Updates to documentation build process (SmartSim-PR133_) (SmartSim-PR143_)
- - Updates to documentation content (SmartSim-PR96_) (SmartSim-PR129_) (SmartSim-PR136_) (SmartSim-PR141_)
- - Update SmartSim Examples (SmartSim-PR68_) (SmartSim-PR100_)
-
-
-.. _SmartSim-PR50: https://github.com/CrayLabs/SmartSim/pull/50
-.. _SmartSim-PR68: https://github.com/CrayLabs/SmartSim/pull/68
-.. _SmartSim-PR86: https://github.com/CrayLabs/SmartSim/pull/86
-.. _SmartSim-PR90: https://github.com/CrayLabs/SmartSim/pull/90
-.. _SmartSim-PR95: https://github.com/CrayLabs/SmartSim/pull/95
-.. _SmartSim-PR96: https://github.com/CrayLabs/SmartSim/pull/96
-.. _SmartSim-PR100: https://github.com/CrayLabs/SmartSim/pull/100
-.. _SmartSim-PR104: https://github.com/CrayLabs/SmartSim/pull/104
-.. _SmartSim-PR107: https://github.com/CrayLabs/SmartSim/pull/107
-.. _SmartSim-PR108: https://github.com/CrayLabs/SmartSim/pull/108
-.. _SmartSim-PR109: https://github.com/CrayLabs/SmartSim/pull/109
-.. _SmartSim-PR110: https://github.com/CrayLabs/SmartSim/pull/110
-.. _SmartSim-PR112: https://github.com/CrayLabs/SmartSim/pull/112
-.. _SmartSim-PR113: https://github.com/CrayLabs/SmartSim/pull/113
-.. _SmartSim-PR115: https://github.com/CrayLabs/SmartSim/pull/115
-.. _SmartSim-PR116: https://github.com/CrayLabs/SmartSim/pull/116
-.. _SmartSim-PR118: https://github.com/CrayLabs/SmartSim/pull/118
-.. _SmartSim-PR119: https://github.com/CrayLabs/SmartSim/pull/119
-.. _SmartSim-PR120: https://github.com/CrayLabs/SmartSim/pull/120
-.. _SmartSim-PR122: https://github.com/CrayLabs/SmartSim/pull/122
-.. _SmartSim-PR129: https://github.com/CrayLabs/SmartSim/pull/129
-.. _SmartSim-PR130: https://github.com/CrayLabs/SmartSim/pull/130
-.. _SmartSim-PR132: https://github.com/CrayLabs/SmartSim/pull/132
-.. _SmartSim-PR133: https://github.com/CrayLabs/SmartSim/pull/133
-.. _SmartSim-PR136: https://github.com/CrayLabs/SmartSim/pull/136
-.. _SmartSim-PR137: https://github.com/CrayLabs/SmartSim/pull/137
-.. _SmartSim-PR138: https://github.com/CrayLabs/SmartSim/pull/138
-.. _SmartSim-PR139: https://github.com/CrayLabs/SmartSim/pull/139
-.. _SmartSim-PR140: https://github.com/CrayLabs/SmartSim/pull/140
-.. _SmartSim-PR141: https://github.com/CrayLabs/SmartSim/pull/141
-.. _SmartSim-PR143: https://github.com/CrayLabs/SmartSim/pull/143
-
-
-0.3.2
------
-
-Released on August 10, 2021
-
-Description:
-
- - Upgraded RedisAI backend to 1.2.3 (SmartSim-PR69_)
- - PyTorch 1.7.1, TF 2.4.2, and ONNX 1.6-7 (SmartSim-PR69_)
- - LSF launcher for IBM machines (SmartSim-PR62_)
- - Improved code coverage by adding more unit tests (SmartSim-PR53_)
- - Orchestrator methods to get address and check status (SmartSim-PR60_)
- - Added Manifest object that tracks deployables in Experiments (SmartSim-PR61_)
- - Bug fixes (SmartSim-PR52_) (SmartSim-PR58_) (SmartSim-PR67_) (SmartSim-PR73_)
- - Updated documentation and examples (SmartSim-PR51_) (SmartSim-PR57_) (SmartSim-PR71_)
- - Improved IP address aquisition (SmartSim-PR72_)
- - Binding database to network interfaces
-
-.. _SmartSim-PR51: https://github.com/CrayLabs/SmartSim/pull/51
-.. _SmartSim-PR52: https://github.com/CrayLabs/SmartSim/pull/52
-.. _SmartSim-PR53: https://github.com/CrayLabs/SmartSim/pull/53
-.. _SmartSim-PR57: https://github.com/CrayLabs/SmartSim/pull/57
-.. _SmartSim-PR58: https://github.com/CrayLabs/SmartSim/pull/58
-.. _SmartSim-PR60: https://github.com/CrayLabs/SmartSim/pull/60
-.. _SmartSim-PR61: https://github.com/CrayLabs/SmartSim/pull/61
-.. _SmartSim-PR62: https://github.com/CrayLabs/SmartSim/pull/62
-.. _SmartSim-PR67: https://github.com/CrayLabs/SmartSim/pull/67
-.. _SmartSim-PR69: https://github.com/CrayLabs/SmartSim/pull/69
-.. _SmartSim-PR71: https://github.com/CrayLabs/SmartSim/pull/71
-.. _SmartSim-PR72: https://github.com/CrayLabs/SmartSim/pull/72
-.. _SmartSim-PR73: https://github.com/CrayLabs/SmartSim/pull/73
-
-0.3.1
------
-
-Released on May 5, 2021
-
-Description:
-This release was dedicated to making the install process
-easier. SmartSim can be installed from PyPI now and the
-``smart`` cli tool makes installing the machine learning
-runtimes much easier.
-
- - Pip install (SmartSim-PR42_)
- - ``smart`` cli tool for ML backends (SmartSim-PR42_)
- - Build Documentation for updated install (SmartSim-PR43_)
- - Migrate from Jenkins to Github Actions CI (SmartSim-PR42_)
- - Bug fix for setup.cfg (SmartSim-PR35_)
-
-.. _SmartSim-PR43: https://github.com/CrayLabs/SmartSim/pull/43
-.. _SmartSim-PR42: https://github.com/CrayLabs/SmartSim/pull/42
-.. _SmartSim-PR35: https://github.com/CrayLabs/SmartSim/pull/35
-
-0.3.0
------
-
-Released on April 1, 2021
-
-Description:
-
- - initial 0.3.0 (first public) release of SmartSim
-
-
----------------------------------------------------------------
-
-.. _sr_changelog:
-
-SmartRedis
-==========
-
-.. include:: ../smartredis/doc/changelog.rst
-    :start-line: 3
diff --git a/doc/conf.py b/doc/conf.py
index e489fd797..932bce013 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -14,6 +14,9 @@
 
 import os
 import sys
+import logging
+import inspect
+from sphinx.util.logging import SphinxLoggerAdapter
 sys.path.insert(0, os.path.abspath('.'))
 
 # -- Project information -----------------------------------------------------
@@ -26,7 +29,7 @@
     import smartsim
     version = smartsim.__version__
 except ImportError:
-    version = "0.6.2"
+    version = "0.7.0"
 
 # The full version, including alpha/beta/rc tags
 release = version
@@ -39,6 +42,7 @@
 # ones.
 extensions = [
     'sphinx.ext.autodoc',
+    'sphinx_autodoc_typehints',
     'sphinx.ext.autosectionlabel',
     'sphinx.ext.todo',
     'sphinx.ext.coverage',
@@ -52,18 +56,39 @@
     'breathe',
     'nbsphinx',
     'sphinx_copybutton',
-    'sphinx_tabs.tabs'
+    'sphinx_tabs.tabs',
+    'sphinx_design',
+    'sphinx.ext.mathjax',
+    'myst_parser'
 ]
-
+# sphinx_autodoc_typehints configurations
+always_use_bars_union = True
+typehints_document_rtype = True
+typehints_use_signature = True
+typehints_use_signature_return = True
+typehints_defaults = 'comma'
+
+autodoc_mock_imports = ["smartredis.smartredisPy"]
 suppress_warnings = ['autosectionlabel']
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
+source_suffix = {
+    '.rst': 'restructuredtext',
+    '.md': 'markdown',
+}
+
+linkcheck_ignore = [
+    'Redis::set_model_multigpu',
+]
+
+# The path to the MathJax.js file that Sphinx will use to render math expressions
+mathjax_path = 'https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
 # This pattern also affects html_static_path and html_extra_path.
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', "**.ipynb_checkpoints"]
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', "**.ipynb_checkpoints", "tutorials/ml_training/surrogate/README.md", "tutorials/online_analysis/lattice/README.md"]
 
 breathe_projects = {
         "c_client":"../smartredis/doc/c_client/xml",
@@ -82,6 +107,12 @@
 # a list of builtin themes.
 html_theme = "sphinx_book_theme"
 
+# Check if the environment variable is set to 'True'
+if os.environ.get('READTHEDOCS') == "True":
+    # If it is, generate the robots.txt file
+    with open('./robots.txt', 'w') as f:
+        f.write("# Disallow crawling of the Read the Docs URL\nUser-agent: *\nDisallow: /en/")
+    html_extra_path = ['./robots.txt']
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
@@ -104,8 +135,43 @@
 # white background with dark themes.  If sphinx-tabs updates its
 # static/tabs.css, this may need to be updated.
 html_css_files = ['custom_tab_style.css']
-
 autoclass_content = 'both'
 add_module_names = False
 
 nbsphinx_execute = 'never'
+
+from inspect import getsourcefile
+
+# Get path to directory containing this file, conf.py.
+DOCS_DIRECTORY = os.path.dirname(os.path.abspath(getsourcefile(lambda: 0)))
+
+def ensure_pandoc_installed(_):
+    import pypandoc
+
+    # Download pandoc if necessary. If pandoc is already installed and on
+    # the PATH, the installed version will be used. Otherwise, we will
+    # download a copy of pandoc into docs/bin/ and add that to our PATH.
+    pandoc_dir = os.path.join(DOCS_DIRECTORY, "bin")
+    # Add dir containing pandoc binary to the PATH environment variable
+    if pandoc_dir not in os.environ["PATH"].split(os.pathsep):
+        os.environ["PATH"] += os.pathsep + pandoc_dir
+    pypandoc.ensure_pandoc_installed(
+        targetfolder=pandoc_dir,
+        delete_installer=True,
+    )
+
+
+def setup(app):
+    app.connect("builder-inited", ensure_pandoc_installed)
+
+    # Below code from https://github.com/sphinx-doc/sphinx/issues/10219
+    def _is_sphinx_logger_adapter(obj):
+        return isinstance(obj, SphinxLoggerAdapter)
+    class ForwardReferenceFilter(logging.Filter):
+        def filter(self, record):
+            # Suppress the warning related to forward references
+            return "Cannot resolve forward reference in type annotations" not in record.getMessage()
+
+    members = inspect.getmembers(app.extensions['sphinx_autodoc_typehints'].module, _is_sphinx_logger_adapter)
+    for _, adapter in members:
+        adapter.logger.addFilter(ForwardReferenceFilter())
diff --git a/doc/dragon.rst b/doc/dragon.rst
new file mode 100644
index 000000000..0bf6a8ea3
--- /dev/null
+++ b/doc/dragon.rst
@@ -0,0 +1,169 @@
+******
+Dragon
+******
+
+========
+Overview
+========
+
+Dragon is a composable distributed run-time targeting HPC workflows. In SmartSim,
+Dragon can be used as a launcher, within a Slurm or PBS allocation or batch job.
+The SmartSim team collaborates with the Dragon team to develop an efficient
+launcher which will enable fast, interactive, and customized execution of
+complex workflows on large HPC systems. As Dragon is scheduler-agnostic,
+the same SmartSim script using Dragon as a launcher can be run indifferently
+on a Slurm or PBS system. Support for additional schedulers is coming soon.
+
+.. warning::
+    The Dragon launcher is currently in its early development stage and should be treated as
+    a prototype implementation. Your assistance is invaluable in identifying any issues
+    encountered during usage and suggesting missing features for implementation. Please
+    provide feedback in the form of a created issue on the
+    `SmartSim issues GitHub page <https://github.com/CrayLabs/SmartSim/issues>`_.
+    The :ref:`Known Issues section<dragon_known_issues>` is also a good starting
+    point when troubleshooting workflows run using the Dragon launcher.
+
+=====
+Usage
+=====
+To use Dragon, you need to install it in your current Python environment. This can
+be accomplished by providing the ``--dragon`` flag to the ``smart build`` command, as
+detailed in the :ref:`Dragon Install <dragon_install>`. Note that specifying the device
+configuration is also required for a proper build.
+
+After installation, specify Dragon as the launcher when creating an ``Experiment``:
+
+.. code-block:: python
+
+    exp = Experiment(name="dragon-example", launcher="dragon")
+
+Dragon introduces its own run settings class, ``DragonRunSettings``, which allows users to
+specify nodes and tasks per node for a ``Model``. For instance, continuing from the previous
+example:
+
+.. code-block:: python
+
+    # Because "dragon" was specified as the launcher during Experiment initialization,
+    # create_run_settings will return a DragonRunSettings object
+    rs = exp.create_run_settings(exe="mpi_app",
+                                 exe_args=["--option", "value"],
+                                 env_vars={"MYVAR": "VALUE"})
+    # Above we specify the executable (exe), executable arguments (exe_args)
+    # and environment variables (env_vars)
+
+    # Sets the number of nodes for this job
+    rs.set_nodes(4)
+    # Set the tasks per node for this job
+    rs.set_tasks_per_node(3)
+    # Initialize the Model and pass in the DragonRunSettings object
+    mpi_app = exp.create_model("MPI_APP", run_settings=rs)
+    # Start the Model
+    exp.start(mpi_app)
+
+SmartSim supports ``DragonRunSettings`` with ``Model``, ``Ensemble`` and ``Orchestrator`` entities.
+In the next sections, we detail how Dragon is integrated into SmartSim.
+
+For more information on HPC launchers, visit the :ref:`Run Settings<run_settings_hpc_ex>` page.
+
+=================
+The Dragon Server
+=================
+
+Dragon can initiate processes on any available resource within an allocation. To facilitate
+this, SmartSim initializes the Dragon infrastructure whenever a ``Model`` is launched and maintains
+it until the parent ``Experiment`` concludes. To facilitate interaction with processes managed by
+Dragon, SmartSim establishes a command server within the Dragon infrastructure. This server,
+known as the `Dragon Server`, is responsible for executing commands to start or stop processes
+and to query their status.
+
+Sharing the Dragon Server across Experiments
+============================================
+
+Currently, SmartSim supports only one Dragon server per allocation. Consequently,
+if multiple Experiments need to run within the same allocation, the Dragon server
+must be shared among them. By default, the server starts from a subdirectory
+of the ``Experiment`` path, where it creates a configuration file.
+To enable server sharing, users can specify a custom path
+from which the server should be launched. This can be achieved by setting the
+environment variable ``SMARTSIM_DRAGON_SERVER_PATH`` to an existing absolute path.
+Each ``Experiment`` will then search for the configuration file in the specified path
+and initiate a new server instance only if the file is not found.
+
+Dragon's High-Speed Transport Agents
+====================================
+
+On systems equipped with the HPE Slingshot interconnect, Dragon utilizes High-Speed
+Transport Agents (HSTA) by default for internal messaging within the infrastructure
+launched by SmartSim. On systems without the HPE Slingshot interconnect,
+TCP agents are employed. To specify the use of TCP agents, users must set the environment
+variable ``SMARTSIM_DRAGON_TRANSPORT`` to ``tcp`` prior to executing the Experiment.
+To specify HSTA, ``SMARTSIM_DRAGON_TRANSPORT`` can be set to ``hsta`` or left unset.
+
+=============
+Communication
+=============
+
+SmartSim and the Dragon Server communicate using `ZeroMQ <https://zeromq.org/>`_.
+
+Similar to other communication protocols, defining timeouts for send and receive operations
+is crucial in SmartSim. SmartSim configures default timeouts that have been tested on various
+systems, such as Polaris, Perlmutter, and other HPE Cray EX and Apollo systems.
+However, if you encounter failed communication attempts, adjusting the timeouts may
+be necessary. You can adjust these timeouts by setting the corresponding environment variables:
+
+- **Server Start-up Timeout**: This timeout specifies the duration the SmartSim ``Experiment``
+  waits when the server is initially started. It must accommodate the time required for
+  Dragon to set up the infrastructure, which varies based on the system's workload manager
+  response time. The default timeout is `"300000"` milliseconds (i.e., five minutes), and you can override
+  it using the ``SMARTSIM_DRAGON_STARTUP_TIMEOUT`` environment variable.
+
+- **Server Send and Receive Timeout**: This timeout dictates how long SmartSim and the Dragon
+  server wait to send or receive a message. The default timeout is `"30000"` milliseconds (i.e., 30 seconds),
+  and you can modify it using the ``SMARTSIM_DRAGON_TIMEOUT`` environment variable.
+
+Setting any timeout to "-1" will result in an infinite waiting time, causing the execution to
+block until the communication is completed, potentially hanging indefinitely if issues occur.
+
+It's important to note that all communications are secured with `elliptic curve cryptography <http://curvezmq.org/>`_.
+SmartSim generates the necessary key-pairs and stores them in the user's home directory by
+default. However, you can specify an alternative absolute path using the ``SMARTSIM_KEY_PATH``
+environment variable.
+
+.. _dragon_known_issues:
+
+============
+Known issues
+============
+
+As previously noted, the integration of SmartSim with Dragon is still in its early
+development stage, and there are known issues that may result in unexpected behavior
+during runs:
+
+- **Incomplete cleanup of Dragon resources**: When SmartSim exits, it attempts to properly
+  shut down the Dragon infrastructure to clean up associated resources, such as shared memory
+  segments, and terminate all processes. However, in rare cases, if the execution is
+  abruptly interrupted (e.g., by terminating SmartSim with ``SIGKILL``), the cleanup process
+  may be incomplete, leaving processes like the Dragon overlay network active on the node
+  where SmartSim was executed (which could be a login node, particularly on Slurm systems).
+  If this occurs, you can use the following command to address the issue:
+
+  .. code-block::
+
+    smart teardown --dragon
+
+  This command will terminate all Dragon-related processes, release shared memory segments,
+  but also terminate all Python processes associated with your username.
+
+- **Dragon server not starting**: This issue may arise due to two main reasons:
+
+  1. *HSTA not available on the system*: Try setting the environment variable
+     ``SMARTSIM_DRAGON_TRANSPORT`` to ``tcp``.
+  2. *System or Workload Manager too busy*: Attempt to mitigate this by setting the environment
+     variable ``SMARTSIM_DRAGON_STARTUP_TIMEOUT`` to a larger value or ``"-1"``.
+
+- **MPI-based applications hanging**: To run MPI-based applications on Dragon, Cray PMI or
+  Cray PALS must be available on the system. This limitation is currently being addressed.
+
+
+Interested users can learn more about the Dragon project at the external
+`Dragon documentation page <https://dragonhpc.github.io/dragon/doc/_build/html/index.html>`_.
\ No newline at end of file
diff --git a/doc/ensemble.rst b/doc/ensemble.rst
new file mode 100644
index 000000000..93019d18d
--- /dev/null
+++ b/doc/ensemble.rst
@@ -0,0 +1,1214 @@
+.. _ensemble_doc:
+
+********
+Ensemble
+********
+========
+Overview
+========
+A SmartSim ``Ensemble`` enables users to run a **group** of computational tasks together in an
+``Experiment`` workflow. An ``Ensemble`` is comprised of multiple ``Model`` objects,
+where each ``Ensemble`` member (SmartSim ``Model``) represents an individual application.
+An ``Ensemble`` can be managed as a single entity and
+launched with other :ref:`Model's<model_object_doc>` and :ref:`Orchestrators<orch_docs>` to construct AI-enabled workflows.
+
+The :ref:`Ensemble API<ensemble_api>` offers key features, including methods to:
+
+- :ref:`Attach Configuration Files<attach_files_ensemble>` for use at ``Ensemble`` runtime.
+- :ref:`Load AI Models<ai_model_ensemble_doc>` (TF, TF-lite, PT, or ONNX) into the ``Orchestrator`` at ``Ensemble`` runtime.
+- :ref:`Load TorchScripts<TS_ensemble_doc>` into the ``Orchestrator`` at ``Ensemble`` runtime.
+- :ref:`Prevent Data Collisions<prefix_ensemble>` within the ``Ensemble``, which allows for reuse of application code.
+
+To create a SmartSim ``Ensemble``, use the ``Experiment.create_ensemble`` API function. When
+initializing an ``Ensemble``, consider one of the **three** creation strategies explained
+in the :ref:`Initialization<init_ensemble_strategies>` section.
+
+SmartSim manages ``Ensemble`` instances through the :ref:`Experiment API<experiment_api>` by providing functions to
+launch, monitor, and stop applications.
+
+.. _init_ensemble_strategies:
+
+==============
+Initialization
+==============
+Overview
+========
+The :ref:`Experiment API<experiment_api>` is responsible for initializing all workflow entities.
+An ``Ensemble`` is created using the ``Experiment.create_ensemble`` factory method, and users can customize the
+``Ensemble`` creation via the factory method parameters.
+
+The factory method arguments for ``Ensemble`` creation can be found in the :ref:`Experiment API<exp_init>`
+under the ``create_ensemble`` docstring.
+
+By using specific combinations of the factory method arguments, users can tailor
+the creation of an ``Ensemble`` to align with one of the following creation strategies:
+
+1. :ref:`Parameter Expansion<param_expansion_init>`: Generate a variable-sized set of unique simulation instances
+   configured with user-defined input parameters.
+2. :ref:`Replica Creation<replicas_init>`: Generate a specified number of ``Model`` replicas.
+3. :ref:`Manually<append_init>`: Attach pre-configured ``Model``'s to an ``Ensemble`` to manage as a single unit.
+
+.. _param_expansion_init:
+
+Parameter Expansion
+===================
+Parameter expansion is a technique that allows users to set parameter values per ``Ensemble`` member.
+This is done by specifying input to the `params` and `perm_strategy` factory method arguments during
+``Ensemble`` creation (``Experiment.create_ensemble``). Users may control how the `params` values
+are applied to the ``Ensemble`` through the `perm_strategy` argument. The `perm_strategy` argument
+accepts three values listed below.
+
+**Parameter Expansion Strategy Options:**
+
+-  `"all_perm"`: Generate all possible parameter permutations for an exhaustive exploration. This
+   means that every possible combination of parameters will be used in the ``Ensemble``.
+-  `"step"`: Create parameter sets by collecting identically indexed values across parameter lists.
+   This allows for discrete combinations of parameters for ``Model``'s.
+-  `"random"`: Enable random selection from predefined parameter spaces, offering a stochastic approach.
+   This means that the parameters will be chosen randomly for each ``Model``, which can be useful
+   for exploring a wide range of possibilities.
+
+--------
+Examples
+--------
+This subsection contains two examples of ``Ensemble`` parameter expansion. The
+:ref:`first example<param_first_ex>` illustrates parameter expansion using two parameters
+while the :ref:`second example<param_second_ex>` demonstrates parameter expansion with two
+parameters along with the launch of the ``Ensemble`` as a batch workload.
+
+.. _param_first_ex:
+
+Example 1 : Parameter Expansion Using `all_perm` Strategy
+
+    In this example an ``Ensemble`` of four ``Model`` entities is created by expanding two parameters
+    using the `all_perm` strategy. All of the ``Model``'s in the ``Ensemble`` share the same ``RunSettings``
+    and only differ in the value of the `params` assigned to each member. The source code example
+    is available in the dropdown below for convenient execution and customization.
+
+    .. dropdown:: Example Driver Script Source Code
+
+        .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/param_expansion_1.py
+
+    Begin by initializing a ``RunSettings`` object to apply to
+    all ``Ensemble`` members:
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/param_expansion_1.py
+        :language: python
+        :linenos:
+        :lines: 6-7
+
+    Next, define the parameters that will be applied to the ``Ensemble``:
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/param_expansion_1.py
+        :language: python
+        :linenos:
+        :lines: 9-13
+
+    Finally, initialize an ``Ensemble`` by specifying the ``RunSettings``, `params` and `perm_strategy="all_perm"`:
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/param_expansion_1.py
+        :language: python
+        :linenos:
+        :lines: 15-16
+
+    By specifying `perm_strategy="all_perm"`, all permutations of the `params` will
+    be calculated and distributed across ``Ensemble`` members. Here there are four permutations of the `params` values:
+
+    .. code-block:: bash
+
+        ensemble member 1: ["Ellie", 2]
+        ensemble member 2: ["Ellie", 11]
+        ensemble member 3: ["John", 2]
+        ensemble member 4: ["John", 11]
+
+.. _param_second_ex:
+
+Example 2 : Parameter Expansion Using `step` Strategy with the ``Ensemble`` Configured For Batch Launching
+
+    In this example an ``Ensemble`` of two ``Model`` entities is created by expanding two parameters
+    using the `step` strategy. All of the ``Model``'s in the ``Ensemble`` share the same ``RunSettings``
+    and only differ in the value of the `params` assigned to each member. Lastly, the ``Ensemble`` is
+    submitted as a batch workload. The source code example is available in the dropdown below for
+    convenient execution and customization.
+
+    .. dropdown:: Example Driver Script source code
+
+        .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/param_expansion_2.py
+
+    Begin by initializing and configuring a ``BatchSettings`` object to
+    run the ``Ensemble`` instance:
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/param_expansion_2.py
+        :language: python
+        :linenos:
+        :lines: 6-8
+
+    The above ``BatchSettings`` object will instruct SmartSim to run the ``Ensemble`` on two
+    nodes with a timeout of `10 hours`.
+
+    Next initialize a ``RunSettings`` object to apply to all ``Ensemble`` members:
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/param_expansion_2.py
+        :language: python
+        :linenos:
+        :lines: 10-12
+
+    Next, define the parameters to include in ``Ensemble``:
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/param_expansion_2.py
+        :language: python
+        :linenos:
+        :lines: 14-18
+
+    Finally, initialize an ``Ensemble`` by passing in the ``RunSettings``, `params` and `perm_strategy="step"`:
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/param_expansion_2.py
+        :language: python
+        :linenos:
+        :lines: 20-21
+
+    When specifying `perm_strategy="step"`, the `params` sets are created by collecting identically
+    indexed values across the `param` value lists.
+
+    .. code-block:: bash
+
+        ensemble member 1: ["Ellie", 2]
+        ensemble member 2: ["John", 11]
+
+.. _replicas_init:
+
+Replicas
+========
+A replica strategy involves the creation of identical ``Model``'s within an ``Ensemble``.
+This strategy is particularly useful for applications that have some inherent randomness.
+Users may use the `replicas` factory method argument to create a specified number of identical
+``Model`` members during ``Ensemble`` creation (``Experiment.create_ensemble``).
+
+--------
+Examples
+--------
+This subsection contains two examples of using the replicas creation strategy. The
+:ref:`first example<replicas_first_ex>` illustrates creating four ``Ensemble`` member clones
+while the :ref:`second example<replicas_second_ex>` demonstrates creating four ``Ensemble``
+member clones along with the launch of the ``Ensemble`` as a batch workload.
+
+.. _replicas_first_ex:
+
+Example 1 : ``Ensemble`` creation with replicas strategy
+
+    In this example an ``Ensemble`` of four identical ``Model`` members is created by
+    specifying the number of clones to create via the `replicas` argument.
+    All of the ``Model``'s in the ``Ensemble`` share the same ``RunSettings``.
+    The source code example is available in the dropdown below for convenient execution
+    and customization.
+
+    .. dropdown:: Example Driver Script Source Code
+
+        .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/replicas_1.py
+
+    To create an ``Ensemble`` of identical ``Model``'s, begin by initializing a ``RunSettings``
+    object:
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/replicas_1.py
+        :language: python
+        :linenos:
+        :lines: 6-7
+
+    Initialize the ``Ensemble`` by specifying the ``RunSettings`` object and number of clones to `replicas`:
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/replicas_1.py
+        :language: python
+        :linenos:
+        :lines: 9-10
+
+    By passing in `replicas=4`, four identical ``Ensemble`` members will be initialized.
+
+.. _replicas_second_ex:
+
+Example 2 : ``Ensemble`` Creation with Replicas Strategy and ``Ensemble`` Batch Launching
+
+    In this example an ``Ensemble`` of four ``Model`` entities is created by specifying
+    the number of clones to create via the `replicas` argument. All of the ``Model``'s in
+    the ``Ensemble`` share the same ``RunSettings`` and the ``Ensemble`` is
+    submitted as a batch workload. The source code example is available in the dropdown below for
+    convenient execution and customization.
+
+    .. dropdown:: Example Driver Script Source Code
+
+        .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/replicas_2.py
+
+    To launch the ``Ensemble`` of identical ``Model``'s as a batch job, begin by initializing a ``BatchSettings``
+    object:
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/replicas_2.py
+        :language: python
+        :linenos:
+        :lines: 6-9
+
+    The above ``BatchSettings`` object will instruct SmartSim to run the ``Ensemble`` on four
+    nodes with a timeout of `10 hours`.
+
+    Next, create a ``RunSettings`` object to apply to all ``Model`` replicas:
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/replicas_2.py
+        :language: python
+        :linenos:
+        :lines: 10-12
+
+    Initialize the ``Ensemble`` by specifying the ``RunSettings`` object, ``BatchSettings`` object
+    and number of clones to `replicas`:
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/replicas_2.py
+        :language: python
+        :linenos:
+        :lines: 14-15
+
+    By passing in `replicas=4`, four identical ``Ensemble`` members will be initialized.
+
+.. _append_init:
+
+Manually Append
+===============
+Manually appending ``Model``'s to an ``Ensemble`` offers an in-depth level of customization in ``Ensemble`` design.
+This approach is favorable when users have distinct requirements for individual ``Model``'s, such as variations
+in parameters, run settings, or different types of simulations.
+
+--------
+Examples
+--------
+This subsection contains an example of creating an ``Ensemble`` by manually appending ``Model``'s.
+The example illustrates attaching two SmartSim ``Model``'s to the ``Ensemble``.
+The ``Ensemble`` is submitted as a batch workload.
+
+Example 1 : Append ``Model``'s to an ``Ensemble`` and Launch as a Batch Job
+
+    In this example, we append ``Model``'s to an ``Ensemble`` for batch job execution. To do
+    this, we first initialize an Ensemble with a ``BatchSettings`` object. Then, manually
+    create ``Model``'s and add each to the ``Ensemble`` using the ``Ensemble.add_model`` function.
+    The source code example is available in the dropdown below for convenient execution and customization.
+
+    .. dropdown:: Example Driver Script Source Code
+
+        .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/manual_append_ensemble.py
+
+    To create an empty ``Ensemble`` to append ``Model``'s, initialize the ``Ensemble`` with
+    a batch settings object:
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/manual_append_ensemble.py
+        :language: python
+        :linenos:
+        :lines: 6-11
+
+    Next, create the ``Model``'s to append to the ``Ensemble``:
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/manual_append_ensemble.py
+        :language: python
+        :linenos:
+        :lines: 13-20
+
+    Finally, append the ``Model`` objects to the ``Ensemble``:
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/manual_append_ensemble.py
+        :language: python
+        :linenos:
+        :lines: 22-25
+
+    The new ``Ensemble`` is comprised of two appended ``Model`` members.
+
+.. _attach_files_ensemble:
+
+=====
+Files
+=====
+Overview
+========
+``Ensemble`` members often depend on external files (e.g. training datasets, evaluation datasets, etc)
+to operate as intended. Users can instruct SmartSim to copy, symlink, or manipulate external files
+prior to an ``Ensemble`` launch via the ``Ensemble.attach_generator_files`` function. Attached files
+will be applied to all ``Ensemble`` members.
+
+.. note::
+    Multiple calls to ``Ensemble.attach_generator_files`` will overwrite previous file configurations
+    on the ``Ensemble``.
+
+To attach a file to an ``Ensemble`` for use at runtime, provide one of the following arguments to the
+``Ensemble.attach_generator_files`` function:
+
+* `to_copy` (t.Optional[t.List[str]] = None): Files that are copied into the path of the ``Ensemble`` members.
+* `to_symlink` (t.Optional[t.List[str]] = None): Files that are symlinked into the path of the ``Ensemble`` members.
+  A symlink, or symbolic link, is a file that points to another file or directory, allowing you to access that file
+  as if it were located in the same directory as the symlink.
+
+To specify a template file in order to programmatically replace specified parameters during generation
+of ``Ensemble`` member directories, pass the following value to the ``Ensemble.attach_generator_files`` function:
+
+* `to_configure` (t.Optional[t.List[str]] = None): This parameter is designed for text-based ``Ensemble``
+  member input files. During directory generation for ``Ensemble`` members, the linked files are parsed and replaced with
+  the `params` values applied to each ``Ensemble`` member. To further explain, the ``Ensemble``
+  creation strategy is considered when replacing the tagged parameters in the input files.
+  These tagged parameters are placeholders in the text that are replaced with the actual
+  parameter values during the directory generation process. The default tag is a semicolon
+  (e.g., THERMO = ;THERMO;).
+
+In the :ref:`Example<files_example_doc_ensem>` subsection, we provide an example using the value `to_configure`
+within ``Ensemble.attach_generator_files``.
+
+.. seealso::
+    To add a file to a single ``Model`` that will be appended to an ``Ensemble``, refer to the :ref:`Files<files_doc>`
+    section of the ``Model`` documentation.
+
+.. _files_example_doc_ensem:
+
+Example
+=======
+This example demonstrates how to attach a text file to an ``Ensemble`` for parameter replacement.
+This is accomplished using the `params` function parameter in
+the ``Experiment.create_ensemble`` factory function and the `to_configure` function parameter
+in ``Ensemble.attach_generator_files``. The source code example is available in the dropdown below for
+convenient execution and customization.
+
+.. dropdown:: Example Driver Script Source Code
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/file_attach.py
+
+In this example, we have a text file named `params_inputs.txt`. Within the text, is the parameter `THERMO`
+that is required by each ``Ensemble`` member at runtime:
+
+.. code-block:: bash
+
+   THERMO = ;THERMO;
+
+In order to have the tagged parameter `;THERMO;` replaced with a usable value at runtime, two steps are required:
+
+1. The `THERMO` variable must be included in ``Experiment.create_ensemble`` factory method as
+   part of the `params` parameter.
+2. The file containing the tagged parameter `;THERMO;`, `params_inputs.txt`, must be attached to the ``Ensemble``
+   via the ``Ensemble.attach_generator_files`` method as part of the `to_configure` parameter.
+
+To encapsulate our application within an ``Ensemble``, we must create an ``Experiment`` instance
+to gain access to the ``Experiment`` factory method that creates the ``Ensemble``.
+Begin by importing the ``Experiment`` module and initializing an ``Experiment``:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/file_attach.py
+    :language: python
+    :linenos:
+    :lines: 1-4
+
+To create our ``Ensemble``, we are using the `replicas` initialization strategy.
+Begin by creating a simple ``RunSettings`` object to specify the path to
+the executable simulation as an executable:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/file_attach.py
+    :language: python
+    :linenos:
+    :lines: 6-7
+
+Next, initialize an ``Ensemble`` object with ``Experiment.create_ensemble``
+by passing in `ensemble_settings`, `params={"THERMO":1}` and `replicas=2`:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/file_attach.py
+    :language: python
+    :linenos:
+    :lines: 9-10
+
+We now have an ``Ensemble`` instance named `example_ensemble`. Attach the above text file
+to the ``Ensemble`` for use at entity runtime. To do so, we use the
+``Ensemble.attach_generator_files`` function and specify the `to_configure`
+parameter with the path to the text file, `params_inputs.txt`:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/file_attach.py
+    :language: python
+    :linenos:
+    :lines: 12-13
+
+To create an isolated directory for the ``Ensemble`` member outputs and configuration files, invoke ``Experiment.generate`` via the
+``Experiment`` instance `exp` with `example_ensemble` as an input parameter:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/file_attach.py
+    :language: python
+    :linenos:
+    :lines: 15-16
+
+After invoking ``Experiment.generate``, the attached generator files will be available for the
+application when ``exp.start(example_ensemble)`` is called.
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/file_attach.py
+    :language: python
+    :linenos:
+    :lines: 18-19
+
+The contents of `params_inputs.txt` after ``Ensemble`` completion are:
+
+.. code-block:: bash
+
+   THERMO = 1
+
+.. _ensemble_ml_model_script:
+
+=====================
+ML Models and Scripts
+=====================
+Overview
+========
+SmartSim users have the capability to load ML models and TorchScripts into an ``Orchestrator``
+within the ``Experiment`` script for use within ``Ensemble`` members. Functions
+accessible through an ``Ensemble`` object support loading ML models (TensorFlow, TensorFlow-lite,
+PyTorch, and ONNX) and TorchScripts into standalone or colocated ``Orchestrators`` before
+application runtime.
+
+.. seealso::
+    To add an ML model or TorchScript to a single ``Model`` that will be appended to an
+    ``Ensemble``, refer to the :ref:`ML Models and Scripts<ml_script_model_doc>`
+    section of the ``Model`` documentation.
+
+Depending on the planned storage method of the **ML model**, there are **two** distinct
+approaches to load it into the ``Orchestrator``:
+
+- :ref:`From Memory<in_mem_ML_model_ensemble_ex>`
+- :ref:`From File<from_file_ML_model_ensemble_ex>`
+
+.. warning::
+    Uploading an ML model :ref:`from memory<in_mem_ML_model_ensemble_ex>` is solely supported for
+    standalone ``Orchestrators``. To upload an ML model to a colocated ``Orchestrator``, users
+    must save the ML model to disk and upload :ref:`from file<from_file_ML_model_ensemble_ex>`.
+
+Depending on the planned storage method of the **TorchScript**, there are **three** distinct
+approaches to load it into the ``Orchestrator``:
+
+- :ref:`From Memory<in_mem_TF_ensemble_doc>`
+- :ref:`From File<TS_from_file_ensemble>`
+- :ref:`From String<TS_raw_string_ensemble>`
+
+.. warning::
+    Uploading a TorchScript :ref:`from memory<in_mem_TF_ensemble_doc>` is solely supported for
+    standalone ``Orchestrators``. To upload a TorchScript to a colocated ``Orchestrator``, users
+    upload :ref:`from file<TS_from_file_ensemble>` or :ref:`from string<TS_raw_string_ensemble>`.
+
+Once a ML model or TorchScript is loaded into the ``Orchestrator``, ``Ensemble`` members can
+leverage ML capabilities by utilizing the SmartSim client (:ref:`SmartRedis<smartredis-api>`)
+to execute the stored ML models or TorchScripts.
+
+.. _ai_model_ensemble_doc:
+
+AI Models
+=========
+When configuring an ``Ensemble``, users can instruct SmartSim to load
+Machine Learning (ML) models dynamically to the ``Orchestrator`` (colocated or standalone). ML models added
+are loaded into the ``Orchestrator`` prior to the execution of the ``Ensemble``. To load an ML model
+to the ``Orchestrator``, SmartSim users can serialize and provide the ML model **in-memory** or specify the **file path**
+via the ``Ensemble.add_ml_model`` function. The supported ML frameworks are TensorFlow,
+TensorFlow-lite, PyTorch, and ONNX.
+
+Users must **serialize TensorFlow ML models** before sending to an ``Orchestrator`` from memory
+or from file. To save a TensorFlow model to memory, SmartSim offers the ``serialize_model``
+function. This function returns the TF model as a byte string with the names of the
+input and output layers, which will be required upon uploading. To save a TF model to disk,
+SmartSim offers the ``freeze_model`` function which returns the path to the serialized
+TF model file with the names of the input and output layers. Additional TF model serialization
+information and examples can be found in the :ref:`ML Features<ml_features_docs>` section of SmartSim.
+
+.. note::
+    Uploading an ML model from memory is only supported for standalone ``Orchestrators``.
+
+When attaching an ML model using ``Ensemble.add_ml_model``, the
+following arguments are offered to customize storage and execution:
+
+- `name` (str): name to reference the ML model in the ``Orchestrator``.
+- `backend` (str): name of the backend (TORCH, TF, TFLITE, ONNX).
+- `model` (t.Optional[str] = None): An ML model in memory (only supported for non-colocated ``Orchestrators``).
+- `model_path` (t.Optional[str] = None): serialized ML model.
+- `device` (t.Literal["CPU", "GPU"] = "CPU"): name of device for execution, defaults to “CPU”.
+- `devices_per_node` (int = 1): The number of GPU devices available on the host. This parameter only applies to GPU devices and will be ignored if device is specified as CPU.
+- `first_device` (int = 0): The first GPU device to use on the host. This parameter only applies to GPU devices and will be ignored if device is specified as CPU.
+- `batch_size` (int = 0): batch size for execution, defaults to 0.
+- `min_batch_size` (int = 0): minimum batch size for ML model execution, defaults to 0.
+- `min_batch_timeout` (int = 0): time to wait for minimum batch size, defaults to 0.
+- `tag` (str = ""): additional tag for ML model information, defaults to “”.
+- `inputs` (t.Optional[t.List[str]] = None): ML model inputs (TF only), defaults to None.
+- `outputs` (t.Optional[t.List[str]] = None): ML model outputs (TF only), defaults to None.
+
+.. seealso::
+    To add an ML model to a single ``Model`` that will be appended to an
+    ``Ensemble``, refer to the :ref:`AI Models<ai_model_doc>`
+    section of the ``Model`` documentation.
+
+.. _in_mem_ML_model_ensemble_ex:
+
+-------------------------------------
+Example: Attach an In-Memory ML Model
+-------------------------------------
+This example demonstrates how to attach an in-memory ML model to a SmartSim ``Ensemble``
+to load into an ``Orchestrator`` at ``Ensemble`` runtime. The source code example is
+available in the dropdown below for convenient execution and customization.
+
+.. dropdown:: Experiment Driver Script Source Code
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/ensemble_ml_model_mem.py
+
+.. note::
+    This example assumes:
+
+    - an ``Orchestrator`` is launched prior to the ``Ensemble`` execution
+    - an initialized ``Ensemble`` named `ensemble_instance` exists within the ``Experiment`` workflow
+    - a Tensorflow-based ML model was serialized using ``serialize_model`` which returns the
+      ML model as a byte string with the names of the input and output layers
+
+**Attach the ML Model to a SmartSim Ensemble**
+
+In this example, we have a serialized Tensorflow-based ML model that was saved to a byte string stored under `model`.
+Additionally, the ``serialize_model`` function returned the names of the input and output layers stored under
+`inputs` and `outputs`. Assuming an initialized ``Ensemble`` named `ensemble_instance` exists, we add the byte string TensorFlow model using
+``Ensemble.add_ml_model``:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/ensemble_ml_model_mem.py
+  :language: python
+  :linenos:
+  :lines: 39-40
+
+In the above ``ensemble_instance.add_ml_model`` code snippet, we offer the following arguments:
+
+-  `name` ("cnn"): A name to reference the ML model in the ``Orchestrator``.
+-  `backend` ("TF"): Indicating that the ML model is a TensorFlow model.
+-  `model` (model): The in-memory representation of the TensorFlow model.
+-  `device` ("GPU"): Specifying the device for ML model execution.
+-  `devices_per_node` (2): Use two GPUs per node.
+-  `first_device` (0): Start with 0 index GPU.
+-  `inputs` (inputs): The name of the ML model input nodes (TensorFlow only).
+-  `outputs` (outputs): The name of the ML model output nodes (TensorFlow only).
+
+.. warning::
+    Calling `exp.start(ensemble_instance)` prior to the launch of an ``Orchestrator`` will result in
+    a failed attempt to load the ML model to a non-existent standalone ``Orchestrator``.
+
+When the ``Ensemble`` is started via ``Experiment.start``, the ML model will be loaded to the
+launched standalone ``Orchestrator``. The ML model can then be executed on the ``Orchestrator`` via a SmartSim
+client (:ref:`SmartRedis<smartredis-api>`) within the application code.
+
+.. _from_file_ML_model_ensemble_ex:
+
+-------------------------------------
+Example: Attach an ML Model From File
+-------------------------------------
+This example demonstrates how to attach a ML model from file to a SmartSim ``Ensemble``
+to load into an ``Orchestrator`` at ``Ensemble`` runtime. The source code example is
+available in the dropdown below for convenient execution and customization.
+
+.. dropdown:: Experiment Driver Script Source Code
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/ensemble_ml_model_file.py
+
+.. note::
+    This example assumes:
+
+    - a standalone ``Orchestrator`` is launched prior to ``Ensemble`` execution
+    - an initialized ``Ensemble`` named `ensemble_instance` exists within the ``Experiment`` workflow
+    - a Tensorflow-based ML model was serialized using ``freeze_model`` which returns the
+      the path to the serialized model file and the names of the input and output layers
+
+**Attach the ML Model to a SmartSim Ensemble**
+
+In this example, we have a serialized Tensorflow-based ML model that was saved to disk and stored under `model`.
+Additionally, the ``freeze_model`` function returned the names of the input and output layers stored under
+`inputs` and `outputs`. Assuming an initialized ``Ensemble`` named `ensemble_instance` exists, we add a TensorFlow model using
+the ``Ensemble.add_ml_model`` function and specify the ML model path to the parameter `model_path`:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/ensemble_ml_model_file.py
+  :language: python
+  :linenos:
+  :lines: 39-40
+
+In the above ``ensemble_instance.add_ml_model`` code snippet, we offer the following arguments:
+
+-  `name` ("cnn"): A name to reference the ML model in the ``Orchestrator``.
+-  `backend` ("TF"): Indicating that the ML model is a TensorFlow model.
+-  `model_path` (model_file): The path to the ML model script.
+-  `device` ("GPU"): Specifying the device for ML model execution.
+-  `devices_per_node` (2): Use two GPUs per node.
+-  `first_device` (0): Start with 0 index GPU.
+-  `inputs` (inputs): The name of the ML model input nodes (TensorFlow only).
+-  `outputs` (outputs): The name of the ML model output nodes (TensorFlow only).
+
+.. warning::
+    Calling `exp.start(ensemble_instance)` prior to instantiation of an ``Orchestrator`` will result in
+    a failed attempt to load the ML model to a non-existent ``Orchestrator``.
+
+When the ``Ensemble`` is started via ``Experiment.start``, the ML model will be loaded to the
+launched ``Orchestrator``. The ML model can then be executed on the ``Orchestrator`` via a SmartSim
+client (:ref:`SmartRedis<smartredis-api>`) within the application executable.
+
+.. _TS_ensemble_doc:
+
+TorchScripts
+============
+When configuring an ``Ensemble``, users can instruct SmartSim to load TorchScripts dynamically
+to the ``Orchestrator``. The TorchScripts become available for each ``Ensemble`` member upon being loaded
+into the ``Orchestrator`` prior to the execution of the ``Ensemble``. SmartSim users may upload
+a single TorchScript function via ``Ensemble.add_function`` or alternatively upload a script
+containing multiple functions via ``Ensemble.add_script``. To load a TorchScript to the
+``Orchestrator``, SmartSim users can follow one of the following processes:
+
+- :ref:`Define a TorchScript Function In-Memory<in_mem_TF_doc>`
+   Use the ``Ensemble.add_function`` to instruct SmartSim to load an in-memory TorchScript to the ``Orchestrator``.
+- :ref:`Define Multiple TorchScript Functions From File<TS_from_file>`
+   Provide file path to ``Ensemble.add_script`` to instruct SmartSim to load the TorchScript from file to the ``Orchestrator``.
+- :ref:`Define a TorchScript Function as String<TS_raw_string>`
+   Provide function string to ``Ensemble.add_script`` to instruct SmartSim to load a raw string as a TorchScript function to the ``Orchestrator``.
+
+.. note::
+    Uploading a TorchScript :ref:`from memory<in_mem_TF_doc>` using ``Ensemble.add_function``
+    is only supported for standalone ``Orchestrators``. Users uploading
+    TorchScripts to colocated ``Orchestrators`` should instead use the function ``Ensemble.add_script``
+    to upload :ref:`from file<TS_from_file>` or as a :ref:`string<TS_raw_string>`.
+
+Each function also provides flexible device selection, allowing users to choose between which device the TorchScript is executed on, `"GPU"` or `"CPU"`.
+In environments with multiple devices, specific device numbers can be specified using the
+`devices_per_node` parameter.
+
+.. note::
+    If `device=GPU` is specified when attaching a TorchScript function to an ``Ensemble``, this instructs
+    SmartSim to execute the TorchScript on GPU nodes. However, TorchScripts loaded to an ``Orchestrator`` are
+    executed on the ``Orchestrator`` compute resources. Therefore, users must make sure that the device
+    specified is included in the ``Orchestrator`` compute resources. To further explain, if a user
+    specifies `device=GPU`, however, initializes ``Orchestrator`` on only CPU nodes,
+    the TorchScript will not run on GPU nodes as advised.
+
+Continue or select the respective process link to learn more on how each function (``Ensemble.add_script`` and ``Ensemble.add_function``)
+dynamically loads TorchScripts to the ``Orchestrator``.
+
+.. seealso::
+    To add a TorchScript to a single ``Model`` that will be appended to an
+    ``Ensemble``, refer to the :ref:`TorchScripts<TS_doc>`
+    section of the ``Model`` documentation.
+
+.. _in_mem_TF_ensemble_doc:
+
+-------------------------------
+Attach an In-Memory TorchScript
+-------------------------------
+Users can define TorchScript functions within the ``Experiment`` driver script
+to attach to an ``Ensemble``. This feature is supported by ``Ensemble.add_function``.
+
+.. warning::
+    ``Ensemble.add_function`` does **not** support loading in-memory TorchScript functions to a colocated ``Orchestrator``.
+    If you would like to load a TorchScript function to a colocated ``Orchestrator``, define the function
+    as a :ref:`raw string<TS_raw_string_ensemble>` or :ref:`load from file<TS_from_file_ensemble>`.
+
+When specifying an in-memory TF function using ``Ensemble.add_function``, the
+following arguments are offered:
+
+- `name` (str): reference name for the script inside of the ``Orchestrator``.
+- `function` (t.Optional[str] = None): TorchScript function code.
+- `device` (t.Literal["CPU", "GPU"] = "CPU"): device for script execution, defaults to “CPU”.
+- `devices_per_node` (int = 1): The number of GPU devices available on the host. This parameter only applies to GPU devices and will be ignored if device is specified as CPU.
+- `first_device` (int = 0): The first GPU device to use on the host. This parameter only applies to GPU devices and will be ignored if device is specified as CPU.
+
+.. _in_mem_TF_ex:
+
+Example: Load a In-Memory TorchScript Function
+----------------------------------------------
+This example walks through the steps of instructing SmartSim to load an in-memory TorchScript function
+to a standalone ``Orchestrator``. The source code example is available in the dropdown below for
+convenient execution and customization.
+
+.. dropdown:: Experiment Driver Script Source Code
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/ensemble_torchscript_mem.py
+
+.. note::
+    The example assumes:
+
+    - a standalone ``Orchestrator`` is launched prior to ``Ensemble`` execution
+    - an initialized ``Ensemble`` named `ensemble_instance` exists within the ``Experiment`` workflow
+
+**Define an In-Memory TF Function**
+
+To begin, define an in-memory TorchScript function within the Python driver script.
+For the purpose of the example, we add a simple TorchScript function, `timestwo`:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/ensemble_torchscript_mem.py
+  :language: python
+  :linenos:
+  :lines: 3-4
+
+**Attach the In-Memory TorchScript Function to a SmartSim Ensemble**
+
+We use the ``Ensemble.add_function`` function to instruct SmartSim to load the TorchScript function `timestwo`
+onto the launched standalone ``Orchestrator``. Specify the function `timestwo` to the `function`
+parameter:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/ensemble_torchscript_mem.py
+  :language: python
+  :linenos:
+  :lines: 15-16
+
+In the above ``ensemble_instance.add_function`` code snippet, we offer the following arguments:
+
+-  `name` ("example_func"): A name to uniquely identify the TorchScript within the ``Orchestrator``.
+-  `function` (timestwo): Name of the TorchScript function defined in the Python driver script.
+-  `device` ("GPU"): Specifying the device for TorchScript execution.
+-  `devices_per_node` (2): Use two GPUs per node.
+-  `first_device` (0): Start with 0 index GPU.
+
+.. warning::
+    Calling `exp.start(ensemble_instance)` prior to instantiation of an ``Orchestrator`` will result in
+    a failed attempt to load the TorchScript to a non-existent ``Orchestrator``.
+
+When the ``Ensemble`` is started via ``Experiment.start``, the TF function will be loaded to the
+standalone ``Orchestrator``. The function can then be executed on the ``Orchestrator`` via a SmartSim
+client (:ref:`SmartRedis<smartredis-api>`) within the application code.
+
+.. _TS_from_file_ensemble:
+
+------------------------------
+Attach a TorchScript From File
+------------------------------
+Users can attach TorchScript functions from a file to an ``Ensemble`` and upload them to a
+colocated or standalone ``Orchestrator``. This functionality is supported by the ``Ensemble.add_script``
+function's `script_path` parameter.
+
+When specifying a TorchScript using ``Ensemble.add_script``, the
+following arguments are offered:
+
+- `name` (str): Reference name for the script inside of the ``Orchestrator``.
+- `script` (t.Optional[str] = None): TorchScript code (only supported for non-colocated ``Orchestrators``).
+- `script_path` (t.Optional[str] = None): path to TorchScript code.
+- `device` (t.Literal["CPU", "GPU"] = "CPU"): device for script execution, defaults to “CPU”.
+- `devices_per_node` (int = 1): The number of GPU devices available on the host. This parameter only applies to GPU devices and will be ignored if device is specified as CPU.
+- `first_device` (int = 0): The first GPU device to use on the host. This parameter only applies to GPU devices and will be ignored if device is specified as CPU.
+
+Example: Loading a TorchScript From File
+----------------------------------------
+This example walks through the steps of instructing SmartSim to load a TorchScript from file
+to an ``Orchestrator``. The source code example is available in the dropdown below for
+convenient execution and customization.
+
+.. dropdown:: Experiment Driver Script Source Code
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/ensemble_torchscript_file.py
+
+.. note::
+    This example assumes:
+
+    - an ``Orchestrator`` is launched prior to ``Ensemble`` execution
+    - an initialized ``Ensemble`` named `ensemble_instance` exists within the ``Experiment`` workflow
+
+**Define a TorchScript Script**
+
+For the example, we create the Python script `torchscript.py`. The file contains multiple
+simple torch function shown below:
+
+.. code-block:: python
+
+    def negate(x):
+        return torch.neg(x)
+
+    def random(x, y):
+        return torch.randn(x, y)
+
+    def pos(z):
+        return torch.positive(z)
+
+**Attach the TorchScript Script to a SmartSim Ensemble**
+
+Assuming an initialized ``Ensemble`` named `ensemble_instance` exists, we add a TorchScript script using
+the ``Ensemble.add_script`` function and specify the script path to the parameter `script_path`:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/ensemble_torchscript_mem.py
+  :language: python
+  :linenos:
+  :lines: 12-13
+
+In the above ``smartsim_model.add_script`` code snippet, we offer the following arguments:
+
+-  `name` ("example_script"): Reference name for the script inside of the ``Orchestrator``.
+-  `script_path` ("path/to/torchscript.py"): Path to the script file.
+-  `device` ("GPU"): device for script execution.
+-  `devices_per_node` (2): Use two GPUs per node.
+-  `first_device` (0): Start with 0 index GPU.
+
+.. warning::
+    Calling `exp.start(ensemble_instance)` prior to instantiation of an ``Orchestrator`` will result in
+    a failed attempt to load the ML model to a non-existent ``Orchestrator``.
+
+When `ensemble_instance` is started via ``Experiment.start``, the TorchScript will be loaded from file to the
+``Orchestrator`` that is launched prior to the start of `ensemble_instance`.
+
+.. _TS_raw_string_ensemble:
+
+---------------------------------
+Define TorchScripts as Raw String
+---------------------------------
+Users can upload TorchScript functions from string to send to a colocated or
+standalone ``Orchestrator``. This feature is supported by the
+``Ensemble.add_script`` function's `script` parameter.
+
+When specifying a TorchScript using ``Ensemble.add_script``, the
+following arguments are offered:
+
+- `name` (str): Reference name for the script inside of the ``Orchestrator``.
+- `script` (t.Optional[str] = None): String of function code (e.g. TorchScript code string).
+- `script_path` (t.Optional[str] = None): path to TorchScript code.
+- `device` (t.Literal["CPU", "GPU"] = "CPU"): device for script execution, defaults to “CPU”.
+- `devices_per_node` (int = 1): The number of GPU devices available on the host. This parameter only applies to GPU devices and will be ignored if device is specified as CPU.
+- `first_device` (int = 0): The first GPU device to use on the host. This parameter only applies to GPU devices and will be ignored if device is specified as CPU.
+
+Example: Load a TorchScript From String
+---------------------------------------
+This example walks through the steps of instructing SmartSim to load a TorchScript function
+from string to an ``Orchestrator`` before the execution of the associated ``Ensemble``.
+The source code example is available in the dropdown below for convenient execution and customization.
+
+.. dropdown:: Experiment Driver Script Source Code
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/ensemble_torchscript_string.py
+
+.. note::
+    This example assumes:
+
+    - an ``Orchestrator`` is launched prior to ``Ensemble`` execution
+    - an initialized ``Ensemble`` named `ensemble_instance` exists within the ``Experiment`` workflow
+
+**Define a String TorchScript**
+
+Define the TorchScript code as a variable in the Python driver script:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/ensemble_torchscript_string.py
+  :language: python
+  :linenos:
+  :lines: 12-13
+
+**Attach the TorchScript Function to a SmartSim Ensemble**
+
+Assuming an initialized ``Ensemble`` named `ensemble_instance` exists, we add a TorchScript using
+the ``Ensemble.add_script`` function and specify the variable `torch_script_str` to the parameter
+`script`:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/ensemble_torchscript_string.py
+  :language: python
+  :linenos:
+  :lines: 15-16
+
+In the above ``ensemble_instance.add_script`` code snippet, we offer the following arguments:
+
+-  `name` ("example_script"): key to store script under.
+-  `script` (torch_script_str): TorchScript code.
+-  `device` ("GPU"): device for script execution.
+-  `devices_per_node` (2): Use two GPUs per node.
+-  `first_device` (0): Start with 0 index GPU.
+
+.. warning::
+    Calling `exp.start(ensemble_instance)` prior to instantiation of an ``Orchestrator`` will result in
+    a failed attempt to load the ML model to a non-existent ``Orchestrator``.
+
+When the ``Ensemble`` is started via ``Experiment.start``, the TorchScript will be loaded to the
+``Orchestrator`` that is launched prior to the start of the ``Ensemble``.
+
+.. _prefix_ensemble:
+
+=========================
+Data Collision Prevention
+=========================
+Overview
+========
+When multiple ``Ensemble`` members use the same code to send and access their respective data
+in the ``Orchestrator``, key overlapping can occur, leading to inadvertent data access
+between ``Ensemble`` members. To address this, SmartSim supports key prefixing
+through ``Ensemble.enable_key_prefixing`` which enables key prefixing for all
+``Ensemble`` members. For example, during an ``Ensemble`` simulation with prefixing enabled, SmartSim will add
+the ``Ensemble`` member `name` as a prefix to the keys sent to the ``Orchestrator``.
+Enabling key prefixing eliminates issues related to key overlapping, allowing ``Ensemble``
+members to use the same code without issue.
+
+The key components of SmartSim ``Ensemble`` prefixing functionality include:
+
+1. **Sending Data to the Orchestrator**: Users can send data to an ``Orchestrator``
+   with the ``Ensemble`` member name prepended to the data name by utilizing SmartSim :ref:`Ensemble functions<model_prefix_func_ensemble>`.
+2. **Retrieving Data From the Orchestrator**: Users can instruct a ``Client`` to prepend a
+   ``Ensemble`` member name to a key during data retrieval, polling, or check for existence on the ``Orchestrator``
+   through SmartRedis :ref:`Client functions<client_prefix_func>`. However, entity interaction
+   must be registered using :ref:`Ensemble<model_prefix_func_ensemble>` or :ref:`Model<model_prefix_func>` functions.
+
+.. seealso::
+    For information on prefixing ``Client`` functions, visit the :ref:`Client functions<client_prefix_func>` page of the ``Model``
+    documentation.
+
+For example, assume you have an ``Ensemble`` that was initialized using the :ref:`replicas<replicas_init>` creation strategy.
+Two identical ``Model`` were created named `ensemble_0` and `ensemble_1` that use the same executable application
+within an ``Ensemble`` named `ensemble`. In the application code you use the function ``Client.put_tensor("tensor_0", data)``.
+Without key prefixing enabled, the slower member will overwrite the data from the faster simulation.
+With ``Ensemble`` key prefixing turned on, `ensemble_0` and `ensemble_1` can access
+their tensor `"tensor_0"` by name without overwriting or accessing the other ``Model``'s `"tensor_0"` tensor.
+In this scenario, the two tensors placed in the ``Orchestrator`` are named `ensemble_0.tensor_0` and `ensemble_1.tensor_0`.
+
+.. _model_prefix_func_ensemble:
+
+------------------
+Ensemble Functions
+------------------
+An ``Ensemble`` object supports two prefixing functions: ``Ensemble.enable_key_prefixing`` and
+``Ensemble.register_incoming_entity``. For more information on each function, reference the
+:ref:`Ensemble API docs<ensemble_api>`.
+
+To enable prefixing on a ``Ensemble``, users must use the ``Ensemble.enable_key_prefixing``
+function in the ``Experiment`` driver script. This function activates prefixing for tensors,
+``Datasets``, and lists sent to an ``Orchestrator`` for all ``Ensemble`` members. This function
+also enables access to prefixing ``Client`` functions within the ``Ensemble`` members. This excludes
+the ``Client.set_data_source`` function, where ``enable_key_prefixing`` is not require for access.
+
+.. note::
+    ML model and script prefixing is not automatically enabled through ``Ensemble.enable_key_prefixing``.
+    Prefixing must be enabled within the ``Ensemble`` by calling the ``use_model_ensemble_prefix`` method
+    on the ``Client`` embedded within the member application.
+
+Users can enable the SmartRedis ``Client`` to interact with prefixed data, ML models and TorchScripts
+using the ``Client.set_data_source``. However, for SmartSim to recognize the producer entity name
+passed to the function within an application, the producer entity must be registered on the consumer
+entity using ``Ensemble.register_incoming_entity``.
+
+If a consumer ``Ensemble`` member requests data sent to the ``Orchestrator`` by other ``Ensemble`` members, the producer members must be
+registered on consumer member. To access ``Ensemble`` members, SmartSim offers the attribute ``Ensemble.models`` that returns
+a list of ``Ensemble`` members. Below we demonstrate registering producer members on a consumer member:
+
+.. code-block:: python
+
+    # list of producer Ensemble members
+    list_of_ensemble_names = ["producer_0", "producer_1", "producer_2"]
+
+    # Grab the consumer Ensemble member
+    ensemble_member = ensemble.models.get("producer_3")
+    # Register the producer members on the consumer member
+    for name in list_of_ensemble_names:
+        ensemble_member.register_incoming_entity(ensemble.models.get(name))
+
+For examples demonstrating how to retrieve data within the entity application that produced
+the data, visit the ``Model`` :ref:`Copy/Rename/Delete Operations<copy_rename_del_prefix>` subsection.
+
+Example: Ensemble Key Prefixing
+===============================
+In this example, we create an ``Ensemble`` comprised of two ``Model``'s that use identical code
+to send data to a standalone ``Orchestrator``. To prevent key collisions and ensure data
+integrity, we enable key prefixing on the ``Ensemble`` which automatically
+appends the ``Ensemble`` member `name` to the data sent to the ``Orchestrator``. After the
+``Ensemble`` completes, we launch a consumer ``Model`` within the ``Experiment`` driver script
+to demonstrate accessing prefixed data sent to the ``Orchestrator`` by ``Ensemble`` members.
+
+This example consists of **three** Python scripts:
+
+1. :ref:`Application Producer Script<app_prod_prefix_ensemble>`: This script is encapsulated
+   in a SmartSim ``Ensemble`` within the ``Experiment`` driver script. Prefixing is enabled
+   on the ``Ensemble``. The producer script puts NumPy tensors on an ``Orchestrator``
+   launched in the ``Experiment`` driver script. The ``Ensemble`` creates two
+   identical ``Ensemble`` members. The producer script is executed
+   in both ``Ensemble`` members to send two prefixed tensors to the ``Orchestrator``.
+   The source code example is available in the dropdown below for convenient customization.
+
+.. dropdown:: Application Producer Script Source Code
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/application_producer_script.py
+
+1. :ref:`Application Consumer Script<app_con_prefix_ensemble>`: This script is encapsulated
+   within a SmartSim ``Model`` in the ``Experiment`` driver script. The script requests the
+   prefixed tensors placed by the producer script. The source code example is available in
+   the dropdown below for convenient customization.
+
+.. dropdown:: Application Consumer Script Source Code
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/application_consumer_script.py
+
+1. :ref:`Experiment Driver Script<exp_prefix_ensemble>`: The driver script launches the
+   ``Orchestrator``, the ``Ensemble`` (which sends prefixed keys to the ``Orchestrator``),
+   and the ``Model`` (which requests prefixed keys from the ``Orchestrator``). The
+   ``Experiment`` driver script is the centralized spot that controls the workflow.
+   The source code example is available in the dropdown below for convenient execution and
+   customization.
+
+.. dropdown:: Experiment Driver Script Source Code
+
+    .. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/experiment_driver.py
+
+.. _app_prod_prefix_ensemble:
+
+-------------------------------
+The Application Producer Script
+-------------------------------
+In the ``Experiment`` driver script, we instruct SmartSim to create an ``Ensemble`` comprised of
+two duplicate members that execute this producer script. In the producer script, a SmartRedis ``Client`` sends a
+tensor to the ``Orchestrator``. Since the ``Ensemble`` members are identical and therefore use the same
+application code, two tensors are sent to the ``Orchestrator``. Without prefixing enabled on the ``Ensemble``
+the keys can be overwritten. To prevent this, we enable key prefixing on the ``Ensemble`` in the driver script
+via ``Ensemble.enable_key_prefixing``. When the producer script is executed by each ``Ensemble`` member, a
+tensor is sent to the ``Orchestrator`` with the ``Ensemble`` member `name` prepended to the tensor `name`.
+
+Here we provide the producer script that is applied to the ``Ensemble`` members:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/application_producer_script.py
+  :language: python
+  :linenos:
+
+After the completion of ``Ensemble`` members `producer_0` and `producer_1`, the contents of the ``Orchestrator`` are:
+
+.. code-block:: bash
+
+    1) "producer_0.tensor"
+    2) "producer_1.tensor"
+
+.. _app_con_prefix_ensemble:
+
+-------------------------------
+The Application Consumer Script
+-------------------------------
+In the ``Experiment`` driver script, we initialize a consumer ``Model`` that encapsulates
+the consumer application to request the tensors produced from the ``Ensemble``. To do
+so, we use SmartRedis key prefixing functionality to instruct the SmartRedis ``Client``
+to append the name of an ``Ensemble`` member to the key `name`.
+
+.. seealso::
+    For more information on ``Client`` prefixing functions, visit the :ref:`Client functions<client_prefix_func>`
+    subsection of the ``Model`` documentation.
+
+To begin, specify the imports and initialize a SmartRedis ``Client``:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/application_consumer_script.py
+  :language: python
+  :linenos:
+  :lines: 1-4
+
+To retrieve the tensor from the first ``Ensemble`` member named `producer_0`, use
+``Client.set_data_source``. Specify the name of the first ``Ensemble`` member
+as an argument to the function. This instructs SmartSim to append the ``Ensemble`` member name to the data
+search on the ``Orchestrator``. When ``Client.poll_tensor`` is executed,
+the SmartRedis `client` will poll for key, `producer_0.tensor`:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/application_consumer_script.py
+  :language: python
+  :linenos:
+  :lines: 6-9
+
+Follow the same steps above, however, change the data source `name` to the `name`
+of the second ``Ensemble`` member (`producer_1`):
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/application_consumer_script.py
+  :language: python
+  :linenos:
+  :lines: 11-14
+
+We print the boolean return to verify that the tensors were found:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/application_consumer_script.py
+  :language: python
+  :linenos:
+  :lines: 16-17
+
+When the ``Experiment`` driver script is executed, the following output will appear in `consumer.out`:
+
+.. code-block:: bash
+
+    Default@11-46-05:producer_0.tensor was found: True
+    Default@11-46-05:producer_1.tensor was found: True
+
+.. warning::
+    For SmartSim to recognize the ``Ensemble`` member names as a valid data source
+    to ``Client.set_data_source``, you must register each ``Ensemble`` member
+    on the consumer ``Model`` in the driver script via ``Model.register_incoming_entity``.
+    We demonstrate this in the ``Experiment`` driver script section of the example.
+
+.. _exp_prefix_ensemble:
+
+---------------------
+The Experiment Script
+---------------------
+The ``Experiment`` driver script manages all workflow components and utilizes the producer and consumer
+application scripts. In the example, the ``Experiment``:
+
+- launches standalone ``Orchestrator``
+- launches an ``Ensemble`` via the replicas initialization strategy
+- launches a consumer ``Model``
+- clobbers the ``Orchestrator``
+
+To begin, add the necessary imports, initialize an ``Experiment`` instance and initialize the
+standalone ``Orchestrator``:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/experiment_driver.py
+  :language: python
+  :linenos:
+  :lines: 1-9
+
+We are now setup to discuss key prefixing within the ``Experiment`` driver script.
+To create an ``Ensemble`` using the replicas strategy, begin by initializing a ``RunSettings``
+object to apply to all ``Ensemble`` members. Specify the path to the application
+producer script:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/experiment_driver.py
+  :language: python
+  :linenos:
+  :lines: 11-12
+
+Next, initialize an ``Ensemble`` by specifying `ensemble_settings` and the number of ``Model`` `replicas` to create:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/experiment_driver.py
+  :language: python
+  :linenos:
+  :lines: 14-15
+
+Instruct SmartSim to prefix all tensors sent to the ``Orchestrator`` from the ``Ensemble`` via ``Ensemble.enable_key_prefixing``:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/experiment_driver.py
+  :language: python
+  :linenos:
+  :lines: 17-18
+
+Next, initialize the consumer ``Model``. The consumer ``Model`` application requests
+the prefixed tensors produced by the ``Ensemble``:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/experiment_driver.py
+  :language: python
+  :linenos:
+  :lines: 20-23
+
+Next, organize the SmartSim entity output files into a single ``Experiment`` folder:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/experiment_driver.py
+  :language: python
+  :linenos:
+  :lines: 25-26
+
+Launch the ``Orchestrator``:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/experiment_driver.py
+  :language: python
+  :linenos:
+  :lines: 28-29
+
+Launch the ``Ensemble``:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/experiment_driver.py
+  :language: python
+  :linenos:
+  :lines: 31-32
+
+Set `block=True` so that ``Experiment.start`` waits until the last ``Ensemble`` member has finished before continuing.
+
+The consumer ``Model`` application script uses ``Client.set_data_source`` which
+accepts the ``Ensemble`` member names when searching for prefixed
+keys in the ``Orchestrator``. In order for SmartSim to recognize the ``Ensemble``
+member names as a valid data source in the consumer ``Model``, we must register
+the entity interaction:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/experiment_driver.py
+  :language: python
+  :linenos:
+  :lines: 34-36
+
+Launch the consumer ``Model``:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/experiment_driver.py
+  :language: python
+  :linenos:
+  :lines: 38-39
+
+To finish, tear down the standalone ``Orchestrator``:
+
+.. literalinclude:: tutorials/doc_examples/ensemble_doc_examples/experiment_driver.py
+  :language: python
+  :linenos:
+  :lines: 41-42
\ No newline at end of file
diff --git a/doc/experiment.rst b/doc/experiment.rst
index 986db4cad..716df1228 100644
--- a/doc/experiment.rst
+++ b/doc/experiment.rst
@@ -1,326 +1,534 @@
-
 ***********
 Experiments
 ***********
+========
+Overview
+========
+SmartSim helps automate the deployment of AI-enabled workflows on HPC systems. With SmartSim, users
+can describe and launch combinations of applications and AI/ML infrastructure to produce novel and
+scalable workflows. SmartSim supports launching these workflows on a diverse set of systems, including
+local environments such as Mac or Linux, as well as HPC job schedulers (e.g. Slurm, PBS Pro, and LSF).
 
-The Experiment acts as both a factory class for constructing the stages of an
-experiment (``Model``, ``Ensemble``, ``Orchestrator``, etc.) as well as an
-interface to interact with the entities created by the experiment.
-
-Users can initialize an :ref:`Experiment <experiment_api>` at the beginning of a
-Jupyter notebook, interactive python session, or Python file and use the
-``Experiment`` to iteratively create, configure and launch computational kernels
-on the system through the specified launcher.
-
-.. |SmartSim Architecture| image:: images/ss-arch-overview.png
-  :width: 700
-  :alt: Alternative text
-
-|SmartSim Architecture|
-
+The ``Experiment`` API is SmartSim's top level API that provides users with methods for creating, combining,
+configuring, launching and monitoring :ref:`entities<entities_exp_docs>` in an AI-enabled workflow. More specifically, the
+``Experiment`` API offers three customizable workflow components that are created and initialized via factory
+methods:
 
-The interface was designed to be simple, with as little complexity as possible,
-and agnostic to the backend launching mechanism (local, Slurm, PBSPro, etc.).
+* :ref:`Orchestrator<orchestrator_exp_docs>`
+* :ref:`Model<model_exp_docs>`
+* :ref:`Ensemble<ensemble_exp_docs>`
 
-Model
-=====
+Settings are given to ``Model`` and ``Ensemble`` objects to provide parameters for how the job should be executed. The
+:ref:`Experiment API<experiment_api>` offers two customizable Settings objects that are created via the factory methods:
 
-``Model(s)`` are subclasses of ``SmartSimEntity(s)`` and are created through the
-Experiment API. Models represent any computational kernel. Models are flexible
-enough to support many different applications, however, to be used with our
-clients (SmartRedis) the application will have to be written in Python, C, C++,
-or Fortran.
+* :ref:`RunSettings<run_settings_doc>`
+* :ref:`BatchSettings<batch_settings_doc>`
 
-Models are given :ref:`RunSettings <rs-api>` objects that specify how a kernel
-should be executed with regard to the workload manager (e.g. Slurm) and the
-available compute resources on the system.
+Once a workflow component is initialized (e.g. ``Orchestrator``, ``Model`` or ``Ensemble``), a user has access
+to the associated entity API which supports configuring and retrieving the entities' information:
 
-Each launcher supports specific types of ``RunSettings``.
+* :ref:`Orchestrator API<orchestrator_api>`
+* :ref:`Model API<model_api>`
+* :ref:`Ensemble API<ensemble_api>`
 
-   - :ref:`SrunSettings <srun_api>` for Slurm
-   - :ref:`AprunSettings <aprun_api>` for PBSPro
-   - :ref:`MpirunSettings <openmpi_run_api>` for OpenMPI with `mpirun` on PBSPro, LSF, and Slurm
-   - :ref:`JsrunSettings <jsrun_api>` for LSF
+There is no limit to the number of SmartSim entities a user can
+initialize within an ``Experiment``.
 
-These settings can be manually specified by the user, or auto-detected by the
-SmartSim Experiment through the ``Experiment.create_run_settings`` method.
+.. figure:: images/Experiment.png
 
-A simple example of using the Experiment API to create a model and run it
-locally:
+  Sample ``Experiment`` showing a user application leveraging
+  machine learning infrastructure launched by SmartSim and connected
+  to online analysis and visualization via the in-memory ``Orchestrator``.
 
-.. code-block:: Python
+Find an example of the ``Experiment`` class and factory methods used within a
+workflow in the :ref:`Example<exp_example>` section of this page.
 
-  from smartsim import Experiment
+.. _launcher_exp_docs:
 
-  exp = Experiment("simple", launcher="local")
+=========
+Launchers
+=========
+SmartSim supports launching AI-enabled workflows on a wide variety of systems, including locally on a Mac or
+Linux machine or on HPC machines with a job scheduler (e.g. Slurm, PBS Pro, and LSF). When creating a SmartSim
+``Experiment``, the user has the opportunity to specify the `launcher` type or defer to automatic `launcher` selection.
+`Launcher` selection determines how SmartSim translates entity configurations into system calls to launch,
+manage, and monitor. Currently, SmartSim supports 7 `launcher` options:
 
-  settings = exp.create_run_settings("echo", exe_args="Hello World")
-  model = exp.create_model("hello_world", settings)
+1. ``local`` **[default]**: for single-node, workstation, or laptop
+2. ``slurm``: for systems using the Slurm scheduler
+3. ``pbs``: for systems using the PBS Pro scheduler
+4. ``pals``: for systems using the PALS scheduler
+5. ``lsf``: for systems using the LSF scheduler
+6. ``dragon``: if Dragon is installed in the current Python environment, see :ref:`Dragon Install <dragon_install>`
+7. ``auto``: have SmartSim auto-detect the launcher to use (will not detect ``dragon``)
 
-  exp.start(model, block=True)
-  print(exp.get_status(model))
+The :ref:`Dragon-based launcher <dragon>` can be run on PBS- or Slurm-based systems
+(MPI applications are supported only when Cray PMI or Cray PALS are available).
 
-If the launcher has been specified, or auto-detected through setting
-``launcher=auto`` in the Experiment initialization, the ``create_run_settings``
-method will automatically create the appropriate ``RunSettings`` object and
-return it.
+If the systems `launcher` cannot be found or no `launcher` argument is provided, the default value of
+`"local"` will be assigned which will start all ``Experiment`` launched entities on the
+localhost.
 
-For example with Slurm
+For examples specifying a `launcher` during ``Experiment`` initialization, navigate to the
+``Experiment`` :ref:`__init__ special method<exp_init>` in the ``Experiment`` API docstring.
 
-.. code-block:: Python
+.. _entities_exp_docs:
 
-  from smartsim import Experiment
-
-  exp = Experiment("hello_world_exp", launcher="slurm")
-  srun = exp.create_run_settings(exe="echo", exe_args="Hello World!")
-
-  # helper methods for configuring run settings are available in
-  # each of the implementations of RunSettings
-  srun.set_nodes(1)
-  srun.set_tasks(32)
+========
+Entities
+========
+Entities are SmartSim API objects that can be launched and
+managed on the compute system through the ``Experiment`` API.
+The SmartSim entities include:
+
+* ``Orchestrator``
+* ``Model``
+* ``Ensemble``
+
+While the ``Experiment`` object is intended to be instantiated once in the
+Python driver script, there is no limit to the number of SmartSim entities
+within the ``Experiment``. In the following subsections, we define the
+general purpose of the three entities that can be created through the
+``Experiment``.
+
+To create a reference to a newly instantiated entity object, use the
+associated ``Experiment.create_...`` factory method shown below.
+
+.. list-table:: Experiment API Entity Creation
+   :widths: 20 65 25
+   :header-rows: 1
+
+   * - Factory Method
+     - Example
+     - Return Type
+   * - ``create_database``
+     - ``orch = exp.create_database([port, db_nodes, ...])``
+     - :ref:`Orchestrator <orchestrator_api>`
+   * - ``create_model``
+     - ``model = exp.create_model(name, run_settings)``
+     - :ref:`Model <model_api>`
+   * - ``create_ensemble``
+     - ``ensemble = exp.create_ensemble(name[, params, ...])``
+     - :ref:`Ensemble <ensemble_api>`
+
+After initialization, each entity can be started, monitored, and stopped using
+the ``Experiment`` post-creation methods.
+
+.. list-table:: Interact with Entities During the Experiment
+   :widths: 25 55 25
+   :header-rows: 1
+
+   * - Factory Method
+     - Example
+     - Desc
+   * - ``start``
+     - ``exp.start(*args[, block, summary, ...])``
+     - Launch an Entity
+   * - ``stop``
+     - ``exp.stop(*args)``
+     - Stop an Entity
+   * - ``get_status``
+     - ``exp.get_status(*args)``
+     - Retrieve Entity Status
+   * - ``preview``
+     - ``exp.preview(*args, ...)``
+     - Preview an Entity
+
+.. _orchestrator_exp_docs:
+
+Orchestrator
+============
+The :ref:`Orchestrator<orch_docs>` is an in-memory database built for
+a wide variety of AI-enabled workflows. The ``Orchestrator`` can be thought of as a general
+feature store for numerical data, ML models, and scripts. The ``Orchestrator`` is capable
+of performing inference and script evaluation using data in the feature store.
+Any SmartSim ``Model`` or ``Ensemble`` member can connect to the
+``Orchestrator`` via the :ref:`SmartRedis<smartredis-api>`
+``Client`` library to transmit data, execute ML models, and execute scripts.
+
+**SmartSim Offers Two Types of Orchestrator Deployments:**
+
+* :ref:`Standalone Orchestrator Deployment<standalone_orch_doc>`
+* :ref:`Colocated Orchestrator Deployment<colocated_orch_doc>`
+
+To create a standalone ``Orchestrator`` that does not share compute resources with other
+SmartSim entities, use the ``Experiment.create_database`` factory method which
+returns an ``Orchestrator`` object. To create a colocated ``Orchestrator`` that
+shares compute resources with a ``Model``, use the ``Model.colocate_db_tcp``
+or ``Model.colocate_db_uds`` member functions accessible after a
+``Model`` object has been initialized. The functions instruct
+SmartSim to launch an ``Orchestrator`` on the application compute nodes. An ``Orchestrator`` object is not
+returned from a ``Model.colocate_db`` instruction, and subsequent interactions with the
+colocated ``Orchestrator`` are handled through the :ref:`Model API<model_api>`.
+
+SmartSim supports :ref:`multi-database<mutli_orch_doc>` functionality, enabling an ``Experiment`` to have
+several concurrently launched ``Orchestrator(s)``. If there is a need to launch more than
+one ``Orchestrator``, the ``Experiment.create_database`` and ``Model.colocate..``
+functions mandate the specification of a unique ``Orchestrator`` identifier, denoted
+by the `db_identifier` argument for each ``Orchestrator``. The `db_identifier` is used
+in an application script by a SmartRedis ``Client`` to connect to a specific ``Orchestrator``.
+
+.. _model_exp_docs:
 
-  model = exp.create_model("hello_world", srun)
-  exp.start(model, block=True, summary=True)
+Model
+=====
+:ref:`Model(s)<model_object_doc>` represent a simulation model or any computational kernel,
+including applications, scripts, or generally, a program. They can
+interact with other SmartSim entities via data transmitted to/from
+SmartSim ``Orchestrator(s)`` using a SmartRedis ``Client``.
 
-  print(exp.get_status(model))
+A ``Model`` is created through the factory method: ``Experiment.create_model``.
+``Model(s)`` are initialized with ``RunSettings`` objects that specify
+how a ``Model`` should be launched by a workload manager
+(e.g., Slurm) and the compute resources required.
+Optionally, the user may also specify a ``BatchSettings`` object if
+the ``Model`` should be launched as a batch job on the WLM system.
+The ``create_model`` factory method returns an initialized ``Model`` object that
+gives you access to functions associated with the :ref:`Model API<model_api>`.
 
-The above will run ``srun -n 32 -N 1 echo Hello World!``, monitor its
-execution, and inform the user when it is completed. This driver script can be
-executed in an interactive allocation, or placed into a batch script as follows:
+A ``Model`` supports key features, including methods to:
 
-.. code-block:: bash
+- :ref:`Attach configuration files<files_doc>` for use at ``Model`` runtime.
+- :ref:`Colocate an Orchestrator<colo_model_doc>` to a SmartSim ``Model``.
+- :ref:`Load an ML model<ai_model_doc>`  into the ``Orchestrator`` at ``Model`` runtime.
+- :ref:`Load a TorchScript function<TS_doc>`  into the ``Orchestrator`` at ``Model`` runtime.
+- :ref:`Enable data collision prevention<model_key_collision>` which allows
+  for reuse of key names in different ``Model`` applications.
 
-    #!/bin/bash
-    #SBATCH --exclusive
-    #SBATCH --nodes=1
-    #SBATCH --ntasks-per-node=32
-    #SBATCH --time=00:10:00
+Visit the respective links for more information on each topic.
 
-    python /path/to/script.py
+.. _ensemble_exp_docs:
 
 Ensemble
 ========
-
-In addition to a single model, SmartSim has the ability to launch an
-``Ensemble`` of ``Model`` applications simultaneously.
-
-An ``Ensemble`` can be constructed in three ways:
-  1. Parameter expansion (by specifying ``params`` and ``perm_strat`` argument)
-  2. Replica creation (by specifying ``replicas`` argument)
-  3. Manually (by adding created ``Model`` objects) if launching as a batch job
-
-Ensembles can be given parameters and permutation strategies that define how the
-``Ensemble`` will create the underlying model objects.
-
-Three strategies are built in:
-  1. ``all_perm``: for generating all permutations of model parameters
-  2. ``step``: for creating one set of parameters for each element in `n` arrays
-  3. ``random``: for random selection from predefined parameter spaces
-
-Here is an example that uses the ``random`` strategy to intialize four models
-with random parameters within a set range. We use the ``params_as_args`` field
-to specify that the randomly selected learning rate parameter should be passed
-to the created models as a executable argument.
-
-.. code-block:: bash
-
-  import numpy as np
-  from smartsim import Experiment
-
-  exp = Experiment("Training-Run", launcher="auto")
-
-  # setup ensemble parameter space
-  learning_rate = list(np.linspace(.01, .5))
-  train_params = {"LR": learning_rate}
-
-  # define how each member should run
-  run = exp.create_run_settings(exe="python",
-                                exe_args="./train-model.py")
-
-  ensemble = exp.create_ensemble("Training-Ensemble",
-                                params=train_params,
-                                params_as_args=["LR"],
-                                run_settings=run,
-                                perm_strategy="random",
-                                n_models=4)
-  exp.start(ensemble, summary=True)
-
-
-A callable function can also be supplied for custom permutation strategies.  The
-function should take two arguments: a list of parameter names, and a list of
-lists of potential parameter values. The function should return a list of
-dictionaries that will be supplied as model parameters. The length of the list
-returned will determine how many ``Model`` instances are created.
-
-For example, the following is the built-in strategy ``all_perm``:
+In addition to a single ``Model``, SmartSim allows users to create,
+configure, and launch an :ref:`Ensemble<ensemble_doc>` of ``Model`` objects.
+``Ensemble(s)`` can be given parameters and a permutation strategy that define how the
+``Ensemble`` will create the underlying ``Model`` objects. Users may also
+manually create and append ``Model(s)`` to an ``Ensemble``. For information
+and examples on ``Ensemble`` creation strategies, visit the :ref:`Initialization<init_ensemble_strategies>`
+section within the ``Ensemble`` documentation.
+
+An ``Ensemble`` supports key features, including methods to:
+
+- :ref:`Attach configuration files<attach_files_ensemble>` for use at ``Ensemble`` runtime.
+- :ref:`Load an ML model<ai_model_ensemble_doc>` (TF, TF-lite, PT, or ONNX) into the ``Orchestrator`` at ``Ensemble`` runtime.
+- :ref:`Load a TorchScript function<TS_ensemble_doc>` into the ``Orchestrator`` at ``Ensemble`` runtime.
+- :ref:`Prevent data collisions<prefix_ensemble>` within the ``Ensemble``, which allows for reuse of application code.
+
+Visit the respective links for more information on each topic.
+
+==============
+File Structure
+==============
+When a user executes an ``Experiment`` script, it generates output folders in the system's directory.
+By default, SmartSim creates a predefined file structure and assigns a path to each entity initialized.
+However, users have the flexibility to customize this according to workflow needs. Please refer
+to the respective :ref:`default<default_folder>` and :ref:`configure<config_folder>` sections below
+for more details.
+
+.. note::
+  Files added for symlinking, copying, or configuration will not be organized into the generated
+  directories unless ``Experiment.generate`` is invoked on the designated entity.
+
+.. _default_folder:
+
+Default
+=======
+By default, an ``Experiment`` folder is created in your current working directory, using the
+specified `name` parameter during ``Experiment`` initialization. Each entity created by the
+``Experiment`` generates an output folder under the ``Experiment`` directory, named after the
+entity. These folders hold `.err` and `.out` files, containing execution-related information.
+
+For instance, consider the following Python script:
 
 .. code-block:: python
 
-    from itertools import product
-
-    def create_all_permutations(param_names, param_values):
-        perms = list(product(*param_values))
-        all_permutations = []
-        for p in perms:
-            temp_model = dict(zip(param_names, p))
-            all_permutations.append(temp_model)
-        return all_permutations
-
-
-After ``Ensemble`` initialization, ``Ensemble`` instances can be
-passed as arguments to ``Experiment.generate()`` to write assigned
-parameter values into attached and tagged configuration files.
-
-Launching Ensembles
--------------------
-
-Ensembles can be launched in previously obtained interactive allocations
-and as a batch. Similar to ``RunSettings``, ``BatchSettings`` specify how
-an application(s) in a batch job should be executed with regards to the system
-workload manager and available compute resources.
+   from smartsim import Experiment
+
+   exp = Experiment(name="experiment-example")
+   database = exp.create_database(port=6379, interface="ib0")
+   exp.start(database)
+   settings = exp.create_run_settings(exe="echo", exec_args="hello world")
+   model = exp.create_model(name="model-name", run_settings=settings)
+   ensemble = exp.create_ensemble(name="ensemble-name", run_settings=settings, replicas=2)
+   exp.start(model, ensemble)
+   exp.stop(database)
+
+When executed, this script creates the following directory structure in your
+working directory:
+
+::
+
+    experiment-example
+    ├── orchestrator
+    │   ├── orchestrator_0.err
+    │   └── orchestrator_0.out
+    ├── model-name
+    │   ├── model-name.err
+    │   └── model-name.out
+    └── ensemble-name
+        ├── ensemble-name_0
+        │   ├── ensemble-name_0.err
+        │   └── ensemble-name_0.out
+        ├── ensemble-name_1
+        │   ├── ensemble-name_1.err
+        │   └── ensemble-name_1.out
+
+.. _config_folder:
+
+Configure
+=========
+Customizing the path of the ``Experiment`` and entity folders is possible by providing
+either an absolute or relative path to the `path` argument during initialization. When
+a relative path is provided, SmartSim executes the entity relative to the current working
+directory.
+
+For instance, consider the following Python script:
 
-  - :ref:`SbatchSettings <sbatch_api>` for Slurm
-  - :ref:`QsubBatchSettings <qsub_api>` for PBSPro
-  - :ref:`BsubBatchSettings <bsub_api>` for LSF
-
-If it only passed ``RunSettings``, ``Ensemble``, objects will require either
-a ``replicas`` argument or a ``params`` argument to expand parameters
-into ``Model`` instances. At launch, the ``Ensemble`` will look for
-interactive allocations to launch models in.
-
-If it passed ``BatchSettings`` without other arguments, an empty ``Ensemble``
-will be created that ``Model`` objects can be added to manually. All ``Model``
-objects added to the ``Ensemble`` will be launched in a single batch.
-
-If it passed ``BatchSettings`` and ``RunSettings``, the ``BatchSettings`` will
-determine the allocation settings for the entire batch, and the ``RunSettings``
-will determine how each individual ``Model`` instance is executed within
-that batch.
-
-This is the same example as above, but tailored towards a running as a batch job
-on a slurm system:
-
-.. code-block:: bash
-
-  import numpy as np
-  from smartsim import Experiment
-
-  exp = Experiment("Training-Run", launcher="slurm")
-
-  # setup ensemble parameter space
-  learning_rate = list(np.linspace(.01, .5))
-  train_params = {"LR": learning_rate}
-
-  # define resources for all ensemble members
-  sbatch = exp.create_batch_settings(nodes=4,
-                                    time="01:00:00",
-                                    account="12345-Cray",
-                                    queue="gpu")
-
-  # define how each member should run
-  srun = exp.create_run_settings(exe="python",
-                                exe_args="./train-model.py")
-  srun.set_nodes(1)
-  srun.set_tasks(24)
-
-  ensemble = exp.create_ensemble("Training-Ensemble",
-                                params=train_params,
-                                params_as_args=["LR"],
-                                batch_settings=sbatch,
-                                run_settings=srun,
-                                perm_strategy="random",
-                                n_models=4)
-  exp.start(ensemble, summary=True)
-
-
-This will generate and execute a batch script that looks something like
-the following:
-
-.. code-block:: bash
-
-  # GENERATED
-
-  #!/bin/bash
-
-  #SBATCH --output=/lus/smartsim/Training-Ensemble.out
-  #SBATCH --error=/lus/smartsim/Training-Ensemble.err
-  #SBATCH --job-name=Training-Ensemble-CHTN0UI2DORX
-  #SBATCH --nodes=4
-  #SBATCH --time=01:00:00
-  #SBATCH --partition=gpu
-  #SBATCH --account=12345-Cray
-
-  cd /scratch/smartsim/Training-Run ; /usr/bin/srun --output /scratch/smartsim/Training-Run/Training-Ensemble_0.out --error /scratch/smartsim/Training-Ensemble_0.err --job-name Training-Ensemble_0-CHTN0UI2E5DX --nodes=1 --ntasks=24 /scratch/pyenvs/smartsim/bin/python ./train-model.py --LR=0.17 &
-
-  cd /scratch/smartsim/Training-Run ; /usr/bin/srun --output /scratch/smartsim/Training-Run/Training-Ensemble_1.out --error /scratch/smartsim/Training-Ensemble_1.err --job-name Training-Ensemble_1-CHTN0UI2JQR5 --nodes=1 --ntasks=24 /scratch/pyenvs/smartsim/bin/python ./train-model.py --LR=0.32 &
-
-  cd /scratch/smartsim/Training-Run ; /usr/bin/srun --output /scratch/smartsim/Training-Run/Training-Ensemble_2.out --error /scratch/smartsim/Training-Ensemble_2.err --job-name Training-Ensemble_2-CHTN0UI2P2AR --nodes=1 --ntasks=24 /scratch/pyenvs/smartsim/bin/python ./train-model.py --LR=0.060000000000000005 &
-
-  cd /scratch/smartsim/Training-Run ; /usr/bin/srun --output /scratch/smartsim/Training-Run/Training-Ensemble_3.out --error /scratch/smartsim/Training-Ensemble_3.err --job-name Training-Ensemble_3-CHTN0UI2TRE7 --nodes=1 --ntasks=24 /scratch/pyenvs/smartsim/bin/python ./train-model.py --LR=0.35000000000000003 &
-
-  wait
-
-Prefixing Keys in the Orchestrator
-----------------------------------
-
-If each of multiple ensemble members attempt to use the same code to access their respective models
-in the Orchestrator, the keys by which they do this will overlap and they can end up accessing each
-others' data inadvertently. To prevent this situation, the SmartSim Entity object supports key
-prefixing, which automatically prepends the name of the model to the keys by which it is accessed.
-With this enabled, key overlapping is no longer an issue and ensemble members can use the same code.
-
-Under the hood, calling ensemble.enable_key_prefixing() causes the SSKEYOUT environment variable to
-be set, which in turn causes all keys generated by an ensemble member to be prefixed with its model
-name. Similarly, if the model for the ensemble member has incoming entities (such as those set via
-model.register_incoming_entity() or ensemble.register_incoming_entity()), the SSKEYIN environment
-variable will be set and the keys associated with those inputs will be automatically prefixed. Note
-that entities must register themselves as this is not done by default.
-
-Finally, please note that while prefixing is enabled by default for tensors, datasets, and aggregated
-lists of datasets, a SmartRedis client must manually call Client.use_model_ensemble_prefix() to
-ensure that prefixes are used with models and scripts.
-
-We modify the example above to enable key prefixing as follows:
-
-.. code-block:: bash
-
-  import numpy as np
-  from smartsim import Experiment
-
-  exp = Experiment("Training-Run", launcher="slurm")
-
-  # setup ensemble parameter space
-  learning_rate = list(np.linspace(.01, .5))
-  train_params = {"LR": learning_rate}
+.. code-block:: python
 
-  # define resources for all ensemble members
-  sbatch = exp.create_batch_settings(nodes=4,
-                                    time="01:00:00",
-                                    account="12345-Cray",
-                                    queue="gpu")
+   from smartsim import Experiment
+
+   exp = Experiment(name="experiment-example", exp_path="absolute/path/to/experiment-folder")
+   database = exp.create_database(port=6379, interface="ib0")
+   exp.start(database)
+   settings = exp.create_run_settings(exe="echo", exec_args="hello world")
+   model = exp.create_model(name="model-name", run_settings=settings, path="./model-folder")
+   ensemble = exp.create_ensemble(name="ensemble-name", run_settings=settings, replicas=2, path="./ensemble-folder")
+   exp.start(model, ensemble)
+   exp.stop(database)
+
+When executed, this script creates the following directory structure in your
+working directory:
+
+::
+
+    ├── experiment-folder
+    |   ├── orchestrator
+    |   │   ├── orchestrator_0.err
+    |   │   └── orchestrator_0.out
+    ├── model-folder
+    │   ├── model-name.err
+    │   └── model-name.out
+    └── ensemble-folder
+        ├── ensemble-name_0
+        │   ├── ensemble-name_0.err
+        │   └── ensemble-name_0.out
+        ├── ensemble-name_1
+        │   ├── ensemble-name_1.err
+        │   └── ensemble-name_1.out
+
+.. _exp_example:
+
+=======
+Example
+=======
+.. compound::
+  In the following section, we provide an example of using SmartSim to automate the
+  deployment of an HPC workflow consisting of a ``Model`` and standalone ``Orchestrator``.
+  The example demonstrates:
+
+  *Initializing*
+   - a workflow (``Experiment``)
+   - an in-memory database (standalone ``Orchestrator``)
+   - an application (``Model``)
+  *Generating*
+   - the ``Orchestrator`` output directory
+   - the ``Model`` output directory
+  *Previewing*
+   - the ``Orchestrator`` contents
+   - the ``Model`` contents
+  *Starting*
+   - an in-memory database (standalone ``Orchestrator``)
+   - an application (``Model``)
+  *Stopping*
+   - an in-memory database (standalone ``Orchestrator``)
+
+  The example source code is available in the dropdown below for convenient execution
+  and customization.
+
+  .. dropdown:: Example Driver Script Source Code
+
+      .. literalinclude:: tutorials/doc_examples/experiment_doc_examples/exp.py
+
+Initializing
+============
+.. compound::
+  To create a workflow, *initialize* an ``Experiment`` object
+  at the start of the Python driver script. This involves specifying
+  a name and the system launcher that will execute all entities.
+  Set the `launcher` argument to `auto` to instruct SmartSim to attempt
+  to find the machines WLM.
+
+  .. literalinclude:: tutorials/doc_examples/experiment_doc_examples/exp.py
+    :language: python
+    :linenos:
+    :lines: 1-8
+
+  We also initialize a SmartSim :ref:`logger<ss_logger>`. We will use the logger to log the ``Experiment``
+  summary.
+
+.. compound::
+  Next, launch an in-memory database, referred to as an ``Orchestrator``.
+  To *initialize* an ``Orchestrator`` object, use the ``Experiment.create_database``
+  factory method. Create a multi-sharded ``Orchestrator`` by setting the argument `db_nodes` to three.
+  SmartSim will assign a `port` to the ``Orchestrator`` and attempt to detect your machine's
+  network interface if not provided.
+
+  .. literalinclude:: tutorials/doc_examples/experiment_doc_examples/exp.py
+    :language: python
+    :linenos:
+    :lines: 10-11
+
+.. compound::
+  Before invoking the factory method to create a ``Model``,
+  first create a ``RunSettings`` object. ``RunSettings`` hold the
+  information needed to execute the ``Model`` on the machine. The ``RunSettings``
+  object is initialized using the ``Experiment.create_run_settings`` method.
+  Specify the executable to run and arguments to pass to the executable.
+
+  The example ``Model`` is a simple `Hello World` program
+  that echos `Hello World` to stdout.
+
+  .. literalinclude:: tutorials/doc_examples/experiment_doc_examples/exp.py
+    :language: python
+    :linenos:
+    :lines: 13-14
+
+  After creating the ``RunSettings`` object, initialize the ``Model`` object by passing the `name`
+  and `settings` to ``create_model``.
+
+  .. literalinclude:: tutorials/doc_examples/experiment_doc_examples/exp.py
+    :language: python
+    :linenos:
+    :lines: 15-16
+
+Generating
+==========
+.. compound::
+  Next we generate the file structure for the ``Experiment``. A call to ``Experiment.generate``
+  instructs SmartSim to create directories within the ``Experiment`` folder for each instance passed in.
+  We organize the ``Orchestrator`` and ``Model`` output files within the ``Experiment`` folder by
+  passing the ``Orchestrator`` and ``Model`` instances to ``exp.generate``:
+
+  .. literalinclude:: tutorials/doc_examples/experiment_doc_examples/exp.py
+    :language: python
+    :linenos:
+    :lines: 18-19
+
+  `Overwrite=True` instructs SmartSim to overwrite entity contents if files and subdirectories
+  already exist within the ``Experiment`` directory.
+
+  .. note::
+    If files or folders are attached to a ``Model`` or ``Ensemble`` members through ``Model.attach_generator_files``
+    or ``Ensemble.attach_generator_files``, the attached files or directories will be symlinked, copied, or configured and
+    written into the created directory for that instance.
+
+  The ``Experiment.generate`` call places the `.err` and `.out` log files in the entity
+  subdirectories within the main ``Experiment`` directory.
+
+Previewing
+==========
+.. compound::
+  Optionally, users can preview an ``Experiment`` entity. The ``Experiment.preview`` method displays the entity summaries during runtime
+  to offer additional insight into the launch details. Any instance of a ``Model``, ``Ensemble``, or ``Orchestrator`` created by the
+  ``Experiment`` can be passed as an argument to the preview method. Additionally, users may specify the name of a file to write preview data to
+  via the ``output_filename`` argument, as well as the text format through the ``output_format`` argument. Users can also specify how verbose
+  the preview is via the ``verbosity_level`` argument.
+
+  The following options are available when configuring preview:
+
+  *  `verbosity_level="info"` instructs preview to display user-defined fields and entities.
+  *  `verbosity_level="debug"` instructs preview to display user-defined field and entities and auto-generated fields.
+  *  `verbosity_level="developer"` instructs preview to display user-defined field and entities, auto-generated fields, and run commands.
+  *  `output_format="plain_text"` sets the output format. The only accepted output format is 'plain_text'.
+  *  `output_filename="test_name.txt"` specifies name of file and extension to write preview data to. If no output filename is set, the preview will be output to stdout.
+
+  In the example below, we preview the ``Orchestrator`` and ``Model`` entities by passing their instances to ``Experiment.preview``:
+
+  .. literalinclude:: tutorials/doc_examples/experiment_doc_examples/exp.py
+    :language: python
+    :linenos:
+    :lines: 21-22
+
+When executed, the preview logs the following in stdout:
+
+::
+
+  === Experiment Overview ===
+
+    Experiment Name: example-experiment
+      Experiment Path: absolute/path/to/SmartSim/example-experiment
+      Launcher: local
+
+  === Entity Preview ===
+
+    == Orchestrators ==
+
+      = Database Identifier: orchestrator =
+          Path: absolute/path/to/SmartSim/example-experiment/orchestrator
+          Shards: 1
+          TCP/IP Port(s):
+            6379
+          Network Interface: ib0
+          Type: redis
+          Executable: absolute/path/to/SmartSim/smartsim/_core/bin/redis-server
+
+    == Models ==
+
+      = Model Name: hello_world =
+          Path: absolute/path/to/SmartSim/example-experiment/hello_world
+          Executable: /bin/echo
+          Executable Arguments:
+            Hello
+            World
+          Client Configuration:
+            Database Identifier: orchestrator
+              Database Backend: redis
+              TCP/IP Port(s):
+                6379
+              Type: Standalone
+            Outgoing Key Collision Prevention (Key Prefixing):
+              Tensors: Off
+              Datasets: Off
+              ML Models/Torch Scripts: Off
+              Aggregation Lists: Off
+
+Starting
+========
+.. compound::
+  Next launch the components of the ``Experiment`` (``Orchestrator`` and ``Model``).
+  To do so, use the ``Experiment.start`` factory method and pass in the previous
+  ``Orchestrator`` and ``Model`` instances.
 
-  # define how each member should run
-  srun = exp.create_run_settings(exe="python",
-                                exe_args="./train-model.py")
-  srun.set_nodes(1)
-  srun.set_tasks(24)
+  .. literalinclude:: tutorials/doc_examples/experiment_doc_examples/exp.py
+    :language: python
+    :linenos:
+    :lines: 24-25
 
-  ensemble = exp.create_ensemble("Training-Ensemble",
-                                params=train_params,
-                                params_as_args=["LR"],
-                                batch_settings=sbatch,
-                                run_settings=srun,
-                                perm_strategy="random",
-                                n_models=4)
+Stopping
+========
+.. compound::
+  Lastly, to clean up the ``Experiment``, tear down the launched ``Orchestrator``
+  using the ``Experiment.stop`` factory method.
 
-  # Enable key prefixing -- note that this should be done
-  # before starting the experiment
-  ensemble.enable_key_prefixing()
+  .. literalinclude:: tutorials/doc_examples/experiment_doc_examples/exp.py
+    :language: python
+    :linenos:
+    :lines: 27-28
 
-  exp.start(ensemble, summary=True)
+  Notice that we use the ``Experiment.summary`` function to print
+  the summary of the workflow.
 
+When you run the experiment, the following output will appear::
 
-Further Information
--------------------
+  |    | Name           | Entity-Type   | JobID       | RunID   | Time    | Status    | Returncode   |
+  |----|----------------|---------------|-------------|---------|---------|-----------|--------------|
+  | 0  | hello_world    | Model         | 1778304.4   | 0       | 10.0657 | Completed | 0            |
+  | 1  | orchestrator_0 | DBNode        | 1778304.3+2 | 0       | 43.4797 | Cancelled | 0            |
 
-For more informtion about Ensembles, please refer to the :ref:`Ensemble API documentation <ensemble_api>`.
\ No newline at end of file
+.. note::
+  Failure to tear down the ``Orchestrator`` at the end of an ``Experiment``
+  may lead to ``Orchestrator`` launch failures if another ``Experiment`` is
+  started on the same node.
diff --git a/doc/images/Experiment.png b/doc/images/Experiment.png
new file mode 100644
index 000000000..a103dd6dd
Binary files /dev/null and b/doc/images/Experiment.png differ
diff --git a/doc/images/clustered_orchestrator-1.png b/doc/images/clustered_orchestrator-1.png
new file mode 100644
index 000000000..996d55e85
Binary files /dev/null and b/doc/images/clustered_orchestrator-1.png differ
diff --git a/doc/images/colocated_orchestrator-1.png b/doc/images/colocated_orchestrator-1.png
new file mode 100644
index 000000000..0da5d0609
Binary files /dev/null and b/doc/images/colocated_orchestrator-1.png differ
diff --git a/doc/index.rst b/doc/index.rst
index 91a7ee1ba..4c64712b2 100644
--- a/doc/index.rst
+++ b/doc/index.rst
@@ -30,9 +30,14 @@
    :caption: SmartSim
 
    experiment
+   run_settings
+   batch_settings
+   model
+   ensemble
    orchestrator
-   launchers
+   ss_logger
    ml_features
+   dragon
    api/smartsim_api
 
 .. toctree::
diff --git a/doc/installation_instructions/basic.rst b/doc/installation_instructions/basic.rst
index 2f43db50f..02c17e1fd 100644
--- a/doc/installation_instructions/basic.rst
+++ b/doc/installation_instructions/basic.rst
@@ -1,3 +1,5 @@
+.. _basic_install_SS:
+
 ******************
 Basic Installation
 ******************
@@ -18,7 +20,7 @@ Basic
 
 The base prerequisites to install SmartSim and SmartRedis are:
 
-  - Python 3.8-3.11
+  - Python 3.9-3.11
   - Pip
   - Cmake 3.13.x (or later)
   - C compiler
@@ -27,7 +29,7 @@ The base prerequisites to install SmartSim and SmartRedis are:
   - git
   - `git-lfs`_
 
-.. _git-lfs: https://github.com/git-lfs/git-lfs?utm_source=gitlfs_site&utm_medium=installation_link&utm_campaign=gitlfs#installing
+.. _git-lfs: https://github.com/git-lfs/git-lfs?utm_source=gitlfs_site&utm_medium=installation_link&utm_campaign=gitlfs
 
 .. note::
 
@@ -48,7 +50,7 @@ The machine-learning backends have additional requirements in order to
 use GPUs for inference
 
   - `CUDA Toolkit 11 (tested with 11.8) <https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html>`_
-  - `cuDNN 8 (tested with 8.9.1) <https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html#download>`_
+  - `cuDNN 8 (tested with 8.9.1) <https://docs.nvidia.com/deeplearning/cudnn/installation/overview.html>`_
   - OS: Linux
   - GPU: Nvidia
 
@@ -72,11 +74,11 @@ Supported Versions
    * - MacOS
      - x86_64, aarch64
      - Not supported
-     - 3.8 - 3.11
+     - 3.9 - 3.11
    * - Linux
      - x86_64
      - Nvidia
-     - 3.8 - 3.11
+     - 3.9 - 3.11
 
 
 .. note::
@@ -235,6 +237,28 @@ to building SmartSim with GPU support is to specify a different ``device``
   backends look for the CUDA Toolkit and cuDNN libraries. Please see the
   :ref:`Platform Installation Section <install-notes>` section for guidance.
 
+
+.. _dragon_install:
+
+Dragon Install
+--------------
+
+`Dragon <https://dragonhpc.github.io/dragon/doc/_build/html/index.html>`_ is
+an HPC-native library for distributed computing. SmartSim can use Dragon as a
+launcher on systems with Slurm or PBS as schedulers. To install the correct
+version of Dragon, you can add the ``--dragon`` option to ``smart build``.
+For example, to install dragon alongside the RedisAI CPU backends, you can run
+
+.. code-block:: bash
+
+    # run one of the following
+    smart build --device cpu --dragon           # install Dragon, PT and TF for cpu
+    smart build --device cpu --onnx --dragon    # install Dragon and all backends (PT, TF, ONNX) on cpu
+
+.. note::
+  Dragon is only supported on Linux systems. For further information, you
+  can read :ref:`the dedicated documentation page <dragon>`.
+
 ==========
 SmartRedis
 ==========
@@ -254,9 +278,9 @@ SmartSim does.
    * - Platform
      - Python Versions
    * - MacOS
-     - 3.8 - 3.11
+     - 3.9 - 3.11
    * - Linux
-     - 3.8 - 3.11
+     - 3.9 - 3.11
 
 The Python client for SmartRedis is installed through ``pip`` as follows:
 
@@ -298,7 +322,7 @@ source remains at the site of the clone instead of in site-packages.
   pip install -e .[dev,ml]    # for bash users
   pip install -e .\[dev,ml\]  # for zsh users
 
-Use the now installed ``smart`` cli to install the machine learning runtimes.
+Use the now installed ``smart`` cli to install the machine learning runtimes and dragon.
 
 .. tabs::
 
@@ -307,8 +331,8 @@ Use the now installed ``smart`` cli to install the machine learning runtimes.
     .. code-block:: bash
 
       # run one of the following
-      smart build --device cpu --onnx  # install with cpu-only support
-      smart build --device gpu --onnx  # install with both cpu and gpu support
+      smart build --device cpu --onnx --dragon  # install with cpu-only support
+      smart build --device gpu --onnx --dragon  # install with both cpu and gpu support
 
 
   .. tab:: MacOS (Intel x64)
diff --git a/doc/installation_instructions/platform/nonroot-linux.rst b/doc/installation_instructions/platform/nonroot-linux.rst
index 2c8f7933a..3070a871a 100644
--- a/doc/installation_instructions/platform/nonroot-linux.rst
+++ b/doc/installation_instructions/platform/nonroot-linux.rst
@@ -13,6 +13,6 @@ a user is possible.
     ./cuda_11.4.4_470.82.01_linux.run --toolkit  --silent --toolkitpath=/path/to/install/location/
 
 For cuDNN, follow `Nvidia's instructions
-<https://docs.nvidia.com/deeplearning/cudnn/install-guide/index.html#download>`_,
+<https://docs.nvidia.com/deeplearning/cudnn/installation/overview.html>`_,
 and copy the cuDNN libraries to the `lib64` directory at the CUDA Toolkit
 location specified above.
\ No newline at end of file
diff --git a/doc/installation_instructions/platform/olcf-summit.rst b/doc/installation_instructions/platform/olcf-summit.rst
index 5727ae8fe..236d15054 100644
--- a/doc/installation_instructions/platform/olcf-summit.rst
+++ b/doc/installation_instructions/platform/olcf-summit.rst
@@ -6,7 +6,7 @@ Since SmartSim does not have a built PowerPC build, the build steps for an IBM
 system are slightly different than other systems.
 
 Luckily for us, a conda channel with all relevant packages is maintained as part
-of the `OpenCE <https://opence.mit.edu/#/>`_ initiative.  Users can follow these
+of the `OpenCE <https://opence.mit.edu>`_ initiative.  Users can follow these
 instructions to get a working SmartSim build with PyTorch and TensorFlow for GPU
 on Summit.  Note that SmartSim and SmartRedis will be downloaded to the working
 directory from which these instructions are executed.
@@ -19,7 +19,7 @@ into problems.
 .. code-block:: bash
 
   # setup Python and build environment
-  export ENV_NAME=smartsim-0.6.2
+  export ENV_NAME=smartsim-0.7.0
   git clone https://github.com/CrayLabs/SmartRedis.git smartredis
   git clone https://github.com/CrayLabs/SmartSim.git smartsim
   conda config --prepend channels https://ftp.osuosl.org/pub/open-ce/1.6.1/
diff --git a/doc/launchers.rst b/doc/launchers.rst
deleted file mode 100644
index 22425071e..000000000
--- a/doc/launchers.rst
+++ /dev/null
@@ -1,248 +0,0 @@
-
-*********
-Launchers
-*********
-
-SmartSim interfaces with a number of backends called `launchers` that
-are responsible for constructing jobs based on run parameters and
-launching them onto a system.
-
-The `launchers` allow SmartSim users to interact with their system
-programmatically through a python interface.
-Because of this, SmartSim users do not have to leave the Jupyter Notebook,
-Python REPL, or Python script to launch, query, and interact with their jobs.
-
-SmartSim currently supports 5 `launchers`:
-  1. ``local``: for single-node, workstation, or laptop
-  2. ``slurm``: for systems using the Slurm scheduler
-  3. ``pbs``: for systems using the PBSpro scheduler
-  4. ``lsf``: for systems using the LSF scheduler
-  5. ``auto``: have SmartSim auto-detect the launcher to use.
-
-To specify a specific launcher, one argument needs to be provided
-to the ``Experiment`` initialization.
-
-.. code-block:: python
-
-    from smartsim import Experiment
-
-    exp = Experiment("name-of-experiment", launcher="local")  # local launcher
-    exp = Experiment("name-of-experiment", launcher="slurm")  # Slurm launcher
-    exp = Experiment("name-of-experiment", launcher="pbs")    # PBSpro launcher
-    exp = Experiment("name-of-experiment", launcher="lsf")    # LSF launcher
-    exp = Experiment("name-of-experiment", launcher="auto")   # auto-detect launcher
-
--------------------------------------------------------------------------
-
-Local
-=====
-
-
-The local launcher can be used on laptops, workstations and single
-nodes of supercomputer and cluster systems. Through
-launching locally, users can prototype workflows and quickly scale
-them to larger systems with minimal changes.
-
-As with all launchers in SmartSim, the local launcher supports
-asynchronous execution meaning once entities have been launched
-the main thread of execution is not blocked. Daemon threads
-that manage currently running jobs will be created when active
-jobs are present within SmartSim.
-
-.. _psutil: https://github.com/giampaolo/psutil
-
-The local launcher uses the `psutil`_ library to execute and monitor
-user-created jobs.
-
-
-Running Locally
----------------
-
-The local launcher supports the base :ref:`RunSettings API <rs-api>`
-which can be used to run executables as well as run executables
-with arbitrary launch binaries like `mpiexec`.
-
-The local launcher is the default launcher for all ``Experiment``
-instances.
-
-The local launcher does not support batch launching. Ensembles
-are always executed in parallel but launched sequentially.
-
-----------------------------------------------------------------------
-
-Slurm
-=====
-
-The Slurm launcher works directly with the Slurm scheduler to launch, query,
-monitor and stop applications. During the course of an ``Experiment``,
-launched entities can be queried for status, completion, and errors.
-
-The amount of communication between SmartSim and Slurm can be tuned
-for specific guidelines of different sites by setting the
-value for ``jm_interval`` in the SmartSim configuration file.
-
-To use the Slurm launcher, specify at ``Experiment`` initialization:
-
-.. code-block:: python
-
-    from smartsim import Experiment
-
-    exp = Experiment("NAMD-worklfow", launcher="slurm")
-
-
-Running on Slurm
-----------------
-
-The Slurm launcher supports three types of ``RunSettings``:
-  1. :ref:`SrunSettings <srun_api>`
-  2. :ref:`MpirunSettings <openmpi_run_api>`
-  3. :ref:`MpiexecSettings <openmpi_exec_api>`
-
-As well as batch settings for ``sbatch`` through:
-  1. :ref:`SbatchSettings <sbatch_api>`
-
-
-Both supported ``RunSettings`` types above can be added
-to a ``SbatchSettings`` batch workload through ``Ensemble``
-creation.
-
-
-Getting Allocations
--------------------
-
-Slurm supports a number of user facing features that other schedulers
-do not. For this reason, an extra module :ref:`smartsim.slurm <slurm_module_api>` can be
-used to obtain allocations to launch on and release them after
-``Experiment`` completion.
-
-.. code-block:: python
-
-    from smartsim.wlm import slurm
-    alloc = slurm.get_allocation(nodes=1)
-
-The ID of the allocation is returned as a string to the user so that
-they can specify what entities should run on which allocations
-obtained by SmartSim.
-
-Additional arguments that would have been passed to the ``salloc``
-command can be passed through the ``options`` argument in a dictionary.
-
-Anything passed to the options will be processed as a Slurm
-argument and appended to the salloc command with the appropriate
-prefix (e.g. `-` or `--`).
-
-For arguments without a value, pass None as the value:
-    - `exclusive=None`
-
-.. code-block:: python
-
-    from smartsim.wlm import slurm
-    salloc_options = {
-        "C": "haswell",
-        "partition": "debug",
-        "exclusive": None
-    }
-    alloc_id = slurm.get_slurm_allocation(nodes=128,
-                                          time="10:00:00",
-                                          options=salloc_options)
-
-The above code would generate a ``salloc`` command like:
-
-.. code-block:: bash
-
-    salloc -N 5 -C haswell --partition debug --time 10:00:00 --exclusive
-
-
-
-Releasing Allocations
----------------------
-
-The :ref:`smartsim.slurm <slurm_module_api>` interface
-also supports releasing allocations obtained in an experiment.
-
-The example below releases the allocation in the example above.
-
-.. code-block:: python
-
-    from smartsim.wlm import slurm
-    salloc_options = {
-        "C": "haswell",
-        "partition": "debug",
-        "exclusive": None
-    }
-    alloc_id = slurm.get_slurm_allocation(nodes=128,
-                                        time="10:00:00",
-                                        options=salloc_options)
-
-    # <experiment code goes here>
-
-    slurm.release_slurm_allocation(alloc_id)
-
--------------------------------------------------------------------
-
-PBSPro
-======
-
-Like the Slurm launcher, the PBSPro launcher works directly with the PBSPro
-scheduler to launch, query, monitor and stop applications.
-
-The amount of communication between SmartSim and PBSPro can be tuned
-for specific guidelines of different sites by setting the
-value for ``jm_interval`` in the SmartSim configuration file.
-
-To use the PBSpro launcher, specify at ``Experiment`` initialization:
-
-.. code-block:: python
-
-    from smartsim import Experiment
-
-    exp = Experiment("LAMMPS-melt", launcher="pbs")
-
-
-
-Running on PBSpro
------------------
-
-The PBSpro launcher supports three types of ``RunSettings``:
-  1. :ref:`AprunSettings <aprun_api>`
-  2. :ref:`MpirunSettings <openmpi_run_api>`
-  3. :ref:`MpiexecSettings <openmpi_exec_api>`
-
-As well as batch settings for ``qsub`` through:
-  1. :ref:`QsubBatchSettings <qsub_api>`
-
-Both supported ``RunSettings`` types above can be added
-to a ``QsubBatchSettings`` batch workload through ``Ensemble``
-creation.
-
----------------------------------------------------------------------
-
-LSF
-===
-
-The LSF Launcher works like the PBSPro launcher and
-is compatible with LSF and OpenMPI workloads.
-
-To use the LSF launcher, specify at ``Experiment`` initialization:
-
-.. code-block:: python
-
-    from smartsim import Experiment
-
-    exp = Experiment("MOM6-double-gyre", launcher="lsf")
-
-
-Running on LSF
---------------
-
-The LSF launcher supports three types of ``RunSettings``:
-  1. :ref:`JsrunSettings <jsrun_api>`
-  2. :ref:`MpirunSettings <openmpi_run_api>`
-  3. :ref:`MpiexecSettings <openmpi_exec_api>`
-
-As well as batch settings for ``bsub`` through:
-  1. :ref:`BsubBatchSettings <bsub_api>`
-
-Both supported ``RunSettings`` types above can be added
-to a ``BsubBatchSettings`` batch workload through ``Ensemble``
-creation.
diff --git a/doc/ml_features.rst b/doc/ml_features.rst
index 6096f005e..4e0919a08 100644
--- a/doc/ml_features.rst
+++ b/doc/ml_features.rst
@@ -1,3 +1,5 @@
+.. _ml_features_docs:
+
 ###########
 ML Features
 ###########
@@ -303,7 +305,7 @@ with TensorFlow or PyTorch backends.
 
 .. code-block:: python
 
-    client.run_model(model_key, inputs=["mnist_imagse"], outputs=["mnist_output"])
+    client.run_model(model_key, inputs=["mnist_images"], outputs=["mnist_output"])
     output = client.get_tensor("mnist_output")
 
 
diff --git a/doc/model.rst b/doc/model.rst
new file mode 100644
index 000000000..52e1ce1c0
--- /dev/null
+++ b/doc/model.rst
@@ -0,0 +1,2343 @@
+.. _model_object_doc:
+
+*****
+Model
+*****
+========
+Overview
+========
+SmartSim ``Model`` objects enable users to execute computational tasks in an
+``Experiment`` workflow, such as launching compiled applications,
+running scripts, or performing general computational operations. A ``Model`` can be launched with
+other SmartSim ``Model(s)`` and ``Orchestrator(s)`` to build AI-enabled workflows.
+With the SmartSim ``Client`` (:ref:`SmartRedis<smartredis-api>`), data can be transferred from a ``Model``
+to the ``Orchestrator`` for use in an ML model (TF, TF-lite, PyTorch, or ONNX), online
+training process, or additional ``Model`` applications. SmartSim ``Clients`` (SmartRedis) are available in
+Python, C, C++, or Fortran.
+
+To initialize a SmartSim ``Model``, use the ``Experiment.create_model`` factory method.
+When creating a ``Model``, a :ref:`RunSettings<run_settings_doc>` object must be provided. A ``RunSettings``
+object specifies the ``Model`` executable (e.g. the full path to a compiled binary) as well as
+executable arguments and launch parameters. These specifications include launch commands (e.g. `srun`, `aprun`, `mpiexec`, etc),
+compute resource requirements, and application command-line arguments.
+
+Once a ``Model`` instance has been initialized, users have access to
+the :ref:`Model API<model_api>` functions to further configure the ``Model``.
+The Model API functions provide users with the following capabilities:
+
+- :ref:`Attach Files to a SmartSim Model<files_doc>`
+- :ref:`Colocate an Orchestrator to a SmartSim Model<colo_model_doc>`
+- :ref:`Attach a ML Model to the SmartSim Model<ai_model_doc>`
+- :ref:`Attach a TorchScript Function to the SmartSim Model<TS_doc>`
+- :ref:`Enable SmartSim Model Data Collision Prevention<model_key_collision>`
+
+Once the ``Model`` has been configured and launched, a user can leverage an ``Orchestrator`` within a ``Model``
+through **two** strategies:
+
+- :ref:`Connect to a Standalone Orchestrator<standalone_orch_doc>`
+   When a ``Model`` is launched, it does not use or share compute
+   resources on the same host (computer/server) where a SmartSim ``Orchestrator`` is running.
+   Instead, it is launched on its own compute resources specified by the ``RunSettings`` object.
+   The ``Model`` can connect via a SmartRedis ``Client`` to a launched standalone ``Orchestrator``.
+
+- :ref:`Connect to a Colocated Orchestrator<colocated_orch_doc>`
+   When the colocated ``Model`` is started, SmartSim launches an ``Orchestrator`` on the ``Model`` compute
+   nodes prior to the ``Model`` execution. The ``Model`` can then connect to the colocated ``Orchestrator``
+   via a SmartRedis ``Client``.
+
+.. note::
+    For the ``Client`` connection to be successful from within the ``Model`` application,
+    the SmartSim ``Orchestrator`` must be launched prior to the start of the ``Model``.
+
+.. note::
+    A ``Model`` can be launched without an ``Orchestrator`` if data transfer and ML capabilities are not
+    required.
+
+SmartSim manages ``Model`` instances through the :ref:`Experiment API<experiment_api>` by providing functions to
+launch, monitor, and stop applications. Additionally, a ``Model`` can be launched individually
+or as a group via an :ref:`Ensemble<ensemble_doc>`.
+
+==============
+Initialization
+==============
+Overview
+========
+The ``Experiment`` is responsible for initializing all SmartSim entities.
+A ``Model`` is created using the ``Experiment.create_model`` factory method, and users can customize the
+``Model`` via the factory method parameters.
+
+The key initializer arguments for ``Model`` creation can be found in the :ref:`Experiment API<exp_init>`
+under the ``create_model`` docstring.
+
+A `name` and :ref:`RunSettings<run_settings_doc>` reference are required to initialize a ``Model``.
+Optionally, include a :ref:`BatchSettings<batch_settings_doc>` object to specify workload manager batch launching.
+
+.. note::
+    ``BatchSettings`` attached to a ``Model`` are ignored when the ``Model`` is executed as part of an ``Ensemble``.
+
+The `params` factory method parameter for ``Model`` creation allows a user to define simulation parameters and
+values through a dictionary. Using ``Model`` :ref:`file functions<files_doc>`, users can write these parameters to
+a file in the ``Model`` working directory.
+
+When a ``Model`` instance is passed to ``Experiment.generate``, a
+directory within the Experiment directory
+is created to store input and output files from the ``Model``.
+
+.. note::
+    It is strongly recommended to invoke ``Experiment.generate`` on the ``Model``
+    instance before launching the ``Model``. If a path is not specified during
+    ``Experiment.create_model``, calling ``Experiment.generate`` with the ``Model``
+    instance will result in SmartSim generating a ``Model`` directory within the
+    ``Experiment`` directory. This directory will be used to store the ``Model`` outputs
+    and attached files.
+
+.. _std_model_doc:
+
+Example
+=======
+In this example, we provide a demonstration of how to initialize and launch a ``Model``
+within an ``Experiment`` workflow. The source code example is available in the dropdown below for
+convenient execution and customization.
+
+.. dropdown:: Example Driver Script Source Code
+
+    .. literalinclude:: tutorials/doc_examples/model_doc_examples/model_init.py
+
+All workflow entities are initialized through the :ref:`Experiment API<experiment_api>`.
+Consequently, initializing a SmartSim ``Experiment`` is a prerequisite for ``Model``
+initialization.
+
+To initialize an instance of the ``Experiment`` class, import the SmartSim
+``Experiment`` module and invoke the ``Experiment`` constructor
+with a `name` and `launcher`:
+
+.. literalinclude:: tutorials/doc_examples/model_doc_examples/model_init.py
+  :language: python
+  :linenos:
+  :lines: 1-4
+
+A ``Model`` requires ``RunSettings`` objects to specify how the ``Model`` should be
+executed within the workflow. We use the ``Experiment`` instance `exp` to
+call the factory method ``Experiment.create_run_settings`` to initialize a ``RunSettings``
+object. Finally, we specify the executable `"echo"` to run the executable argument `"Hello World"`:
+
+.. literalinclude:: tutorials/doc_examples/model_doc_examples/model_init.py
+    :language: python
+    :linenos:
+    :lines: 6-7
+
+.. seealso::
+    For more information on ``RunSettings`` objects, reference the :ref:`RunSettings<run_settings_doc>` documentation.
+
+We now have a ``RunSettings`` instance named `model_settings` that contains all of the
+information required to launch our application. Pass a `name` and the run settings instance
+to the ``create_model`` factory method:
+
+.. literalinclude:: tutorials/doc_examples/model_doc_examples/model_init.py
+  :language: python
+  :linenos:
+  :lines: 9-10
+
+To create an isolated output directory for the ``Model``, invoke ``Experiment.generate`` on the
+``Model`` `model_instance`:
+
+.. literalinclude:: tutorials/doc_examples/model_doc_examples/model_init.py
+  :language: python
+  :linenos:
+  :lines: 12-13
+
+.. note::
+    The ``Experiment.generate`` step is optional; however, this step organizes the ``Experiment``
+    entity output files into individual entity folders within the ``Experiment`` folder. Continue
+    in the example for information on ``Model`` output generation or visit the
+    :ref:`Output and Error Files<model_output_files>` section.
+
+All entities are launched, monitored and stopped by the ``Experiment`` instance.
+To start the ``Model``, invoke ``Experiment.start`` on `model_instance`:
+
+.. literalinclude:: tutorials/doc_examples/model_doc_examples/model_init.py
+  :language: python
+  :linenos:
+  :lines: 15-16
+
+When the ``Experiment`` driver script is executed, two files from the `model_instance` will be created
+in the generated ``Model`` subdirectory:
+
+1. `model_instance.out` : this file will hold outputs produced by the `model_instance` workload.
+2. `model_instance.err` : this file will hold any errors that occurred during `model_instance` execution.
+
+.. _colo_model_doc:
+
+======================
+Colocated Orchestrator
+======================
+A SmartSim ``Model`` has the capability to share compute node(s) with a SmartSim ``Orchestrator`` in
+a deployment known as a colocated ``Orchestrator``. In this scenario, the ``Orchestrator`` and ``Model`` share
+compute resources. To achieve this, users need to initialize a ``Model`` instance using the
+``Experiment.create_model`` function and then utilize one of the three functions listed below to
+colocate an ``Orchestrator`` with the ``Model``. This instructs SmartSim to launch an ``Orchestrator``
+on the application compute node(s) before the ``Model`` execution.
+
+There are **three** different Model API functions to colocate a ``Model``:
+
+- ``Model.colocate_db_tcp``: Colocate an ``Orchestrator`` instance and establish client communication using TCP/IP.
+- ``Model.colocate_db_uds``: Colocate an ``Orchestrator`` instance and establish client communication using Unix domain sockets (UDS).
+- ``Model.colocate_db``: (deprecated) An alias for `Model.colocate_db_tcp`.
+
+Each function initializes an unsharded ``Orchestrator`` accessible only to the ``Model`` processes on the same compute node. When the ``Model``
+is started, the ``Orchestrator`` will be launched on the same compute resource as the ``Model``. Only the colocated ``Model``
+may communicate with the ``Orchestrator`` via a SmartRedis ``Client`` by using the loopback TCP interface or
+Unix Domain sockets. Extra parameters for the ``Orchestrator`` can be passed into the colocate functions above
+via `kwargs`.
+
+.. code-block:: python
+
+    example_kwargs = {
+        "maxclients": 100000,
+        "threads_per_queue": 1,
+        "inter_op_threads": 1,
+        "intra_op_threads": 1
+    }
+
+For a walkthrough of how to colocate a ``Model``, navigate to the
+:ref:`Colocated Orchestrator<colocated_orch_doc>` for instructions.
+
+For users aiming to **optimize performance**, SmartSim offers the flexibility to specify
+processor IDs to which the colocated ``Orchestrator`` should be pinned. This can be achieved using
+the `custom_pinning` argument, which is recognized by both ``Model.colocate_db_uds`` and
+``Model.colocate_db_tcp``. In systems where specific processors support ML model and
+TorchScript execution, users can employ the `custom_pinning` argument to designate
+these processor IDs. This ensures that the specified processors are available
+when executing ML models or TorchScripts on the colocated ``Orchestrator``.
+Additionally, users may use the `custom_pinning` argument to avoid reserved processors
+by specifying a available processor ID or a list of available processor IDs.
+
+.. _files_doc:
+
+=====
+Files
+=====
+Overview
+========
+Applications often depend on external files (e.g. training datasets, evaluation datasets, etc)
+to operate as intended. Users can instruct SmartSim to copy, symlink, or manipulate external files
+prior to a ``Model`` launch via the ``Model.attach_generator_files`` function.
+
+.. note::
+    Multiple calls to ``Model.attach_generator_files`` will overwrite previous file configurations
+    in the ``Model``.
+
+To setup the run directory for the ``Model``, users should pass the list of files to
+``Model.attach_generator_files`` using the following arguments:
+
+* `to_copy` (t.Optional[t.List[str]] = None): Files that are copied into the path of the ``Model``.
+* `to_symlink` (t.Optional[t.List[str]] = None): Files that are symlinked into the path of the ``Model``.
+
+User-formatted files can be attached using the `to_configure` argument. These files will be modified
+during ``Model`` generation to replace tagged sections in the user-formatted files with
+values from the `params` initializer argument used during ``Model`` creation:
+
+* `to_configure` (t.Optional[t.List[str]] = None): Designed for text-based ``Model`` input files,
+  `"to_configure"` is exclusive to the ``Model``. During ``Model`` directory generation, the attached
+  files are parsed and specified tagged parameters are replaced with the `params` values that were
+  specified in the ``Experiment.create_model`` factory method of the ``Model``. The default tag is a semicolon
+  (e.g., THERMO = ;THERMO;).
+
+In the :ref:`Example<files_example_doc>` subsection, we provide an example using the value `to_configure`
+within ``attach_generator_files``.
+
+.. _files_example_doc:
+
+Example
+=======
+This example demonstrates how to attach a file to a ``Model`` for parameter replacement at the time
+of ``Model`` directory generation. This is accomplished using the `params` function parameter in
+``Experiment.create_model`` and the `to_configure` function parameter
+in ``Model.attach_generator_files``. The source code example is available in the dropdown below for
+convenient execution and customization.
+
+.. dropdown:: Example Driver Script Source Code
+
+    .. literalinclude:: tutorials/doc_examples/model_doc_examples/model_file.py
+
+In this example, we have a text file named `params_inputs.txt`. Within the text file, is the parameter `THERMO`
+that is required by the ``Model`` application at runtime:
+
+.. code-block:: bash
+
+   THERMO = ;THERMO;
+
+In order to have the tagged parameter `;THERMO;` replaced with a usable value at runtime, two steps are required:
+
+1. The `THERMO` variable must be included in ``Experiment.create_model`` factory method as
+   part of the `params` initializer argument.
+2. The file containing the tagged parameter `;THERMO;`, `params_inputs.txt`, must be attached to the ``Model``
+   via the ``Model.attach_generator_files`` method as part of the `to_configure` function parameter.
+
+To encapsulate our application within a ``Model``, we must first create an ``Experiment`` instance.
+Begin by importing the ``Experiment`` module and initializing an ``Experiment``:
+
+.. literalinclude:: tutorials/doc_examples/model_doc_examples/model_file.py
+  :language: python
+  :linenos:
+  :lines: 1-4
+
+A SmartSim ``Model`` requires a ``RunSettings`` object to
+specify the ``Model`` executable (e.g. the full path to a compiled binary) as well as
+executable arguments and launch parameters. Create a simple ``RunSettings`` object
+and specify the path to the executable script as an executable argument (`exe_args`):
+
+.. literalinclude:: tutorials/doc_examples/model_doc_examples/model_file.py
+  :language: python
+  :linenos:
+  :lines: 6-7
+
+.. seealso::
+    To read more on SmartSim ``RunSettings`` objects, reference the :ref:`RunSettings<run_settings_doc>` documentation.
+
+Next, initialize a ``Model`` object via ``Experiment.create_model``. Pass in the `model_settings` instance
+and the `params` value:
+
+.. literalinclude:: tutorials/doc_examples/model_doc_examples/model_file.py
+  :language: python
+  :linenos:
+  :lines: 9-10
+
+We now have a ``Model`` instance named `model_instance`. Attach the text file, `params_inputs.txt`,
+to the ``Model`` for use at entity runtime. To do so, use the
+``Model.attach_generator_files`` function and specify the `to_configure`
+parameter with the path to the text file, `params_inputs.txt`:
+
+.. literalinclude:: tutorials/doc_examples/model_doc_examples/model_file.py
+  :language: python
+  :linenos:
+  :lines: 12-13
+
+To created an isolated directory for the ``Model`` outputs and configuration files, invoke ``Experiment.generate``
+on `model_instance` as an input parameter:
+
+.. literalinclude:: tutorials/doc_examples/model_doc_examples/model_file.py
+  :language: python
+  :linenos:
+  :lines: 15-16
+
+The contents of `getting-started/model_name/params_inputs.txt` at runtime are:
+
+.. code-block:: bash
+
+   THERMO = 1
+
+.. _model_output_files:
+
+======================
+Output and Error Files
+======================
+By default, SmartSim stores the standard output and error of the ``Model`` in two files:
+
+* `<model_name>.out`
+* `<model_name>.err`
+
+The files are created in the working directory of the ``Model``, and the filenames directly match the
+``Model`` name. The `<model_name>.out` file logs standard outputs and the
+`<model_name>.err` logs errors for debugging.
+
+.. note::
+    Invoking ``Experiment.generate(model)`` will create a directory `model_name/` and will store
+    the two files within that directory. You can also specify a path for these files using the
+    `path` parameter when invoking ``Experiment.create_model``.
+
+.. _ml_script_model_doc:
+
+=====================
+ML Models and Scripts
+=====================
+Overview
+========
+SmartSim users have the capability to load ML models and TorchScripts into an ``Orchestrator``
+within the ``Experiment`` script for use within a ``Model``. Functions accessible through
+a ``Model`` object support loading ML models (TensorFlow, TensorFlow-lite, PyTorch, and ONNX) and
+TorchScripts into standalone or colocated ``Orchestrator(s)`` before application runtime.
+
+Users can follow **two** processes to load an ML model to the ``Orchestrator``:
+
+- :ref:`From Memory<in_mem_ML_model_ex>`
+- :ref:`From File<from_file_ML_model_ex>`
+
+.. warning::
+    Uploading an ML model :ref:`from memory<in_mem_ML_model_ex>` is solely supported for
+    standalone ``Orchestrator(s)``. To upload an ML model to a colocated ``Orchestrator``, users
+    must save the ML model to disk and upload :ref:`from file<from_file_ML_model_ex>`.
+
+Users can follow **three** processes to load a TorchScript to the ``Orchestrator``:
+
+- :ref:`From Memory<in_mem_TF_doc>`
+- :ref:`From File<TS_from_file>`
+- :ref:`From String<TS_raw_string>`
+
+.. warning::
+    Uploading a TorchScript :ref:`from memory<in_mem_TF_doc>` is solely supported for
+    standalone ``Orchestrator(s)``. To upload a TorchScript to a colocated ``Orchestrator``, users
+    upload :ref:`from file<TS_from_file>` or :ref:`from string<TS_raw_string>`.
+
+Once an ML model or TorchScript is loaded into the ``Orchestrator``, ``Model`` objects can
+leverage ML capabilities by utilizing the SmartSim ``Client`` (:ref:`SmartRedis<smartredis-api>`)
+to execute the stored ML models and TorchScripts.
+
+.. _ai_model_doc:
+
+AI Models
+=========
+When configuring a ``Model``, users can instruct SmartSim to load
+Machine Learning (ML) models to the ``Orchestrator``. ML models added
+are loaded into the ``Orchestrator`` prior to the execution of the ``Model``. To load an ML model
+to the ``Orchestrator``, SmartSim users can provide the ML model **in-memory** or specify the **file path**
+when using the ``Model.add_ml_model`` function. SmartSim solely supports loading an ML model from file
+for use within standalone ``Orchestrator(s)``. The supported ML frameworks are TensorFlow,
+TensorFlow-lite, PyTorch, and ONNX.
+
+The arguments that customize the storage and execution of an ML model can be found in the
+:ref:`Model API<model_api>` under the ``add_ml_model`` docstring.
+
+.. _in_mem_ML_model_ex:
+
+-------------------------------------
+Example: Attach an In-Memory ML Model
+-------------------------------------
+This example demonstrates how to attach an in-memory ML model to a SmartSim ``Model``
+to load into an ``Orchestrator`` at ``Model`` runtime. The source code example is available in the dropdown below for
+convenient execution and customization.
+
+.. dropdown:: Example Driver Script Source Code
+
+    .. literalinclude:: tutorials/doc_examples/model_doc_examples/in_mem_ml_model.py
+
+.. note::
+    This example assumes:
+
+    - an ``Orchestrator`` is launched prior to the ``Model`` execution (colocated or standalone)
+    - an initialized ``Model`` named `smartsim_model` exists within the ``Experiment`` workflow
+    - a Tensorflow-based ML model was serialized using ``serialize_model`` which returns the
+      the ML model as a byte string with the names of the input and output layers
+
+**Attach the ML Model to a SmartSim Model**
+
+In this example, we have a serialized Tensorflow-based ML model that was saved to a byte string stored under `model`.
+Additionally, the ``serialize_model`` function returned the names of the input and output layers stored under
+`inputs` and `outputs`. Assuming an initialized ``Model`` named `smartsim_model` exists, we add the in-memory TensorFlow model using
+the ``Model.add_ml_model`` function and specify the in-memory ML model to the function parameter `model`:
+
+.. literalinclude:: tutorials/doc_examples/model_doc_examples/in_mem_ml_model.py
+  :language: python
+  :linenos:
+  :lines: 39-40
+
+In the above ``smartsim_model.add_ml_model`` code snippet, we pass in the following arguments:
+
+-  `name` ("cnn"): A name to reference the ML model in the ``Orchestrator``.
+-  `backend` ("TF"): Indicating that the ML model is a TensorFlow model.
+-  `model` (model): The in-memory representation of the TensorFlow model.
+-  `device` ("GPU"): Specifying the device for ML model execution.
+-  `devices_per_node` (2): Use two GPUs per node.
+-  `first_device` (0): Start with 0 index GPU.
+-  `inputs` (inputs): The name of the ML model input nodes (TensorFlow only).
+-  `outputs` (outputs): The name of the ML model output nodes (TensorFlow only).
+
+.. warning::
+    Calling `exp.start(smartsim_model)` prior to instantiation of an ``Orchestrator`` will result in
+    a failed attempt to load the ML model to a non-existent ``Orchestrator``.
+
+When the ``Model`` is started via ``Experiment.start``, the ML model will be loaded to the
+launched ``Orchestrator``. The ML model can then be executed on the ``Orchestrator`` via a SmartSim
+``Client`` (:ref:`SmartRedis<smartredis-api>`) within the application code.
+
+.. _from_file_ML_model_ex:
+
+-------------------------------------
+Example: Attach an ML Model From File
+-------------------------------------
+This example demonstrates how to attach a ML model from file to a SmartSim ``Model``
+to load into an ``Orchestrator`` at ``Model`` runtime.
+The source code example is available in the dropdown below for
+convenient execution and customization.
+
+.. note::
+    SmartSim supports loading ML models from file to standalone ``Orchestrator(s)``.
+    This feature is **not** supported for colocated ``Orchestrator(s)``.
+
+.. dropdown:: Example Driver Script Source Code
+
+    .. literalinclude:: tutorials/doc_examples/model_doc_examples/from_file_ml_model.py
+
+.. note::
+    This example assumes:
+
+    - a standalone ``Orchestrator`` is launched prior to the ``Model`` execution
+    - an initialized ``Model`` named `smartsim_model` exists within the ``Experiment`` workflow
+    - a Tensorflow-based ML model was serialized using ``freeze_model`` which returns the
+      the path to the serialized model file and the names of the input and output layers
+
+**Attach the ML Model to a SmartSim Model**
+
+In this example, we have a serialized Tensorflow-based ML model that was saved to disk and stored under `model`.
+Additionally, the ``freeze_model`` function returned the names of the input and output layers stored under
+`inputs` and `outputs`. Assuming an initialized ``Model`` named `smartsim_model` exists, we add the TensorFlow model using
+the ``Model.add_ml_model`` function and specify the TensorFlow model path to the parameter `model_path`:
+
+.. literalinclude:: tutorials/doc_examples/model_doc_examples/from_file_ml_model.py
+  :language: python
+  :linenos:
+  :lines: 39-40
+
+In the above ``smartsim_model.add_ml_model`` code snippet, we pass in the following arguments:
+
+-  `name` ("cnn"): A name to reference the ML model in the ``Orchestrator``.
+-  `backend` ("TF"): Indicating that the ML model is a TensorFlow model.
+-  `model_path` (model_file): The path to the ML model script.
+-  `device` ("GPU"): Specifying the device for ML model execution.
+-  `devices_per_node` (2): Use two GPUs per node.
+-  `first_device` (0): Start with 0 index GPU.
+-  `inputs` (inputs): The name of the ML model input nodes (TensorFlow only).
+-  `outputs` (outputs): The name of the ML model output nodes (TensorFlow only).
+
+.. warning::
+    Calling `exp.start(smartsim_model)` prior to instantiation of an ``Orchestrator`` will result in
+    a failed attempt to load the ML model to a non-existent ``Orchestrator``.
+
+When the ``Model`` is started via ``Experiment.start``, the ML model will be loaded to the
+launched standalone ``Orchestrator``. The ML model can then be executed on the ``Orchestrator``
+via a SmartRedis ``Client`` (:ref:`SmartRedis<smartredis-api>`) within the application code.
+
+.. _TS_doc:
+
+TorchScripts
+============
+When configuring a ``Model``, users can instruct SmartSim to load TorchScripts
+to the ``Orchestrator``. TorchScripts added are loaded into the ``Orchestrator`` prior to
+the execution of the ``Model``. To load a TorchScript to the ``Orchestrator``, SmartSim users
+can follow one of the processes:
+
+- :ref:`Define a TorchScript Function In-Memory<in_mem_TF_doc>`
+   Use the ``Model.add_function`` to instruct SmartSim to load an in-memory TorchScript to the ``Orchestrator``.
+- :ref:`Define a TorchScript Function From File<TS_from_file>`
+   Provide file path to ``Model.add_script`` to instruct SmartSim to load the TorchScript from file to the ``Orchestrator``.
+- :ref:`Define a TorchScript Function as String<TS_raw_string>`
+   Provide function string to ``Model.add_script`` to instruct SmartSim to load a raw string as a TorchScript function to the ``Orchestrator``.
+
+.. note::
+    SmartSim does **not** support loading in-memory TorchScript functions to colocated ``Orchestrator(s)``.
+    Users should instead load TorchScripts to a colocated ``Orchestrator`` from file or as a raw string.
+
+Continue or select a process link to learn more on how each function (``Model.add_script`` and ``Model.add_function``)
+load TorchScripts to launched ``Orchestrator(s)``.
+
+.. _in_mem_TF_doc:
+
+-------------------------------
+Attach an In-Memory TorchScript
+-------------------------------
+Users can define TorchScript functions within the Python driver script
+to attach to a ``Model``. This feature is supported by ``Model.add_function`` which provides flexible
+device selection, allowing users to choose between which device the TorchScript is executed on, `"GPU"` or `"CPU"`.
+In environments with multiple devices, specific device numbers can be specified using the
+`devices_per_node` function parameter.
+
+.. warning::
+    ``Model.add_function`` does **not** support loading in-memory TorchScript functions to a colocated ``Orchestrator``.
+    If you would like to load a TorchScript function to a colocated ``Orchestrator``, define the function
+    as a :ref:`raw string<TS_raw_string>` or :ref:`load from file<TS_from_file>`.
+
+The arguments that customize the execution of an in-memory TorchScript function can be found in the
+:ref:`Model API<model_api>` under the ``add_function`` docstring.
+
+Example: Load a In-Memory TorchScript Function
+----------------------------------------------
+This example walks through the steps of instructing SmartSim to load an in-memory TorchScript function
+to a standalone ``Orchestrator``. The source code example is available in the dropdown below for
+convenient execution and customization.
+
+.. dropdown:: Example Driver Script Source Code
+
+    .. literalinclude:: tutorials/doc_examples/model_doc_examples/in_mem_script.py
+
+.. note::
+    The example assumes:
+
+    - a standalone ``Orchestrator`` is launched prior to the ``Model`` execution
+    - an initialized ``Model`` named `smartsim_model` exists within the ``Experiment`` workflow
+
+**Define an In-Memory TF Function**
+
+To begin, define an in-memory TorchScript function within the ``Experiment`` driver script.
+For the purpose of the example, we add a simple TorchScript function named `timestwo`:
+
+.. literalinclude:: tutorials/doc_examples/model_doc_examples/in_mem_script.py
+  :language: python
+  :linenos:
+  :lines: 3-4
+
+**Attach the In-Memory TorchScript Function to a SmartSim Model**
+
+We use the ``Model.add_function`` function to instruct SmartSim to load the TorchScript function `timestwo`
+onto the launched standalone ``Orchestrator``. Specify the function `timestwo` to the `function`
+parameter:
+
+.. literalinclude:: tutorials/doc_examples/model_doc_examples/in_mem_script.py
+  :language: python
+  :linenos:
+  :lines: 15-16
+
+In the above ``smartsim_model.add_function`` code snippet, we input the following arguments:
+
+-  `name` ("example_func"): A name to uniquely identify the TorchScript within the ``Orchestrator``.
+-  `function` (timestwo): Name of the TorchScript function defined in the Python driver script.
+-  `device` ("CPU"): Specifying the device for TorchScript execution.
+-  `devices_per_node` (2): Use two GPUs per node.
+-  `first_device` (0): Start with 0 index GPU.
+
+.. warning::
+    Calling `exp.start(smartsim_model)` prior to instantiation of an ``Orchestrator`` will result in
+    a failed attempt to load the TorchScript to a non-existent ``Orchestrator``.
+
+When the ``Model`` is started via ``Experiment.start``, the TF function will be loaded to the
+standalone ``Orchestrator``. The function can then be executed on the ``Orchestrator`` via a SmartRedis
+``Client`` (:ref:`SmartRedis<smartredis-api>`) within the application code.
+
+.. _TS_from_file:
+
+------------------------------
+Attach a TorchScript From File
+------------------------------
+Users can attach TorchScript functions from a file to a ``Model`` and upload them to a
+colocated or standalone ``Orchestrator``. This functionality is supported by the ``Model.add_script``
+function's `script_path` parameter. The function supports
+flexible device selection, allowing users to choose between `"GPU"` or `"CPU"` via the `device` parameter.
+In environments with multiple devices, specific device numbers can be specified using the
+`devices_per_node` parameter.
+
+The arguments that customize the storage and execution of a TorchScript script can be found in the
+:ref:`Model API<model_api>` under the ``add_script`` docstring.
+
+Example: Load a TorchScript From File
+-------------------------------------
+This example walks through the steps of instructing SmartSim to load a TorchScript from file
+to a launched ``Orchestrator``. The source code example is available in the dropdown below for
+convenient execution and customization.
+
+.. dropdown:: Example Driver Script Source Code
+
+    .. literalinclude:: tutorials/doc_examples/model_doc_examples/from_file_script.py
+
+.. note::
+    This example assumes:
+
+    - a ``Orchestrator`` is launched prior to the ``Model`` execution (Colocated or standalone)
+    - an initialized ``Model`` named `smartsim_model` exists within the ``Experiment`` workflow
+
+**Define a TorchScript Script**
+
+For the example, we create the Python script `torchscript.py`. The file contains a
+simple torch function shown below:
+
+.. code-block:: python
+
+    def negate(x):
+        return torch.neg(x)
+
+**Attach the TorchScript Script to a SmartSim Model**
+
+Assuming an initialized ``Model`` named `smartsim_model` exists, we add the TorchScript script using
+``Model.add_script`` by specifying the script path to the `script_path` parameter:
+
+.. literalinclude:: tutorials/doc_examples/model_doc_examples/from_file_script.py
+  :language: python
+  :linenos:
+  :lines: 13-14
+
+In the above ``smartsim_model.add_script`` code snippet, we include the following arguments:
+
+-  `name` ("example_script"): Reference name for the script inside of the ``Orchestrator``.
+-  `script_path` ("path/to/torchscript.py"): Path to the script file.
+-  `device` ("CPU"): device for script execution.
+-  `devices_per_node` (2): Use two GPUs per node.
+-  `first_device` (0): Start with 0 index GPU.
+
+.. warning::
+    Calling `exp.start(smartsim_model)` prior to instantiation of an ``Orchestrator`` will result in
+    a failed attempt to load the ML model to a non-existent ``Orchestrator``.
+
+When `smartsim_model` is started via ``Experiment.start``, the TorchScript will be loaded from file to the
+``Orchestrator`` that is launched prior to the start of `smartsim_model`. The function can then be executed
+on the ``Orchestrator`` via a SmartRedis ``Client`` (:ref:`SmartRedis<smartredis-api>`) within the application code.
+
+.. _TS_raw_string:
+
+---------------------------------
+Define TorchScripts as Raw String
+---------------------------------
+Users can upload TorchScript functions from string to colocated or
+standalone ``Orchestrator(s)``. This feature is supported by the
+``Model.add_script`` function's `script` parameter. The function supports
+flexible device selection, allowing users to choose between `"GPU"` or `"CPU"` via the `device` parameter.
+In environments with multiple devices, specific device numbers can be specified using the
+`devices_per_node` parameter.
+
+The arguments that customize the storage and execution of a TorchScript script can be found in the
+:ref:`Model API<model_api>` under the ``add_script`` docstring.
+
+Example: Load a TorchScript From String
+---------------------------------------
+This example walks through the steps of instructing SmartSim to load a TorchScript
+from string to a ``Orchestrator``. The source code example is available in the dropdown below for
+convenient execution and customization.
+
+.. dropdown:: Example Driver Script Source Code
+
+    .. literalinclude:: tutorials/doc_examples/model_doc_examples/string_script.py
+
+.. note::
+    This example assumes:
+
+    - a ``Orchestrator`` is launched prior to the ``Model`` execution (standalone or colocated)
+    - an initialized ``Model`` named `smartsim_model` exists within the ``Experiment`` workflow
+
+**Define a String TorchScript**
+
+Define the TorchScript code as a variable in the ``Experiment`` driver script:
+
+.. literalinclude:: tutorials/doc_examples/model_doc_examples/string_script.py
+  :language: python
+  :linenos:
+  :lines: 12-13
+
+**Attach the TorchScript Function to a SmartSim Model**
+
+Assuming an initialized ``Model`` named `smartsim_model` exists, we add a TensorFlow model using
+the ``Model.add_script`` function and specify the variable `torch_script_str` to the parameter
+`script`:
+
+.. literalinclude:: tutorials/doc_examples/model_doc_examples/string_script.py
+  :language: python
+  :linenos:
+  :lines: 15-16
+
+In the above ``smartsim_model.add_script`` code snippet, we offer the following arguments:
+
+-  `name` ("example_script"): key to store script under.
+-  `script` (torch_script_str): TorchScript code.
+-  `device` ("CPU"): device for script execution.
+-  `devices_per_node` (2): Use two GPUs per node.
+-  `first_device` (0): Start with 0 index GPU.
+
+.. warning::
+    Calling `exp.start(smartsim_model)` prior to instantiation of an ``Orchestrator`` will result in
+    a failed attempt to load the ML model to a non-existent ``Orchestrator``.
+
+When the ``Model`` is started via ``Experiment.start``, the TorchScript will be loaded to the
+``Orchestrator`` that is launched prior to the start of the ``Model``.
+
+.. _model_key_collision:
+
+=========================
+Data Collision Prevention
+=========================
+Overview
+========
+If an ``Experiment`` consists of multiple ``Model(s)`` that use the same key names to reference
+information in the ``Orchestrator``, the names used to reference data, ML models, and scripts will be
+identical, and without the use of SmartSim and SmartRedis prefix methods, ``Model(s)``
+will end up inadvertently accessing or overwriting each other’s data. To prevent this
+situation, the SmartSim ``Model`` object supports key prefixing, which prepends
+the name of the ``Model`` to the keys sent to the ``Orchestrator`` to create unique key names.
+With this enabled, collision is avoided and ``Model(s)`` can use the same key names within their applications.
+
+The key components of SmartSim ``Model`` prefixing functionality include:
+
+1. **Sending Data to the Orchestrator**: Users can send data to an ``Orchestrator``
+   with the ``Model`` name prepended to the data name through SmartSim :ref:`Model functions<model_prefix_func>` and
+   SmartRedis :ref:`Client functions<client_prefix_func>`.
+2. **Retrieving Data from the Orchestrator**: Users can instruct a ``Client`` to prepend a
+   ``Model`` name to a key during data retrieval, polling, or check for existence on the ``Orchestrator``
+   through SmartRedis :ref:`Client functions<client_prefix_func>`.
+
+For example, assume you have two ``Model(s)`` in an ``Experiment``, named `model_0` and `model_1`. In each
+application code you use the function ``Client.put_tensor("tensor_0", data)`` to send a tensor named `"tensor_0"`
+to the same ``Orchestrator``. With ``Model`` key prefixing turned on, the `model_0` and `model_1`
+applications can access their respective tensor `"tensor_0"` by name without overwriting or accessing
+the other ``Model(s)`` `"tensor_0"` tensor. In this scenario, the two tensors placed in the
+``Orchestrator`` are `model_0.tensor_0` and `model_1.tensor_0`.
+
+Enabling and Disabling
+======================
+SmartSim provides support for toggling prefixing on a ``Model`` for tensors, ``Datasets``,
+lists, ML models, and scripts. Prefixing functions from the SmartSim :ref:`Model API<model_api>` and SmartRedis :ref:`Client API<smartredis-api>` rely on
+each other to fully support SmartSim key prefixing. For example, to use the ``Client`` prefixing
+functions, a user must enable prefixing on the ``Model`` through ``Model.enable_key_prefixing``.
+This function enables and activates prefixing for tensors, ``Datasets`` and lists placed in an ``Orchestrator``
+by the ``Model``. This configuration can be toggled within the ``Model`` application through
+``Client`` functions, such as disabling tensor prefixing via ``Client.use_tensor_ensemble_prefix(False)``.
+
+The interaction between the prefix SmartSim `Model Functions<model_prefix_func>` and SmartRedis
+`Client Functions<client_prefix_func>` are documentation below.
+
+.. _model_prefix_func:
+
+---------------
+Model Functions
+---------------
+A ``Model`` object supports two prefixing functions: ``Model.enable_key_prefixing`` and
+``Model.register_incoming_entity``.
+
+To enable prefixing on a ``Model``, users must use the ``Model.enable_key_prefixing``
+function in the ``Experiment`` driver script. The key components of this function include:
+
+- Activates prefixing for tensors, ``Datasets``, and lists sent to a ``Orchestrator`` from within
+  the ``Model`` application.
+- Enables access to prefixing ``Client`` functions within the ``Model`` application. This excludes
+  the ``Client.set_data_source`` function, where ``enable_key_prefixing`` is not require for access.
+
+.. note::
+    ML model and script prefixing is not automatically enabled through ``Model.enable_key_prefixing``
+    and rather must be enabled within the ``Model`` application using ``Client.use_model_ensemble_prefix``.
+
+Users can enable a SmartRedis ``Client`` to interact with prefixed data, ML models and TorchScripts
+within a ``Model`` application by specifying the producer entity name to ``Client.set_data_source``.
+However, for SmartSim to recognize the entity name within the application, the producer
+entity must be registered on the consumer entity using ``Ensemble.register_incoming_entity``.
+This also applies to scenarios where the ``Model`` attempts to access data placed by self.
+For more information on ``Client.set_data_source``, visit the
+:ref:`Client functions<client_prefix_func>` section.
+
+.. _client_prefix_func:
+
+----------------
+Client Functions
+----------------
+A ``Client`` object supports five prefixing functions: ``Client.use_tensor_ensemble_prefix``,
+``Client.use_dataset_ensemble_prefix``,  ``Client.use_list_ensemble_prefix``,
+``Client.use_model_ensemble_prefix`` and ``Client.set_data_source``.
+
+To enable or disable SmartRedis data structure prefixing for tensors, ``Datasets``, aggregation lists, ML models
+and scripts, SmartRedis ``Client`` offers functions per data structure:
+
+- Tensor: ``Client.use_tensor_ensemble_prefix``
+- ``Dataset``: ``Client.use_dataset_ensemble_prefix``
+- Aggregation lists: ``Client.use_list_ensemble_prefix``
+- ML models/scripts: ``Client.use_model_ensemble_prefix``
+
+.. warning::
+    To access the ``Client`` prefixing functions, prefixing must be enabled on the
+    ``Model`` through ``Model.enable_key_prefixing``. This function activates prefixing
+    for tensors, ``Datasets`` and lists.
+
+Examples are provided below that show the use of these ``Client`` methods in conjunction
+with the SmartSim key prefixing ``Model`` API functions.
+
+Users can enable the SmartSim ``Client`` to interact with prefixed data, ML models and TorchScripts
+using the ``Client.set_data_source`` function. To leverage this capability:
+
+1. Use ``Model.register_incoming_entity`` on the ``Model`` intending to interact with prefixed data in the ``Orchestrator``
+   placed by a separate ``Model``.
+2. Pass the SmartSim entity (e.g., another ``Model``) to ``Model.register_incoming_entity`` in order to
+   reference the ``Model`` prefix in the application code.
+3. In the ``Model`` application, instruct the ``Client`` to prepend the specified ``Model`` name during key searches
+   using ``Client.set_data_source("model_name")``.
+
+Examples are provided below that show the use of these ``Client`` methods in conjunction
+with the SmartSim key prefixing ``Model`` API functions.
+
+.. _put_set_prefix:
+
+Put/Set Operations
+==================
+In the following tabs we provide snippets of driver script and application code to demonstrate
+activating and deactivating prefixing for tensors, ``Datasets``, lists, ML models and scripts using
+SmartRedis put/get semantics.
+
+.. tabs::
+
+    .. group-tab:: Tensor
+        **Activate Tensor Prefixing in the Driver Script**
+
+        To activate prefixing on a ``Model`` in the driver script, a user must use the function
+        ``Model.enable_key_prefixing``. This functionality ensures that the ``Model`` name
+        is prepended to each tensor name sent to the ``Orchestrator`` from within the ``Model``
+        executable code. The source code example is available in the dropdown below for
+        convenient execution and customization.
+
+        .. dropdown:: Example Driver Script Source Code
+
+            .. literalinclude:: tutorials/doc_examples/model_doc_examples/prefix_data.py
+
+        In the driver script snippet below, we take an initialized ``Model`` and activate tensor
+        prefixing through the ``enable_key_prefixing`` function:
+
+        .. literalinclude:: tutorials/doc_examples/model_doc_examples/prefix_data.py
+            :language: python
+            :linenos:
+            :lines: 6-12
+
+        In the `model` application, two tensors named `tensor_1` and `tensor_2` are sent to a launched ``Orchestrator``.
+        The contents of the ``Orchestrator`` after ``Model`` completion are:
+
+        .. code-block:: bash
+
+            1) "model_name.tensor_1"
+            2) "model_name.tensor_2"
+
+        You will notice that the ``Model`` name `model_name` has been prepended to each tensor name
+        and stored in the ``Orchestrator``.
+
+        **Activate Tensor Prefixing in the Application**
+
+        Users can further configure tensor prefixing in the application by using
+        the ``Client`` function ``use_tensor_ensemble_prefix``. By specifying a boolean
+        value to the function, users can turn prefixing on and off.
+
+        .. note::
+            To have access to ``Client.use_tensor_ensemble_prefix``, prefixing must be enabled
+            on the ``Model`` in the driver script via ``Model.enable_key_prefixing``.
+
+        In the application snippet below, we demonstrate enabling and disabling tensor prefixing:
+
+        .. code-block:: python
+
+            # Disable key prefixing
+            client.use_tensor_ensemble_prefix(False)
+            # Place a tensor in the Orchestrator
+            client.put_tensor("tensor_1", np.array([1, 2, 3, 4]))
+            # Enable key prefixing
+            client.use_tensor_ensemble_prefix(True)
+            # Place a tensor in the Orchestrator
+            client.put_tensor("tensor_2", np.array([5, 6, 7, 8]))
+
+        In the application, two tensors named `tensor_1` and `tensor_2` are sent to a launched ``Orchestrator``.
+        The contents of the ``Orchestrator`` after ``Model`` completion are:
+
+        .. code-block:: bash
+
+            1) "tensor_1"
+            2) "model_name.tensor_2"
+
+        You will notice that the ``Model`` name `model_name` is **not** prefixed to `tensor_1` since
+        we disabled tensor prefixing before sending the tensor to the ``Orchestrator``. However,
+        when we enabled tensor prefixing and sent the second tensor, the ``Model`` name was prefixed
+        to `tensor_2`.
+
+    .. group-tab:: Dataset
+        **Activate Dataset Prefixing in the Driver Script**
+
+        To activate prefixing on a ``Model`` in the driver script, a user must use the function
+        ``Model.enable_key_prefixing``. This functionality ensures that the ``Model`` name
+        is prepended to each ``Dataset`` name sent to the ``Orchestrator`` from within the ``Model``.
+        The source code example is available in the dropdown below for
+        convenient execution and customization.
+
+        .. dropdown:: Example Driver Script Source Code
+
+            .. literalinclude:: tutorials/doc_examples/model_doc_examples/prefix_data.py
+
+        In the driver script snippet below, we take an initialized ``Model`` and activate ``Dataset``
+        prefixing through the ``enable_key_prefixing`` function:
+
+        .. literalinclude:: tutorials/doc_examples/model_doc_examples/prefix_data.py
+            :language: python
+            :linenos:
+            :lines: 6-12
+
+        In the `model` application, two Datasets named `dataset_1` and `dataset_2` are sent to a launched ``Orchestrator``.
+        The contents of the ``Orchestrator`` after ``Model`` completion are:
+
+        .. code-block:: bash
+
+            1) "model_name.{dataset_1}.dataset_tensor_1"
+            2) "model_name.{dataset_1}.meta"
+            3) "model_name.{dataset_2}.dataset_tensor_2"
+            4) "model_name.{dataset_2}.meta"
+
+        You will notice that the ``Model`` name `model_name` has been prefixed to each ``Dataset`` name
+        and stored in the ``Orchestrator``.
+
+        **Activate Dataset Prefixing in the Application**
+
+        Users can further configure ``Dataset`` prefixing in the application by using
+        the ``Client`` function ``use_dataset_ensemble_prefix``. By specifying a boolean
+        value to the function, users can turn prefixing on and off.
+
+        .. note::
+            To have access to ``Client.use_dataset_ensemble_prefix``, prefixing must be enabled
+            on the ``Model`` in the driver script via ``Model.enable_key_prefixing``.
+
+        In the application snippet below, we demonstrate enabling and disabling ``Dataset`` prefixing:
+
+        .. code-block:: python
+
+            # Disable key prefixing
+            client.use_dataset_ensemble_prefix(False)
+            # Place a Dataset in the Orchestrator
+            client.put_dataset(dataset_1)
+            # Enable key prefixing
+            client.use_dataset_ensemble_prefix(True)
+            # Place a Dataset in the Orchestrator
+            client.put_dataset(dataset_2)
+
+        In the application, we have two ``Datasets`` named `dataset_1` and `dataset_2`.
+        We then send them to a launched ``Orchestrator``. The contents of the ``Orchestrator`` after ``Model`` completion are:
+
+        .. code-block:: bash
+
+            1) "{dataset_1}.dataset_tensor_1"
+            2) "{dataset_1}.meta"
+            3) "model_name.{dataset_2}.dataset_tensor_1"
+            4) "model_name.{dataset_2}.meta"
+
+        You will notice that the ``Model`` name `model_name` is **not** prefixed to `dataset_1` since
+        we disabled ``Dataset`` prefixing before sending the ``Dataset`` to the ``Orchestrator``. However,
+        when we enabled ``Dataset`` prefixing and sent the second ``Dataset``, the ``Model`` name was prefixed
+        to `dataset_2`.
+
+    .. group-tab:: Aggregation List
+        **Activate Aggregation List Prefixing in the Driver Script**
+
+        To activate prefixing on a ``Model`` in the driver script, a user must use the function
+        ``Model.enable_key_prefixing``. This functionality ensures that the ``Model`` name
+        is prepended to each list name sent to the ``Orchestrator`` from within the ``Model``.
+        The source code example is available in the dropdown below for
+        convenient execution and customization.
+
+        .. dropdown:: Example Driver Script Source Code
+
+            .. literalinclude:: tutorials/doc_examples/model_doc_examples/prefix_data.py
+
+        In the driver script snippet below, we take an initialized ``Model`` and activate list
+        prefixing through the ``enable_key_prefixing`` function:
+
+        .. literalinclude:: tutorials/doc_examples/model_doc_examples/prefix_data.py
+            :language: python
+            :linenos:
+            :lines: 6-12
+
+        In the `model` application, a list named `dataset_list` is sent to a launched ``Orchestrator``.
+        The contents of the ``Orchestrator`` after ``Model`` completion are:
+
+        .. code-block:: bash
+
+            1) "model_name.dataset_list"
+
+        You will notice that the ``Model`` name `model_name` has been prefixed to the list name
+        and stored in the ``Orchestrator``.
+
+        **Activate Aggregation List Prefixing in the Application**
+
+        Users can further configure list prefixing in the application by using
+        the ``Client`` function ``use_list_ensemble_prefix``. By specifying a boolean
+        value to the function, users can turn prefixing on and off.
+
+        .. note::
+            To have access to ``Client.use_list_ensemble_prefix``, prefixing must be enabled
+            on the ``Model`` in the driver script via ``Model.enable_key_prefixing``.
+
+        In the application snippet below, we demonstrate enabling and disabling list prefixing:
+
+        .. code-block:: python
+
+            # Disable key prefixing
+            client.use_list_ensemble_prefix(False)
+            # Place a Dataset in the Orchestrator
+            client.put_dataset(dataset_1)
+            # Place a list in the Orchestrator
+            client.append_to_list("list_1", dataset_1)
+            # Enable key prefixing
+            client.use_dataset_ensemble_prefix(True)
+            # Place a Dataset in the Orchestrator
+            client.put_dataset(dataset_2)
+            # Append Dataset to list in the Orchestrator
+            client.append_to_list("list_2", dataset_2)
+
+        In the application, two lists named `list_1` and `list_2` are sent to the ``Orchestrator``.
+        The contents of the ``Orchestrator`` after ``Model`` completion are:
+
+        .. code-block:: bash
+
+            1) "list_1"
+            2) "model_name.{dataset_1}.meta"
+            3) "model_name.{dataset_1}.dataset_tensor_1"
+            4) "model_name.list_2"
+            5) "model_name.{dataset_2}.meta"
+            6) "model_name.{dataset_2}.dataset_tensor_2"
+
+        You will notice that the ``Model`` name `model_name` is **not** prefixed to `list_1` since
+        we disabled list prefixing before sending the list to the ``Orchestrator``. However,
+        when we enabled list prefixing and sent the second list, the ``Model`` name was prefixed
+        to `list_2` as well as the list ``Dataset`` members.
+
+        .. note::
+            The ``Datasets`` sent to the ``Orchestrator`` are all prefixed. This is because
+            ``Model.enable_key_prefixing`` turns on prefixing for tensors, ``Datasets`` and lists.
+
+    .. group-tab:: ML Model
+        **Activate ML Model Prefixing in the Application**
+
+        Users can configure ML model prefixing in the application by using
+        the ``Client`` function ``use_model_ensemble_prefix``. By specifying a boolean
+        value to the function, users can turn prefixing on and off.
+
+        .. note::
+            To have access to ``Client.use_model_ensemble_prefix``, prefixing must be enabled
+            on the ``Model`` in the driver script via ``Model.enable_key_prefixing``.
+
+        In the application snippet below, we demonstrate enabling and disabling ML model prefixing:
+
+        .. code-block:: python
+
+            # Disable ML model prefixing
+            client.use_model_ensemble_prefix(False)
+            # Send ML model to the Orchestrator
+            client.set_model(
+                "ml_model_1", serialized_model_1, "TF", device="CPU", inputs=inputs, outputs=outputs
+            )
+            # Enable ML model prefixing
+            client.use_model_ensemble_prefix(True)
+            # Send prefixed ML model to the Orchestrator
+            client.set_model(
+                "ml_model_2", serialized_model_2, "TF", device="CPU", inputs=inputs, outputs=outputs
+            )
+
+        In the application, two ML models named `ml_model_1` and `ml_model_2` are sent
+        to a launched ``Orchestrator``. The contents of the ``Orchestrator`` after ``Model`` completion are:
+
+        .. code-block:: bash
+
+            1) "ml_model_1"
+            2) "model_name.ml_model_2"
+
+        You will notice that the ``Model`` name `model_name` is **not** prefixed to `ml_model_1` since
+        we disabled ML model prefixing before sending the ML model to the ``Orchestrator``. However,
+        when we enabled ML model prefixing and sent the second ML model, the ``Model`` name was prefixed
+        to `ml_model_2`.
+
+    .. group-tab:: Script
+        **Activate Script Prefixing in the Application**
+
+        Users can configure script prefixing in the application by using
+        the ``Client`` function ``use_model_ensemble_prefix``. By specifying a boolean
+        value to the function, users can turn prefixing on and off.
+
+        .. note::
+            To have access to ``Client.use_model_ensemble_prefix``, prefixing must be enabled
+            on the ``Model`` in the driver script via ``Model.enable_key_prefixing``.
+
+        In the application snippet below, we demonstrate enabling and disabling script prefixing:
+
+        .. code-block:: python
+
+            # Disable script prefixing
+            client.use_model_ensemble_prefix(False)
+            # Store a script in the Orchestrator
+            client.set_function("script_1", script_1)
+            # Enable script prefixing
+            client.use_model_ensemble_prefix(True)
+            # Store a prefixed script in the Orchestrator
+            client.set_function("script_2", script_2)
+
+        In the application, two ML models named `script_1` and `script_2` are sent
+        to a launched ``Orchestrator``. The contents of the ``Orchestrator`` after ``Model`` completion are:
+
+        .. code-block:: bash
+
+            1) "script_1"
+            2) "model_name.script_2"
+
+        You will notice that the ``Model`` name `model_name` is **not** prefixed to `script_1` since
+        we disabled script prefixing before sending the script to the ``Orchestrator``. However,
+        when we enabled script prefixing and sent the second script, the ``Model`` name was prefixed
+        to `script_2`.
+
+.. _get_prefix:
+
+Get Operations
+==============
+In the following sections, we walk through snippets of application code to demonstrate the retrieval
+of prefixed tensors, ``Datasets``, lists, ML models, and scripts using SmartRedis put/get
+semantics. The examples demonstrate retrieval within the same application where the data
+structures were placed, as well as scenarios where data structures are placed by separate
+applications.
+
+.. tabs::
+
+    .. group-tab:: Tensor
+        **Retrieve a Tensor Placed by the Same Application**
+
+        SmartSim supports retrieving prefixed tensors sent to the ``Orchestrator`` from within the
+        same application where the tensor was placed. To achieve this, users must
+        provide the ``Model`` name that stored the tensor to ``Client.set_data_source``. This action
+        instructs the ``Client`` to prepend the ``Model`` name to all key searches. For SmartSim to
+        recognize the ``Model`` name as a data source, users must execute the
+        ``Model.register_incoming_entity`` function on the ``Model`` and pass the self ``Model`` name
+        in the driver script.
+
+        As an example, we placed a prefixed tensor on the ``Orchestrator`` within a ``Model`` named
+        `model_1`. The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_1.tensor_name"
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_1`,
+            we execute ``model_1.register_incoming_entity(model_1)``. By passing the ``Model``
+            instance to itself, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        In the application snippet below, we demonstrate retrieving the tensor:
+
+        .. code-block:: python
+
+            # Set the name to prepend to key searches
+            client.set_data_source("model_1")
+            # Retrieve the prefixed tensor
+            tensor_data = client.get_tensor("tensor_name")
+            # Log the tensor data
+            client.log_data(LLInfo, f"The tensor value is: {tensor_data}")
+
+        In the `model.out` file, the ``Client`` will log the message::
+            Default@00-00-00:The tensor value is: [1 2 3 4]
+
+        **Retrieve a Tensor Placed by an External Application**
+
+        SmartSim supports retrieving prefixed tensors sent to the ``Orchestrator`` by separate
+        ``Model(s)``. To achieve this, users need to provide the ``Model`` name that stored the tensor
+        to ``Client.set_data_source``. This action instructs the ``Client`` to prepend the ``Model``
+        name to all key searches. For SmartSim to recognize the ``Model`` name as a data source,
+        users must execute the ``Model.register_incoming_entity`` function on the ``Model``
+        responsible for the search and pass the ``Model`` instance that stored the data in the
+        driver script.
+
+        In the example, a ``Model`` named `model_1` has placed a tensor in a standalone
+        ``Orchestrator`` with prefixing enabled on the ``Model``. The contents of the ``Orchestrator``
+        are as follows:
+
+        .. code-block:: bash
+
+            1) "model_1.tensor_name"
+
+        We create a separate ``Model``, named `model_2`, with the executable application code below.
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_2`,
+            we execute ``model_2.register_incoming_entity(model_1)``. By passing the producer ``Model``
+            instance to the consumer ``Model``, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        Here we retrieve the stored tensor named `tensor_name`:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_1")
+            # Retrieve the prefixed tensor
+            tensor_data = client.get_tensor("tensor_name")
+            # Log the tensor data
+            client.log_data(LLInfo, f"The tensor value is: {tensor_data}")
+
+        In the `model.out` file, the ``Client`` will log the message::
+            Default@00-00-00:The tensor value is: [1 2 3 4]
+
+    .. group-tab:: Dataset
+        **Retrieve a Dataset Placed by the Same Application**
+
+        SmartSim supports retrieving prefixed ``Datasets`` sent to the ``Orchestrator`` from within the
+        same application where the ``Dataset`` was placed. To achieve this, users must
+        provide the ``Model`` name that stored the ``Dataset`` to ``Client.set_data_source``. This action
+        instructs the ``Client`` to prepend the ``Model`` name to all key searches. For SmartSim to
+        recognize the ``Model`` name as a data source, users must execute the
+        ``Model.register_incoming_entity`` function on the ``Model`` and pass the self ``Model`` name.
+
+        As an example, we placed a prefixed ``Dataset`` on the ``Orchestrator`` within a ``Model`` named
+        `model_1`. The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_1.{dataset_name}.dataset_tensor"
+            2) "model_1.{dataset_name}.meta"
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_1`,
+            we execute ``model_1.register_incoming_entity(model_1)``. By passing the ``Model``
+            instance to itself, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        In the application snippet below, we demonstrate retrieving the ``Dataset``:
+
+        .. code-block:: python
+
+            # Set the name to prepend to key searches
+            client.set_data_source("model_1")
+            # Retrieve the prefixed Dataset
+            dataset_data = client.get_dataset("dataset_name")
+            # Log the Dataset data
+            client.log_data(LLInfo, f"The Dataset value is: {dataset_data}")
+
+        In the `model.out` file, the ``Client`` will log the message:
+
+        .. code-block:: bash
+
+            Default@00-00-00:Default@00-00-00:The dataset value is:
+
+            DataSet (dataset_name):
+                Tensors:
+                    dataset_tensor:
+                        type: 16 bit unsigned integer
+                        dimensions: [4]
+                        elements: 4
+                Metadata:
+                    none
+
+        **Retrieve a Dataset Placed by an External Application**
+
+        SmartSim supports retrieving prefixed ``Datasets`` sent to the ``Orchestrator`` by separate
+        ``Model(s)``. To achieve this, users need to provide the ``Model`` name that stored the ``Dataset``
+        to ``Client.set_data_source``. This action instructs the ``Client`` to prepend the ``Model``
+        name to all key searches. For SmartSim to recognize the ``Model`` name as a data source,
+        users must execute the ``Model.register_incoming_entity`` function on the ``Model``
+        responsible for the search and pass the ``Model`` instance that stored the data.
+
+        In the example, a ``Model`` named `model_1` has placed a ``Dataset`` in a standalone
+        ``Orchestrator`` with prefixing enabled on the ``Model``. The contents of the ``Orchestrator``
+        are as follows:
+
+        .. code-block:: bash
+
+            1) "model_1.{dataset_name}.dataset_tensor"
+            2) "model_1.{dataset_name}.meta"
+
+        We create a separate ``Model``, named `model_2`, with the executable application code below.
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_2`,
+            we execute ``model_2.register_incoming_entity(model_1)``. By passing the producer ``Model``
+            instance to the consumer ``Model``, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        Here we retrieve the stored ``Dataset`` named `dataset_name`:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_1")
+            # Retrieve the prefixed Dataset
+            dataset_data = client.get_dataset("dataset_name")
+            # Log the Dataset data
+            client.log_data(LLInfo, f"The Dataset value is: {dataset_data}")
+
+        In the `model.out` file, the ``Client`` will log the message:
+
+        .. code-block:: bash
+
+            Default@00-00-00:Default@00-00-00:The Dataset value is:
+
+            DataSet (dataset_name):
+                Tensors:
+                    dataset_tensor:
+                        type: 16 bit unsigned integer
+                        dimensions: [4]
+                        elements: 4
+                Metadata:
+                    none
+
+    .. group-tab:: Aggregation List
+        **Retrieve a Aggregation List Placed by the Same Application**
+
+        SmartSim supports retrieving prefixed lists sent to the ``Orchestrator`` from within the
+        same application where the list was placed. To achieve this, users must
+        provide the ``Model`` name that stored the list to ``Client.set_data_source``. This action
+        instructs the ``Client`` to prepend the ``Model`` name to all key searches. For SmartSim to
+        recognize the ``Model`` name as a data source, users must execute the
+        ``Model.register_incoming_entity`` function on the ``Model`` and pass the self ``Model`` name.
+
+        As an example, we placed a prefixed list on the ``Orchestrator`` within a ``Model`` named
+        `model_1`. The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_1.dataset_list"
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_1`,
+            we execute ``model_1.register_incoming_entity(model_1)``. By passing the ``Model``
+            instance to itself, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        In the application snippet below, we demonstrate checking the length of the list:
+
+        .. code-block:: python
+
+            # Set the name to prepend to key searches
+            client.set_data_source("model_1")
+            # Retrieve the prefixed list
+            list_data = client.get_datasets_from_list("dataset_list")
+            # Log the list data
+            client.log_data(LLInfo, f"The length of the list is: {len(list_data)}")
+
+        In the `model.out` file, the ``Client`` will log the message::
+            The length of the list is: 1
+
+        **Retrieve a Aggregation List Placed by an External Application**
+
+        SmartSim supports retrieving prefixed lists sent to the ``Orchestrator`` by separate
+        ``Model(s)``. To achieve this, users need to provide the ``Model`` name that stored the list
+        to ``Client.set_data_source``. This action instructs the ``Client`` to prepend the ``Model``
+        name to all key searches. For SmartSim to recognize the ``Model`` name as a data source,
+        users must execute the ``Model.register_incoming_entity`` function on the ``Model``
+        responsible for the search and pass the ``Model`` instance that stored the data.
+
+        In the example, a ``Model`` named `model_1` has placed a list in a standalone
+        ``Orchestrator`` with prefixing enabled on the ``Model``. The contents of the ``Orchestrator``
+        are as follows:
+
+        .. code-block:: bash
+
+            1) "model_name.dataset_list"
+
+        We create a separate ``Model``, named `model_2`, with the executable application code below.
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_2`,
+            we execute ``model_2.register_incoming_entity(model_1)``. By passing the producer ``Model``
+            instance to the consumer ``Model``, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        Here we check the length of the list named `dataset_list`:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_1")
+            # Retrieve the prefixed list
+            list_data = client.get_datasets_from_list("dataset_list")
+            # Log the list data
+            client.log_data(LLInfo, f"The length of the list is: {len(list_data)}")
+
+        In the `model.out` file, the ``Client`` will log the message::
+            The length of the list is: 1
+
+    .. group-tab:: ML Model
+        **Retrieve a ML Model Placed by the Same Application**
+
+        SmartSim supports retrieving prefixed ML models sent to the ``Orchestrator`` from within the
+        same application where the ML model was placed. To achieve this, users must
+        provide the ``Model`` name that stored the ML model to ``Client.set_data_source``. This action
+        instructs the ``Client`` to prepend the ``Model`` name to all key searches. For SmartSim to
+        recognize the ``Model`` name as a data source, users must execute the
+        ``Model.register_incoming_entity`` function on the ``Model`` and pass the self ``Model`` name.
+
+        As an example, we placed a prefixed ML model on the ``Orchestrator`` within a ``Model`` named
+        `model_1`. The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_1.mnist_cnn"
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_1`,
+            we execute ``model_1.register_incoming_entity(model_1)``. By passing the ``Model``
+            instance to itself, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        In the application snippet below, we demonstrate retrieving the ML model:
+
+        .. code-block:: python
+
+            # Set the name to prepend to key searches
+            client.set_data_source("model_1")
+            # Retrieve the prefixed ML model
+            model_data = client.get_model("mnist_cnn")
+
+        **Retrieve a ML Model Placed by an External Application**
+
+        SmartSim supports retrieving prefixed ML model sent to the ``Orchestrator`` by separate
+        ``Model(s)``. To achieve this, users need to provide the ``Model`` name that stored the ML model
+        to ``Client.set_data_source``. This action instructs the ``Client`` to prepend the ``Model``
+        name to all key searches. For SmartSim to recognize the ``Model`` name as a data source,
+        users must execute the ``Model.register_incoming_entity`` function on the ``Model``
+        responsible for the search and pass the ``Model`` instance that stored the data.
+
+        In the example, a ``Model`` named `model_1` has placed a ML model in a standalone
+        ``Orchestrator`` with prefixing enabled on the ``Model``. The contents of the ``Orchestrator``
+        are as follows:
+
+        .. code-block:: bash
+
+            1) "model_1.mnist_cnn"
+
+        We create a separate ``Model``, named `model_2`, with the executable application code below.
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_2`,
+            we execute ``model_2.register_incoming_entity(model_1)``. By passing the producer ``Model``
+            instance to the consumer ``Model``, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        Here we retrieve the stored ML model named `mnist_cnn`:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_1")
+            # Retrieve the prefixed model
+            model_data = client.get_model("mnist_cnn")
+
+    .. group-tab:: Script
+        **Retrieve a Script Placed by the Same Application**
+
+        SmartSim supports retrieving prefixed scripts sent to the ``Orchestrator`` from within the
+        same application where the script was placed. To achieve this, users must
+        provide the ``Model`` name that stored the script to ``Client.set_data_source``. This action
+        instructs the ``Client`` to prepend the ``Model`` name to all key searches. For SmartSim to
+        recognize the ``Model`` name as a data source, users must execute the
+        ``Model.register_incoming_entity`` function on the ``Model`` and pass the self ``Model`` name.
+
+        As an example, we placed a prefixed script on the ``Orchestrator`` within a ``Model`` named
+        `model_1`. The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_1.normalizer"
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_1`,
+            we execute ``model_1.register_incoming_entity(model_1)``. By passing the ``Model``
+            instance to itself, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        In the application snippet below, we demonstrate retrieving the script:
+
+        .. code-block:: python
+
+            # Set the name to prepend to key searches
+            client.set_data_source("model_1")
+            # Retrieve the prefixed script
+            script_data = client.get_script("normalizer")
+            # Log the script data
+            client.log_data(LLInfo, f"The script data is: {script_data}")
+
+        In the `model.out` file, the ``Client`` will log the message:
+
+        .. code-block:: bash
+
+            The script data is: def normalize(X):
+            """Simple function to normalize a tensor"""
+            mean = X.mean
+            std = X.std
+
+            return (X-mean)/std
+
+        **Retrieve a Script Placed by an External Application**
+
+        SmartSim supports retrieving prefixed scripts sent to the ``Orchestrator`` by separate
+        ``Model(s)``. To achieve this, users need to provide the ``Model`` name that stored the script
+        to ``Client.set_data_source``. This action instructs the ``Client`` to prepend the ``Model``
+        name to all key searches. For SmartSim to recognize the ``Model`` name as a data source,
+        users must execute the ``Model.register_incoming_entity`` function on the ``Model``
+        responsible for the search and pass the ``Model`` instance that stored the data.
+
+        In the example, a ``Model`` named `model_1` has placed a script in a standalone
+        ``Orchestrator`` with prefixing enabled on the ``Model``. The contents of the ``Orchestrator``
+        are as follows:
+
+        .. code-block:: bash
+
+            1) "model_1.normalizer"
+
+        We create a separate ``Model``, named `model_2`, with the executable application code below.
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_2`,
+            we execute ``model_2.register_incoming_entity(model_1)``. By passing the producer ``Model``
+            instance to the consumer ``Model``, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        Here we retrieve the stored script named `normalizer`:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_1")
+            # Retrieve the prefixed script
+            script_data = client.get_script("model_1.normalizer")
+            # Log the script data
+            client.log_data(LLInfo, f"The script data is: {script_data}")
+
+        In the `model.out` file, the ``Client`` will log the message:
+
+        .. code-block:: bash
+
+            The script data is: def normalize(X):
+            """Simple function to normalize a tensor"""
+            mean = X.mean
+            std = X.std
+
+            return (X-mean)/std
+
+.. _run_prefix:
+
+Run Operations
+==============
+In the following sections, we walk through snippets of application code to demonstrate executing
+prefixed ML models and scripts using SmartRedis run semantics. The examples demonstrate
+executing within the same application where the ML Model and Script were placed, as well as scenarios
+where ML Model and Script are placed by separate applications.
+
+.. tabs::
+
+    .. group-tab:: ML Model
+        **Access ML Models From within the Application**
+
+        SmartSim supports executing prefixed ML models with prefixed tensors sent to the ``Orchestrator`` from within
+        the same application that the ML model was placed. To achieve this, users must
+        provide the ``Model`` name that stored the ML model and input tensors to ``Client.set_data_source``. This action
+        instructs the ``Client`` to prepend the ``Model`` name to all key names. For SmartSim to
+        recognize the ``Model`` name as a data source, users must execute the
+        ``Model.register_incoming_entity`` function on the ``Model`` and pass the self ``Model`` name.
+
+        As an example, we placed a prefixed ML model and tensor on the ``Orchestrator`` within a ``Model`` named
+        `model_1`. The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_1.mnist_cnn"
+            2) "model_1.mnist_images"
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_1`,
+            we execute ``model_1.register_incoming_entity(model_1)``. By passing the ``Model``
+            instance to itself, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        In the application snippet below, we demonstrate running the ML model:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_1")
+            # Run the ML model
+            client.run_model(name="mnist_cnn", inputs=["mnist_images"], outputs=["Identity"])
+
+        The ``Orchestrator`` now contains prefixed output tensors:
+
+        .. code-block:: bash
+
+            1) "model_1.Identity"
+            2) "model_1.mnist_cnn"
+            3) "model_1.mnist_images"
+
+        .. note::
+            The output tensors are prefixed because we executed ``model_1.enable_key_prefixing``
+            in the driver script which enables and activates prefixing for tensors, ``Datasets``
+            and lists.
+
+        **Access ML Models Loaded From an External Application**
+
+        SmartSim supports executing prefixed ML models with prefixed tensors sent to the ``Orchestrator`` by separate
+        ``Model(s)``. To achieve this, users need to provide the ``Model`` name that stored the ML model and tensor
+        to ``Client.set_data_source``. This action instructs the ``Client`` to prepend the ``Model``
+        name to all key searches. For SmartSim to recognize the ``Model`` name as a data source,
+        users must execute the ``Model.register_incoming_entity`` function on the ``Model``
+        responsible for the search and pass the ``Model`` instance that stored the data.
+
+        In the example, a ``Model`` named `model_1` has placed a ML model and tensor in a standalone
+        ``Orchestrator`` with prefixing enabled on the ``Model``. The contents of the ``Orchestrator``
+        are as follows:
+
+        .. code-block:: bash
+
+            1) "model_1.mnist_cnn"
+            2) "model_1.mnist_images"
+
+        We create a separate ``Model``, named `model_2`, with the executable application code below.
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_2`,
+            we execute ``model_2.register_incoming_entity(model_1)``. By passing the producer ``Model``
+            instance to the consumer ``Model``, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        In the application snippet below, we demonstrate running the ML model:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_1")
+            # Run the ML model
+            client.run_model(name="mnist_cnn", inputs=["mnist_images"], outputs=["Identity"])
+
+        The ``Orchestrator`` now contains prefixed output tensors:
+
+        .. code-block:: bash
+
+            1) "model_2.Identity"
+            2) "model_1.mnist_cnn"
+            3) "model_1.mnist_images"
+
+        .. note::
+            The output tensors are prefixed because we executed ``model_2.enable_key_prefixing``
+            in the driver script which enables and activates prefixing for tensors, ``Datasets``
+            and lists.
+
+    .. group-tab:: Script
+
+        **Access Scripts From within the Application**
+
+        SmartSim supports executing prefixed scripts with prefixed tensors sent to the ``Orchestrator`` from within
+        the same application that the script was placed. To achieve this, users must
+        provide the ``Model`` name that stored the script and input tensors to ``Client.set_data_source``. This action
+        instructs the ``Client`` to prepend the ``Model`` name to all key names. For SmartSim to
+        recognize the ``Model`` name as a data source, users must execute the
+        ``Model.register_incoming_entity`` function on the ``Model`` and pass the self ``Model`` name.
+
+        As an example, we placed a prefixed script and tensor on the ``Orchestrator`` within a ``Model`` named
+        `model_1`. The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_1.normalizer"
+            2) "model_1.X_rand"
+
+        To run the script, the prefixed script name `"model_name.normalizer"` and prefixed
+        input tensors `"model_name.X_rand"` must be provided, as demonstrated below:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_1")
+            # Run the script
+            client.run_script("normalizer", "normalize", inputs=["X_rand"], outputs=["X_norm"])
+
+        The ``Orchestrator`` now contains prefixed output tensors:
+
+        .. code-block:: bash
+
+            1) "model_1.normalizer"
+            2) "model_1.X_rand"
+            3) "model_1.X_norm"
+
+        .. note::
+            The output tensors are prefixed because we executed ``model_1.enable_key_prefixing``
+            in the driver script which enables and activates prefixing for tensors, ``Datasets``
+            and lists.
+
+        **Access Scripts Loaded From an External Application**
+
+        SmartSim supports executing prefixed scripts with prefixed tensors sent to the ``Orchestrator`` by separate
+        ``Model(s)``. To achieve this, users need to provide the ``Model`` name that stored the script and tensor
+        to ``Client.set_data_source``. This action instructs the ``Client`` to prepend the ``Model``
+        name to all key searches. For SmartSim to recognize the ``Model`` name as a data source,
+        users must execute the ``Model.register_incoming_entity`` function on the ``Model``
+        responsible for the search and pass the ``Model`` instance that stored the data.
+
+        In the example, a ``Model`` named `model_1` has placed a script and tensor in a standalone
+        ``Orchestrator`` with prefixing enabled on the ``Model``. The contents of the ``Orchestrator``
+        are as follows:
+
+        .. code-block:: bash
+
+            1) "model_1.normalizer"
+            2) "model_1.X_rand"
+
+        We create a separate ``Model``, named `model_2`, with the executable application code below.
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_2`,
+            we execute ``model_2.register_incoming_entity(model_1)``. By passing the producer ``Model``
+            instance to the consumer ``Model``, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for use in ``Client.set_data_source``.
+
+        In the application snippet below, we demonstrate running the script:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_1")
+            # Run the script
+            client.run_script("normalizer", "normalize", inputs=["X_rand"], outputs=["X_norm"])
+
+        The ``Orchestrator`` now contains prefixed output tensors:
+
+        .. code-block:: bash
+
+            1) "model_1.normalizer"
+            2) "model_1.X_rand"
+            3) "model_2.X_norm"
+
+        .. note::
+            The output tensors are prefixed because we executed ``model_2.enable_key_prefixing``
+            in the driver script which enables and activates prefixing for tensors, ``Datasets``
+            and lists.
+
+.. _copy_rename_del_prefix:
+
+Copy/Rename/Delete Operations
+=============================
+In the following sections, we walk through snippets of application code to demonstrate the copy, rename and delete
+operations on prefixed tensors, ``Datasets``, lists, ML models, and scripts. The examples
+demonstrate these operations within the same script where the data
+structures were placed, as well as scenarios where data structures are placed by separate
+scripts.
+
+.. tabs::
+
+    .. group-tab:: Tensor
+        **Copy/Rename/Delete Operations on Tensors in The Same Application**
+
+        SmartSim supports copy/rename/delete operations on prefixed tensors sent to the ``Orchestrator`` from within
+        the same application that the tensor was placed. To achieve this, users must
+        provide the ``Model`` name that stored the tensor to ``Client.set_data_source``. This action
+        instructs the ``Client`` to prepend the ``Model`` name to all key names. For SmartSim to
+        recognize the ``Model`` name as a data source, users must execute the
+        ``Model.register_incoming_entity`` function on the ``Model`` and pass the self ``Model`` name.
+
+        As an example, we placed a prefixed tensor on the ``Orchestrator`` within a ``Model`` named
+        `model_1`. The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_1.tensor"
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_1`,
+            we execute ``model_1.register_incoming_entity(model_1)``. By passing the ``Model``
+            instance to itself, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        To rename the tensor in the ``Orchestrator``, we provide self ``Model`` name
+        to ``Client.set_data_source`` then execute the function ``rename_tensor``:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_1")
+            # Rename the tensor
+            client.rename_tensor("tensor", "renamed_tensor")
+
+        Because prefixing is enabled on the ``Model`` via ``enable_key_prefixing`` in the driver script,
+        SmartSim will keep the prefix on the tensor but replace the tensor name as shown in the ``Orchestrator``:
+
+        .. code-block:: bash
+
+            1) "model_1.renamed_tensor"
+
+        Next, we copy the prefixed tensor to a new destination:
+
+        .. code-block:: python
+
+            client.copy_tensor("renamed_tensor", "copied_tensor")
+
+        Since tensor prefixing is enabled on the ``Client``, the `copied_tensor` is prefixed:
+
+        .. code-block:: bash
+
+            1) "model_1.renamed_tensor"
+            2) "model_1.copied_tensor"
+
+        Next, delete `renamed_tensor`:
+
+        .. code-block:: python
+
+            client.delete_tensor("renamed_tensor")
+
+        The contents of the ``Orchestrator`` are:
+
+        .. code-block:: bash
+
+            1) "model_1.copied_tensor"
+
+        **Copy/Rename/Delete Operations on Tensors Placed by an External Application**
+
+        SmartSim supports copy/rename/delete operations on prefixed tensors sent to the ``Orchestrator`` by separate
+        ``Model(s)``. To achieve this, users need to provide the ``Model`` name that stored the tensor
+        to ``Client.set_data_source``. This action instructs the ``Client`` to prepend the ``Model``
+        name to all key searches. For SmartSim to recognize the ``Model`` name as a data source,
+        users must execute the ``Model.register_incoming_entity`` function on the ``Model``
+        responsible for the search and pass the ``Model`` instance that stored the data.
+
+        In the example, a ``Model`` named `model_1` has placed a tensor in a standalone ``Orchestrator`` with prefixing enabled
+        on the ``Client``. The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_1.tensor"
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_2`,
+            we execute ``model_2.register_incoming_entity(model_1)``. By passing the producer ``Model``
+            instance to the consumer ``Model``, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        From within a separate ``Model`` named `model_2`, we perform basic copy/rename/delete operations.
+        To instruct the ``Client`` to prepend a ``Model`` name to all key searches, use the
+        ``Client.set_data_source`` function. Specify the ``Model`` name `model_1`
+        that placed the tensor in the ``Orchestrator``:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_1")
+
+        To rename the tensor in the ``Orchestrator``, we provide the tensor name:
+
+        .. code-block:: python
+
+            client.rename_tensor("tensor", "renamed_tensor")
+
+        SmartSim will replace the prefix with the current ``Model`` name since prefixing is enabled
+        on the current ``Model``. The contents of the ``Orchestrator`` are:
+
+        .. code-block:: bash
+
+            1) "model_2.renamed_tensor"
+
+        .. note::
+            In the driver script, we also register `model_2` as an entity on itself via ``model_2.register_incoming_entity(model_2)``.
+            This way we can use ``Client.set_data_source`` to interact with prefixed data placed by `model_2`.
+
+        Next, we copy the prefixed tensor to a new destination:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_2")
+            # Copy the tensor data
+            client.copy_tensor("renamed_tensor", "copied_tensor")
+
+        The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_2.renamed_tensor"
+            2) "model_2.copied_tensor"
+
+        Next, delete `copied_tensor` by specifying the name:
+
+        .. code-block:: python
+
+            client.delete_tensor("copied_tensor")
+
+        The contents of the ``Orchestrator`` are:
+
+        .. code-block:: bash
+
+            1) "model_2.renamed_tensor"
+
+    .. group-tab:: Dataset
+        **Copy/Rename/Delete Operations on A Dataset in The Same Application**
+
+        SmartSim supports copy/rename/delete operations on prefixed ``Datasets`` sent to the ``Orchestrator`` from within
+        the same application that the ``Dataset`` was placed. To achieve this, users must
+        provide the ``Model`` name that stored the ``Dataset`` to ``Client.set_data_source``. This action
+        instructs the ``Client`` to prepend the ``Model`` name to all key names. For SmartSim to
+        recognize the ``Model`` name as a data source, users must execute the
+        ``Model.register_incoming_entity`` function on the ``Model`` and pass the self ``Model`` name.
+
+        As an example, we placed a prefixed ``Dataset`` on the ``Orchestrator`` within a ``Model`` named
+        `model_1`. The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_1.{dataset}.dataset_tensor"
+            2) "model_1.{dataset}.meta"
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_1`,
+            we execute ``model_1.register_incoming_entity(model_1)``. By passing the ``Model``
+            instance to itself, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        To rename the ``Dataset`` in the ``Orchestrator``, we provide self ``Model`` name
+        to ``Client.set_data_source`` then execute the function ``rename_tensor``:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_1")
+            # Rename the Dataset
+            client.rename_dataset("dataset", "renamed_dataset")
+
+        Because prefixing is enabled on the ``Model`` via ``enable_key_prefixing`` in the driver script,
+        SmartSim will keep the prefix on the ``Dataset`` but replace the ``Dataset`` name as shown in the ``Orchestrator``:
+
+        .. code-block:: bash
+
+            1) "model_1.{renamed_dataset}.dataset_tensor"
+            2) "model_1.{renamed_dataset}.meta"
+
+        Next, we copy the prefixed ``Dataset`` to a new destination:
+
+        .. code-block:: python
+
+            client.copy_dataset("renamed_dataset", "copied_dataset")
+
+        Since ``Dataset`` prefixing is enabled on the ``Client``, the `copied_dataset` is prefixed:
+
+        .. code-block:: bash
+
+            1) "model_1.{renamed_dataset}.dataset_tensor"
+            2) "model_1.{renamed_dataset}.meta"
+            3) "model_1.{copied_dataset}.dataset_tensor"
+            4) "model_1.{copied_dataset}.meta"
+
+        Next, delete `copied_dataset`:
+
+        .. code-block:: python
+
+            client.delete_dataset("model_name.copied_dataset")
+
+        The contents of the ``Orchestrator`` are:
+
+        .. code-block:: bash
+
+            1) "model_1.{renamed_dataset}.dataset_tensor"
+            2) "model_1.{renamed_dataset}.meta"
+
+        **Copy/Rename/Delete Operations on Datasets Placed by an External Application**
+
+        SmartSim supports copy/rename/delete operations on prefixed ``Datasets`` sent to the ``Orchestrator`` by separate
+        ``Model(s)``. To achieve this, users need to provide the ``Model`` name that stored the ``Dataset``
+        to ``Client.set_data_source``. This action instructs the ``Client`` to prepend the ``Model``
+        name to all key searches. For SmartSim to recognize the ``Model`` name as a data source,
+        users must execute the ``Model.register_incoming_entity`` function on the ``Model``
+        responsible for the search and pass the ``Model`` instance that stored the data.
+
+        In the example, a ``Model`` named `model_1` has placed a ``Dataset`` in a standalone ``Orchestrator`` with prefixing enabled
+        on the ``Client``. The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_1.{dataset}.dataset_tensor"
+            2) "model_1.{dataset}.meta"
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_2`,
+            we execute ``model_2.register_incoming_entity(model_1)``. By passing the producer ``Model``
+            instance to the consumer ``Model``, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        From within a separate ``Model`` named `model_2`, we perform basic copy/rename/delete operations.
+        To instruct the ``Client`` to prepend a ``Model`` name to all key searches, use the
+        ``Client.set_data_source`` function. Specify the ``Model`` name `model_1`
+        that placed the ``Dataset`` in the ``Orchestrator``:
+
+        .. code-block:: python
+
+            client.set_data_source("model_1")
+
+        To rename the ``Dataset`` in the ``Orchestrator``, we provide the ``Dataset`` `name`:
+
+        .. code-block:: python
+
+            client.rename_tensor("dataset", "renamed_dataset")
+
+        SmartSim will replace the prefix with the current ``Model`` name since prefixing is enabled
+        on the current ``Model`` via ``Model.enable_key_prefixing`` in the driver script.
+        The contents of the ``Orchestrator`` are:
+
+        .. code-block:: bash
+
+            1) "model_2.{renamed_dataset}.dataset_tensor"
+            2) "model_2.{renamed_dataset}.meta"
+
+        .. note::
+            In the driver script, we also register `model_2` as an entity on itself via ``model_2.register_incoming_entity(model_2)``.
+            This way we can use ``Client.set_data_source`` to interact with prefixed data placed by `model_2`.
+
+        Next, we copy the prefixed ``Dataset`` to a new destination:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_2")
+            # Copy the tensor data
+            client.copy_dataset("renamed_dataset", "copied_dataset")
+
+        The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_2.{renamed_dataset}.dataset_tensor"
+            2) "model_2.{renamed_dataset}.meta"
+            3) "model_2.{copied_dataset}.dataset_tensor"
+            4) "model_2.{copied_dataset}.meta"
+
+        Next, delete `copied_dataset` by specifying the name:
+
+        .. code-block:: python
+
+            client.delete_dataset("copied_tensor")
+
+        The contents of the ``Orchestrator`` are:
+
+        .. code-block:: bash
+
+            1) "model_2.{renamed_dataset}.dataset_tensor"
+            2) "model_2.{renamed_dataset}.meta"
+
+    .. group-tab:: Aggregation List
+        **Copy/Rename/Delete Operations on a Aggregation List in The Same Application**
+
+        SmartSim supports copy/rename/delete operations on prefixed lists sent to the ``Orchestrator`` from within
+        the same application that the list was placed. To achieve this, users must
+        provide the ``Model`` name that stored the list to ``Client.set_data_source``. This action
+        instructs the ``Client`` to prepend the ``Model`` name to all key names. For SmartSim to
+        recognize the ``Model`` name as a data source, users must execute the
+        ``Model.register_incoming_entity`` function on the ``Model`` and pass the self ``Model`` name.
+
+        As an example, we placed a prefixed list on the ``Orchestrator`` within a ``Model`` named
+        `model_1`. The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_1.list_of_datasets"
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_1`,
+            we execute ``model_1.register_incoming_entity(model_1)``. By passing the ``Model``
+            instance to itself, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        To rename the list in the ``Orchestrator``, we provide self ``Model`` name
+        to ``Client.set_data_source`` then execute the function ``rename_list``:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_1")
+            # Rename the list
+            client.rename_list("list_of_datasets", "renamed_list")
+
+        Because prefixing is enabled on the ``Model`` via ``enable_key_prefixing`` in the driver script,
+        SmartSim will keep the prefix on the list but replace the list name as shown in the ``Orchestrator``:
+
+        .. code-block:: bash
+
+            1) "model_1.renamed_list"
+
+        Next, we copy the prefixed list to a new destination:
+
+        .. code-block:: python
+
+            client.copy_list("renamed_list", "copied_list")
+
+        Since list prefixing is enabled on the ``Client``, the `copied_list` is prefixed:
+
+        .. code-block:: bash
+
+            1) "model_1.renamed_list"
+            2) "model_1.copied_list"
+
+        Next, delete `copied_list`:
+
+        .. code-block:: python
+
+            client.delete_list("copied_list")
+
+        The contents of the ``Orchestrator`` are:
+
+        .. code-block:: bash
+
+            1) "model_1.renamed_list"
+
+        **Copy/Rename/Delete Operations on Aggregation Lists Placed by an External Application**
+
+        SmartSim supports copy/rename/delete operations on prefixed lists sent to the ``Orchestrator`` by separate
+        ``Model(s)``. To achieve this, users need to provide the ``Model`` name that stored the list
+        to ``Client.set_data_source``. This action instructs the ``Client`` to prepend the ``Model``
+        name to all key searches. For SmartSim to recognize the ``Model`` name as a data source,
+        users must execute the ``Model.register_incoming_entity`` function on the ``Model``
+        responsible for the search and pass the ``Model`` instance that stored the data.
+
+        In the example, a ``Model`` named `model_1` has placed a list in a standalone ``Orchestrator`` with prefixing enabled
+        on the ``Client``. The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_1.list_of_datasets"
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_2`,
+            we execute ``model_2.register_incoming_entity(model_1)``. By passing the producer ``Model``
+            instance to the consumer ``Model``, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        From within a separate ``Model`` named `model_2`, we perform basic copy/rename/delete operations.
+        To instruct the ``Client`` to prepend a ``Model`` name to all key searches, use the
+        ``Client.set_data_source`` function. Specify the ``Model`` name `model_1`
+        that placed the list in the ``Orchestrator``:
+
+        .. code-block:: python
+
+            client.set_data_source("model_1")
+
+        To rename the list in the ``Orchestrator``, we provide the list name:
+
+        .. code-block:: python
+
+            client.rename_list("list_of_datasets", "renamed_list")
+
+        SmartSim will replace the prefix with the current ``Model`` name since prefixing is enabled
+        on the current ``Model``. The contents of the ``Orchestrator`` are:
+
+        .. code-block:: bash
+
+            1) "model_2.renamed_list"
+
+        .. note::
+            In the driver script, we also register `model_2` as an entity on itself via ``model_2.register_incoming_entity(model_2)``.
+            This way we can use ``Client.set_data_source`` to interact with prefixed data placed by `model_2`.
+
+        Next, we copy the prefixed list to a new destination:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_2")
+            # Copy the tensor data
+            client.copy_dataset("renamed_list", "copied_list")
+
+        The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_2.renamed_list"
+            2) "model_2.copied_list"
+
+        Next, delete `copied_list` by specifying the name:
+
+        .. code-block:: python
+
+            client.delete_list("copied_list")
+
+        The contents of the ``Orchestrator`` are:
+
+        .. code-block:: bash
+
+            1) "model_2.renamed_list"
+
+    .. group-tab:: ML Model
+        **Delete ML Models From within the Application**
+
+        SmartSim supports delete operations on prefixed ML models sent to the ``Orchestrator`` from within
+        the same application that the ML model was placed. To achieve this, users must
+        provide the ``Model`` name that stored the ML model to ``Client.set_data_source``. This action
+        instructs the ``Client`` to prepend the ``Model`` name to all key names. For SmartSim to
+        recognize the ``Model`` name as a data source, users must execute the
+        ``Model.register_incoming_entity`` function on the ``Model`` and pass the self ``Model`` name.
+
+        As an example, we placed a prefixed ML model on the ``Orchestrator`` within a ``Model`` named
+        `model_1`. The ``Orchestrator`` contents are:
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_1`,
+            we execute ``model_1.register_incoming_entity(model_1)``. By passing the ``Model``
+            instance to itself, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        .. code-block:: bash
+
+            1) "model_1.ml_model"
+
+        To delete the ML model in the ``Orchestrator``, we provide self ``Model`` name
+        to ``Client.set_data_source`` then execute the function ``delete_model``:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_1")
+            # Delete the ML model
+            client.delete_model("ml_model")
+
+        **Delete a ML Model Placed by an External Application**
+
+        SmartSim supports delete operations on prefixed ML models sent to the ``Orchestrator`` by separate ``Model(s)``.
+        To do so, users must provide the ``Model`` name that stored the ML model to ``Client.set_data_source``.
+        This will instruct the ``Client`` to prepend the ``Model`` name input to all key searches.
+
+        In the example, a ``Model`` named `model_1` has placed a ML model in a standalone ``Orchestrator`` with prefixing enabled
+        on the ``Client``. The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_1.ml_model"
+
+        From within a separate ``Model`` named `model_2`, we perform a basic delete operation.
+        To instruct the ``Client`` to prepend a ``Model`` name to all key searches, use the
+        ``Client.set_data_source`` function. Specify the ``Model`` name `model_1`
+        that placed the list in the ``Orchestrator``:
+
+        .. code-block:: python
+
+            client.set_data_source("model_1")
+
+        To delete the ML model in the ``Orchestrator``, we provide the ML model name:
+
+        .. code-block:: python
+
+            client.delete_model("ml_model")
+
+    .. group-tab:: Script
+
+        **Delete Scripts From within the Application**
+
+        SmartSim supports delete operations on prefixed scripts sent to the ``Orchestrator`` from within
+        the same application that the script was placed. To achieve this, users must
+        provide the ``Model`` name that stored the script to ``Client.set_data_source``. This action
+        instructs the ``Client`` to prepend the ``Model`` name to all key names. For SmartSim to
+        recognize the ``Model`` name as a data source, users must execute the
+        ``Model.register_incoming_entity`` function on the ``Model`` and pass the self ``Model`` name.
+
+        As an example, we placed a prefixed script on the ``Orchestrator`` within a ``Model`` named
+        `model_1`. The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_1.script"
+
+        .. note::
+            In the driver script, after initializing the ``Model`` instance named `model_1`,
+            we execute ``model_1.register_incoming_entity(model_1)``. By passing the ``Model``
+            instance to itself, we instruct SmartSim to recognize the name of `model_1` as a valid data
+            source for subsequent use in ``Client.set_data_source``.
+
+        To delete the script in the ``Orchestrator``, we provide the full list name:
+
+        .. code-block:: python
+
+            # Set the Model source name
+            client.set_data_source("model_1")
+            # Rename the script
+            client.delete_script("script")
+
+        **Delete a Script Placed by an External Application**
+
+        SmartSim supports delete operations on prefixed scripts sent to the ``Orchestrator`` by separate ``Model(s)``.
+        To do so, users must provide the ``Model`` name that stored the script to ``Client.set_data_source``.
+        This will instruct the ``Client`` to prepend the ``Model`` name input to all key searches.
+
+        In the example, a ``Model`` named `model_1` has placed a ML model in a standalone ``Orchestrator`` with prefixing enabled
+        on the ``Client``. The ``Orchestrator`` contents are:
+
+        .. code-block:: bash
+
+            1) "model_1.script"
+
+        From within a separate ``Model`` named `model_2`, we perform a basic delete operation.
+        To instruct the ``Client`` to prepend a ``Model`` name to all key searches, use the
+        ``Client.set_data_source`` function. Specify the ``Model`` name `model_1`
+        that placed the list in the ``Orchestrator``:
+
+        .. code-block:: python
+
+            client.set_data_source("model_1")
+
+        To delete the script in the ``Orchestrator``, we provide the script name:
+
+        .. code-block:: python
+
+            client.delete_model("script")
\ No newline at end of file
diff --git a/doc/orchestrator.rst b/doc/orchestrator.rst
index 456d9a814..6ccc7c1e1 100644
--- a/doc/orchestrator.rst
+++ b/doc/orchestrator.rst
@@ -1,208 +1,688 @@
+.. _orch_docs:
+
 ************
 Orchestrator
 ************
+========
+Overview
+========
+The ``Orchestrator`` is an in-memory database with features built for
+AI-enabled workflows including online training, low-latency inference, cross-application data
+exchange, online interactive visualization, online data analysis, computational steering, and more.
+
+An ``Orchestrator`` can be thought of as a general feature store
+capable of storing numerical data (tensors and ``Datasets``), AI models (TF, TF-lite, PyTorch, or ONNX),
+and scripts (TorchScripts). In addition to storing data, the ``Orchestrator`` is capable of
+executing AI models and TorchScripts on the stored data using CPUs or GPUs.
+
+.. figure:: images/smartsim-arch.png
+
+  Sample ``Experiment`` showing a user application leveraging
+  machine learning infrastructure launched by SmartSim and connected
+  to an online analysis and visualization simulation via the ``Orchestrator``.
+
+Users can establish a connection to the ``Orchestrator`` from within ``Model`` executable code, ``Ensemble``
+member executable code, or ``Experiment`` driver scripts by using the
+:ref:`SmartRedis<smartredis-api>` ``Client`` library.
+
+SmartSim offers **two** types of ``Orchestrator`` deployments:
+
+- :ref:`Standalone Deployment<standalone_orch_doc>`
+   A standalone ``Orchestrator`` is ideal for systems that have heterogeneous node types
+   (i.e. a mix of CPU-only and GPU-enabled compute nodes) where
+   ML model and TorchScript evaluation is more efficiently performed off-node. This
+   deployment is also ideal for workflows relying on data exchange between multiple
+   applications (e.g. online analysis, visualization, computational steering, or
+   producer/consumer application couplings). Standalone deployment is also optimal for
+   high data throughput scenarios where ``Orchestrators`` require large amounts of compute resources.
+
+- :ref:`Colocated Deployment<colocated_orch_doc>`
+    A colocated ``Orchestrator`` is ideal when the data and hardware accelerator are located on the same compute node.
+    This setup helps reduce latency in ML inference and TorchScript evaluation by eliminating off-node communication.
 
+.. warning::
+  Colocated ``Orchestrators`` cannot share data across compute nodes.
+  Communication is only supported between a ``Model`` and colocated ``Orchestrator`` pair.
+
+SmartSim allows users to launch :ref:`multiple Orchestrators<mutli_orch_doc>` of either type during
+the course of an ``Experiment``. If a workflow requires a multiple ``Orchestrator`` environment, a
+`db_identifier` argument must be specified during ``Orchestrator`` initialization. Users can connect to
+``Orchestrators`` in a multiple ``Orchestrator`` workflow by specifying the respective `db_identifier` argument
+within a :ref:`ConfigOptions<config_options_explain>` object that is passed into the SmartRedis ``Client`` constructor.
+
+.. _standalone_orch_doc:
+
+=====================
+Standalone Deployment
+=====================
+--------
+Overview
+--------
+During standalone ``Orchestrator`` deployment, a SmartSim ``Orchestrator`` (the database) runs on separate
+compute node(s) from the SmartSim ``Model`` node(s). A standalone ``Orchestrator`` can be deployed on a single
+node (single-sharded) or distributed (sharded) over multiple nodes. With a multi-node ``Orchestrator``, users can
+scale the number of database nodes for inference and script evaluation, enabling
+increased in-memory capacity for data storage in large-scale workflows. Single-node
+``Orchestrators`` are effective for small-scale workflows and offer lower latency for ``Client`` API calls
+that involve data appending or processing (e.g. ``Client.append_to_list``, ``Client.run_model``, etc).
+
+When connecting to a standalone ``Orchestrator`` from within a ``Model`` application, the user has
+several options to connect a SmartRedis ``Client``:
+
+- In an ``Experiment`` with a single deployed ``Orchestrator``, users can rely on SmartRedis
+  to detect the ``Orchestrator`` address through runtime configuration of the SmartSim ``Model`` environment.
+  A default ``Client`` constructor, with no user-specified parameters, is sufficient to
+  connect to the ``Orchestrator``. The only exception is for the Python ``Client``, which requires
+  the `cluster` constructor parameter to differentiate between standalone deployment and colocated
+  deployment.
+- In an ``Experiment`` with multiple ``Orchestrators``, users can connect to a specific ``Orchestrator`` by
+  first specifying the `db_identifier` in the ``ConfigOptions`` constructor within the executable application.
+  Subsequently, users should pass the ``ConfigOptions`` instance to the ``Client`` constructor.
+- Users can specify or override automatically configured connection options by providing the
+  ``Orchestrator`` address in the ``ConfigOptions`` object. Subsequently, users should pass the ``ConfigOptions``
+  instance to the ``Client`` constructor.
+
+If connecting to a standalone ``Orchestrator`` from a ``Experiment`` driver script, the user must specify
+the address of the ``Orchestrator`` to the ``Client`` constructor. SmartSim does not automatically
+configure the environment of the ``Experiment`` driver script to connect to an ``Orchestrator``. Users
+can access an ``Orchestrators`` address through ``Orchestrator.get_address``.
 
-The ``Orchestrator`` is an in-memory database that is launched prior to all other
-entities within an ``Experiment``. The ``Orchestrator`` can be used to store and retrieve
-data during the course of an experiment and across multiple entities. In order to
-stream data into or receive data from the ``Orchestrator``, one of the SmartSim clients
-(SmartRedis) has to be used within a Model.
+.. note::
+  In SmartSim ``Model`` applications, it is advisable to **avoid** specifying addresses directly to the ``Client`` constructor.
+  Utilizing the SmartSim environment configuration for SmartRedis ``Client`` connections
+  allows the SmartSim ``Model`` application code to remain unchanged even as ``Orchestrator`` deployment
+  options vary.
 
-.. |orchestrator| image:: images/Orchestrator.png
-  :width: 700
-  :alt: Alternative text
+The following image illustrates
+communication between a standalone ``Orchestrator`` and a
+SmartSim ``Model``. In the diagram, the application is running on multiple compute nodes,
+separate from the ``Orchestrator`` compute nodes. Communication is established between the
+``Model`` application and the sharded ``Orchestrator`` using the :ref:`SmartRedis client<smartredis-api>`.
 
-|orchestrator|
+.. figure::  images/clustered_orchestrator-1.png
 
-Combined with the SmartRedis clients, the ``Orchestrator`` is capable of hosting and executing
-AI models written in Python on CPU or GPU. The ``Orchestrator`` supports models written with
-TensorFlow, Pytorch, TensorFlow-Lite, or models saved in an ONNX format (e.g. sci-kit learn).
+  Sample Standalone ``Orchestrator`` Deployment
 
+.. note::
+  Users do not need to know how the data is stored in a standalone configuration and
+  can address the cluster with the SmartRedis ``Client`` like a single block of memory
+  using simple put/get semantics in SmartRedis.
+
+In scenarios where data needs to be shared amongst ``Experiment`` entities,
+such as online analysis, training, and processing, a standalone ``Orchestrator``
+is optimal. The data produced by multiple processes in a ``Model`` is stored in the standalone
+``Orchestrator`` and is available for consumption by other ``Model``'s.
+
+If a workflow requires an application to leverage multiple standalone deployments,
+multiple ``Clients`` can be instantiated within an application,
+with each ``Client`` connected to a unique ``Orchestrator``. This is accomplished through the use of the
+`db-identifier` and :ref:`ConfigOptions<config_options_explain>` object specified at ``Orchestrator`` initialization time.
+For more information on a multiple database ``Experiment``, visit the :ref:`Multiple Orchestrators<mutli_orch>` section on
+this page.
+
+-------
+Example
+-------
+In the following example, we demonstrate deploying a standalone ``Orchestrator`` on an HPC system.
+Once the standalone ``Orchestrator`` is launched from the ``Experiment`` driver script, we walk through
+connecting a SmartRedis ``Client`` to the ``Orchestrator`` from within the ``Model``
+application to transmit and poll for data.
 
-Cluster Orchestrator
-====================
+The example is comprised of two script files:
+
+- :ref:`Application Script<standalone_orch_app_script>`
+   The application script is a Python file that contains instructions to create a SmartRedis
+   ``Client`` connection to the standalone ``Orchestrator``. To demonstrate the ability of
+   workflow components to access data from other entities, we retrieve the tensors set by
+   the driver script using a SmartRedis ``Client`` in the application script. We then instruct
+   the ``Client`` to send and retrieve data from within the application script. The example source
+   code is available in the dropdown below for convenient execution and customization.
+
+   .. dropdown:: Example Application Script source code
+
+       .. literalinclude:: tutorials/doc_examples/orch_examples/std_app.py
 
-The ``Orchestrator`` supports single node and distributed memory settings. This means
-that a single compute host can be used for the database or multiple by specifying
-``db_nodes`` to be greater than 1.
+- :ref:`Experiment Driver Script<standalone_orch_driver_script>`
+   The ``Experiment`` driver script is responsible for launching and managing SmartSim entities. Within this script,
+   we use the ``Experiment`` API to create and launch a standalone ``Orchestrator``. To demonstrate the capability of
+   a ``Model`` application to access ``Orchestrator`` data sent from other sources, we employ the SmartRedis ``Client`` in
+   the driver script to store a tensor in the ``Orchestrator``, which is later retrieved by the ``Model`` application.
+   To employ the application script, we initialize a ``Model`` object with the application script as the executable,
+   launch the ``Orchestrator``, and then launch the ``Model``.
 
-.. |cluster-orc| image:: images/clustered-orc-diagram.png
-  :width: 700
-  :alt: Alternative text
+   To further demonstrate the ability of workflow components to access data from
+   other entities, we retrieve the tensors stored by the completed ``Model`` using a SmartRedis ``Client`` in
+   the driver script. Lastly, we tear down the ``Orchestrator``. The example source code is available in the dropdown below for
+   convenient execution and customization.
 
-|cluster-orc|
+   .. dropdown:: Example Experiment Driver Script Source Code
 
+       .. literalinclude:: tutorials/doc_examples/orch_examples/std_driver.py
 
-With a clustered ``Orchestrator``, multiple compute hosts memory can be used together
-to store data. As well, the CPU or GPU(s) where the ``Orchestrator`` is running can
-be used to execute the AI models, and Torchscript code on data stored within it.
+.. _standalone_orch_app_script:
 
-Users do not need to know how the data is stored in a clustered configuration and
-can address the cluster with the SmartRedis clients like a single block of memory
-using simple put/get semantics in SmartRedis. SmartRedis will ensure that data
-is evenly distributed amongst all nodes in the cluster.
+Application Script
+==================
+To begin writing the application script, import the necessary SmartRedis packages:
 
-The cluster deployment is optimal for high data throughput scenarios such as
-online analysis, training and processing.
+.. literalinclude:: tutorials/doc_examples/orch_examples/std_app.py
+    :language: python
+    :linenos:
+    :lines: 1-2
 
+Client Initialization
+---------------------
+To establish a connection with the ``Orchestrator``, we need to initialize a new SmartRedis ``Client``.
+Because the ``Orchestrator`` launched in the driver script is sharded, we specify the
+constructor argument `cluster` as `True`.
 
-Colocated Orchestrator
-=======================
+.. literalinclude:: tutorials/doc_examples/orch_examples/std_app.py
+    :language: python
+    :linenos:
+    :lines: 4-5
 
-A colocated Orchestrator is a special type of Orchestrator that is deployed on
-the same compute hosts an a ``Model`` instance defined by the user. In this
-deployment, the database is *not* connected together in a cluster and each
-shard of the database is addressed individually by the processes running
-on that compute host.
+.. note::
+  Note that the C/C++/Fortran SmartRedis ``Clients`` are capable of reading cluster configurations
+  from the SmartSim ``Model`` environment and the `cluster` constructor argument does not need to be specified
+  in those ``Client`` languages.
 
-.. |colo-orc| image:: images/co-located-orc-diagram.png
-  :width: 700
-  :alt: Alternative text
+Since there is only one ``Orchestrator`` launched in the ``Experiment``
+(the standalone ``Orchestrator``), specifying an ``Orchestrator`` `db_identifier`
+is **not** required when initializing the SmartRedis ``Client``.
+SmartRedis will handle the connection configuration.
 
+.. note::
+   To create a SmartRedis ``Client`` connection to the standalone ``Orchestrator``, the ``Orchestrator`` must be launched
+   from within the driver script prior to the start of the ``Model``.
 
-|colo-orc|
+Data Retrieval
+--------------
+To confirm a successful connection to the ``Orchestrator``, we retrieve the tensor set from the ``Experiment`` script.
+Use the ``Client.get_tensor`` method to retrieve the tensor named `tensor_1` placed by the driver script:
 
-This deployment is designed for highly performant online inference scenarios where
-a distributed process (likely MPI processes) are performing inference with
-data local to each process.
+.. literalinclude:: tutorials/doc_examples/orch_examples/std_app.py
+    :language: python
+    :linenos:
+    :lines: 7-10
 
-This method is deemed ``locality based inference`` since data is local to each
-process and the ``Orchestrator`` is deployed locally on each compute host where
-the distributed application is running.
+After the ``Model`` is launched by the driver script, the following output will appear in
+`getting-started/model/model.out`::
 
+  Default@17-11-48:The multi-sharded db tensor is: [1 2 3 4]
 
-To create a colocated model, first, create a ``Model`` instance and then call
-the ``Model.colocate_db_tcp`` or ``Model.colocate_db_uds`` function.
+Data Storage
+------------
+Next, create a NumPy tensor to send to the standalone ``Orchestrator`` using
+``Client.put_tensor(name, data)``:
 
-.. currentmodule:: smartsim.entity.model
+.. literalinclude:: tutorials/doc_examples/orch_examples/std_app.py
+    :language: python
+    :linenos:
+    :lines: 12-15
 
-.. automethod:: Model.colocate_db_tcp
-    :noindex:
+We retrieve `"tensor_2"` in the ``Experiment`` driver script.
 
-.. automethod:: Model.colocate_db_uds
-    :noindex:
+.. _standalone_orch_driver_script:
 
-Here is an example of creating a simple model that is colocated with an
-``Orchestrator`` deployment using Unix Domain Sockets
+Experiment Driver Script
+========================
+To run the previous application script, we define a ``Model`` and ``Orchestrator`` within the
+``Experiment`` driver script. Configuring and launching workflow entities (``Model`` and ``Orchestrator``) requires the utilization of
+``Experiment`` class methods. The ``Experiment`` object is intended to be instantiated
+once and utilized throughout the workflow runtime.
 
-.. code-block:: python
+In this example, we instantiate an ``Experiment`` object with the name `getting-started`
+and the `launcher` set to `auto`. When using `launcher=auto`, SmartSim attempts to find a launcher on the machine.
+For example, if this script were run on a Slurm-based system, SmartSim will automatically set the launcher to `slurm`.
+We also setup the SmartSim `logger` to output information from the ``Experiment`` at runtime:
+
+.. literalinclude:: tutorials/doc_examples/orch_examples/std_driver.py
+    :language: python
+    :linenos:
+    :lines: 1-9
+
+Orchestrator Initialization
+---------------------------
+In the next stage of the ``Experiment``, we create a standalone ``Orchestrator``.
+
+To create a standalone ``Orchestrator``, utilize the ``Experiment.create_database`` function:
+
+.. literalinclude:: tutorials/doc_examples/orch_examples/std_driver.py
+    :language: python
+    :linenos:
+    :lines: 11-12
+
+Client Initialization
+---------------------
+The SmartRedis ``Client`` object contains functions that manipulate, send, and retrieve
+data on the ``Orchestrator``. Begin by initializing a SmartRedis ``Client`` object for the standalone ``Orchestrator``.
+
+SmartRedis ``Clients`` in driver scripts do not have the ability to use a `db-identifier` or
+rely on automatic configurations to connect to ``Orchestrators``. Therefore, when creating a SmartRedis ``Client``
+connection from within a driver script, specify the address of the ``Orchestrator`` you would like to connect to.
+You can easily retrieve the ``Orchestrator`` address using the ``Orchestrator.get_address`` function:
+
+.. literalinclude:: tutorials/doc_examples/orch_examples/std_driver.py
+    :language: python
+    :linenos:
+    :lines: 14-15
+
+Data Storage
+------------
+In the application script, we retrieved a NumPy tensor stored from within the driver script.
+To support the application functionality, we create a
+NumPy array in the ``Experiment`` driver script to send to the ``Orchestrator``. To
+send a tensor to the ``Orchestrator``, use the function ``Client.put_tensor(name, data)``:
+
+.. literalinclude:: tutorials/doc_examples/orch_examples/std_driver.py
+    :language: python
+    :linenos:
+    :lines: 17-20
+
+Model Initialization
+--------------------
+In the next stage of the ``Experiment``, we configure and create
+a SmartSim ``Model`` and specify the executable path during ``Model`` creation:
+
+.. literalinclude:: tutorials/doc_examples/orch_examples/std_driver.py
+    :language: python
+    :linenos:
+    :lines: 22-27
+
+File Generation
+---------------
+To create an isolated output directory for the ``Orchestrator`` and ``Model``, invoke ``Experiment.generate`` on the
+``Experiment`` instance `exp` with `standalone_orchestrator` and `model` as input parameters:
+
+.. literalinclude:: tutorials/doc_examples/orch_examples/std_driver.py
+    :language: python
+    :linenos:
+    :lines: 29-30
+
+Invoking ``Experiment.generate(standalone_orchestrator, model)`` will create two directories:
+`standalone_orchestrator/` and `model/`. Each of these directories will store
+two output files: a `.out` file and a `.err` file.
 
-  from smartsim import Experiment
-  exp = Experiment("colo-test", launcher="auto")
+.. note::
+  It is important to invoke ``Experiment.generate`` with all ``Experiment`` entity instances
+  before launching. This will ensure that the output files are organized in the main ``experiment-name/``
+  folder. In this example, the ``Experiment`` folder is named `getting-started/`.
 
-  colo_settings = exp.create_run_settings(exe="./some_mpi_app")
+Entity Deployment
+-----------------
+In the next stage of the ``Experiment``, we launch the ``Orchestrator``, then launch the ``Model``.
 
-  colo_model = exp.create_model("colocated_model", colo_settings)
-  colo_model.colocate_db_uds(
-          db_cpus=1,              # cpus given to the database on each node
-          debug=False             # include debug information (will be slower)
-          ifname=network_interface # specify network interface(s) to use (i.e. "ib0" or ["ib0", "lo"])
-  )
-  exp.start(colo_model)
+Step 1: Start Orchestrator
+''''''''''''''''''''''''''
+In the context of this ``Experiment``, it's essential to create and launch
+the ``Orchestrator`` as a preliminary step before any other workflow entities. This is important
+because the application requests and sends tensors to a launched ``Orchestrator``.
 
+To launch the ``Orchestrator``, pass the ``Orchestrator`` instance to ``Experiment.start``.
 
-By default, SmartSim will pin the database to the first _N_ CPUs according to ``db_cpus``. By
-specifying the optional argument ``custom_pinning``, an alternative pinning can be specified
-by sending in a list of CPU ids (e.g [0,2,range(5,8)]). For optimal performance, most users
-will want to also modify the RunSettings for the model to pin their application to cores not
-occupied by the database.
+.. literalinclude:: tutorials/doc_examples/orch_examples/std_driver.py
+    :language: python
+    :linenos:
+    :lines: 32-33
 
-.. warning::
+The ``Experiment.start`` function launches the ``Orchestrator`` for use within the workflow.
+In other words, the function deploys the ``Orchestrator`` on the allocated compute resources.
+
+Step 2: Start Model
+'''''''''''''''''''
+Next, launch the `model` instance using the ``Experiment.start`` function:
+
+.. literalinclude:: tutorials/doc_examples/orch_examples/std_driver.py
+    :language: python
+    :linenos:
+    :lines: 35-36
+
+In the next subsection, we request tensors placed by the ``Model`` application.
+We specify `block=True` to ``exp.start`` to require the ``Model`` to finish before
+the ``Experiment`` continues.
+
+Data Polling
+------------
+Next, check if the tensor exists in the standalone ``Orchestrator`` using ``Client.poll_tensor``.
+This function queries for data in the ``Orchestrator``. The function requires the tensor name (`name`),
+how many milliseconds to wait in between queries (`poll_frequency_ms`),
+and the total number of times to query (`num_tries`). Check if the data exists in the ``Orchestrator`` by
+polling every 100 milliseconds until 10 attempts have completed:
+
+.. literalinclude:: tutorials/doc_examples/orch_examples/std_driver.py
+    :language: python
+    :linenos:
+    :lines: 38-41
+
+When you execute the driver script, the output will be as follows::
+
+  23:45:46 system.host.com SmartSim[87400] INFO The tensor exists: True
+
+Cleanup
+-------
+Finally, use the ``Experiment.stop`` function to stop the ``Orchestrator`` instance. Print the
+workflow summary with ``Experiment.summary``:
+
+.. literalinclude:: tutorials/doc_examples/orch_examples/std_driver.py
+    :language: python
+    :linenos:
+    :lines: 43-46
+
+When you run the ``Experiment``, the following output will appear::
+
+  |    | Name           | Entity-Type   | JobID       | RunID   | Time    | Status    | Returncode   |
+  |----|----------------|---------------|-------------|---------|---------|-----------|--------------|
+  | 0  | model          | Model         | 1658679.3   | 0       | 1.3342  | Completed | 0            |
+  | 1  | orchestrator_0 | DBNode        | 1658679.2+2 | 0       | 42.8742 | Cancelled | 0            |
+
+.. _colocated_orch_doc:
+
+====================
+Colocated Deployment
+====================
+--------
+Overview
+--------
+During colocated ``Orchestrator`` deployment, a SmartSim ``Orchestrator`` (the database) runs on
+the ``Model``'s compute node(s). Colocated ``Orchestrators`` can only be deployed as isolated instances
+on each compute node and cannot be clustered over multiple nodes. The ``Orchestrator`` on each application node is
+utilized by SmartRedis ``Clients`` on the same node. With a colocated ``Orchestrator``, all interactions
+with the database occur on the same node, thus resulting in lower latency compared to the standard ``Orchestrator``.
+A colocated ``Orchestrator`` is ideal when the data and hardware accelerator are located on the
+same compute node.
+
+Communication between a colocated ``Orchestrator`` and ``Model`` is initiated in the application through a
+SmartRedis ``Client``. Since a colocated ``Orchestrator`` is launched when the ``Model``
+is started by the ``Experiment``, connecting a SmartRedis ``Client`` to a colocated ``Orchestrator`` is only possible from within
+the associated ``Model`` application.
+
+There are **three** methods for connecting the SmartRedis ``Client`` to the colocated ``Orchestrator``:
+
+- In an ``Experiment`` with a single deployed ``Orchestrator``, users can rely on SmartRedis
+  to detect the ``Orchestrator`` address through runtime configuration of the SmartSim ``Model`` environment.
+  A default ``Client`` constructor, with no user-specified parameters, is sufficient to
+  connect to the ``Orchestrator``. The only exception is for the Python ``Client``, which requires
+  the `cluster=False` constructor parameter for the colocated ``Orchestrator``.
+- In an ``Experiment`` with multiple ``Orchestrators``, users can connect to a specific ``Orchestrator`` by
+  first specifying the `db_identifier` in the ``ConfigOptions`` constructor. Subsequently, users should pass the
+  ``ConfigOptions`` instance to the ``Client`` constructor.
+- Users can specify or override automatically configured connection options by providing the
+  ``Orchestrator`` address in the ``ConfigOptions`` object. Subsequently, users should pass the ``ConfigOptions``
+  instance to the ``Client`` constructor.
+
+Below is an image illustrating communication within a colocated ``Model`` spanning multiple compute nodes.
+As demonstrated in the diagram, each process of the application creates its own SmartRedis ``Client``
+connection to the ``Orchestrator`` running on the same host.
+
+.. figure:: images/colocated_orchestrator-1.png
+
+  Sample Colocated ``Orchestrator`` Deployment
+
+Colocated deployment is ideal for highly performant online inference scenarios where
+a distributed application (likely an MPI application) is performing inference with
+data local to each process. With colocated deployment, data does not need to travel
+off-node to be used to evaluate a ML model, and the results of the ML model evaluation
+are stored on-node.
+
+If a workflow requires an application to both leverage colocated
+deployment and standalone deployment, multiple ``Clients`` can be instantiated within an application,
+with each ``Client`` connected to a unique deployment. This is accomplished through the use of the
+`db-identifier` specified at ``Orchestrator`` initialization time.
+
+-------
+Example
+-------
+In the following example, we demonstrate deploying a colocated ``Orchestrator`` on an HPC system.
+Once the ``Orchestrator`` is launched, we walk through connecting a SmartRedis ``Client``
+from within the application script to transmit and poll for data on the ``Orchestrator``.
+
+The example is comprised of two script files:
+
+- :ref:`Application Script<colocated_orch_app_script>`
+   The application script is a Python script that connects a SmartRedis
+   ``Client`` to the colocated ``Orchestrator``. From within the application script,
+   the ``Client`` is utilized to both send and retrieve data. The source code example
+   is available in the dropdown below for convenient execution and customization.
+
+   .. dropdown:: Example Application Script Source Code
+
+       .. literalinclude:: tutorials/doc_examples/orch_examples/colo_app.py
 
-  Pinning is not supported on MacOS X. Setting ``custom_pinning`` to anything
-  other than ``None`` will raise a warning and the input will be ignored.
+- :ref:`Experiment Driver Script<colocated_orch_driver_script>`
+   The ``Experiment`` driver script launches and manages
+   the example entities through the ``Experiment`` API.
+   In the driver script, we use the ``Experiment`` API
+   to create and launch a colocated ``Model``. The source code example is available
+   in the dropdown below for convenient execution and customization.
+
+   .. dropdown:: Example Experiment Driver source code
+
+       .. literalinclude:: tutorials/doc_examples/orch_examples/colo_driver.py
+
+.. _colocated_orch_app_script:
+
+Application Script
+==================
+To begin writing the application script, import the necessary SmartRedis packages:
+
+.. literalinclude:: tutorials/doc_examples/orch_examples/colo_app.py
+    :language: python
+    :linenos:
+    :lines: 1-2
+
+Client Initialization
+---------------------
+To establish a connection with the colocated ``Orchestrator``, we need to initialize a
+new SmartRedis ``Client`` and specify `cluster=False` since colocated deployments are never
+clustered but only single-sharded.
+
+.. literalinclude:: tutorials/doc_examples/orch_examples/colo_app.py
+    :language: python
+    :linenos:
+    :lines: 4-5
+
+.. note::
+  Note that the C/C++/Fortran SmartRedis ``Clients`` are capable of reading cluster configurations
+  from the ``Model`` environment and the `cluster` constructor argument does not need to be specified
+  in those ``Client`` languages.
 
 .. note::
+    Since there is only one ``Orchestrator`` launched in the ``Experiment``
+    (the colocated ``Orchestrator``), specifying a ``Orchestrator`` `db_identifier`
+    is not required when initializing the ``Client``. SmartRedis will handle the
+    connection configuration.
 
-  Pinning _only_ affects the co-located deployment because both the application and the database
-  are sharing the same compute node. For the clustered deployment, a shard occupies the entirety
-  of the node.
+.. note::
+   To create a ``Client`` connection to the colocated ``Orchestrator``, the colocated ``Model`` must be launched
+   from within the driver script. You must execute the Python driver script, otherwise, there will
+   be no ``Orchestrator`` to connect the ``Client`` to.
+
+Data Storage
+------------
+Next, using the SmartRedis ``Client`` instance, we create and store a NumPy tensor through
+``Client.put_tensor(name, data)``:
+
+.. literalinclude:: tutorials/doc_examples/orch_examples/colo_app.py
+    :language: python
+    :linenos:
+    :lines: 7-10
+
+We will retrieve `“tensor_1”` in the following section.
+
+Data Retrieval
+--------------
+To confirm a successful connection to the ``Orchestrator``, we retrieve the tensor we stored.
+Use the ``Client.get_tensor`` method to retrieve the tensor by specifying the name
+`“tensor_1”`:
+
+.. literalinclude:: tutorials/doc_examples/orch_examples/colo_app.py
+    :language: python
+    :linenos:
+    :lines: 12-15
+
+When the ``Experiment`` completes, you can find the following log message in `colo_model.out`::
+
+    Default@21-48-01:The colocated db tensor is: [1 2 3 4]
+
+.. _colocated_orch_driver_script:
+
+Experiment Driver Script
+========================
+To run the previous application script, a ``Model`` object must be configured and launched within the
+``Experiment`` driver script. Configuring and launching workflow entities (``Model``)
+requires the utilization of ``Experiment`` class methods. The ``Experiment`` object is intended to
+be instantiated once and utilized throughout the workflow runtime.
+
+In this example, we instantiate an ``Experiment`` object with the name `getting-started`
+and the `launcher` set to `auto`. When using `launcher=auto`, SmartSim attempts to find a launcher on the machine.
+In this case, since we are running the example on a Slurm-based machine,
+SmartSim will automatically set the launcher to `slurm`. We set up the SmartSim `logger`
+to output information from the ``Experiment`` at runtime:
+
+.. literalinclude:: tutorials/doc_examples/orch_examples/colo_driver.py
+    :language: python
+    :linenos:
+    :lines: 1-9
+
+Colocated Model Initialization
+------------------------------
+In the next stage of the ``Experiment``, we create and launch a colocated ``Model`` that
+runs the application script with a ``Orchestrator`` on the same compute node.
 
-Redis
-=====
+Step 1: Configure
+'''''''''''''''''
+In this example ``Experiment``, the ``Model`` application is a Python script as defined in section:
+:ref:`Application Script<colocated_orch_app_script>`. Before initializing the ``Model`` object, we must use
+``Experiment.create_run_settings`` to create a ``RunSettings`` object that defines how to execute
+the ``Model``. To launch the Python script in this example workflow, we specify the path to the application
+file `application_script.py` as the `exe_args` parameter and the executable `exe_ex` (the Python
+executable on this system) as `exe` parameter. The ``Experiment.create_run_settings`` function
+will return a ``RunSettings`` object that can then be used to initialize the ``Model`` object.
 
-.. _Redis: https://github.com/redis/redis
-.. _RedisAI: https://github.com/RedisAI/RedisAI
+.. note::
+  Change the `exe_args` argument to the path of the application script
+  on your file system to run the example.
 
-The ``Orchestrator`` is built on `Redis`_. Largely, the job of the ``Orchestrator`` is to
-create a Python reference to a Redis deployment so that users can launch, monitor
-and stop a Redis deployment on workstations and HPC systems.
+Use the ``RunSettings`` helper functions to
+configure the the distribution of computational tasks (``RunSettings.set_nodes``). In this
+example, we specify to SmartSim that we intend the ``Model`` to run on a single compute node.
 
-Redis was chosen for the Orchestrator because it resides in-memory, can be distributed on-node
-as well as across nodes, and provides low latency data access to many clients in parallel. The
-Redis ecosystem was a primary driver as the Redis module system provides APIs for languages,
-libraries, and techniques used in Data Science. In particular, the ``Orchestrator``
-relies on `RedisAI`_ to provide access to Machine Learning runtimes.
+.. literalinclude:: tutorials/doc_examples/orch_examples/colo_driver.py
+    :language: python
+    :linenos:
+    :lines: 11-14
 
-At its core, Redis is a key-value store. This means that put/get semantics are used to send
-messages to and from the database. SmartRedis clients use a specific hashing algorithm, CRC16, to ensure
-that data is evenly distributed amongst all database nodes. Notably, a user is not required to
-know where (which database node) data or Datasets (see Dataset API) are stored as the
-SmartRedis clients will infer their location for the user.
+Step 2: Initialize
+''''''''''''''''''
+Next, create a ``Model`` instance using the ``Experiment.create_model`` factory method.
+Pass the ``model_settings`` object as input to the method and
+assign the returned ``Model`` instance to the variable `model`:
+
+.. literalinclude:: tutorials/doc_examples/orch_examples/colo_driver.py
+    :language: python
+    :linenos:
+    :lines: 16-17
+
+Step 3: Colocate
+''''''''''''''''
+To colocate an ``Orchestrator`` with a ``Model``, use the ``Model.colocate_db_uds`` function.
+This function will colocate an ``Orchestrator`` instance with this ``Model`` over
+a Unix domain socket connection.
 
+.. literalinclude:: tutorials/doc_examples/orch_examples/colo_driver.py
+    :language: python
+    :linenos:
+    :lines: 19-20
 
-KeyDB
-=====
+Step 4: Generate Files
+''''''''''''''''''''''
+Next, generate the ``Experiment`` entity directories by passing the ``Model`` instance to
+``Experiment.generate``:
 
-.. _KeyDB: https://github.com/EQ-Alpha/KeyDB
+.. literalinclude:: tutorials/doc_examples/orch_examples/colo_driver.py
+    :language: python
+    :linenos:
+    :lines: 22-23
 
-`KeyDB`_ is a multi-threaded fork of Redis that can be swapped in as the database for
-the ``Orchestrator`` in SmartSim. KeyDB can be swapped in for Redis by setting the
-``REDIS_PATH`` environment variable to point to the ``keydb-server`` binary.
+Step 5: Start
+'''''''''''''
+Next, launch the colocated ``Model`` instance using the ``Experiment.start`` function.
 
-A full example of configuring KeyDB to run in SmartSim is shown below
+.. literalinclude:: tutorials/doc_examples/orch_examples/colo_driver.py
+    :language: python
+    :linenos:
+    :lines: 25-26
 
-.. code-block:: bash
+Cleanup
+-------
+.. note::
+  Since the colocated ``Orchestrator`` is automatically torn down by SmartSim once the colocated ``Model``
+  has finished, we do not need to `stop` the ``Orchestrator``.
+
+.. literalinclude:: tutorials/doc_examples/orch_examples/colo_driver.py
+    :language: python
+    :linenos:
+    :lines: 28-29
+
+When you run the experiment, the following output will appear::
 
-  # build KeyDB
-  # see https://github.com/EQ-Alpha/KeyDB
+  |    | Name   | Entity-Type   | JobID     | RunID   | Time    | Status    | Returncode   |
+  |----|--------|---------------|-----------|---------|---------|-----------|--------------|
+  | 0  | model  | Model         | 1592652.0 | 0       | 10.1039 | Completed | 0            |
 
-  # get KeyDB configuration file
-  wget https://github.com/CrayLabs/SmartSim/blob/d3d252b611c9ce9d9429ba6eeb71c15471a78f08/smartsim/_core/config/keydb.conf
+.. _mutli_orch_doc:
 
-  export REDIS_PATH=/path/to/keydb-server
-  export REDIS_CONF=/path/to/keydb.conf
+======================
+Multiple Orchestrators
+======================
+SmartSim supports automating the deployment of multiple ``Orchestrators``
+from within an ``Experiment``. Communication with the ``Orchestrator`` via a SmartRedis ``Client`` is possible with the
+`db_identifier` argument that is required when initializing an ``Orchestrator`` or
+colocated ``Model`` during a multiple ``Orchestrator`` ``Experiment``. When initializing a SmartRedis
+``Client`` during the ``Experiment``, create a ``ConfigOptions`` object to specify the `db_identifier`
+argument used when creating the ``Orchestrator``. Pass the ``ConfigOptions`` object to
+the ``Client`` init call.
 
-  # run smartsim workload
+.. _mutli_orch:
 
+-----------------------------
 Multiple Orchestrator Example
-=============================
+-----------------------------
 SmartSim offers functionality to automate the deployment of multiple
 databases, supporting workloads that require multiple
 ``Orchestrators`` for a ``Experiment``. For instance, a workload may consist of a
 simulation with high inference performance demands (necessitating a co-located deployment),
-along with an analysis and
-visualization workflow connected to the simulation (requiring a standard orchestrator).
-In the following example, we simulate a simple version of this use case.
+along with an analysis and visualization workflow connected to the simulation
+(requiring a standalone ``Orchestrator``). In the following example, we simulate a
+simple version of this use case.
 
 The example is comprised of two script files:
 
-* The :ref:`Application Script<The Application Script>`
-* The :ref:`Experiment Driver Script<The Experiment Driver Script>`
+* The Application Script
+* The ``Experiment`` Driver Script
 
 **The Application Script Overview:**
 In this example, the application script is a python file that
 contains instructions to complete computational
 tasks. Applications are not limited to Python
 and can also be written in C, C++ and Fortran.
-This script specifies creating a Python SmartRedis client for each
-standard orchestrator and a colocated orchestrator. We use the
-clients to request data from both standard databases, then
-transfer the data to the colocated database. The application
-file is launched by the experiment driver script
+This script specifies creating a Python SmartRedis ``Client`` for each
+standalone ``Orchestrator`` and a colocated ``Orchestrator``. We use the
+``Clients`` to request data from both standalone ``Orchestrators``, then
+transfer the data to the colocated ``Orchestrator``. The application
+file is launched by the ``Experiment`` driver script
 through a ``Model`` stage.
 
 **The Application Script Contents:**
 
-1. Connecting SmartRedis clients within the application to retrieve tensors
-   from the standard databases to store in a colocated database. Details in section:
-   :ref:`Initialize the Clients<Initialize the Clients>`.
+1. Connecting SmartRedis ``Clients`` within the application to retrieve tensors
+   from the standalone ``Orchestrators`` to store in a colocated ``Orchestrator``. Details in section:
+   :ref:`Initialize the Clients<init_model_client>`.
 
 **The Experiment Driver Script Overview:**
-The experiment driver script holds the stages of the workflow
+The ``Experiment`` driver script holds the stages of the workflow
 and manages their execution through the ``Experiment`` API.
-We initialize an Experiment
+We initialize an ``Experiment``
 at the beginning of the Python file and use the ``Experiment`` to
 iteratively create, configure and launch computational kernels
 on the system through the `slurm` launcher.
@@ -211,143 +691,146 @@ runs the application.
 
 **The Experiment Driver Script Contents:**
 
-1. Launching two standard Orchestrators with unique identifiers. Details in section:
-   :ref:`Launch Multiple Orchestrators<Launch Multiple Orchestrators>`.
-2. Launching the application script with a co-located database. Details in section:
-   :ref:`Initialize a Colocated Model<Initialize a Colocated Model>`.
-3. Connecting SmartRedis clients within the driver script to send tensors to standard Orchestrators
+1. Launching two standalone ``Orchestrators`` with unique identifiers. Details in section:
+   :ref:`Launch Multiple Orchestrators<launch_multiple_orch>`.
+2. Launching the application script with a colocated ``Orchestrator``. Details in section:
+   :ref:`Initialize a Colocated Model<init_colocated_model>`.
+3. Connecting SmartRedis ``Clients`` within the driver script to send tensors to standalone ``Orchestrators``
    for retrieval within the application. Details in section:
-   :ref:`Create Client Connections to Orchestrators<Create Client Connections to Orchestrators>`.
+   :ref:`Create Client Connections to Orchestrators<client_connect_orch>`.
 
-Setup and run instructions can be found :ref:`here<How to Run the Example>`
+Setup and run instructions can be found :ref:`here<run_ex_instruct>`
+
+.. _app_script_multi_db:
 
 The Application Script
-----------------------
-Applications interact with the databases
-through a SmartRedis client.
+======================
+Applications interact with the ``Orchestrators``
+through a SmartRedis ``Client``.
 In this section, we write an application script
 to demonstrate how to connect SmartRedis
-clients in the context of multiple
-launched databases. Using the clients, we retrieve tensors
-from two databases launched in the driver script, then store
-the tensors in the colocated database.
+``Clients`` in the context of multiple
+launched ``Orchestrators``. Using the ``Clients``, we retrieve tensors
+from two ``Orchestrators`` launched in the driver script, then store
+the tensors in the colocated ``Orchestrators``.
 
 .. note::
-   The Experiment must be started to use the Orchestrators within the
+   The ``Experiment`` must be started to use the ``Orchestrators`` within the
    application script.  Otherwise, it will fail to connect.
-   Find the instructions on how to launch :ref:`here<How to Run the Example>`
+   Find the instructions on how to launch :ref:`here<run_ex_instruct>`
 
 To begin, import the necessary packages:
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/application_script.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/application_script.py
   :language: python
   :linenos:
   :lines: 1-3
 
+.. _init_model_client:
+
 Initialize the Clients
-^^^^^^^^^^^^^^^^^^^^^^
-To establish a connection with each database,
-we need to initialize a new SmartRedis client for each
-``Orchestrator``.
+----------------------
+To establish a connection with each ``Orchestrators``,
+we need to initialize a new SmartRedis ``Client`` for each.
 
 Step 1: Initialize ConfigOptions
-""""""""""""""""""""""""""""""""
-Since we are launching multiple databases within the experiment,
+''''''''''''''''''''''''''''''''
+Since we are launching multiple ``Orchestrators`` within the ``Experiment``,
 the SmartRedis ``ConfigOptions`` object is required when initializing
-a client in the application.
-We use the ``ConfigOptions.create_from_environment()``
+a ``Client`` in the application.
+We use the ``ConfigOptions.create_from_environment``
 function to create three instances of ``ConfigOptions``,
 with one instance associated with each launched ``Orchestrator``.
-Most importantly, to associate each launched Orchestrator to a ConfigOptions object,
-the ``create_from_environment()`` function requires specifying the unique database identifier
+Most importantly, to associate each launched ``Orchestrator`` to a ``ConfigOptions`` object,
+the ``create_from_environment`` function requires specifying the unique ``Orchestrator`` identifier
 argument named `db_identifier`.
 
-For the single-sharded database:
+For the single-sharded ``Orchestrator``:
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/application_script.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/application_script.py
   :language: python
   :linenos:
   :lines: 5-6
 
-For the multi-sharded database:
+For the multi-sharded ``Orchestrator``:
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/application_script.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/application_script.py
   :language: python
   :linenos:
   :lines: 10-11
 
-For the colocated database:
+For the colocated ``Orchestrator``:
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/application_script.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/application_script.py
   :language: python
   :linenos:
   :lines: 15-16
 
 Step 2: Initialize the Client Connections
-"""""""""""""""""""""""""""""""""""""""""
+'''''''''''''''''''''''''''''''''''''''''
 Now that we have three ``ConfigOptions`` objects, we have the
-tools necessary to initialize three SmartRedis clients and
-establish a connection with the three databases.
-We use the SmartRedis ``Client`` API to create the client instances by passing in
+tools necessary to initialize three SmartRedis ``Clients`` and
+establish a connection with the three ``Orchestrators``.
+We use the SmartRedis ``Client`` API to create the ``Client`` instances by passing in
 the ``ConfigOptions`` objects and assigning a `logger_name` argument.
 
-Single-sharded database:
+Single-sharded ``Orchestrator``:
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/application_script.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/application_script.py
   :language: python
   :linenos:
   :lines: 7-8
 
-Multi-sharded database:
+Multi-sharded ``Orchestrator``:
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/application_script.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/application_script.py
   :language: python
   :linenos:
   :lines: 12-13
 
-Colocated database:
+Colocated ``Orchestrator``:
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/application_script.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/application_script.py
   :language: python
   :linenos:
   :lines: 17-18
 
 Retrieve Data and Store Using SmartRedis Client Objects
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-To confirm a successful connection to each database, we will retrieve the tensors
+-------------------------------------------------------
+To confirm a successful connection to each ``Orchestrator``, we will retrieve the tensors
 that we plan to store in the python driver script. After retrieving, we
-store both tensors in the colocated database.
-The ``Client.get_tensor()`` method allows
+store both tensors in the colocated ``Orchestrator``.
+The ``Client.get_tensor`` method allows
 retrieval of a tensor. It requires the `name` of the tensor assigned
-when sent to the database via ``Client.put_tensor()``.
+when sent to the ``Orchestrator`` via ``Client.put_tensor``.
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/application_script.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/application_script.py
   :language: python
   :linenos:
   :lines: 20-26
 
-Later, when you run the experiment driver script the following output will appear in ``tutorial_model.out``
+Later, when you run the ``Experiment`` driver script the following output will appear in ``tutorial_model.out``
 located in ``getting-started-multidb/tutorial_model/``::
 
   Model: single shard logger@00-00-00:The single sharded db tensor is: [1 2 3 4]
   Model: multi shard logger@00-00-00:The multi sharded db tensor is: [5 6 7 8]
 
-This output showcases that we have established a connection with multiple Orchestrators.
+This output showcases that we have established a connection with multiple ``Orchestrators``.
 
-Next, take the tensors retrieved from the standard deployment databases and
-store them in the colocated database using  ``Client.put_tensor(name, data)``.
+Next, take the tensors retrieved from the standalone deployment ``Orchestrators`` and
+store them in the colocated ``Orchestrator`` using  ``Client.put_tensor(name, data)``.
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/application_script.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/application_script.py
   :language: python
   :linenos:
   :lines: 28-30
 
-Next, check if the tensors exist in the colocated database using ``Client.poll_tensor()``.
-This function queries for data in the database. The function requires the tensor name (`name`),
+Next, check if the tensors exist in the colocated ``Orchestrator`` using ``Client.poll_tensor``.
+This function queries for data in the ``Orchestrator``. The function requires the tensor name (`name`),
 how many milliseconds to wait in between queries (`poll_frequency_ms`),
 and the total number of times to query (`num_tries`):
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/application_script.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/application_script.py
   :language: python
   :linenos:
   :lines: 32-37
@@ -358,156 +841,162 @@ The output will be as follows::
   Model: colo logger@00-00-00:The colocated db has tensor_2: True
 
 The Experiment Driver Script
-----------------------------
+============================
 To run the previous application, we must define workflow stages within a workload.
 Defining workflow stages requires the utilization of functions associated
-with the ``Experiment`` object. The Experiment object is intended to be instantiated
+with the ``Experiment`` object. The ``Experiment`` object is intended to be instantiated
 once and utilized throughout the workflow runtime.
 In this example, we instantiate an ``Experiment`` object with the name ``getting-started-multidb``.
 We setup the SmartSim ``logger`` to output information from the Experiment.
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/multidb_driver.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/multidb_driver.py
   :language: python
   :linenos:
   :lines: 1-10
 
+.. _launch_multiple_orch:
+
 Launch Multiple Orchestrators
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+-----------------------------
 In the context of this ``Experiment``, it's essential to create and launch
-the databases as a preliminary step before any other components since
-the application script requests tensors from the launched databases.
+the ``Orchestrators`` as a preliminary step before any other components since
+the application script requests tensors from the launched ``Orchestrators``.
 
-We aim to showcase the multi-database automation capabilities of SmartSim, so we
-create two databases in the workflow: a single-sharded database and a
-multi-sharded database.
+We aim to showcase the multi-Orchestrator automation capabilities of SmartSim, so we
+create two ``Orchestrators`` in the workflow: a single-sharded ``Orchestrator`` and a
+multi-sharded ``Orchestrator``.
 
 Step 1: Initialize Orchestrators
-""""""""""""""""""""""""""""""""
-To create an database, utilize the ``Experiment.create_database()`` function.
+''''''''''''''''''''''''''''''''
+To create an ``Orchestrator``, utilize the ``Experiment.create_database`` function.
 The function requires specifying a unique
-database identifier argument named `db_identifier` to launch multiple databases.
-This step is necessary to connect to databases outside of the driver script.
+``Orchestrator`` identifier argument named `db_identifier` to launch multiple ``Orchestrators``.
+This step is necessary to connect to ``Orchestrators`` outside of the driver script.
 We will use the `db_identifier` names we specified in the application script.
 
-For the single-sharded database:
+For the single-sharded ``Orchestrator``:
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/multidb_driver.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/multidb_driver.py
   :language: python
   :linenos:
   :lines: 12-14
 
-For the multi-sharded database:
+For the multi-sharded ``Orchestrator``:
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/multidb_driver.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/multidb_driver.py
   :language: python
   :linenos:
   :lines: 16-18
 
 .. note::
-  Calling ``exp.generate()`` will create two subfolders
-  (one for each Orchestrator created in the previous step)
-  whose names are based on the db_identifier of that Orchestrator.
+  Calling ``exp.generate`` will create two subfolders
+  (one for each ``Orchestrator`` created in the previous step)
+  whose names are based on the `db_identifier` of that ``Orchestrator``.
   In this example, the Experiment folder is
-  named ``getting-started-multidb/``. Within this folder, two Orchestrator subfolders will
+  named ``getting-started-multidb/``. Within this folder, two ``Orchestrator`` subfolders will
   be created, namely ``single_shard_db_identifier/`` and ``multi_shard_db_identifier/``.
 
-Step 2: Start Databases
-"""""""""""""""""""""""
-Next, to launch the databases,
-pass the database instances to ``Experiment.start()``.
+Step 2: Start
+'''''''''''''
+Next, to launch the ``Orchestrators``,
+pass the ``Orchestrator`` instances to ``Experiment.start``.
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/multidb_driver.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/multidb_driver.py
   :language: python
   :linenos:
   :lines: 20-21
 
-The ``Experiment.start()`` function launches the ``Orchestrators`` for use within the workflow. In other words, the function
-deploys the databases on the allocated compute resources.
+The ``Experiment.start`` function launches the ``Orchestrators`` for use within the workflow. In other words, the function
+deploys the ``Orchestrators`` on the allocated compute resources.
 
 .. note::
   By setting `summary=True`, SmartSim will print a summary of the
-  experiment before it is launched. After printing the experiment summary,
-  the experiment is paused for 10 seconds giving the user time to
-  briefly scan the summary contents. If we set `summary=False`, then the experiment
+  ``Experiment`` before it is launched. After printing the ``Experiment`` summary,
+  the ``Experiment`` is paused for 10 seconds giving the user time to
+  briefly scan the summary contents. If we set `summary=False`, then the ``Experiment``
   would be launched immediately with no summary.
 
+.. _client_connect_orch:
+
 Create Client Connections to Orchestrators
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+------------------------------------------
 The SmartRedis ``Client`` object contains functions that manipulate, send, and receive
-data within the database. Each database has a single, dedicated SmartRedis ``Client``.
-Begin by initializing a SmartRedis ``Client`` object per launched database.
+data within the ``Orchestrator``. Each ``Orchestrator`` has a single, dedicated SmartRedis ``Client``.
+Begin by initializing a SmartRedis ``Client`` object per launched ``Orchestrator``.
 
 To create a designated SmartRedis ``Client``, you need to specify the address of the target
-running database. You can easily retrieve this address using the ``Orchestrator.get_address()`` function.
+running ``Orchestrator``. You can easily retrieve this address using the ``Orchestrator.get_address`` function.
 
-For the single-sharded database:
+For the single-sharded ``Orchestrator``:
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/multidb_driver.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/multidb_driver.py
   :language: python
   :linenos:
   :lines: 23-24
 
-For the multi-sharded database:
+For the multi-sharded ``Orchestrator``:
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/multidb_driver.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/multidb_driver.py
   :language: python
   :linenos:
   :lines: 25-26
 
 Store Data Using Clients
-^^^^^^^^^^^^^^^^^^^^^^^^
+------------------------
 In the application script, we retrieved two NumPy tensors.
 To support the apps functionality, we will create two
-NumPy arrays in the python driver script and send them to the a database. To
-accomplish this, we use the ``Client.put_tensor()`` function with the respective
-database client instances.
+NumPy arrays in the python driver script and send them to the a ``Orchestrator``. To
+accomplish this, we use the ``Client.put_tensor`` function with the respective
+``Orchestrator`` `client` instances.
 
-For the single-sharded database:
+For the single-sharded ``Orchestrator``:
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/multidb_driver.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/multidb_driver.py
   :language: python
   :linenos:
   :lines: 28-31
 
-For the multi-sharded database:
+For the multi-sharded ``Orchestrator``:
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/multidb_driver.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/multidb_driver.py
   :language: python
   :linenos:
   :lines: 33-36
 
-Lets check to make sure the database tensors do not exist in the incorrect databases:
+Lets check to make sure the ``Orchestrator`` tensors do not exist in the incorrect ``Orchestrators``:
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/multidb_driver.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/multidb_driver.py
   :language: python
   :linenos:
   :lines: 38-42
 
-When you run the experiment, the following output will appear::
+When you run the ``Experiment``, the following output will appear::
 
   00:00:00 system.host.com SmartSim[#####] INFO The multi shard array key exists in the incorrect database: False
   00:00:00 system.host.com SmartSim[#####] INFO The single shard array key exists in the incorrect database: False
 
+.. _init_colocated_model:
+
 Initialize a Colocated Model
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-In the next stage of the experiment, we
-launch the application script with a co-located database
+----------------------------
+In the next stage of the ``Experiment``, we
+launch the application script with a co-located ``Orchestrator``
 by configuring and creating
 a SmartSim colocated ``Model``.
 
 Step 1: Configure
-"""""""""""""""""
-You can specify the run settings of a model.
-In this experiment, we invoke the Python interpreter to run
-the python script defined in section: :ref:`The Application Script<The Application Script>`.
-To configure this into a ``Model``, we use the ``Experiment.create_run_settings()`` function.
+'''''''''''''''''
+You can specify the run settings of a ``Model``.
+In this ``Experiment``, we invoke the Python interpreter to run
+the python script defined in section: :ref:`The Application Script<app_script_multi_db>`.
+To configure this into a SmartSim ``Model``, we use the ``Experiment.create_run_settings`` function.
 The function returns a ``RunSettings`` object.
 When initializing the RunSettings object,
 we specify the path to the application file,
 `application_script.py`, for
 ``exe_args``, and the run command for ``exe``.
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/multidb_driver.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/multidb_driver.py
   :language: python
   :linenos:
   :lines: 44-45
@@ -517,62 +1006,66 @@ we specify the path to the application file,
   on your machine to run the example.
 
 With the ``RunSettings`` instance,
-configure the the distribution of computational tasks (``RunSettings.set_nodes()``) and the number of instances
-the script is execute on each node (``RunSettings.set_tasks_per_node()``). In this
+configure the the distribution of computational tasks (``RunSettings.set_nodes``) and the number of instances
+the script is execute on each node (``RunSettings.set_tasks_per_node``). In this
 example, we specify to SmartSim that we intend to execute the script once on a single node.
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/multidb_driver.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/multidb_driver.py
   :language: python
   :linenos:
   :lines: 46-48
 
 Step 2: Initialize
-""""""""""""""""""
-Next, create a ``Model`` instance using the ``Experiment.create_model()``.
+''''''''''''''''''
+Next, create a ``Model`` instance using the ``Experiment.create_model``.
 Pass the ``model_settings`` object as an argument
-to the ``create_model()`` function and assign to the variable ``model``.
+to the ``create_model`` function and assign to the variable ``model``.
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/multidb_driver.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/multidb_driver.py
   :language: python
   :linenos:
   :lines: 49-50
 
 Step 2: Colocate
-""""""""""""""""
-To colocate the model, use the ``Model.colocate_db_uds()`` function to
-Colocate an Orchestrator instance with this Model over
+''''''''''''''''
+To colocate the ``Model``, use the ``Model.colocate_db_uds`` function to
+Colocate an ``Orchestrator`` instance with this ``Model`` over
 a Unix domain socket connection.
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/multidb_driver.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/multidb_driver.py
   :language: python
   :linenos:
   :lines: 51-52
 
 This method will initialize settings which add an unsharded
-database to this Model instance. Only this Model will be able
-to communicate with this colocated database by using the loopback TCP interface.
+``Orchestrator`` to this ``Model`` instance. Only this ``Model`` will be able
+to communicate with this colocated ``Orchestrator`` by using the loopback TCP interface.
 
 Step 3: Start
-"""""""""""""
-Next, launch the colocated model instance using the ``Experiment.start()`` function.
+'''''''''''''
+Next, launch the colocated ``Model`` instance using the ``Experiment.start`` function.
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/multidb_driver.py
+.. literalinclude:: tutorials/getting_started/multi_db_example/multidb_driver.py
   :language: python
   :linenos:
   :lines: 53-54
 
 .. note::
   We set `block=True`,
-  so that ``Experiment.start()`` waits until the last Model has finished
+  so that ``Experiment.start`` waits until the last ``Model`` has finished
   before returning: it will act like a job monitor, letting us know
   if processes run, complete, or fail.
 
 Cleanup Experiment
-^^^^^^^^^^^^^^^^^^
-Finally, use the ``Experiment.stop()`` function to stop the database instances. Print the
-workflow summary with ``Experiment.summary()``.
+------------------
+Finally, use the ``Experiment.stop`` function to stop the standard ``Orchestrator`` instances.
+
+.. note::
+  Co-located ``Orchestrator``s are stopped when their associated ``Model``'s are stopped.
 
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/multidb_driver.py
+Print the workflow summary with ``Experiment.summary``.
+
+.. literalinclude:: tutorials/getting_started/multi_db_example/multidb_driver.py
   :language: python
   :linenos:
   :lines: 56-59
@@ -586,16 +1079,18 @@ When you run the experiment, the following output will appear::
   | 1  | single_shard_db_identifier_0 | DBNode        | 1556529.3   | 0       | 68.8732 | Cancelled | 0            |
   | 2  | multi_shard_db_identifier_0  | DBNode        | 1556529.4+2 | 0       | 45.5139 | Cancelled | 0            |
 
+.. _run_ex_instruct:
+
 How to Run the Example
-----------------------
-Below are the steps to run the experiment. Find the
-:ref:`experiment source code<Experiment Source Code>`
-and :ref:`application source code<Application Source Code>`
+======================
+Below are the steps to run the ``Experiment``. Find the
+:ref:`experiment source code<multi_exp_source_code>`
+and :ref:`application source code<multi_app_source_code>`
 below in the respective subsections.
 
 .. note::
   The example assumes that you have already installed and built
-  SmartSim and SmartRedis. Please refer to Section :ref:`Basic Installation<Basic Installation>`
+  SmartSim and SmartRedis. Please refer to Section :ref:`Basic Installation<basic_install_SS>`
   for further details. For simplicity, we assume that you are
   running on a SLURM-based HPC-platform. Refer to the steps below
   for more details.
@@ -609,7 +1104,7 @@ Step 1 : Setup your directory tree
         application_script.py
         experiment_script.py
 
-    You can find the application and experiment source code in subsections below.
+    You can find the application and ``Experiment`` source code in subsections below.
 
 Step 2 : Install and Build SmartSim
     This example assumes you have installed SmartSim and SmartRedis in your
@@ -619,21 +1114,25 @@ Step 2 : Install and Build SmartSim
 Step 3 : Change the `exe_args` file path
     When configuring the colocated model in `experiment_script.py`,
     we pass the file path of `application_script.py` to the `exe_args` argument
-    on line 33 in :ref:`experiment_script.py<Experiment Source Code>`.
+    on line 33 in :ref:`experiment_script.py<multi_exp_source_code>`.
     Edit this argument to the file path of your `application_script.py`
 
-Step 4 : Run the Experiment
-    Finally, run the experiment with ``python experiment_script.py``.
+Step 4 : Run the ``Experiment``
+    Finally, run the ``Experiment`` with ``python experiment_script.py``.
+
 
+.. _multi_app_source_code:
 
 Application Source Code
-^^^^^^^^^^^^^^^^^^^^^^^
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/application_script.py
+-----------------------
+.. literalinclude:: tutorials/getting_started/multi_db_example/application_script.py
   :language: python
   :linenos:
 
+.. _multi_exp_source_code:
+
 Experiment Source Code
-^^^^^^^^^^^^^^^^^^^^^^
-.. literalinclude:: ../tutorials/getting_started/multi_db_example/multidb_driver.py
+----------------------
+.. literalinclude:: tutorials/getting_started/multi_db_example/multidb_driver.py
   :language: python
   :linenos:
\ No newline at end of file
diff --git a/doc/requirements-doc.txt b/doc/requirements-doc.txt
index e883a2805..696881bef 100644
--- a/doc/requirements-doc.txt
+++ b/doc/requirements-doc.txt
@@ -12,3 +12,7 @@ ipython
 jinja2==3.1.2
 protobuf
 numpy
+sphinx-design
+pypandoc
+sphinx-autodoc-typehints
+myst_parser
diff --git a/doc/run_settings.rst b/doc/run_settings.rst
new file mode 100644
index 000000000..ed12df8cb
--- /dev/null
+++ b/doc/run_settings.rst
@@ -0,0 +1,334 @@
+.. _run_settings_doc:
+
+************
+Run Settings
+************
+========
+Overview
+========
+``RunSettings`` are used in the SmartSim API to define how ``Model`` and ``Ensemble`` jobs
+should be executed.
+
+In general, ``RunSettings`` define:
+
+- the executable
+- the arguments to pass to the executable
+- necessary environment variables at runtime
+- the required compute resources
+
+The base ``RunSettings`` class is utilized for local task launches,
+while its derived child classes offer specialized functionality for HPC workload managers (WLMs).
+Each SmartSim `launcher` interfaces with a specific ``RunSettings`` subclass tailored to an HPC job scheduler.
+
+- Navigate to the :ref:`Local<run_settings_local_ex>` section to configure run settings locally
+- Navigate to the :ref:`HPC Systems<run_settings_hpc_ex>` section to configure run settings for HPC
+
+A ``RunSettings`` object is initialized through the ``Experiment.create_run_settings`` function.
+This function accepts a `run_command` argument: the command to run the executable.
+
+If `run_command` is set to `"auto"`, SmartSim will attempt to match a run command on the
+system with a ``RunSettings`` class. If found, the class corresponding to
+that `run_command` will be created and returned.
+
+If the `run_command` is passed a recognized run command (e.g. `"srun"`) the ``RunSettings``
+instance will be a child class such as ``SrunSettings``. You may also specify `"mpirun"`,
+`"mpiexec"`, `"aprun"`, `"jsrun"` or `"orterun"` to the `run_command` argument.
+This will return the associated child class.
+
+If the run command is not supported by SmartSim, the base ``RunSettings`` class will be created and returned
+with the specified `run_command` and `run_args` evaluated literally.
+
+After creating a ``RunSettings`` instance, users gain access to the attributes and methods
+of the associated child class, providing them with the ability to further configure the run
+settings for jobs.
+
+========
+Examples
+========
+.. _run_settings_local_ex:
+
+Local
+=====
+When running SmartSim on laptops and single node workstations via the `"local"`
+`launcher`, job execution is configured with the base ``RunSettings`` object.
+For local launches, ``RunSettings`` accepts a `run_command` parameter to allow
+the use of parallel launch binaries like `"mpirun"`, `"mpiexec"`, and others.
+
+If no `run_command` is specified and the ``Experiment`` `launcher` is set to `"local"`,
+the executable is launched locally. When utilizing the `"local"` launcher and configuring
+the `run_command` parameter to `"auto"` in the ``Experiment.create_run_settings`` factory
+method, SmartSim defaults to omitting any run command prefix before the executable.
+
+Once the ``RunSettings`` object is initialized using the ``Experiment.create_run_settings`` factory
+method, the :ref:`RunSettings API<rs-api>` can be used to further configure the
+``RunSettings`` object prior to execution.
+
+.. note::
+      The local `launcher` is the default `launcher` for all ``Experiment`` instances.
+
+When the user initializes the ``Experiment`` at the beginning of the Python driver script,
+a `launcher` argument may be specified. SmartSim will register or detect the
+`launcher` and return the supported class upon a call to ``Experiment.create_run_settings``.
+Below we demonstrate creating and configuring the base ``RunSettings``
+object for local launches by specifying the `"local"` launcher during ``Experiment`` creation.
+We also demonstrate specifying `run_command="mpirun"` locally.
+
+**Initialize and Configure a RunSettings Object with No Run Command Specified:**
+
+.. code-block:: python
+
+      from smartsim import Experiment
+
+      # Initialize the experiment and provide launcher local
+      exp = Experiment("name-of-experiment", launcher="local")
+
+
+      # Initialize a RunSettings object
+      run_settings = exp.create_run_settings(exe="echo", exe_args="Hello World", run_command=None)
+
+**Initialize and Configure a RunSettings Object with the `mpirun` Run Command Specified:**
+
+.. note::
+      Please note that to run this example you need to have an MPI implementation
+      (e.g. OpenMPI or MPICH) installed.
+
+.. code-block:: python
+
+      from smartsim import Experiment
+
+      # Initialize the experiment and provide launcher local
+      exp = Experiment("name-of-experiment", launcher="local")
+
+      # Initialize a RunSettings object
+      run_settings = exp.create_run_settings(exe="echo", exe_args="Hello World", run_command="mpirun")
+
+Users may replace `mpirun` with `mpiexec`.
+
+.. _run_settings_hpc_ex:
+
+HPC System
+==========
+To configure an entity for launch on an HPC system, SmartSim offers ``RunSettings`` child classes.
+Each WLM `launcher` supports different ``RunSettings`` child classes.
+When the user initializes the ``Experiment`` at the beginning of the Python driver script,
+a `launcher` argument may be specified. The specified `launcher` will be used by SmartSim to
+return the correct ``RunSettings`` child class that matches with the specified (or auto-detected)
+`run_command` upon a call to ``Experiment.create_run_settings``. Below we demonstrate
+creating and configuring the base ``RunSettings`` object for HPC launches
+by specifying the launcher during ``Experiment`` creation. We show examples
+for each job scheduler.
+
+.. tabs::
+
+    .. group-tab:: Slurm
+
+      The Slurm `launcher` supports the :ref:`SrunSettings API <srun_api>` as well as the :ref:`MpirunSettings API <openmpi_run_api>`,
+      :ref:`MpiexecSettings API <openmpi_exec_api>` and :ref:`OrterunSettings API <openmpi_orte_api>` that each can be used to run executables
+      with launch binaries like `"srun"`, `"mpirun"`, `"mpiexec"` and `"orterun"`. Below we step through initializing a ``SrunSettings`` and ``MpirunSettings``
+      instance on a Slurm based machine using the associated `run_command`.
+
+      **SrunSettings**
+
+      Run a job with the `srun` command on a Slurm based system. Any arguments passed in
+      the `run_args` dict will be converted into `srun` arguments and prefixed with `"--"`.
+      Values of `None` can be provided for arguments that do not have values.
+
+      .. code-block:: python
+
+            from smartsim import Experiment
+
+            # Initialize the Experiment and provide launcher Slurm
+            exp = Experiment("name-of-experiment", launcher="slurm")
+
+            # Initialize a SrunSettings object
+            run_settings = exp.create_run_settings(exe="echo", exe_args="Hello World", run_command="srun")
+            # Set the number of nodes
+            run_settings.set_nodes(4)
+            # Set the number of cpus to use per task
+            run_settings.set_cpus_per_task(2)
+            # Set the number of tasks for this job
+            run_settings.set_tasks(100)
+            # Set the number of tasks for this job
+            run_settings.set_tasks_per_node(25)
+
+      **MpirunSettings**
+
+      Run a job with the `mpirun` command (MPI-standard) on a Slurm based system. Any
+      arguments passed in the `run_args` dict will be converted into `mpirun` arguments
+      and prefixed with `"--"`. Values of `None` can be provided for arguments that do
+      not have values.
+
+      .. code-block:: python
+
+            from smartsim import Experiment
+
+            # Initialize the Experiment and provide launcher Slurm
+            exp = Experiment("name-of-experiment", launcher="slurm")
+
+            # Initialize a MpirunSettings object
+            run_settings = exp.create_run_settings(exe="echo", exe_args="Hello World", run_command="mpirun")
+            # Set the number of cpus to use per task
+            run_settings.set_cpus_per_task(2)
+            # Set the number of tasks for this job
+            run_settings.set_tasks(100)
+            # Set the number of tasks for this job
+            run_settings.set_tasks_per_node(25)
+
+      Users may replace `mpirun` with `mpiexec` or `orterun`.
+
+
+      .. note::
+            SmartSim will look for an allocation by accessing the associated WLM job ID environment variable. If an allocation
+            is present, the entity will be launched on the reserved compute resources. A user may also specify the allocation ID
+            when initializing a run settings object via the `alloc` argument. If an allocation is specified, the entity receiving
+            these run parameters will launch on that allocation.
+
+    .. group-tab:: PBS Pro
+      The PBS Pro `launcher` supports the :ref:`AprunSettings API <aprun_api>` as well as the :ref:`MpirunSettings API <openmpi_run_api>`,
+      :ref:`MpiexecSettings API <openmpi_exec_api>` and :ref:`OrterunSettings API <openmpi_orte_api>` that each can be used to run executables
+      with launch binaries like `"aprun"`, `"mpirun"`, `"mpiexec"` and `"orterun"`. Below we step through initializing a ``AprunSettings`` and ``MpirunSettings``
+      instance on a PBS Pro based machine using the associated `run_command`.
+
+      **AprunSettings**
+
+      Run a job with `aprun` command on a PBS Pro based system. Any arguments passed in
+      the `run_args` dict will be converted into `aprun` arguments and prefixed with `--`.
+      Values of `None` can be provided for arguments that do not have values.
+
+      .. code-block:: python
+
+            from smartsim import Experiment
+
+            # Initialize the experiment and provide launcher PBS Pro
+            exp = Experiment("name-of-experiment", launcher="pbs")
+
+            # Initialize a AprunSettings object
+            run_settings = exp.create_run_settings(exe="echo", exe_args="Hello World", run_command="aprun")
+            # Set the number of cpus to use per task
+            run_settings.set_cpus_per_task(2)
+            # Set the number of tasks for this job
+            run_settings.set_tasks(100)
+            # Set the number of tasks for this job
+            run_settings.set_tasks_per_node(25)
+
+      **MpirunSettings**
+
+      Run a job with `mpirun` command on a PBS Pro based system. Any arguments passed
+      in the `run_args` dict will be converted into `mpirun` arguments and prefixed with `--`.
+      Values of `None` can be provided for arguments that do not have values.
+
+      .. code-block:: python
+
+            from smartsim import Experiment
+
+            # Initialize the experiment and provide launcher PBS Pro
+            exp = Experiment("name-of-experiment", launcher="pbs")
+
+            # Initialize a MpirunSettings object
+            run_settings = exp.create_run_settings(exe="echo", exe_args="Hello World", run_command="mpirun")
+            # Set the number of cpus to use per task
+            run_settings.set_cpus_per_task(2)
+            # Set the number of tasks for this job
+            run_settings.set_tasks(100)
+            # Set the number of tasks for this job
+            run_settings.set_tasks_per_node(25)
+
+      Users may replace `mpirun` with `mpiexec` or `orterun`.
+
+    .. group-tab:: PALS
+      The PALS `launcher` supports the :ref:`MpiexecSettings API <openmpi_exec_api>` that can be used to run executables
+      with the `mpiexec` launch binary. Below we step through initializing a ``MpiexecSettings`` instance on a PALS
+      based machine using the associated `run_command`.
+
+      **MpiexecSettings**
+
+      Run a job with `mpiexec` command on a PALS based system. Any arguments passed in the `run_args` dict will be converted into `mpiexec` arguments and prefixed with `--`.
+      Values of `None` can be provided for arguments that do not have values.
+
+      .. code-block:: python
+
+            from smartsim import Experiment
+
+            # Initialize the experiment and provide launcher PALS
+            exp = Experiment("name-of-experiment", launcher="pals")
+
+            # Initialize a MpiexecSettings object
+            run_settings = exp.create_run_settings(exe="echo", exe_args="Hello World", run_command="mpiexec")
+            # Set the number of tasks for this job
+            run_settings.set_tasks(100)
+            # Set the number of tasks for this job
+            run_settings.set_tasks_per_node(25)
+
+    .. group-tab:: LSF
+      The LSF `launcher` supports the :ref:`JsrunSettings API <jsrun_api>` as well as the :ref:`MpirunSettings API <openmpi_run_api>`,
+      :ref:`MpiexecSettings API <openmpi_exec_api>` and :ref:`OrterunSettings API <openmpi_orte_api>` that each can be used to run executables
+      with launch binaries like `"jsrun"`, `"mpirun"`, `"mpiexec"` and `"orterun"`. Below we step through initializing a ``JsrunSettings`` and ``MpirunSettings``
+      instance on a LSF based machine using the associated `run_command`.
+
+      **JsrunSettings**
+
+      Run a job with `jsrun` command on a LSF based system. Any arguments passed in the
+      `run_args` dict will be converted into `jsrun` arguments and prefixed with `--`.
+      Values of `None` can be provided for arguments that do not have values.
+
+      .. code-block:: python
+
+            from smartsim import Experiment
+
+            # Initialize the experiment and provide launcher LSF
+            exp = Experiment("name-of-experiment", launcher="lsf")
+
+            # Initialize a JsrunSettings object
+            run_settings = exp.create_run_settings(exe="echo", exe_args="Hello World", run_command="jsrun")
+            # Set the number of cpus to use per task
+            run_settings.set_cpus_per_task(2)
+            # Set the number of tasks for this job
+            run_settings.set_tasks(100)
+            # Set the number of tasks for this job
+            run_settings.set_tasks_per_node(25)
+
+      **MpirunSettings**
+
+      Run a job with `mpirun` command on a LSF based system. Any arguments passed in the
+      `run_args` dict will be converted into `mpirun` arguments and prefixed with `--`.
+      Values of `None` can be provided for arguments that do not have values.
+
+      .. code-block:: python
+
+            from smartsim import Experiment
+
+            # Initialize the experiment and provide launcher LSF
+            exp = Experiment("name-of-experiment", launcher="lsf")
+
+            # Initialize a MpirunSettings object
+            run_settings = exp.create_run_settings(exe="echo", exe_args="Hello World", run_command="mpirun")
+            # Set the number of cpus to use per task
+            run_settings.set_cpus_per_task(2)
+            # Set the number of tasks for this job
+            run_settings.set_tasks(100)
+            # Set the number of tasks for this job
+            run_settings.set_tasks_per_node(25)
+
+      Users may replace `mpirun` with `mpiexec` or `orterun`.
+
+    .. group-tab:: Dragon
+      The Dragon `launcher` does not need any launch binary. Below we step through initializing a ``DragonRunSettings`` instance on a Slurm-
+      or PBS-based machine.
+
+      **DragonRunSettings**
+
+      Run a job with the `dragon` launcher.
+
+      .. code-block:: python
+
+            from smartsim import Experiment
+
+            # Initialize the experiment and provide launcher dragon
+            exp = Experiment("name-of-experiment", launcher="dragon")
+
+            # Initialize a DragonRunSettings object
+            run_settings = exp.create_run_settings(exe="echo", exe_args="Hello World")
+            # Set the number of nodes for this job
+            run_settings.set_nodes(4)
+            # Set the number of tasks per node for this job
+            run_settings.set_tasks_per_node(10)
diff --git a/doc/sr_advanced_topics.rst b/doc/sr_advanced_topics.rst
index 30da2c578..763a7fbe7 100644
--- a/doc/sr_advanced_topics.rst
+++ b/doc/sr_advanced_topics.rst
@@ -1,2 +1,2 @@
-
+.. _config_options_explain:
 .. include:: ../smartredis/doc/advanced_topics.rst
\ No newline at end of file
diff --git a/doc/ss_logger.rst b/doc/ss_logger.rst
new file mode 100644
index 000000000..186e28a89
--- /dev/null
+++ b/doc/ss_logger.rst
@@ -0,0 +1,221 @@
+******
+Logger
+******
+
+.. _ss_logger:
+
+========
+Overview
+========
+SmartSim supports logging experiment activity through a logging API accessible via
+the SmartSim `log` module. The SmartSim logger, backed by Python logging, enables
+real-time logging of experiment activity **to stdout** and/or **to file**, with
+multiple verbosity levels for categorizing log messages.
+
+Users may instruct SmartSim to log certain verbosity level log messages
+and omit others through the `SMARTSIM_LOG_LEVEL` environment variable. The `SMARTSIM_LOG_LEVEL`
+environment variable may be overridden when logging to file by specifying a log level to
+the ``log_to_file`` function. Examples walking through logging :ref:`to stdout<log_to_stdout>`
+and :ref:`to file<log_to_file>` are provided below.
+
+SmartSim offers **four** log functions to use within the Python driver script. The
+below functions accept string messages:
+
+- ``logger.error``
+- ``logger.warning``
+- ``logger.info``
+- ``logger.debug``
+
+The `SMARTSIM_LOG_LEVEL` environment variable accepts **four** log levels: `quiet`,
+`info`, `debug` and `developer`. Setting the log level in the environment (or via the override function)
+controls the log messages that are output at runtime. The log levels are listed below from
+least verbose to most verbose:
+
+- level: `quiet`
+   - The `quiet` log level instructs SmartSim to print ``error`` and ``warning`` messages.
+- level: `info`
+   - The `info` log level instructs SmartSim to print ``info``, ``error`` and ``warning`` messages.
+- level: `debug`
+   - The `debug` log level instructs SmartSim to print ``debug``, ``info``, ``error`` and ``warning`` messages.
+- level: `developer`
+   - The `developer` log level instructs SmartSim to print ``debug``, ``info``, ``error`` and ``warning`` messages.
+
+.. note::
+    Levels `developer` and `debug` print the same log messages. The `developer` log level is intended for use
+    during code development and signifies highly detailed and verbose logging.
+
+.. note::
+    `SMARTSIM_LOG_LEVEL` defaults to log level `info`. For SmartSim log API examples, continue to the :ref:`Examples<log_ex>` section.
+
+.. _log_ex:
+
+========
+Examples
+========
+.. _log_to_stdout:
+
+-------------
+Log to stdout
+-------------
+The ``get_logger`` function in SmartSim enables users to initialize a logger instance.
+Once initialized, a user may use the instance to log a message using one of the four
+logging functions.
+
+To use the SmartSim logger within a Python script, import the required `get_logger`
+function from the `log` module:
+
+.. code-block:: python
+
+      from smartsim.log import get_logger
+
+Next, initialize an instance of the logger and provide a logger `name`:
+
+.. code-block:: python
+
+      logger = get_logger("SmartSim")
+
+To demonstrate full functionality of the SmartSim logger, we include all log
+functions in the Python driver script with log messages:
+
+.. code-block:: python
+
+      logger.info("This is a message")
+      logger.debug("This is a debug message")
+      logger.error("This is an error message")
+      logger.warning("This is a warning message")
+
+Execute the script *without* setting the `SMARTSIM_LOG_LEVEL`. Remember that `SMARTSIM_LOG_LEVEL`
+defaults to `info`. When we execute the script, the following messages will print to stdout:
+
+.. code-block:: bash
+
+    11:15:00 system.host.com SmartSim[130033] INFO This is a message
+    11:15:00 system.host.com SmartSim[130033] ERROR This is an error message
+    11:15:00 system.host.com SmartSim[130033] WARNING This is a warning message
+
+Notice that the `debug` function message was filtered. This is because by using
+a lower verbosity level (`info`), we instruct SmartSim to omit the higher verbosity level messages (`debug` and `developer`).
+
+Next, set `SMARTSIM_LOG_LEVEL` to `debug`:
+
+.. code-block:: bash
+
+    export SMARTSIM_LOG_LEVEL=debug
+
+When we execute the script again,
+the following messages will print to stdout:
+
+.. code-block:: bash
+
+    11:15:00 system.host.com SmartSim[65385] INFO This is a message
+    11:15:00 system.host.com SmartSim[65385] DEBUG This is a debug message
+    11:15:00 system.host.com SmartSim[65385] ERROR This is an error message
+    11:15:00 system.host.com SmartSim[65385] WARNING This is a warning message
+
+Notice that all log messages print to stdout. By using a higher verbosity level (`debug`),
+we instruct SmartSim to print all log functions at and above the level.
+
+Next, set `SMARTSIM_LOG_LEVEL` to `quiet` in terminal:
+
+.. code-block:: bash
+
+    export SMARTSIM_LOG_LEVEL=quiet
+
+When we run the program once again, the following output is printed
+to stdout:
+
+.. code-block:: bash
+
+    11:15:00 system.host.com SmartSim[65385] ERROR This is an error message
+    11:15:00 system.host.com SmartSim[65385] WARNING This is a warning message
+
+Notice that the `info` and `debug` log functions were filtered. This is because by using
+the least verbose level (`quiet`), we instruct SmartSim to omit messages at higher verbosity levels
+(`info`, `debug` and `developer`).
+
+To finish the example, set `SMARTSIM_LOG_LEVEL` to `info` in terminal:
+
+.. code-block:: bash
+
+    export SMARTSIM_LOG_LEVEL=info
+
+When we execute the script, the following messages will print
+to stdout:
+
+.. code-block:: bash
+
+    11:15:00 system.host.com SmartSim[130033] INFO This is a message
+    11:15:00 system.host.com SmartSim[130033] ERROR This is an error message
+    11:15:00 system.host.com SmartSim[130033] WARNING This is a warning message
+
+Notice that the same messages were logged to stdout as when we ran the script with the default value `info`.
+SmartSim omits messages at higher verbosity levels (`debug` and `developer`).
+
+.. _log_to_file:
+
+---------------
+Logging to File
+---------------
+The ``log_to_file`` function in SmartSim allows users to log messages
+to a specified file by providing a file name or relative file path. If the file name
+passed in does not exist, SmartSim will create the file. If the program is re-executed with the same
+file name, the file contents will be overwritten.
+
+To demonstrate, begin by importing the functions `get_logger` and `log_to_file` from the `log` module:
+
+.. code-block:: python
+
+      from smartsim.log import get_logger, log_to_file
+
+Initialize a logger for use within the Python driver script:
+
+.. code-block:: python
+
+      logger = get_logger("SmartSim")
+
+Invoke the ``log_to_file`` function to instruct SmartSim to create a file named `logger.out`
+to write log messages to:
+
+.. code-block:: python
+
+      log_to_file("logger.out")
+
+For the example, we add all log functions to the script:
+
+.. code-block:: python
+
+      logger.info("This is a message")
+      logger.debug("This is a debug message")
+      logger.error("This is an error message")
+      logger.warning("This is a warning message")
+
+Remember that the default value for the `SMARTSIM_LOG_LEVEL` variable is `info`.
+Therefore, we will not set the environment variable and instead rely on the
+default.
+
+When we execute the Python script, a file named `logger.out` is created in our working
+directory with the listed contents:
+
+.. code-block:: bash
+
+    11:15:00 system.host.com SmartSim[10950] INFO This is a message
+    11:15:00 system.host.com SmartSim[10950] ERROR This is an error message
+    11:15:00 system.host.com SmartSim[10950] WARNING This is a warning message
+
+Notice that the `debug` function message was filtered. This is because by using
+a lower verbosity level (`info`), we instruct SmartSim to omit higher verbosity messages (`debug` and `developer`).
+
+In the same Python script, add a log level to the ``log_to_file`` as a input argument:
+
+.. code-block:: python
+
+      log_to_file("logger.out", "quiet")
+
+When we execute the Python script once again, SmartSim will override the `SMARTSIM_LOG_LEVEL`
+variable to output messages of log level `quiet`. SmartSim will overwrite the contents
+of `logger.out` with:
+
+.. code-block:: bash
+
+    11:15:00 system.host.com SmartSim[10950] ERROR This is an error message
+    11:15:00 system.host.com SmartSim[10950] WARNING This is a warning message
\ No newline at end of file
diff --git a/doc/testing.rst b/doc/testing.rst
index ccb2db3c2..08cce5d36 100644
--- a/doc/testing.rst
+++ b/doc/testing.rst
@@ -66,20 +66,20 @@ of the tests located within the ``on_wlm`` directory.
 
 To run the ``on_wlm`` test suite, users will have to be on a system
 with one of the supported workload managers. Additionally, users will
-need to obtain an allocation of **at least 4 nodes**.
+need to obtain an allocation of **at least 8 nodes**.
 
 Examples of how to obtain allocations on systems with the launchers:
 
 .. code:: bash
 
   # for slurm (with srun)
-  salloc -N 4 -A account --exclusive -t 00:10:00
+  salloc -N 8 -A account --exclusive -t 00:10:00
 
   # for PBSPro (with aprun)
-  qsub -l select=4 -l place=scatter -l walltime=00:10:00 -q queue
+  qsub -l select=8 -l place=scatter -l walltime=00:10:00 -q queue
 
   # for LSF (with jsrun)
-  bsub -Is -W 00:30 -nnodes 4 -P project $SHELL
+  bsub -Is -W 00:30 -nnodes 8 -P project $SHELL
 
 Values for queue, account, or project should be substituted appropriately.
 
@@ -119,7 +119,7 @@ A full example on an internal SLURM system
 
 .. code:: bash
 
-  salloc -N 4 -A account --exclusive -t 03:00:00
+  salloc -N 8 -A account --exclusive -t 03:00:00
   export SMARTSIM_TEST_LAUNCHER=slurm
   export SMARTSIM_TEST_INTERFACE=ipogif0
   export SMARTSIM_TEST_DEVICE=gpu
diff --git a/doc/tutorials/doc_examples/ensemble_doc_examples/application_consumer_script.py b/doc/tutorials/doc_examples/ensemble_doc_examples/application_consumer_script.py
new file mode 100644
index 000000000..57d720163
--- /dev/null
+++ b/doc/tutorials/doc_examples/ensemble_doc_examples/application_consumer_script.py
@@ -0,0 +1,17 @@
+from smartredis import Client, LLInfo
+
+# Initialize a Client
+client = Client(cluster=False)
+
+# Set the data source
+client.set_data_source("producer_0")
+# Check if the tensor exists
+tensor_1 = client.poll_tensor("tensor", 100, 100)
+
+# Set the data source
+client.set_data_source("producer_1")
+# Check if the tensor exists
+tensor_2 = client.poll_tensor("tensor", 100, 100)
+
+client.log_data(LLInfo, f"producer_0.tensor was found: {tensor_1}")
+client.log_data(LLInfo, f"producer_1.tensor was found: {tensor_2}")
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/ensemble_doc_examples/application_producer_script.py b/doc/tutorials/doc_examples/ensemble_doc_examples/application_producer_script.py
new file mode 100644
index 000000000..619a56e05
--- /dev/null
+++ b/doc/tutorials/doc_examples/ensemble_doc_examples/application_producer_script.py
@@ -0,0 +1,10 @@
+from smartredis import Client
+import numpy as np
+
+# Initialize a Client
+client = Client(cluster=False)
+
+# Create NumPy array
+array = np.array([1, 2, 3, 4])
+# Use SmartRedis Client to place tensor in standalone Orchestrator
+client.put_tensor("tensor", array)
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/ensemble_doc_examples/ensemble_ml_model_file.py b/doc/tutorials/doc_examples/ensemble_doc_examples/ensemble_ml_model_file.py
new file mode 100644
index 000000000..a2fa206f5
--- /dev/null
+++ b/doc/tutorials/doc_examples/ensemble_doc_examples/ensemble_ml_model_file.py
@@ -0,0 +1,40 @@
+from smartsim import Experiment
+from tensorflow import keras
+from tensorflow.keras.layers import Conv2D, Input
+
+class Net(keras.Model):
+        def __init__(self):
+            super(Net, self).__init__(name="cnn")
+            self.conv = Conv2D(1, 3, 1)
+
+        def call(self, x):
+            y = self.conv(x)
+            return y
+
+def save_tf_cnn(path, file_name):
+    """Create a Keras CNN and save to file for example purposes"""
+    from smartsim.ml.tf import freeze_model
+
+    n = Net()
+    input_shape = (3, 3, 1)
+    n.build(input_shape=(None, *input_shape))
+    inputs = Input(input_shape)
+    outputs = n(inputs)
+    model = keras.Model(inputs=inputs, outputs=outputs, name=n.name)
+
+    return freeze_model(model, path, file_name)
+
+# Initialize the Experiment and set the launcher to auto
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a RunSettings object
+ensemble_settings = exp.create_run_settings(exe="path/to/example_simulation_program")
+
+# Initialize a Model object
+ensemble_instance = exp.create_ensemble("ensemble_name", ensemble_settings)
+
+# Serialize and save TF model to file
+model_file, inputs, outputs = save_tf_cnn(ensemble_instance.path, "model.pb")
+
+# Attach ML model file to Ensemble
+ensemble_instance.add_ml_model(name="cnn", backend="TF", model_path=model_file, device="GPU", devices_per_node=2, first_device=0, inputs=inputs, outputs=outputs)
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/ensemble_doc_examples/ensemble_ml_model_mem.py b/doc/tutorials/doc_examples/ensemble_doc_examples/ensemble_ml_model_mem.py
new file mode 100644
index 000000000..98974fdc2
--- /dev/null
+++ b/doc/tutorials/doc_examples/ensemble_doc_examples/ensemble_ml_model_mem.py
@@ -0,0 +1,40 @@
+from smartsim import Experiment
+from tensorflow import keras
+from tensorflow.keras.layers import Conv2D, Input
+
+class Net(keras.Model):
+        def __init__(self):
+            super(Net, self).__init__(name="cnn")
+            self.conv = Conv2D(1, 3, 1)
+
+        def call(self, x):
+            y = self.conv(x)
+            return y
+
+def create_tf_cnn():
+        """Create an in-memory Keras CNN for example purposes
+
+        """
+        from smartsim.ml.tf import serialize_model
+        n = Net()
+        input_shape = (3,3,1)
+        inputs = Input(input_shape)
+        outputs = n(inputs)
+        model = keras.Model(inputs=inputs, outputs=outputs, name=n.name)
+
+        return serialize_model(model)
+
+# Serialize and save TF model
+model, inputs, outputs = create_tf_cnn()
+
+# Initialize the Experiment and set the launcher to auto
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a RunSettings object
+ensemble_settings = exp.create_run_settings(exe="path/to/example_simulation_program")
+
+# Initialize a Model object
+ensemble_instance = exp.create_ensemble("ensemble_name", ensemble_settings)
+
+# Attach the in-memory ML model to the SmartSim Ensemble
+ensemble_instance.add_ml_model(name="cnn", backend="TF", model=model, device="GPU", devices_per_node=2, first_device=0, inputs=inputs, outputs=outputs)
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/ensemble_doc_examples/ensemble_torchscript_file.py b/doc/tutorials/doc_examples/ensemble_doc_examples/ensemble_torchscript_file.py
new file mode 100644
index 000000000..819ed814f
--- /dev/null
+++ b/doc/tutorials/doc_examples/ensemble_doc_examples/ensemble_torchscript_file.py
@@ -0,0 +1,13 @@
+from smartsim import Experiment
+
+# Initialize the Experiment and set the launcher to auto
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a RunSettings object
+ensemble_settings = exp.create_run_settings(exe="path/to/example_simulation_program")
+
+# Initialize a Model object
+ensemble_instance = exp.create_ensemble("ensemble_name", ensemble_settings)
+
+# Attach TorchScript to Ensemble
+ensemble_instance.add_script(name="example_script", script_path="path/to/torchscript.py", device="GPU", devices_per_node=2, first_device=0)
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/ensemble_doc_examples/ensemble_torchscript_mem.py b/doc/tutorials/doc_examples/ensemble_doc_examples/ensemble_torchscript_mem.py
new file mode 100644
index 000000000..3e68bfd5a
--- /dev/null
+++ b/doc/tutorials/doc_examples/ensemble_doc_examples/ensemble_torchscript_mem.py
@@ -0,0 +1,16 @@
+from smartsim import Experiment
+
+def timestwo(x):
+    return 2*x
+
+# Initialize the Experiment and set the launcher to auto
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a RunSettings object
+ensemble_settings = exp.create_run_settings(exe="path/to/example_simulation_program")
+
+# Initialize a Ensemble object
+ensemble_instance = exp.create_ensemble("ensemble_name", ensemble_settings)
+
+# Attach TorchScript to Ensemble
+ensemble_instance.add_function(name="example_func", function=timestwo, device="GPU", devices_per_node=2, first_device=0)
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/ensemble_doc_examples/ensemble_torchscript_string.py b/doc/tutorials/doc_examples/ensemble_doc_examples/ensemble_torchscript_string.py
new file mode 100644
index 000000000..b8f907e9a
--- /dev/null
+++ b/doc/tutorials/doc_examples/ensemble_doc_examples/ensemble_torchscript_string.py
@@ -0,0 +1,16 @@
+from smartsim import Experiment
+
+# Initialize the Experiment and set the launcher to auto
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a RunSettings object
+ensemble_settings = exp.create_run_settings(exe="path/to/executable/simulation")
+
+# Initialize a Model object
+ensemble_instance = exp.create_ensemble("ensemble_name", ensemble_settings)
+
+# TorchScript string
+torch_script_str = "def negate(x):\n\treturn torch.neg(x)\n"
+
+# Attach TorchScript to Ensemble
+ensemble_instance.add_script(name="example_script", script=torch_script_str, device="GPU", devices_per_node=2, first_device=0)
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/ensemble_doc_examples/experiment_driver.py b/doc/tutorials/doc_examples/ensemble_doc_examples/experiment_driver.py
new file mode 100644
index 000000000..1a1db58e4
--- /dev/null
+++ b/doc/tutorials/doc_examples/ensemble_doc_examples/experiment_driver.py
@@ -0,0 +1,42 @@
+from smartsim import Experiment
+from smartsim.log import get_logger
+
+logger = get_logger("Experiment Log")
+# Initialize the Experiment
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a standalone Orchestrator
+standalone_orch = exp.create_database(db_nodes=1)
+
+# Initialize a RunSettings object for Ensemble
+ensemble_settings = exp.create_run_settings(exe="/path/to/executable_producer_simulation")
+
+# Initialize Ensemble
+producer_ensemble = exp.create_ensemble("producer", run_settings=ensemble_settings, replicas=2)
+
+# Enable key prefixing for Ensemble members
+producer_ensemble.enable_key_prefixing()
+
+# Initialize a RunSettings object for Model
+model_settings = exp.create_run_settings(exe="/path/to/executable_consumer_simulation")
+# Initialize Model
+consumer_model = exp.create_model("consumer", model_settings)
+
+# Generate SmartSim entity folder tree
+exp.generate(standalone_orch, producer_ensemble, consumer_model, overwrite=True)
+
+# Launch Orchestrator
+exp.start(standalone_orch, summary=True)
+
+# Launch Ensemble
+exp.start(producer_ensemble, block=True, summary=True)
+
+# Register Ensemble members on consumer Model
+for model in producer_ensemble:
+    consumer_model.register_incoming_entity(model)
+
+# Launch consumer Model
+exp.start(consumer_model, block=True, summary=True)
+
+# Clobber Orchestrator
+exp.stop(standalone_orch)
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/ensemble_doc_examples/file_attach.py b/doc/tutorials/doc_examples/ensemble_doc_examples/file_attach.py
new file mode 100644
index 000000000..68f233342
--- /dev/null
+++ b/doc/tutorials/doc_examples/ensemble_doc_examples/file_attach.py
@@ -0,0 +1,20 @@
+from smartsim import Experiment
+
+# Initialize the Experiment
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a RunSettings object
+ensemble_settings = exp.create_run_settings(exe="python", exe_args="/path/to/application.py")
+
+# Initialize an Ensemble object via replicas strategy
+example_ensemble = exp.create_ensemble("ensemble", ensemble_settings, replicas=2, params={"THERMO":1})
+
+# Attach the file to the Ensemble instance
+example_ensemble.attach_generator_files(to_configure="path/to/params_inputs.txt")
+
+# Generate the Ensemble directory
+exp.generate(example_ensemble)
+
+# Launch the Ensemble
+exp.start(example_ensemble)
+
diff --git a/doc/tutorials/doc_examples/ensemble_doc_examples/manual_append_ensemble.py b/doc/tutorials/doc_examples/ensemble_doc_examples/manual_append_ensemble.py
new file mode 100644
index 000000000..89c9ea27e
--- /dev/null
+++ b/doc/tutorials/doc_examples/ensemble_doc_examples/manual_append_ensemble.py
@@ -0,0 +1,25 @@
+from smartsim import Experiment
+
+# Initialize the Experiment and set the launcher to auto
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize BatchSettings
+bs = exp.create_batch_settings(nodes=10,
+                               time="01:00:00")
+
+# Initialize Ensemble
+ensemble = exp.create_ensemble("ensemble-append", batch_settings=bs)
+
+# Initialize RunSettings for Model 1
+srun_settings_1 = exp.create_run_settings(exe=exe, exe_args="path/to/application_script_1.py")
+# Initialize RunSettings for Model 2
+srun_settings_2 = exp.create_run_settings(exe=exe, exe_args="path/to/application_script_2.py")
+# Initialize Model 1 with RunSettings 1
+model_1 = exp.create_model(name="model_1", run_settings=srun_settings_1)
+# Initialize Model 2 with RunSettings 2
+model_2 = exp.create_model(name="model_2", run_settings=srun_settings_2)
+
+# Add Model member to Ensemble
+ensemble.add_model(model_1)
+# Add Model member to Ensemble
+ensemble.add_model(model_2)
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/ensemble_doc_examples/param_expansion_1.py b/doc/tutorials/doc_examples/ensemble_doc_examples/param_expansion_1.py
new file mode 100644
index 000000000..6ccbce397
--- /dev/null
+++ b/doc/tutorials/doc_examples/ensemble_doc_examples/param_expansion_1.py
@@ -0,0 +1,16 @@
+from smartsim import Experiment
+
+# Initialize the Experiment and set the launcher to auto
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a RunSettings
+rs = exp.create_run_settings(exe="path/to/example_simulation_program")
+
+#Create the parameters to expand to the Ensemble members
+params = {
+            "name": ["Ellie", "John"],
+            "parameter": [2, 11]
+        }
+
+# Initialize the Ensemble by specifying RunSettings, the params and "all_perm"
+ensemble = exp.create_ensemble("model_member", run_settings=rs, params=params, perm_strategy="all_perm")
diff --git a/doc/tutorials/doc_examples/ensemble_doc_examples/param_expansion_2.py b/doc/tutorials/doc_examples/ensemble_doc_examples/param_expansion_2.py
new file mode 100644
index 000000000..f6fb30967
--- /dev/null
+++ b/doc/tutorials/doc_examples/ensemble_doc_examples/param_expansion_2.py
@@ -0,0 +1,21 @@
+from smartsim import Experiment
+
+# Initialize the Experiment and set the launcher to auto
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a BatchSettings
+bs = exp.create_batch_settings(nodes=2,
+                               time="10:00:00")
+
+# Initialize and configure RunSettings
+rs = exp.create_run_settings(exe="python", exe_args="path/to/application_script.py")
+rs.set_nodes(1)
+
+#Create the parameters to expand to the Ensemble members
+params = {
+            "name": ["Ellie", "John"],
+            "parameter": [2, 11]
+        }
+
+# Initialize the Ensemble by specifying RunSettings, BatchSettings, the params and "step"
+ensemble = exp.create_ensemble("ensemble", run_settings=rs, batch_settings=bs, params=params, perm_strategy="step")
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/ensemble_doc_examples/replicas_1.py b/doc/tutorials/doc_examples/ensemble_doc_examples/replicas_1.py
new file mode 100644
index 000000000..0dd5d16f5
--- /dev/null
+++ b/doc/tutorials/doc_examples/ensemble_doc_examples/replicas_1.py
@@ -0,0 +1,10 @@
+from smartsim import Experiment
+
+# Initialize the Experiment and set the launcher to auto
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a RunSettings object
+rs = exp.create_run_settings(exe="python", exe_args="path/to/application_script.py")
+
+# Initialize the Ensemble by specifying the number of replicas and RunSettings
+ensemble = exp.create_ensemble("ensemble-replica", replicas=4, run_settings=rs)
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/ensemble_doc_examples/replicas_2.py b/doc/tutorials/doc_examples/ensemble_doc_examples/replicas_2.py
new file mode 100644
index 000000000..e2363a5be
--- /dev/null
+++ b/doc/tutorials/doc_examples/ensemble_doc_examples/replicas_2.py
@@ -0,0 +1,15 @@
+from smartsim import Experiment
+
+# Initialize the Experiment and set the launcher to auto
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a BatchSettings object
+bs = exp.create_batch_settings(nodes=4,
+                               time="10:00:00")
+
+# Initialize and configure a RunSettings object
+rs = exp.create_run_settings(exe="python", exe_args="path/to/application_script.py")
+rs.set_nodes(4)
+
+# Initialize an Ensemble
+ensemble = exp.create_ensemble("ensemble-replica", replicas=4, run_settings=rs, batch_settings=bs)
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/experiment_doc_examples/exp.py b/doc/tutorials/doc_examples/experiment_doc_examples/exp.py
new file mode 100644
index 000000000..b5374e7bd
--- /dev/null
+++ b/doc/tutorials/doc_examples/experiment_doc_examples/exp.py
@@ -0,0 +1,30 @@
+from smartsim import Experiment
+from smartsim._core.control.previewrenderer import Verbosity
+from smartsim.log import get_logger
+
+# Initialize an Experiment
+exp = Experiment("example-experiment", launcher="auto")
+# Initialize a SmartSim logger
+smartsim_logger = get_logger("logger")
+
+# Initialize an Orchestrator
+standalone_database = exp.create_database(db_nodes=3, port=6379, interface="ib0")
+
+# Initialize the Model RunSettings
+settings = exp.create_run_settings("echo", exe_args="Hello World")
+# Initialize the Model
+model = exp.create_model("hello_world", settings)
+
+# Generate the output directory
+exp.generate(standalone_database, model, overwrite=True)
+
+# Preview the experiment
+exp.preview(standalone_database, model, verbosity_level=Verbosity.DEBUG)
+
+# Launch the Orchestrator then Model instance
+exp.start(standalone_database, model)
+
+# Clobber the Orchestrator
+exp.stop(standalone_database)
+# Log the summary of the Experiment
+smartsim_logger.info(exp.summary())
diff --git a/doc/tutorials/doc_examples/model_doc_examples/from_file_ml_model.py b/doc/tutorials/doc_examples/model_doc_examples/from_file_ml_model.py
new file mode 100644
index 000000000..329d08edc
--- /dev/null
+++ b/doc/tutorials/doc_examples/model_doc_examples/from_file_ml_model.py
@@ -0,0 +1,40 @@
+from smartsim import Experiment
+from tensorflow import keras
+from tensorflow.keras.layers import Conv2D, Input
+
+class Net(keras.Model):
+        def __init__(self):
+            super(Net, self).__init__(name="cnn")
+            self.conv = Conv2D(1, 3, 1)
+
+        def call(self, x):
+            y = self.conv(x)
+            return y
+
+def save_tf_cnn(path, file_name):
+    """Create a Keras CNN and save to file for example purposes"""
+    from smartsim.ml.tf import freeze_model
+
+    n = Net()
+    input_shape = (3, 3, 1)
+    n.build(input_shape=(None, *input_shape))
+    inputs = Input(input_shape)
+    outputs = n(inputs)
+    model = keras.Model(inputs=inputs, outputs=outputs, name=n.name)
+
+    return freeze_model(model, path, file_name)
+
+# Initialize the Experiment and set the launcher to auto
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a RunSettings object
+model_settings = exp.create_run_settings(exe="path/to/example_simulation_program")
+
+# Initialize a Model object
+model_instance = exp.create_model("model_name", model_settings)
+
+# Get and save TF model
+model_file, inputs, outputs = save_tf_cnn(model_instance.path, "model.pb")
+
+# Attach the from file ML model to the SmartSim Model
+model_instance.add_ml_model(name="cnn", backend="TF", model_path=model_file, device="GPU", devices_per_node=2, first_device=0, inputs=inputs, outputs=outputs)
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/model_doc_examples/from_file_script.py b/doc/tutorials/doc_examples/model_doc_examples/from_file_script.py
new file mode 100644
index 000000000..ca6dcaea1
--- /dev/null
+++ b/doc/tutorials/doc_examples/model_doc_examples/from_file_script.py
@@ -0,0 +1,14 @@
+
+from smartsim import Experiment
+
+# Initialize the Experiment and set the launcher to auto
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a RunSettings object
+model_settings = exp.create_run_settings(exe="path/to/example_simulation_program")
+
+# Initialize a Model object
+model_instance = exp.create_model("model_name", model_settings)
+
+# Attach TorchScript to Model
+model_instance.add_script(name="example_script", script_path="path/to/torchscript.py", device="GPU", devices_per_node=2, first_device=0)
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/model_doc_examples/in_mem_ml_model.py b/doc/tutorials/doc_examples/model_doc_examples/in_mem_ml_model.py
new file mode 100644
index 000000000..a34cceb4a
--- /dev/null
+++ b/doc/tutorials/doc_examples/model_doc_examples/in_mem_ml_model.py
@@ -0,0 +1,40 @@
+from smartsim import Experiment
+from tensorflow import keras
+from tensorflow.keras.layers import Conv2D, Input
+
+class Net(keras.Model):
+        def __init__(self):
+            super(Net, self).__init__(name="cnn")
+            self.conv = Conv2D(1, 3, 1)
+
+        def call(self, x):
+            y = self.conv(x)
+            return y
+
+def create_tf_cnn():
+    """Create an in-memory Keras CNN for example purposes
+
+    """
+    from smartsim.ml.tf import serialize_model
+    n = Net()
+    input_shape = (3,3,1)
+    inputs = Input(input_shape)
+    outputs = n(inputs)
+    model = keras.Model(inputs=inputs, outputs=outputs, name=n.name)
+
+    return serialize_model(model)
+
+# Serialize and save TF model
+model, inputs, outputs = create_tf_cnn()
+
+# Initialize the Experiment and set the launcher to auto
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a RunSettings object
+model_settings = exp.create_run_settings(exe="path/to/example_simulation_program")
+
+# Initialize a Model object
+model_instance = exp.create_model("model_name", model_settings)
+
+# Attach the in-memory ML model to the SmartSim Model
+model_instance.add_ml_model(name="cnn", backend="TF", model=model, device="GPU", devices_per_node=2, first_device=0, inputs=inputs, outputs=outputs)
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/model_doc_examples/in_mem_script.py b/doc/tutorials/doc_examples/model_doc_examples/in_mem_script.py
new file mode 100644
index 000000000..634746085
--- /dev/null
+++ b/doc/tutorials/doc_examples/model_doc_examples/in_mem_script.py
@@ -0,0 +1,16 @@
+from smartsim import Experiment
+
+def timestwo(x):
+    return 2*x
+
+# Initialize the Experiment and set the launcher to auto
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a RunSettings object
+model_settings = exp.create_run_settings(exe="path/to/example_simulation_program")
+
+# Initialize a Model object
+model_instance = exp.create_model("model_name", model_settings)
+
+# Append TorchScript function to Model
+model_instance.add_function(name="example_func", function=timestwo, device="GPU", devices_per_node=2, first_device=0)
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/model_doc_examples/model_file.py b/doc/tutorials/doc_examples/model_doc_examples/model_file.py
new file mode 100644
index 000000000..8961d50a8
--- /dev/null
+++ b/doc/tutorials/doc_examples/model_doc_examples/model_file.py
@@ -0,0 +1,19 @@
+from smartsim import Experiment
+
+# Initialize the Experiment and set the launcher to auto
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a RunSettings object
+model_settings = exp.create_run_settings(exe="path/to/executable/simulation")
+
+# Initialize a Model object
+model_instance = exp.create_model("model_name", model_settings, params={"THERMO":1})
+
+# Attach the file to the Model instance
+model_instance.attach_generator_files(to_configure="path/to/params_inputs.txt")
+
+# Store model_instance outputs within the Experiment directory named getting-started
+exp.generate(model_instance)
+
+# Launch the Model
+exp.start(model_instance)
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/model_doc_examples/model_init.py b/doc/tutorials/doc_examples/model_doc_examples/model_init.py
new file mode 100644
index 000000000..b1bb090f4
--- /dev/null
+++ b/doc/tutorials/doc_examples/model_doc_examples/model_init.py
@@ -0,0 +1,16 @@
+from smartsim import Experiment
+
+# Init Experiment and specify to launch locally in this example
+exp = Experiment(name="getting-started", launcher="local")
+
+# Initialize RunSettings
+model_settings = exp.create_run_settings(exe="echo", exe_args="Hello World")
+
+# Initialize Model instance
+model_instance = exp.create_model(name="example-model", run_settings=model_settings)
+
+# Generate Model directory
+exp.generate(model_instance)
+
+# Launch Model
+exp.start(model_instance)
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/model_doc_examples/prefix_data.py b/doc/tutorials/doc_examples/model_doc_examples/prefix_data.py
new file mode 100644
index 000000000..da4034d82
--- /dev/null
+++ b/doc/tutorials/doc_examples/model_doc_examples/prefix_data.py
@@ -0,0 +1,12 @@
+from smartsim import Experiment
+
+# Initialize the Experiment and set the launcher to auto
+exp = Experiment("getting-started", launcher="auto")
+
+# Create the run settings for the Model
+model_settings = exp.create_run_settings(exe="path/to/executable/simulation")
+
+# Create a Model instance named 'model'
+model = exp.create_model("model_name", model_settings)
+# Enable tensor, Dataset and list prefixing on the 'model' instance
+model.enable_key_prefixing()
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/model_doc_examples/string_script.py b/doc/tutorials/doc_examples/model_doc_examples/string_script.py
new file mode 100644
index 000000000..52495ab47
--- /dev/null
+++ b/doc/tutorials/doc_examples/model_doc_examples/string_script.py
@@ -0,0 +1,16 @@
+from smartsim import Experiment
+
+# Initialize the Experiment and set the launcher to auto
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a RunSettings object
+model_settings = exp.create_run_settings(exe="path/to/executable/simulation")
+
+# Initialize a Model object
+model_instance = exp.create_model("model_name", model_settings)
+
+# TorchScript string
+torch_script_str = "def negate(x):\n\treturn torch.neg(x)\n"
+
+# Attach TorchScript to Model
+model_instance.add_script(name="example_script", script=torch_script_str, device="GPU", devices_per_node=2, first_device=0)
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/orch_examples/colo_app.py b/doc/tutorials/doc_examples/orch_examples/colo_app.py
new file mode 100644
index 000000000..930789fab
--- /dev/null
+++ b/doc/tutorials/doc_examples/orch_examples/colo_app.py
@@ -0,0 +1,15 @@
+from smartredis import Client, LLInfo
+import numpy as np
+
+# Initialize a Client
+colo_client = Client(cluster=False)
+
+# Create NumPy array
+local_array = np.array([1, 2, 3, 4])
+# Store the NumPy tensor
+colo_client.put_tensor("tensor_1", local_array)
+
+# Retrieve tensor from driver script
+local_tensor = colo_client.get_tensor("tensor_1")
+# Log tensor
+colo_client.log_data(LLInfo, f"The colocated db tensor is: {local_tensor}")
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/orch_examples/colo_driver.py b/doc/tutorials/doc_examples/orch_examples/colo_driver.py
new file mode 100644
index 000000000..fde06e9b7
--- /dev/null
+++ b/doc/tutorials/doc_examples/orch_examples/colo_driver.py
@@ -0,0 +1,29 @@
+import numpy as np
+from smartredis import Client
+from smartsim import Experiment
+from smartsim.log import get_logger
+
+# Initialize a logger object
+logger = get_logger("Example Experiment Log")
+# Initialize the Experiment
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a RunSettings object
+model_settings = exp.create_run_settings(exe="path/to/executable_simulation")
+# Configure RunSettings object
+model_settings.set_nodes(1)
+
+# Initialize a SmartSim Model
+model = exp.create_model("colo_model", model_settings)
+
+# Colocate the Model
+model.colocate_db_uds()
+
+# Generate output files
+exp.generate(model)
+
+# Launch the colocated Model
+exp.start(model, block=True, summary=True)
+
+# Log the Experiment summary
+logger.info(exp.summary())
\ No newline at end of file
diff --git a/doc/tutorials/doc_examples/orch_examples/std_app.py b/doc/tutorials/doc_examples/orch_examples/std_app.py
new file mode 100644
index 000000000..67129fbf4
--- /dev/null
+++ b/doc/tutorials/doc_examples/orch_examples/std_app.py
@@ -0,0 +1,15 @@
+from smartredis import Client, LLInfo
+import numpy as np
+
+# Initialize a SmartRedis Client
+application_client = Client(cluster=True)
+
+# Retrieve the driver script tensor from Orchestrator
+driver_script_tensor = application_client.get_tensor("tensor_1")
+# Log the tensor
+application_client.log_data(LLInfo, f"The multi-sharded db tensor is: {driver_script_tensor}")
+
+# Create a NumPy array
+local_array = np.array([5, 6, 7, 8])
+# Use SmartRedis client to place tensor in multi-sharded db
+application_client.put_tensor("tensor_2", local_array)
diff --git a/doc/tutorials/doc_examples/orch_examples/std_driver.py b/doc/tutorials/doc_examples/orch_examples/std_driver.py
new file mode 100644
index 000000000..cf425125b
--- /dev/null
+++ b/doc/tutorials/doc_examples/orch_examples/std_driver.py
@@ -0,0 +1,46 @@
+import numpy as np
+from smartredis import Client
+from smartsim import Experiment
+from smartsim.log import get_logger
+
+# Initialize the logger
+logger = get_logger("Example Experiment Log")
+# Initialize the Experiment
+exp = Experiment("getting-started", launcher="auto")
+
+# Initialize a multi-sharded Orchestrator
+standalone_orchestrator = exp.create_database(db_nodes=3)
+
+# Initialize a SmartRedis client for multi-sharded Orchestrator
+driver_client = Client(cluster=True, address=standalone_orchestrator.get_address()[0])
+
+# Create NumPy array
+local_array = np.array([1, 2, 3, 4])
+# Use the SmartRedis client to place tensor in the standalone Orchestrator
+driver_client.put_tensor("tensor_1", local_array)
+
+# Initialize a RunSettings object
+model_settings = exp.create_run_settings(exe="/path/to/executable_simulation")
+model_settings.set_nodes(1)
+
+# Initialize the Model
+model = exp.create_model("model", model_settings)
+
+# Create the output directory
+exp.generate(standalone_orchestrator, model)
+
+# Launch the multi-sharded Orchestrator
+exp.start(standalone_orchestrator)
+
+# Launch the Model
+exp.start(model, block=True, summary=True)
+
+# Poll the tensors placed by the Model
+app_tensor = driver_client.poll_key("tensor_2", 100, 10)
+# Validate that the tensor exists
+logger.info(f"The tensor exists: {app_tensor}")
+
+# Cleanup the Orchestrator
+exp.stop(standalone_orchestrator)
+# Print the Experiment summary
+logger.info(exp.summary())
\ No newline at end of file
diff --git a/tutorials/getting_started/consumer.py b/doc/tutorials/getting_started/consumer.py
similarity index 100%
rename from tutorials/getting_started/consumer.py
rename to doc/tutorials/getting_started/consumer.py
diff --git a/tutorials/getting_started/getting_started.ipynb b/doc/tutorials/getting_started/getting_started.ipynb
similarity index 100%
rename from tutorials/getting_started/getting_started.ipynb
rename to doc/tutorials/getting_started/getting_started.ipynb
diff --git a/tutorials/getting_started/multi_db_example/application_script.py b/doc/tutorials/getting_started/multi_db_example/application_script.py
similarity index 100%
rename from tutorials/getting_started/multi_db_example/application_script.py
rename to doc/tutorials/getting_started/multi_db_example/application_script.py
diff --git a/tutorials/getting_started/multi_db_example/multidb_driver.py b/doc/tutorials/getting_started/multi_db_example/multidb_driver.py
similarity index 100%
rename from tutorials/getting_started/multi_db_example/multidb_driver.py
rename to doc/tutorials/getting_started/multi_db_example/multidb_driver.py
diff --git a/tutorials/getting_started/output_my_parameter.py b/doc/tutorials/getting_started/output_my_parameter.py
similarity index 100%
rename from tutorials/getting_started/output_my_parameter.py
rename to doc/tutorials/getting_started/output_my_parameter.py
diff --git a/tutorials/getting_started/output_my_parameter_new_tag.py b/doc/tutorials/getting_started/output_my_parameter_new_tag.py
similarity index 100%
rename from tutorials/getting_started/output_my_parameter_new_tag.py
rename to doc/tutorials/getting_started/output_my_parameter_new_tag.py
diff --git a/tutorials/getting_started/producer.py b/doc/tutorials/getting_started/producer.py
similarity index 100%
rename from tutorials/getting_started/producer.py
rename to doc/tutorials/getting_started/producer.py
diff --git a/tutorials/ml_inference/Inference-in-SmartSim.ipynb b/doc/tutorials/ml_inference/Inference-in-SmartSim.ipynb
similarity index 100%
rename from tutorials/ml_inference/Inference-in-SmartSim.ipynb
rename to doc/tutorials/ml_inference/Inference-in-SmartSim.ipynb
diff --git a/tutorials/ml_inference/colo-db-torch-example.py b/doc/tutorials/ml_inference/colo-db-torch-example.py
similarity index 100%
rename from tutorials/ml_inference/colo-db-torch-example.py
rename to doc/tutorials/ml_inference/colo-db-torch-example.py
diff --git a/tutorials/ml_training/surrogate/LICENSE b/doc/tutorials/ml_training/surrogate/LICENSE
similarity index 100%
rename from tutorials/ml_training/surrogate/LICENSE
rename to doc/tutorials/ml_training/surrogate/LICENSE
diff --git a/tutorials/ml_training/surrogate/README.md b/doc/tutorials/ml_training/surrogate/README.md
similarity index 100%
rename from tutorials/ml_training/surrogate/README.md
rename to doc/tutorials/ml_training/surrogate/README.md
diff --git a/tutorials/ml_training/surrogate/fd_sim.py b/doc/tutorials/ml_training/surrogate/fd_sim.py
similarity index 97%
rename from tutorials/ml_training/surrogate/fd_sim.py
rename to doc/tutorials/ml_training/surrogate/fd_sim.py
index 8b128a319..db68b24b2 100644
--- a/tutorials/ml_training/surrogate/fd_sim.py
+++ b/doc/tutorials/ml_training/surrogate/fd_sim.py
@@ -18,12 +18,9 @@ def augment_batch(samples, targets):
     following NWHC ordering.
 
     :param samples: Samples to augment
-    :type samples: np.ndarray
     :param targets: Targets to augment
-    :type targets: np.ndarray
 
     :returns: Tuple of augmented samples and targets
-    :rtype: (np.ndarray, np.ndarray)
     """
     batch_size = samples.shape[0]
     augmented_samples = np.empty((batch_size*8, *samples.shape[1:]))
@@ -83,9 +80,7 @@ def simulate(steps, size):
     both as tensors and as augmented samples for training.
 
     :param steps: Number of simulations to run
-    :type steps: int
     :param size: lateral size of the discretized domain
-    :type size: int
     """
     batch_size = 50
     samples = np.zeros((batch_size,size,size,1)).astype(np.single)
diff --git a/tutorials/ml_training/surrogate/steady_state.py b/doc/tutorials/ml_training/surrogate/steady_state.py
similarity index 100%
rename from tutorials/ml_training/surrogate/steady_state.py
rename to doc/tutorials/ml_training/surrogate/steady_state.py
diff --git a/tutorials/ml_training/surrogate/tf_model.py b/doc/tutorials/ml_training/surrogate/tf_model.py
similarity index 100%
rename from tutorials/ml_training/surrogate/tf_model.py
rename to doc/tutorials/ml_training/surrogate/tf_model.py
diff --git a/tutorials/ml_training/surrogate/tf_training.py b/doc/tutorials/ml_training/surrogate/tf_training.py
similarity index 100%
rename from tutorials/ml_training/surrogate/tf_training.py
rename to doc/tutorials/ml_training/surrogate/tf_training.py
diff --git a/tutorials/ml_training/surrogate/train_surrogate.ipynb b/doc/tutorials/ml_training/surrogate/train_surrogate.ipynb
similarity index 100%
rename from tutorials/ml_training/surrogate/train_surrogate.ipynb
rename to doc/tutorials/ml_training/surrogate/train_surrogate.ipynb
diff --git a/tutorials/ml_training/surrogate/vishelpers.py b/doc/tutorials/ml_training/surrogate/vishelpers.py
similarity index 100%
rename from tutorials/ml_training/surrogate/vishelpers.py
rename to doc/tutorials/ml_training/surrogate/vishelpers.py
diff --git a/tutorials/online_analysis/lattice/LICENSE b/doc/tutorials/online_analysis/lattice/LICENSE
similarity index 100%
rename from tutorials/online_analysis/lattice/LICENSE
rename to doc/tutorials/online_analysis/lattice/LICENSE
diff --git a/tutorials/online_analysis/lattice/README.md b/doc/tutorials/online_analysis/lattice/README.md
similarity index 100%
rename from tutorials/online_analysis/lattice/README.md
rename to doc/tutorials/online_analysis/lattice/README.md
diff --git a/tutorials/online_analysis/lattice/driver.py b/doc/tutorials/online_analysis/lattice/driver.py
similarity index 100%
rename from tutorials/online_analysis/lattice/driver.py
rename to doc/tutorials/online_analysis/lattice/driver.py
diff --git a/tutorials/online_analysis/lattice/fv_sim.py b/doc/tutorials/online_analysis/lattice/fv_sim.py
similarity index 100%
rename from tutorials/online_analysis/lattice/fv_sim.py
rename to doc/tutorials/online_analysis/lattice/fv_sim.py
diff --git a/tutorials/online_analysis/lattice/online_analysis.ipynb b/doc/tutorials/online_analysis/lattice/online_analysis.ipynb
similarity index 100%
rename from tutorials/online_analysis/lattice/online_analysis.ipynb
rename to doc/tutorials/online_analysis/lattice/online_analysis.ipynb
diff --git a/tutorials/online_analysis/lattice/probe.script b/doc/tutorials/online_analysis/lattice/probe.script
similarity index 100%
rename from tutorials/online_analysis/lattice/probe.script
rename to doc/tutorials/online_analysis/lattice/probe.script
diff --git a/tutorials/online_analysis/lattice/vishelpers.py b/doc/tutorials/online_analysis/lattice/vishelpers.py
similarity index 100%
rename from tutorials/online_analysis/lattice/vishelpers.py
rename to doc/tutorials/online_analysis/lattice/vishelpers.py
diff --git a/docker-compose.yml b/docker-compose.yml
index f69743f14..f5be4e338 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -18,7 +18,7 @@ services:
       - "8888:8888"
 
   tutorials-prod:
-    image: smartsim-tutorials:v0.6.1
+    image: smartsim-tutorials:v0.7.0
     build:
       context: .
       dockerfile: ./docker/prod/Dockerfile
diff --git a/docker/dev/Dockerfile b/docker/dev/Dockerfile
index c643787c3..3ab3a37f8 100644
--- a/docker/dev/Dockerfile
+++ b/docker/dev/Dockerfile
@@ -24,7 +24,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-FROM ubuntu:20.04
+FROM ubuntu:22.04
 
 LABEL maintainer="Cray Labs"
 
@@ -36,9 +36,9 @@ RUN useradd --system --create-home --shell /bin/bash -g root -G sudo craylabs &&
     apt-get update \
     && apt-get install --no-install-recommends -y build-essential \
     git gcc make git-lfs wget libopenmpi-dev openmpi-bin unzip \
-    python3-pip python3 python3-dev cmake \
+    python3-pip python3.9 python3.9-dev cmake \
     && rm -rf /var/lib/apt/lists/* \
-    && ln -s /usr/bin/python3 /usr/bin/python
+    && ln -s /usr/bin/python3.9 /usr/bin/python
 
 WORKDIR /home/craylabs
 RUN git clone https://github.com/CrayLabs/SmartRedis.git --branch develop --depth=1 smartredis \
diff --git a/docker/docs/dev/Dockerfile b/docker/docs/dev/Dockerfile
index eff99de36..e9db9c342 100644
--- a/docker/docs/dev/Dockerfile
+++ b/docker/docs/dev/Dockerfile
@@ -24,7 +24,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-FROM ubuntu:20.04
+FROM ubuntu:22.04
 
 LABEL maintainer="Cray Labs"
 
@@ -58,9 +58,7 @@ RUN git clone https://github.com/CrayLabs/SmartDashboard.git --branch develop --
 RUN python -m pip install -r doc/requirements-doc.txt \
     && NO_CHECKS=1 SMARTSIM_SUFFIX=dev python -m pip install .
 
-RUN mkdir -p doc/tutorials/ \
-    && cd doc/tutorials/ \
-    && rm -rf * \
-    && ln -s ../../tutorials/* .
+# Note this is needed to ensure that the Sphinx builds. Can be removed with newer Tensorflow
+RUN python -m pip install typing_extensions==4.6.1
 
 RUN make docs
diff --git a/docker/prod/Dockerfile b/docker/prod/Dockerfile
index 769378aef..325ace923 100644
--- a/docker/prod/Dockerfile
+++ b/docker/prod/Dockerfile
@@ -24,7 +24,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-FROM ubuntu:20.04
+FROM ubuntu:22.04
 
 LABEL maintainer="Cray Labs"
 LABEL org.opencontainers.image.source https://github.com/CrayLabs/SmartSim
@@ -36,9 +36,9 @@ RUN useradd --system --create-home --shell /bin/bash -g root -G sudo craylabs &&
     apt-get update \
     && apt-get install --no-install-recommends -y build-essential \
     git gcc make git-lfs wget libopenmpi-dev openmpi-bin unzip \
-    python3-pip python3 python3-dev cmake \
+    python3.9 python3.9-dev python3-pip cmake \
     && rm -rf /var/lib/apt/lists/* \
-    && ln -s /usr/bin/python3 /usr/bin/python
+    && ln -s /usr/bin/python3.9 /usr/bin/python
 
 WORKDIR /home/craylabs
 COPY --chown=craylabs:root ./tutorials/ /home/craylabs/tutorials/
@@ -46,7 +46,7 @@ COPY --chown=craylabs:root ./tutorials/ /home/craylabs/tutorials/
 USER craylabs
 RUN export PATH=/home/craylabs/.local/bin:$PATH && \
     echo "export PATH=/home/craylabs/.local/bin:$PATH" >> /home/craylabs/.bashrc && \
-    python -m pip install smartsim[ml]==0.6.2 jupyter jupyterlab matplotlib && \
+    python -m pip install smartsim[ml]==0.7.0 jupyter jupyterlab matplotlib && \
     smart build --device cpu -v && \
     chown craylabs:root -R /home/craylabs/.local && \
     rm -rf ~/.cache/pip
diff --git a/docker/testing/Dockerfile b/docker/testing/Dockerfile
index 9c247c320..285a66023 100644
--- a/docker/testing/Dockerfile
+++ b/docker/testing/Dockerfile
@@ -26,7 +26,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-FROM ubuntu:21.10
+FROM ubuntu:22.04
 ENV DEBIAN_FRONTEND noninteractive
 RUN apt update && apt install -y python3 python3-pip python-is-python3 cmake git
 RUN pip install torch==1.9.1
diff --git a/pyproject.toml b/pyproject.toml
index 4415c63ca..91164a68b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,7 @@ build-backend = "setuptools.build_meta"
 
 [tool.black]
 line-length = 88
-target-version = ['py38', 'py39', 'py310']
+target-version = ['py39', 'py310', 'py311']
 exclude = '''
 (
   | \.egg
@@ -45,8 +45,15 @@ exclude = '''
   | build
   | dist
   | setup.py
+  | .*\.py
 )
 '''
+force-exclude = '''
+(
+  .*\.dragon/*
+)
+'''
+
 
 [tool.pytest.ini_options]
 log_cli = true
@@ -61,6 +68,7 @@ markers = [
 # supress circular import warning
 profile = "black"
 skip = ["tests/test_configs/circular_config"]
+skip_glob="smartsim/_core/.dragon/*"
 
 [tool.coverage.run]
 source = ["smartsim"]
@@ -78,7 +86,7 @@ namespace_packages = true
 files = [
   "smartsim"
 ]
-plugins = []
+plugins = ["pydantic.mypy"]
 ignore_errors = false
 
 # Dynamic typing
@@ -107,10 +115,12 @@ strict_equality = true
 # Additional Error Codes
 enable_error_code = [
     # "redundant-expr",
-    # "possibly-undefined",
+    "possibly-undefined",
     # "unused-awaitable",
     # "ignore-without-code",
     # "mutable-override",
+    "truthy-bool",
+    "truthy-iterable",
 ]
 
 [[tool.mypy.overrides]]
@@ -122,6 +132,7 @@ module = [
   "torch",
   "smartsim.ml.torch.*",            # must solve/ignore inheritance issues
   "watchdog",
+  "dragon.*",
 ]
 ignore_missing_imports = true
 ignore_errors = true
diff --git a/setup.cfg b/setup.cfg
index 5fdfa82ae..742386d2c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -42,7 +42,6 @@ contact_email = craylabs@hpe.com
 license = BSD 2-Clause License
 keywords = scientific, ai, workflow, hpc, analysis
 classifiers =
-    Programming Language :: Python :: 3.8
     Programming Language :: Python :: 3.9
     Programming Language :: Python :: 3.10
     Programming Language :: Python :: 3.11
@@ -56,13 +55,14 @@ setup_requires =
     setuptools>=39.2
     cmake>=3.13
 include_package_data = True
-python_requires = >=3.8,<3.12
+python_requires = >=3.9,<3.12
 
 [options.packages.find]
 include =
     smartsim*
 exclude =
     .third-party
+    .dragon
     tests
     doc
     smartredis
diff --git a/setup.py b/setup.py
index bc7cf60d6..6e46ddef9 100644
--- a/setup.py
+++ b/setup.py
@@ -64,6 +64,7 @@
 #
 # This future is needed to print Python2 EOL message
 from __future__ import print_function
+
 import sys
 
 if sys.version_info < (3,):
@@ -71,14 +72,14 @@
     sys.exit(-1)
 
 
-import os
 import importlib.util
+import os
 from pathlib import Path
 
 from setuptools import setup
-from setuptools.dist import Distribution
-from setuptools.command.install import install
 from setuptools.command.build_py import build_py
+from setuptools.command.install import install
+from setuptools.dist import Distribution
 
 # Some necessary evils we have to do to be able to use
 # the _install tools in smartsim/smartsim/_core/_install
@@ -107,8 +108,11 @@
 
 # check for compatible python versions
 if not build_env.is_compatible_python(versions.PYTHON_MIN):
-    print("You are using Python {}. Python >={} is required.".format(build_env.python_version,
-                                                                     ".".join((versions.PYTHON_MIN))))
+    print(
+        "You are using Python {}. Python >={} is required.".format(
+            build_env.python_version, ".".join((versions.PYTHON_MIN))
+        )
+    )
     sys.exit(-1)
 
 if build_env.is_windows():
@@ -120,9 +124,11 @@
 # __version__ in smartsim/__init__.py
 smartsim_version = versions.write_version(setup_path)
 
+
 class BuildError(Exception):
     pass
 
+
 # Hacky workaround for solving CI build "purelib" issue
 # see https://github.com/google/or-tools/issues/616
 class InstallPlatlib(install):
@@ -131,15 +137,14 @@ def finalize_options(self):
         if self.distribution.has_ext_modules():
             self.install_lib = self.install_platlib
 
-class SmartSimBuild(build_py):
 
+class SmartSimBuild(build_py):
     def run(self):
-        database_builder = builder.DatabaseBuilder(build_env(),
-                                             build_env.MALLOC,
-                                             build_env.JOBS)
+        database_builder = builder.DatabaseBuilder(
+            build_env(), build_env.MALLOC, build_env.JOBS
+        )
         if not database_builder.is_built:
-            database_builder.build_from_git(versions.REDIS_URL,
-                                         versions.REDIS)
+            database_builder.build_from_git(versions.REDIS_URL, versions.REDIS)
 
             database_builder.cleanup()
 
@@ -151,9 +156,10 @@ def run(self):
 class BinaryDistribution(Distribution):
     """Distribution which always forces a binary package with platform name
 
-       We use this because we want to pre-package Redis for certain
-       platforms to use.
+    We use this because we want to pre-package Redis for certain
+    platforms to use.
     """
+
     def has_ext_modules(_placeholder):
         return True
 
@@ -167,7 +173,11 @@ def has_ext_modules(_placeholder):
     "tqdm>=4.50.2",
     "filelock>=3.4.2",
     "protobuf~=3.20",
-    "watchdog>=3.0.0,<4.0.0",
+    "jinja2>=3.1.2",
+    "watchdog>=4.0.0",
+    "pydantic==1.10.14",
+    "pyzmq>=25.1.2",
+    "pygithub>=2.3.0",
 ]
 
 # Add SmartRedis at specific version
@@ -181,6 +191,7 @@ def has_ext_modules(_placeholder):
         "pytest>=6.0.0",
         "pytest-cov>=2.10.1",
         "click==8.0.2",
+        "pytest-asyncio>=0.23.3",
     ],
     "mypy": [
         "mypy>=1.3.0",
@@ -193,7 +204,7 @@ def has_ext_modules(_placeholder):
         "typing_extensions>=4.1.0",
     ],
     # see smartsim/_core/_install/buildenv.py for more details
-    **versions.ml_extras_required()
+    **versions.ml_extras_required(),
 }
 
 
@@ -212,5 +223,5 @@ def has_ext_modules(_placeholder):
         "console_scripts": [
             "smart = smartsim._core._cli.__main__:main",
         ]
-    }
+    },
 )
diff --git a/smartsim/__init__.py b/smartsim/__init__.py
index 7c1fa2fe0..5e24097a5 100644
--- a/smartsim/__init__.py
+++ b/smartsim/__init__.py
@@ -30,8 +30,8 @@
 # pylint: disable-next=useless-import-alias
 from .version import __version__ as __version__
 
-if sys.version_info < (3, 8):  # pragma: no cover
-    sys.exit("Python 3.8 or greater must be used with SmartSim.")
+if sys.version_info < (3, 9):  # pragma: no cover
+    sys.exit("Python 3.9 or greater must be used with SmartSim.")
 
 # Main API module
 # pylint: disable=wrong-import-position
diff --git a/smartsim/_core/__init__.py b/smartsim/_core/__init__.py
index bbc108f48..490078770 100644
--- a/smartsim/_core/__init__.py
+++ b/smartsim/_core/__init__.py
@@ -24,7 +24,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from .control import Controller, Manifest
+from .control import Controller, Manifest, previewrenderer
 from .generation import Generator
 
-__all__ = ["Controller", "Manifest", "Generator"]
+__all__ = ["Controller", "Manifest", "Generator", "previewrenderer"]
diff --git a/smartsim/_core/_cli/build.py b/smartsim/_core/_cli/build.py
index 443b916b7..951521f17 100644
--- a/smartsim/_core/_cli/build.py
+++ b/smartsim/_core/_cli/build.py
@@ -33,6 +33,7 @@
 
 from tabulate import tabulate
 
+from smartsim._core._cli.scripts.dragon_install import install_dragon
 from smartsim._core._cli.utils import SMART_LOGGER_FORMAT, color_bool, pip
 from smartsim._core._install import builder
 from smartsim._core._install.buildenv import (
@@ -43,7 +44,7 @@
     VersionConflictError,
     Versioner,
 )
-from smartsim._core._install.builder import BuildError
+from smartsim._core._install.builder import BuildError, Device
 from smartsim._core.config import CONFIG
 from smartsim._core.utils.helpers import installed_redisai_backends
 from smartsim.error import SSConfigError
@@ -54,8 +55,6 @@
 # NOTE: all smartsim modules need full paths as the smart cli
 #       may be installed into a different directory.
 
-
-_TDeviceStr = t.Literal["cpu", "gpu"]
 _TPinningStr = t.Literal["==", "!=", ">=", ">", "<=", "<", "~="]
 
 
@@ -134,16 +133,17 @@ def build_database(
 def build_redis_ai(
     build_env: BuildEnv,
     versions: Versioner,
-    device: _TDeviceStr,
+    device: Device,
     use_torch: bool = True,
     use_tf: bool = True,
     use_onnx: bool = False,
     torch_dir: t.Union[str, Path, None] = None,
     libtf_dir: t.Union[str, Path, None] = None,
     verbose: bool = False,
+    torch_with_mkl: bool = True,
 ) -> None:
     # make sure user isn't trying to do something silly on MacOS
-    if build_env.PLATFORM == "darwin" and device == "gpu":
+    if build_env.PLATFORM == "darwin" and device == Device.GPU:
         raise BuildError("SmartSim does not support GPU on MacOS")
 
     # decide which runtimes to build
@@ -154,7 +154,7 @@ def build_redis_ai(
         ["ONNX", versions.ONNX, color_bool(use_onnx)],
     ]
     print(tabulate(backends_table, tablefmt="fancy_outline"), end="\n\n")
-    print(f"Building for GPU support: {color_bool(device == 'gpu')}\n")
+    print(f"Building for GPU support: {color_bool(device == Device.GPU)}\n")
 
     if not check_backends_install():
         sys.exit(1)
@@ -188,6 +188,7 @@ def build_redis_ai(
         build_tf=use_tf,
         build_onnx=use_onnx,
         verbose=verbose,
+        torch_with_mkl=torch_with_mkl,
     )
 
     if rai_builder.is_built:
@@ -195,7 +196,7 @@ def build_redis_ai(
     else:
         # get the build environment, update with CUDNN env vars
         # if present and building for GPU, otherwise warn the user
-        if device == "gpu":
+        if device == Device.GPU:
             gpu_env = build_env.get_cudnn_env()
             cudnn_env_vars = [
                 "CUDNN_LIBRARY",
@@ -226,18 +227,16 @@ def build_redis_ai(
         logger.info("ML Backends and RedisAI build complete!")
 
 
-def check_py_torch_version(versions: Versioner, device_in: _TDeviceStr = "cpu") -> None:
+def check_py_torch_version(versions: Versioner, device: Device = Device.CPU) -> None:
     """Check Python environment for TensorFlow installation"""
-
-    device = device_in.lower()
     if BuildEnv.is_macos():
-        if device == "gpu":
+        if device == Device.GPU:
             raise BuildError("SmartSim does not support GPU on MacOS")
         device_suffix = ""
     else:  # linux
-        if device == "cpu":
+        if device == Device.CPU:
             device_suffix = versions.TORCH_CPU_SUFFIX
-        elif device == "gpu":
+        elif device == Device.GPU:
             device_suffix = versions.TORCH_CUDA_SUFFIX
         else:
             raise BuildError("Unrecognized device requested")
@@ -261,7 +260,9 @@ def check_py_torch_version(versions: Versioner, device_in: _TDeviceStr = "cpu")
             "Torch version not found in python environment. "
             "Attempting to install via `pip`"
         )
-        wheel_device = device if device == "cpu" else device_suffix.replace("+", "")
+        wheel_device = (
+            device.value if device == Device.CPU else device_suffix.replace("+", "")
+        )
         pip(
             "install",
             "--extra-index-url",
@@ -339,10 +340,10 @@ def _assess_python_env(
 
 
 def _format_incompatible_python_env_message(
-    missing: t.Iterable[str], conflicting: t.Iterable[str]
+    missing: t.Collection[str], conflicting: t.Collection[str]
 ) -> str:
     indent = "\n\t"
-    fmt_list: t.Callable[[str, t.Iterable[str]], str] = lambda n, l: (
+    fmt_list: t.Callable[[str, t.Collection[str]], str] = lambda n, l: (
         f"{n}:{indent}{indent.join(l)}" if l else ""
     )
     missing_str = fmt_list("Missing", missing)
@@ -358,13 +359,27 @@ def _format_incompatible_python_env_message(
     )
 
 
+def _configure_keydb_build(versions: Versioner) -> None:
+    """Configure the redis versions to be used during the build operation"""
+    versions.REDIS = Version_("6.2.0")
+    versions.REDIS_URL = "https://github.com/EQ-Alpha/KeyDB"
+    versions.REDIS_BRANCH = "v6.2.0"
+
+    CONFIG.conf_path = Path(CONFIG.core_path, "config", "keydb.conf")
+    if not CONFIG.conf_path.resolve().is_file():
+        raise SSConfigError(
+            "Database configuration file at REDIS_CONF could not be found"
+        )
+
+
+# pylint: disable-next=too-many-statements
 def execute(
     args: argparse.Namespace, _unparsed_args: t.Optional[t.List[str]] = None, /
 ) -> int:
     verbose = args.v
     keydb = args.keydb
-    device: _TDeviceStr = args.device
-
+    device = Device(args.device.lower())
+    is_dragon_requested = args.dragon
     # torch and tf build by default
     pt = not args.no_pt  # pylint: disable=invalid-name
     tf = not args.no_tf  # pylint: disable=invalid-name
@@ -376,7 +391,7 @@ def execute(
     logger.info("Checking requested versions...")
     versions = Versioner()
 
-    logger.info("Checking for build tools...")
+    logger.debug("Checking for build tools...")
 
     if verbose:
         logger.info("Build Environment:")
@@ -385,14 +400,7 @@ def execute(
         print(tabulate(env, headers=env_vars, tablefmt="github"), "\n")
 
     if keydb:
-        versions.REDIS = Version_("6.2.0")
-        versions.REDIS_URL = "https://github.com/EQ-Alpha/KeyDB"
-        versions.REDIS_BRANCH = "v6.2.0"
-        CONFIG.conf_path = Path(CONFIG.core_path, "config", "keydb.conf")
-        if not CONFIG.conf_path.resolve().is_file():
-            raise SSConfigError(
-                "Database configuration file at REDIS_CONF could not be found"
-            )
+        _configure_keydb_build(versions)
 
     if verbose:
         db_name: DbEngine = "KEYDB" if keydb else "REDIS"
@@ -401,6 +409,17 @@ def execute(
         version_names = list(vers.keys())
         print(tabulate(vers, headers=version_names, tablefmt="github"), "\n")
 
+    if is_dragon_requested:
+        install_to = CONFIG.core_path / ".dragon"
+        return_code = install_dragon(install_to)
+
+        if return_code == 0:
+            logger.info("Dragon installation complete")
+        elif return_code == 1:
+            logger.info("Dragon installation not supported on platform")
+        else:
+            logger.warning("Dragon installation failed")
+
     try:
         if not args.only_python_packages:
             # REDIS/KeyDB
@@ -417,6 +436,7 @@ def execute(
                 args.torch_dir,
                 args.libtensorflow_dir,
                 verbose=verbose,
+                torch_with_mkl=args.torch_with_mkl,
             )
     except (SetupError, BuildError) as e:
         logger.error(str(e))
@@ -453,10 +473,16 @@ def configure_parser(parser: argparse.ArgumentParser) -> None:
     parser.add_argument(
         "--device",
         type=str.lower,
-        default="cpu",
-        choices=["cpu", "gpu"],
+        default=Device.CPU.value,
+        choices=[device.value for device in Device],
         help="Device to build ML runtimes for",
     )
+    parser.add_argument(
+        "--dragon",
+        action="store_true",
+        default=False,
+        help="Install the dragon runtime",
+    )
     parser.add_argument(
         "--only_python_packages",
         action="store_true",
@@ -499,3 +525,9 @@ def configure_parser(parser: argparse.ArgumentParser) -> None:
         default=False,
         help="Build KeyDB instead of Redis",
     )
+    parser.add_argument(
+        "--no_torch_with_mkl",
+        dest="torch_with_mkl",
+        action="store_false",
+        help="Do not build Torch with Intel MKL",
+    )
diff --git a/smartsim/_core/_cli/cli.py b/smartsim/_core/_cli/cli.py
index 3cad573d1..3d5c6e066 100644
--- a/smartsim/_core/_cli/cli.py
+++ b/smartsim/_core/_cli/cli.py
@@ -39,6 +39,8 @@
 from smartsim._core._cli.info import execute as info_execute
 from smartsim._core._cli.plugin import plugins
 from smartsim._core._cli.site import execute as site_execute
+from smartsim._core._cli.teardown import configure_parser as teardown_parser
+from smartsim._core._cli.teardown import execute as teardown_execute
 from smartsim._core._cli.utils import MenuItemConfig
 from smartsim._core._cli.validate import configure_parser as validate_parser
 from smartsim._core._cli.validate import execute as validate_execute
@@ -106,7 +108,7 @@ def default_cli() -> SmartCli:
     menu = [
         MenuItemConfig(
             "build",
-            "Build SmartSim dependencies (Redis, RedisAI, ML runtimes)",
+            "Build SmartSim dependencies (Redis, RedisAI, Dragon, ML runtimes)",
             build_execute,
             build_parser,
         ),
@@ -142,6 +144,12 @@ def default_cli() -> SmartCli:
             "Display information about the current SmartSim installation",
             info_execute,
         ),
+        MenuItemConfig(
+            "teardown",
+            "Clean up allocated resources after an experiment terminates",
+            teardown_execute,
+            teardown_parser,
+        ),
     ]
 
     return SmartCli(menu)
diff --git a/smartsim/_core/_cli/scripts/dragon_install.py b/smartsim/_core/_cli/scripts/dragon_install.py
new file mode 100644
index 000000000..466c390bd
--- /dev/null
+++ b/smartsim/_core/_cli/scripts/dragon_install.py
@@ -0,0 +1,232 @@
+import os
+import pathlib
+import sys
+import typing as t
+
+from github import Github
+from github.GitReleaseAsset import GitReleaseAsset
+
+from smartsim._core._cli.utils import pip
+from smartsim._core._install.builder import WebTGZ
+from smartsim._core.config import CONFIG
+from smartsim._core.utils.helpers import check_platform, is_crayex_platform
+from smartsim.error.errors import SmartSimCLIActionCancelled
+from smartsim.log import get_logger
+
+logger = get_logger(__name__)
+
+
+def create_dotenv(dragon_root_dir: pathlib.Path) -> None:
+    """Create a .env file with required environment variables for the Dragon runtime"""
+    dragon_root = str(dragon_root_dir)
+    dragon_inc_dir = str(dragon_root_dir / "include")
+    dragon_lib_dir = str(dragon_root_dir / "lib")
+    dragon_bin_dir = str(dragon_root_dir / "bin")
+
+    dragon_vars = {
+        "DRAGON_BASE_DIR": dragon_root,
+        "DRAGON_ROOT_DIR": dragon_root,  # note: same as base_dir
+        "DRAGON_INCLUDE_DIR": dragon_inc_dir,
+        "DRAGON_LIB_DIR": dragon_lib_dir,
+        "DRAGON_VERSION": dragon_pin(),
+        "PATH": dragon_bin_dir,
+        "LD_LIBRARY_PATH": dragon_lib_dir,
+    }
+
+    lines = [f"{k}={v}\n" for k, v in dragon_vars.items()]
+
+    if not CONFIG.dragon_dotenv.parent.exists():
+        CONFIG.dragon_dotenv.parent.mkdir(parents=True)
+
+    with CONFIG.dragon_dotenv.open("w", encoding="utf-8") as dotenv:
+        dotenv.writelines(lines)
+
+
+def python_version() -> str:
+    """Return a formatted string used to filter release assets
+    for the current python version"""
+    return f"py{sys.version_info.major}.{sys.version_info.minor}"
+
+
+def dragon_pin() -> str:
+    """Return a string indicating the pinned major/minor version of the dragon
+    package to install"""
+    return "0.9"
+
+
+def _platform_filter(asset_name: str) -> bool:
+    """Return True if the asset name matches naming standard for current
+    platform (Cray or non-Cray). Otherwise, returns False.
+
+    :param asset_name: A value to inspect for keywords indicating a Cray EX asset
+    :returns: True if supplied value is correct for current platform"""
+    key = "crayex"
+    is_cray = key in asset_name.lower()
+    if is_crayex_platform():
+        return is_cray
+    return not is_cray
+
+
+def _version_filter(asset_name: str) -> bool:
+    """Return true if the supplied value contains a python version match
+
+    :param asset_name: A value to inspect for keywords indicating a python version
+    :returns: True if supplied value is correct for current python version"""
+    return python_version() in asset_name
+
+
+def _pin_filter(asset_name: str) -> bool:
+    """Return true if the supplied value contains a dragon version pin match
+
+    :param asset_name: A value to inspect for keywords indicating a dragon version
+    :returns: True if supplied value is correct for current dragon version"""
+    return f"dragon-{dragon_pin()}" in asset_name
+
+
+def _get_release_assets() -> t.Collection[GitReleaseAsset]:
+    """Retrieve a collection of available assets for all releases that satisfy
+    the dragon version pin
+
+    :returns: A collection of release assets"""
+    git = Github()
+
+    dragon_repo = git.get_repo("DragonHPC/dragon")
+
+    if dragon_repo is None:
+        raise SmartSimCLIActionCancelled("Unable to locate dragon repo")
+
+    # find any releases matching our pinned version requirement
+    tags = [tag for tag in dragon_repo.get_tags() if dragon_pin() in tag.name]
+    # repo.get_latest_release fails if only pre-release results are returned
+    pin_releases = list(dragon_repo.get_release(tag.name) for tag in tags)
+    releases = sorted(pin_releases, key=lambda r: r.published_at, reverse=True)
+
+    # take the most recent release for the given pin
+    assets = releases[0].assets
+
+    return assets
+
+
+def filter_assets(assets: t.Collection[GitReleaseAsset]) -> t.Optional[GitReleaseAsset]:
+    """Filter the available release assets so that HSTA agents are used
+    when run on a Cray EX platform
+
+    :param assets: The collection of dragon release assets to filter
+    :returns: An asset meeting platform & version filtering requirements"""
+    # Expect cray & non-cray assets that require a filter, e.g.
+    # 'dragon-0.8-py3.9.4.1-bafaa887f.tar.gz',
+    # 'dragon-0.8-py3.9.4.1-CRAYEX-ac132fe95.tar.gz'
+    asset = next(
+        (
+            asset
+            for asset in assets
+            if _version_filter(asset.name)
+            and _platform_filter(asset.name)
+            and _pin_filter(asset.name)
+        ),
+        None,
+    )
+    return asset
+
+
+def retrieve_asset_info() -> GitReleaseAsset:
+    """Find a release asset that meets all necessary filtering criteria
+
+    :param dragon_pin: identify the dragon version to install (e.g. dragon-0.8)
+    :returns: A GitHub release asset"""
+    assets = _get_release_assets()
+    asset = filter_assets(assets)
+
+    platform_result = check_platform()
+    if not platform_result.is_cray:
+        logger.warning("Installing Dragon without HSTA support")
+        for msg in platform_result.failures:
+            logger.warning(msg)
+
+    if asset is None:
+        raise SmartSimCLIActionCancelled("No dragon runtime asset available to install")
+
+    logger.debug(f"Retrieved asset metadata: {asset}")
+    return asset
+
+
+def retrieve_asset(working_dir: pathlib.Path, asset: GitReleaseAsset) -> pathlib.Path:
+    """Retrieve the physical file associated to a given GitHub release asset
+
+    :param working_dir: location in file system where assets should be written
+    :param asset: GitHub release asset to retrieve
+    :returns: path to the downloaded asset"""
+    if working_dir.exists() and list(working_dir.rglob("*.whl")):
+        return working_dir
+
+    archive = WebTGZ(asset.browser_download_url)
+    archive.extract(working_dir)
+
+    logger.debug(f"Retrieved {asset.browser_download_url} to {working_dir}")
+    return working_dir
+
+
+def install_package(asset_dir: pathlib.Path) -> int:
+    """Install the package found in `asset_dir` into the current python environment
+
+    :param asset_dir: path to a decompressed archive contents for a release asset"""
+    wheels = asset_dir.rglob("*.whl")
+    wheel_path = next(wheels, None)
+    if not wheel_path:
+        logger.error(f"No wheel found for package in {asset_dir}")
+        return 1
+
+    create_dotenv(wheel_path.parent)
+
+    while wheel_path is not None:
+        logger.info(f"Installing package: {wheel_path.absolute()}")
+
+        try:
+            pip("install", "--force-reinstall", str(wheel_path))
+            wheel_path = next(wheels, None)
+        except Exception:
+            logger.error(f"Unable to install from {asset_dir}")
+            return 1
+
+    return 0
+
+
+def cleanup(
+    archive_path: t.Optional[pathlib.Path] = None,
+) -> None:
+    """Delete the downloaded asset and any files extracted during installation
+
+    :param archive_path: path to a downloaded archive for a release asset"""
+    if archive_path:
+        archive_path.unlink(missing_ok=True)
+        logger.debug(f"Deleted archive: {archive_path}")
+
+
+def install_dragon(extraction_dir: t.Union[str, os.PathLike[str]]) -> int:
+    """Retrieve a dragon runtime appropriate for the current platform
+    and install to the current python environment
+    :param extraction_dir: path for download and extraction of assets
+    :returns: Integer return code, 0 for success, non-zero on failures"""
+    if sys.platform == "darwin":
+        logger.debug(f"Dragon not supported on platform: {sys.platform}")
+        return 1
+
+    extraction_dir = pathlib.Path(extraction_dir)
+    filename: t.Optional[pathlib.Path] = None
+    asset_dir: t.Optional[pathlib.Path] = None
+
+    try:
+        asset_info = retrieve_asset_info()
+        asset_dir = retrieve_asset(extraction_dir, asset_info)
+
+        return install_package(asset_dir)
+    except Exception as ex:
+        logger.error("Unable to install dragon runtime", exc_info=ex)
+    finally:
+        cleanup(filename)
+
+    return 2
+
+
+if __name__ == "__main__":
+    sys.exit(install_dragon(CONFIG.core_path / ".dragon"))
diff --git a/smartsim/_core/_cli/teardown.py b/smartsim/_core/_cli/teardown.py
new file mode 100644
index 000000000..a3f181145
--- /dev/null
+++ b/smartsim/_core/_cli/teardown.py
@@ -0,0 +1,74 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import os
+import subprocess
+import typing as t
+
+from smartsim._core.config import CONFIG
+
+
+def configure_parser(parser: argparse.ArgumentParser) -> None:
+    """Builds the parser for the command"""
+    parser.add_argument(
+        "--dragon",
+        action="store_true",
+        default=False,
+        help="Terminate Dragon environment resources if"
+        "any remain after experiment completion",
+    )
+
+
+def _do_dragon_teardown() -> int:
+    """Run dragon-cleanup script to destroy all remaining dragon resources"""
+    env = os.environ.copy()
+    dragon_cleanup = next(CONFIG.core_path.rglob("dragon-cleanup"), None)
+    if dragon_cleanup is None:
+        print("dragon-cleanup not found. Skipping cleanup")
+        return 0
+
+    # Use popen to avoid `dragon-cleanup` doing a kill on all
+    # python processes, terminating `smart teardown`, and the
+    # subprocess handling `dragon-cleanup`. Child processes using
+    # subprocess.run are killed and cleanup is interrupted
+    with subprocess.Popen(
+        [str(dragon_cleanup.absolute())],
+        env=env,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.PIPE,
+    ) as process:
+        process.wait()
+        return process.returncode
+
+
+def execute(
+    args: argparse.Namespace, _unparsed_args: t.Optional[t.List[str]] = None, /
+) -> int:
+    if args.dragon:
+        return _do_dragon_teardown()
+
+    return 0
diff --git a/smartsim/_core/_cli/utils.py b/smartsim/_core/_cli/utils.py
index 8bf0984df..9c9b46cab 100644
--- a/smartsim/_core/_cli/utils.py
+++ b/smartsim/_core/_cli/utils.py
@@ -78,13 +78,17 @@ def clean(core_path: Path, _all: bool = False) -> int:
     """Remove pre existing installations of ML runtimes
 
     :param _all: Remove all non-python dependencies
-    :type _all: bool, optional
     """
 
     build_temp = core_path / ".third-party"
     if build_temp.is_dir():
         shutil.rmtree(build_temp, ignore_errors=True)
 
+    dragon_temp = core_path / ".dragon"
+    if dragon_temp.is_dir():
+        shutil.rmtree(dragon_temp, ignore_errors=True)
+        logger.info("Successfully removed dragon installation")
+
     lib_path = core_path / "lib"
     if lib_path.is_dir():
         # remove RedisAI
diff --git a/smartsim/_core/_cli/validate.py b/smartsim/_core/_cli/validate.py
index 8ea40ae00..96d46d6ee 100644
--- a/smartsim/_core/_cli/validate.py
+++ b/smartsim/_core/_cli/validate.py
@@ -30,7 +30,6 @@
 import multiprocessing as mp
 import os
 import os.path
-import socket
 import tempfile
 import typing as t
 from types import TracebackType
@@ -40,7 +39,9 @@
 
 from smartsim import Experiment
 from smartsim._core._cli.utils import SMART_LOGGER_FORMAT
+from smartsim._core._install.builder import Device
 from smartsim._core.utils.helpers import installed_redisai_backends
+from smartsim._core.utils.network import find_free_port
 from smartsim.log import get_logger
 
 logger = get_logger("Smart", fmt=SMART_LOGGER_FORMAT)
@@ -61,9 +62,6 @@
     _TemporaryDirectory = tempfile.TemporaryDirectory
 
 
-_TCapitalDeviceStr = t.Literal["CPU", "GPU"]
-
-
 class _VerificationTempDir(_TemporaryDirectory):
     """A Temporary directory to be used as a context manager that will only
     clean itself up if no error is raised within its context
@@ -88,7 +86,8 @@ def execute(
     simple experiment
     """
     backends = installed_redisai_backends()
-    device: _TCapitalDeviceStr = args.device.upper()
+    temp_dir = ""
+    device = Device(args.device)
     try:
         with contextlib.ExitStack() as ctx:
             temp_dir = ctx.enter_context(_VerificationTempDir(dir=os.getcwd()))
@@ -98,7 +97,7 @@ def execute(
                     "SR_LOG_FILE", os.path.join(temp_dir, "smartredis.log")
                 ),
             }
-            if device == "GPU":
+            if device == Device.GPU:
                 validate_env["CUDA_VISIBLE_DEVICES"] = "0"
             ctx.enter_context(_env_vars_set_to(validate_env))
             test_install(
@@ -112,10 +111,11 @@ def execute(
     except Exception as e:
         logger.error(
             "SmartSim failed to run a simple experiment!\n"
-            f"Experiment failed due to the following exception:\n{e}\n\n"
-            f"Output files are available at `{temp_dir}`",
+            f"Experiment failed due to the following exception:\n{e}",
             exc_info=True,
         )
+        if temp_dir:
+            logger.info(f"Output files are available at `{temp_dir}`")
         return os.EX_SOFTWARE
     return os.EX_OK
 
@@ -136,8 +136,8 @@ def configure_parser(parser: argparse.ArgumentParser) -> None:
     parser.add_argument(
         "--device",
         type=str.lower,
-        default="cpu",
-        choices=["cpu", "gpu"],
+        default=Device.CPU.value,
+        choices=[device.value for device in Device],
         help="Device to test the ML backends against",
     )
 
@@ -145,14 +145,15 @@ def configure_parser(parser: argparse.ArgumentParser) -> None:
 def test_install(
     location: str,
     port: t.Optional[int],
-    device: _TCapitalDeviceStr,
+    device: Device,
     with_tf: bool,
     with_pt: bool,
     with_onnx: bool,
 ) -> None:
     exp = Experiment("ValidationExperiment", exp_path=location, launcher="local")
-    exp.disable_telemetry()
-    port = _find_free_port() if port is None else port
+    exp.telemetry.disable()
+    port = find_free_port() if port is None else port
+
     with _make_managed_local_orc(exp, port) as client:
         logger.info("Verifying Tensor Transfer")
         client.put_tensor("plain-tensor", np.ones((1, 1, 3, 3)))
@@ -205,15 +206,7 @@ def _make_managed_local_orc(
         exp.stop(orc)
 
 
-def _find_free_port() -> int:
-    """A 'good enough' way to find an open port to bind to"""
-    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
-        sock.bind(("0.0.0.0", 0))
-        _, port = sock.getsockname()
-        return int(port)
-
-
-def _test_tf_install(client: Client, tmp_dir: str, device: _TCapitalDeviceStr) -> None:
+def _test_tf_install(client: Client, tmp_dir: str, device: Device) -> None:
     recv_conn, send_conn = mp.Pipe(duplex=False)
     # Build the model in a subproc so that keras does not hog the gpu
     proc = mp.Process(target=_build_tf_frozen_model, args=(send_conn, tmp_dir))
@@ -235,7 +228,12 @@ def _test_tf_install(client: Client, tmp_dir: str, device: _TCapitalDeviceStr) -
         ) from e
 
     client.set_model_from_file(
-        "keras-fcn", model_path, "TF", device=device, inputs=inputs, outputs=outputs
+        "keras-fcn",
+        model_path,
+        "TF",
+        device=device.value.upper(),
+        inputs=inputs,
+        outputs=outputs,
     )
     client.put_tensor("keras-input", np.random.rand(1, 28, 28).astype(np.float32))
     client.run_model("keras-fcn", inputs=["keras-input"], outputs=["keras-output"])
@@ -263,7 +261,7 @@ def _build_tf_frozen_model(conn: "Connection", tmp_dir: str) -> None:
     conn.send((model_path, inputs, outputs))
 
 
-def _test_torch_install(client: Client, device: _TCapitalDeviceStr) -> None:
+def _test_torch_install(client: Client, device: Device) -> None:
     import torch
     from torch import nn
 
@@ -275,7 +273,7 @@ def __init__(self) -> None:
         def forward(self, x: torch.Tensor) -> torch.Tensor:
             return self.conv(x)
 
-    if device == "GPU":
+    if device == Device.GPU:
         device_ = torch.device("cuda")
     else:
         device_ = torch.device("cpu")
@@ -291,13 +289,13 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
     torch.jit.save(traced, buffer)  # type: ignore[no-untyped-call]
     model = buffer.getvalue()
 
-    client.set_model("torch-nn", model, backend="TORCH", device=device)
+    client.set_model("torch-nn", model, backend="TORCH", device=device.value.upper())
     client.put_tensor("torch-in", torch.rand(1, 1, 3, 3).numpy())
     client.run_model("torch-nn", inputs=["torch-in"], outputs=["torch-out"])
     client.get_tensor("torch-out")
 
 
-def _test_onnx_install(client: Client, device: _TCapitalDeviceStr) -> None:
+def _test_onnx_install(client: Client, device: Device) -> None:
     from skl2onnx import to_onnx
     from sklearn.cluster import KMeans
 
@@ -310,7 +308,7 @@ def _test_onnx_install(client: Client, device: _TCapitalDeviceStr) -> None:
     sample = np.arange(20, dtype=np.float32).reshape(10, 2)
 
     client.put_tensor("onnx-input", sample)
-    client.set_model("onnx-kmeans", model, "ONNX", device=device)
+    client.set_model("onnx-kmeans", model, "ONNX", device=device.value.upper())
     client.run_model(
         "onnx-kmeans", inputs=["onnx-input"], outputs=["onnx-labels", "onnx-transform"]
     )
diff --git a/smartsim/_core/_install/buildenv.py b/smartsim/_core/_install/buildenv.py
index c100ac80e..e0cf5a522 100644
--- a/smartsim/_core/_install/buildenv.py
+++ b/smartsim/_core/_install/buildenv.py
@@ -267,15 +267,15 @@ class Versioner:
     """
 
     # compatible Python version
-    PYTHON_MIN = Version_("3.8.0")
+    PYTHON_MIN = Version_("3.9.0")
 
     # Versions
-    SMARTSIM = Version_(get_env("SMARTSIM_VERSION", "0.6.2"))
-    SMARTREDIS = Version_(get_env("SMARTREDIS_VERSION", "0.5.2"))
+    SMARTSIM = Version_(get_env("SMARTSIM_VERSION", "0.7.0"))
+    SMARTREDIS = Version_(get_env("SMARTREDIS_VERSION", "0.5.3"))
     SMARTSIM_SUFFIX = get_env("SMARTSIM_SUFFIX", "")
 
     # Redis
-    REDIS = Version_(get_env("SMARTSIM_REDIS", "7.0.5"))
+    REDIS = Version_(get_env("SMARTSIM_REDIS", "7.2.4"))
     REDIS_URL = get_env("SMARTSIM_REDIS_URL", "https://github.com/redis/redis.git/")
     REDIS_BRANCH = get_env("SMARTSIM_REDIS_BRANCH", REDIS)
 
diff --git a/smartsim/_core/_install/builder.py b/smartsim/_core/_install/builder.py
index c098cfd01..fb8ec5b81 100644
--- a/smartsim/_core/_install/builder.py
+++ b/smartsim/_core/_install/builder.py
@@ -28,6 +28,7 @@
 
 import concurrent.futures
 import enum
+import fileinput
 import itertools
 import os
 import platform
@@ -53,8 +54,7 @@
 # TODO: check cmake version and use system if possible to avoid conflicts
 
 TRedisAIBackendStr = t.Literal["tensorflow", "torch", "onnxruntime", "tflite"]
-TDeviceStr = t.Literal["cpu", "gpu"]
-
+_PathLike = t.Union[str, "os.PathLike[str]"]
 _T = t.TypeVar("_T")
 _U = t.TypeVar("_U")
 
@@ -63,7 +63,6 @@ def expand_exe_path(exe: str) -> str:
     """Takes an executable and returns the full path to that executable
 
     :param exe: executable or file
-    :type exe: str
     :raises TypeError: if file is not an executable
     :raises FileNotFoundError: if executable cannot be found
     """
@@ -96,6 +95,11 @@ def from_str(cls, string: str, /) -> "Architecture":
         raise BuildError(f"Unrecognized or unsupported architecture: {string}")
 
 
+class Device(enum.Enum):
+    CPU = "cpu"
+    GPU = "gpu"
+
+
 class OperatingSystem(enum.Enum):
     LINUX = ("linux", "linux2")
     DARWIN = ("darwin",)
@@ -173,7 +177,7 @@ def is_built(self) -> bool:
         raise NotImplementedError
 
     def build_from_git(
-        self, git_url: str, branch: str, device: TDeviceStr = "cpu"
+        self, git_url: str, branch: str, device: Device = Device.CPU
     ) -> None:
         raise NotImplementedError
 
@@ -274,13 +278,11 @@ def is_built(self) -> bool:
         return redis_files.issubset(bin_files) or keydb_files.issubset(bin_files)
 
     def build_from_git(
-        self, git_url: str, branch: str, device: TDeviceStr = "cpu"
+        self, git_url: str, branch: str, device: Device = Device.CPU
     ) -> None:
         """Build Redis from git
         :param git_url: url from which to retrieve Redis
-        :type git_url: str
         :param branch: branch to checkout
-        :type branch: str
         """
         # pylint: disable=too-many-locals
         database_name = "keydb" if "KeyDB" in git_url else "redis"
@@ -364,7 +366,7 @@ class _RAIBuildDependency(ABC):
     def __rai_dependency_name__(self) -> str: ...
 
     @abstractmethod
-    def __place_for_rai__(self, target: t.Union[str, "os.PathLike[str]"]) -> Path: ...
+    def __place_for_rai__(self, target: _PathLike) -> Path: ...
 
     @staticmethod
     @abstractmethod
@@ -372,7 +374,7 @@ def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
 
 
 def _place_rai_dep_at(
-    target: t.Union[str, "os.PathLike[str]"], verbose: bool
+    target: _PathLike, verbose: bool
 ) -> t.Callable[[_RAIBuildDependency], Path]:
     def _place(dep: _RAIBuildDependency) -> Path:
         if verbose:
@@ -405,6 +407,7 @@ def __init__(
         build_onnx: bool = False,
         jobs: int = 1,
         verbose: bool = False,
+        torch_with_mkl: bool = True,
     ) -> None:
         super().__init__(
             build_env or {},
@@ -423,6 +426,9 @@ def __init__(
         self.libtf_dir = libtf_dir
         self.torch_dir = torch_dir
 
+        # extra configuration options
+        self.torch_with_mkl = torch_with_mkl
+
         # Sanity checks
         self._validate_platform()
 
@@ -480,7 +486,7 @@ def build_onnx(self) -> bool:
     def fetch_onnx(self) -> bool:
         return self.build_onnx
 
-    def get_deps_dir_path_for(self, device: TDeviceStr) -> Path:
+    def get_deps_dir_path_for(self, device: Device) -> Path:
         def fail_to_format(reason: str) -> BuildError:  # pragma: no cover
             return BuildError(f"Failed to format RedisAI dependency path: {reason}")
 
@@ -497,10 +503,10 @@ def fail_to_format(reason: str) -> BuildError:  # pragma: no cover
             arch = "arm64v8"
         else:  # pragma: no cover
             raise fail_to_format(f"Unknown architecture: {architecture}")
-        return self.rai_build_path / f"deps/{os_}-{arch}-{device}"
+        return self.rai_build_path / f"deps/{os_}-{arch}-{device.value}"
 
     def _get_deps_to_fetch_for(
-        self, device: TDeviceStr
+        self, device: Device
     ) -> t.Tuple[_RAIBuildDependency, ...]:
         os_, arch = self._platform
         # TODO: It would be nice if the backend version numbers were declared
@@ -512,8 +518,8 @@ def _get_deps_to_fetch_for(
         # DLPack is always required
         fetchable_deps: t.List[_RAIBuildDependency] = [_DLPackRepository("v0.5_RAI")]
         if self.fetch_torch:
-            pt_dep = _choose_pt_variant(os_)
-            fetchable_deps.append(pt_dep(arch, device, "2.0.1"))
+            pt_dep = _choose_pt_variant(os_)(arch, device, "2.0.1", self.torch_with_mkl)
+            fetchable_deps.append(pt_dep)
         if self.fetch_tf:
             fetchable_deps.append(_TFArchive(os_, arch, device, "2.13.1"))
         if self.fetch_onnx:
@@ -521,14 +527,13 @@ def _get_deps_to_fetch_for(
 
         return tuple(fetchable_deps)
 
-    def symlink_libtf(self, device: str) -> None:
+    def symlink_libtf(self, device: Device) -> None:
         """Add symbolic link to available libtensorflow in RedisAI deps.
 
         :param device: cpu or gpu
-        :type device: str
         """
         rai_deps_path = sorted(
-            self.rai_build_path.glob(os.path.join("deps", f"*{device}*"))
+            self.rai_build_path.glob(os.path.join("deps", f"*{device.value}*"))
         )
         if not rai_deps_path:
             raise FileNotFoundError("Could not find RedisAI 'deps' directory")
@@ -577,16 +582,13 @@ def symlink_libtf(self, device: str) -> None:
                 os.symlink(src_file, dst_file)
 
     def build_from_git(
-        self, git_url: str, branch: str, device: TDeviceStr = "cpu"
+        self, git_url: str, branch: str, device: Device = Device.CPU
     ) -> None:
         """Build RedisAI from git
 
         :param git_url: url from which to retrieve RedisAI
-        :type git_url: str
         :param branch: branch to checkout
-        :type branch: str
         :param device: cpu or gpu
-        :type device: str
         """
         # delete previous build dir (should never be there)
         if self.rai_build_path.is_dir():
@@ -616,14 +618,14 @@ def build_from_git(
         self.run_command(clone_cmd, out=subprocess.DEVNULL, cwd=self.build_dir)
         self._fetch_deps_for(device)
 
-        if self.libtf_dir and device:
+        if self.libtf_dir and device.value:
             self.symlink_libtf(device)
 
         build_cmd = self._rai_build_env_prefix(
             with_pt=self.build_torch,
             with_tf=self.build_tf,
             with_ort=self.build_onnx,
-            extra_env={"GPU": "1" if device == "gpu" else "0"},
+            extra_env={"GPU": "1" if device == Device.GPU else "0"},
         )
 
         if self.torch_dir:
@@ -674,7 +676,7 @@ def _rai_build_env_prefix(
             *(f"{key}={val}" for key, val in extra_env.items()),
         ]
 
-    def _fetch_deps_for(self, device: TDeviceStr) -> None:
+    def _fetch_deps_for(self, device: Device) -> None:
         if not self.rai_build_path.is_dir():
             raise BuildError("RedisAI build directory not found")
 
@@ -693,13 +695,12 @@ def _fetch_deps_for(self, device: TDeviceStr) -> None:
                 f"found {len(unique_placed_paths)}"
             )
 
-    def _install_backends(self, device: str) -> None:
+    def _install_backends(self, device: Device) -> None:
         """Move backend libraries to smartsim/_core/lib/
         :param device: cpu or cpu
-        :type device: str
         """
         self.rai_install_path = self.rai_build_path.joinpath(
-            f"install-{device}"
+            f"install-{device.value}"
         ).resolve()
         rai_lib = self.rai_install_path / "redisai.so"
         rai_backends = self.rai_install_path / "backends"
@@ -750,7 +751,7 @@ def url(self) -> str: ...
 class _WebGitRepository(_WebLocation):
     def clone(
         self,
-        target: t.Union[str, "os.PathLike[str]"],
+        target: _PathLike,
         depth: t.Optional[int] = None,
         branch: t.Optional[str] = None,
     ) -> None:
@@ -780,7 +781,7 @@ def url(self) -> str:
     def __rai_dependency_name__(self) -> str:
         return f"dlpack@{self.url}"
 
-    def __place_for_rai__(self, target: t.Union[str, "os.PathLike[str]"]) -> Path:
+    def __place_for_rai__(self, target: _PathLike) -> Path:
         target = Path(target) / "dlpack"
         self.clone(target, branch=self.version, depth=1)
         if not target.is_dir():
@@ -794,7 +795,7 @@ def name(self) -> str:
         _, name = self.url.rsplit("/", 1)
         return name
 
-    def download(self, target: t.Union[str, "os.PathLike[str]"]) -> Path:
+    def download(self, target: _PathLike) -> Path:
         target = Path(target)
         if target.is_dir():
             target = target / self.name
@@ -804,37 +805,41 @@ def download(self, target: t.Union[str, "os.PathLike[str]"]) -> Path:
 
 class _ExtractableWebArchive(_WebArchive, ABC):
     @abstractmethod
-    def _extract_download(
-        self, download_path: Path, target: t.Union[str, "os.PathLike[str]"]
-    ) -> None: ...
+    def _extract_download(self, download_path: Path, target: _PathLike) -> None: ...
 
-    def extract(self, target: t.Union[str, "os.PathLike[str]"]) -> None:
+    def extract(self, target: _PathLike) -> None:
         with tempfile.TemporaryDirectory() as tmp_dir:
             arch_path = self.download(tmp_dir)
             self._extract_download(arch_path, target)
 
 
 class _WebTGZ(_ExtractableWebArchive):
-    def _extract_download(
-        self, download_path: Path, target: t.Union[str, "os.PathLike[str]"]
-    ) -> None:
+    def _extract_download(self, download_path: Path, target: _PathLike) -> None:
         with tarfile.open(download_path, "r") as tgz_file:
             tgz_file.extractall(target)
 
 
 class _WebZip(_ExtractableWebArchive):
-    def _extract_download(
-        self, download_path: Path, target: t.Union[str, "os.PathLike[str]"]
-    ) -> None:
+    def _extract_download(self, download_path: Path, target: _PathLike) -> None:
         with zipfile.ZipFile(download_path, "r") as zip_file:
             zip_file.extractall(target)
 
 
+class WebTGZ(_WebTGZ):
+    def __init__(self, url: str) -> None:
+        self._url = url
+
+    @property
+    def url(self) -> str:
+        return self._url
+
+
 @dataclass(frozen=True)
 class _PTArchive(_WebZip, _RAIBuildDependency):
     architecture: Architecture
-    device: TDeviceStr
+    device: Device
     version: str
+    with_mkl: bool
 
     @staticmethod
     def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
@@ -849,7 +854,20 @@ def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
     def __rai_dependency_name__(self) -> str:
         return f"libtorch@{self.url}"
 
-    def __place_for_rai__(self, target: t.Union[str, "os.PathLike[str]"]) -> Path:
+    @staticmethod
+    def _patch_out_mkl(libtorch_root: Path) -> None:
+        _modify_source_files(
+            libtorch_root / "share/cmake/Caffe2/public/mkl.cmake",
+            r"find_package\(MKL QUIET\)",
+            "# find_package(MKL QUIET)",
+        )
+
+    def extract(self, target: _PathLike) -> None:
+        super().extract(target)
+        if not self.with_mkl:
+            self._patch_out_mkl(Path(target))
+
+    def __place_for_rai__(self, target: _PathLike) -> Path:
         self.extract(target)
         target = Path(target) / "libtorch"
         if not target.is_dir():
@@ -865,10 +883,10 @@ def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
 
     @property
     def url(self) -> str:
-        if self.device == "gpu":
+        if self.device == Device.GPU:
             pt_build = "cu117"
         else:
-            pt_build = "cpu"
+            pt_build = Device.CPU.value
         # pylint: disable-next=line-too-long
         libtorch_archive = (
             f"libtorch-cxx11-abi-shared-without-deps-{self.version}%2B{pt_build}.zip"
@@ -887,10 +905,10 @@ def supported_platforms() -> t.Sequence[t.Tuple[OperatingSystem, Architecture]]:
 
     @property
     def url(self) -> str:
-        if self.device == "gpu":
+        if self.device == Device.GPU:
             raise BuildError("RedisAI does not currently support GPU on Mac OSX")
         if self.architecture == Architecture.X64:
-            pt_build = "cpu"
+            pt_build = Device.CPU.value
             libtorch_archive = f"libtorch-macos-{self.version}.zip"
             root_url = "https://download.pytorch.org/libtorch"
             return f"{root_url}/{pt_build}/{libtorch_archive}"
@@ -902,7 +920,7 @@ def url(self) -> str:
             )
             return f"{root_url}/{libtorch_archive}"
 
-        raise BuildError("Unsupported architecture for Pytorch: {self.architecture}")
+        raise BuildError(f"Unsupported architecture for Pytorch: {self.architecture}")
 
 
 def _choose_pt_variant(
@@ -921,7 +939,7 @@ def _choose_pt_variant(
 class _TFArchive(_WebTGZ, _RAIBuildDependency):
     os_: OperatingSystem
     architecture: Architecture
-    device: TDeviceStr
+    device: Device
     version: str
 
     @staticmethod
@@ -937,7 +955,7 @@ def url(self) -> str:
             tf_arch = "x86_64"
         else:
             raise BuildError(
-                "Unexpected Architecture for TF Archive: {self.architecture}"
+                f"Unexpected Architecture for TF Archive: {self.architecture}"
             )
 
         if self.os_ == OperatingSystem.LINUX:
@@ -945,21 +963,21 @@ def url(self) -> str:
             tf_device = self.device
         elif self.os_ == OperatingSystem.DARWIN:
             tf_os = "darwin"
-            if self.device == "gpu":
+            if self.device == Device.GPU:
                 raise BuildError("RedisAI does not currently support GPU on Macos")
-            tf_device = "cpu"
+            tf_device = Device.CPU
         else:
-            raise BuildError("Unexpected OS for TF Archive: {self.os_}")
+            raise BuildError(f"Unexpected OS for TF Archive: {self.os_}")
         return (
             "https://storage.googleapis.com/tensorflow/libtensorflow/"
-            f"libtensorflow-{tf_device}-{tf_os}-{tf_arch}-{self.version}.tar.gz"
+            f"libtensorflow-{tf_device.value}-{tf_os}-{tf_arch}-{self.version}.tar.gz"
         )
 
     @property
     def __rai_dependency_name__(self) -> str:
         return f"libtensorflow@{self.url}"
 
-    def __place_for_rai__(self, target: t.Union[str, "os.PathLike[str]"]) -> Path:
+    def __place_for_rai__(self, target: _PathLike) -> Path:
         target = Path(target) / "libtensorflow"
         target.mkdir()
         self.extract(target)
@@ -970,7 +988,7 @@ def __place_for_rai__(self, target: t.Union[str, "os.PathLike[str]"]) -> Path:
 @dataclass(frozen=True)
 class _ORTArchive(_WebTGZ, _RAIBuildDependency):
     os_: OperatingSystem
-    device: TDeviceStr
+    device: Device
     version: str
 
     @staticmethod
@@ -989,15 +1007,15 @@ def url(self) -> str:
         if self.os_ == OperatingSystem.LINUX:
             ort_os = "linux"
             ort_arch = "x64"
-            ort_build = "-gpu" if self.device == "gpu" else ""
+            ort_build = "-gpu" if self.device == Device.GPU else ""
         elif self.os_ == OperatingSystem.DARWIN:
             ort_os = "osx"
             ort_arch = "x86_64"
             ort_build = ""
-            if self.device == "gpu":
+            if self.device == Device.GPU:
                 raise BuildError("RedisAI does not currently support GPU on Macos")
         else:
-            raise BuildError("Unexpected OS for TF Archive: {self.os_}")
+            raise BuildError(f"Unexpected OS for TF Archive: {self.os_}")
         ort_archive = f"onnxruntime-{ort_os}-{ort_arch}{ort_build}-{self.version}.tgz"
         return f"{ort_url_base}/{ort_archive}"
 
@@ -1005,7 +1023,7 @@ def url(self) -> str:
     def __rai_dependency_name__(self) -> str:
         return f"onnxruntime@{self.url}"
 
-    def __place_for_rai__(self, target: t.Union[str, "os.PathLike[str]"]) -> Path:
+    def __place_for_rai__(self, target: _PathLike) -> Path:
         target = Path(target).resolve() / "onnxruntime"
         self.extract(target)
         try:
@@ -1046,3 +1064,13 @@ def config_git_command(plat: Platform, cmd: t.Sequence[str]) -> t.List[str]:
             + cmd[where:]
         )
     return cmd
+
+
+def _modify_source_files(
+    files: t.Union[_PathLike, t.Iterable[_PathLike]], regex: str, replacement: str
+) -> None:
+    compiled_regex = re.compile(regex)
+    with fileinput.input(files=files, inplace=True) as handles:
+        for line in handles:
+            line = compiled_regex.sub(replacement, line)
+            print(line, end="")
diff --git a/smartsim/_core/config/config.py b/smartsim/_core/config/config.py
index 42a548c42..9cf950b21 100644
--- a/smartsim/_core/config/config.py
+++ b/smartsim/_core/config/config.py
@@ -89,6 +89,7 @@
 #  - Default: None
 
 
+# pylint: disable-next=too-many-public-methods
 class Config:
     def __init__(self) -> None:
         # SmartSim/smartsim/_core
@@ -99,6 +100,7 @@ def __init__(self) -> None:
         self.lib_path = Path(dependency_path, "lib").resolve()
         self.bin_path = Path(dependency_path, "bin").resolve()
         self.conf_path = Path(dependency_path, "config", "redis.conf")
+        self.conf_dir = Path(self.core_path, "config")
 
     @property
     def redisai(self) -> str:
@@ -152,6 +154,30 @@ def database_file_parse_trials(self) -> int:
     def database_file_parse_interval(self) -> int:
         return int(os.getenv("SMARTSIM_DB_FILE_PARSE_INTERVAL", "2"))
 
+    @property
+    def dragon_dotenv(self) -> Path:
+        """Returns the path to a .env file containing dragon environment variables"""
+        return self.conf_dir / "dragon" / ".env"
+
+    @property
+    def dragon_server_path(self) -> t.Optional[str]:
+        return os.getenv(
+            "SMARTSIM_DRAGON_SERVER_PATH",
+            os.getenv("SMARTSIM_DRAGON_SERVER_PATH_EXP", None),
+        )
+
+    @property
+    def dragon_server_timeout(self) -> int:
+        return int(os.getenv("SMARTSIM_DRAGON_TIMEOUT", "30000"))
+
+    @property
+    def dragon_server_startup_timeout(self) -> int:
+        return int(os.getenv("SMARTSIM_DRAGON_STARTUP_TIMEOUT", "300000"))
+
+    @property
+    def dragon_transport(self) -> str:
+        return os.getenv("SMARTSIM_DRAGON_TRANSPORT", "hsta")
+
     @property
     def log_level(self) -> str:
         return os.environ.get("SMARTSIM_LOG_LEVEL", "info")
@@ -177,8 +203,14 @@ def test_num_gpus(self) -> int:  # pragma: no cover
         return int(os.environ.get("SMARTSIM_TEST_NUM_GPUS") or 1)
 
     @property
-    def test_port(self) -> int:  # pragma: no cover
-        return int(os.environ.get("SMARTSIM_TEST_PORT", 6780))
+    def test_ports(self) -> t.Sequence[int]:  # pragma: no cover
+        min_required_ports = 25
+        first_port = int(os.environ.get("SMARTSIM_TEST_PORT", 6780))
+        num_ports = max(
+            int(os.environ.get("SMARTSIM_TEST_NUM_PORTS", min_required_ports)),
+            min_required_ports,
+        )
+        return range(first_port, first_port + num_ports)
 
     @property
     def test_batch_resources(self) -> t.Dict[t.Any, t.Any]:  # pragma: no cover
@@ -219,6 +251,11 @@ def test_account(self) -> t.Optional[str]:  # pragma: no cover
         # no account by default
         return os.environ.get("SMARTSIM_TEST_ACCOUNT", None)
 
+    @property
+    def test_mpi(self) -> bool:  # pragma: no cover
+        # By default, test MPI app if it compiles
+        return int(os.environ.get("SMARTSIM_TEST_MPI", "1")) > 0
+
     @property
     def telemetry_frequency(self) -> int:
         return int(os.environ.get("SMARTSIM_TELEMETRY_FREQUENCY", 5))
@@ -235,6 +272,29 @@ def telemetry_cooldown(self) -> int:
     def telemetry_subdir(self) -> str:
         return ".smartsim/telemetry"
 
+    @property
+    def dragon_default_subdir(self) -> str:
+        return ".smartsim/dragon"
+
+    @property
+    def dragon_log_filename(self) -> str:
+        return "dragon_config.log"
+
+    @property
+    def smartsim_key_path(self) -> str:
+        """Path to a root directory used for persistence of key files. Default
+        value `$HOME/.smartsim/keys`. User-overrideable by setting the environment
+        variable `SMARTSIM_KEY_PATH`.
+
+        :returns: The configured key path.
+        """
+        default_path = Path.home() / ".smartsim" / "keys"
+        return os.environ.get("SMARTSIM_KEY_PATH", str(default_path))
+
+    @property
+    def dragon_pin(self) -> str:
+        return "0.9"
+
 
 @lru_cache(maxsize=128, typed=False)
 def get_config() -> Config:
diff --git a/smartsim/_core/control/controller.py b/smartsim/_core/control/controller.py
index 3b673970a..43a218545 100644
--- a/smartsim/_core/control/controller.py
+++ b/smartsim/_core/control/controller.py
@@ -27,6 +27,7 @@
 from __future__ import annotations
 
 import itertools
+import os
 import os.path as osp
 import pathlib
 import pickle
@@ -36,14 +37,17 @@
 import threading
 import time
 import typing as t
-from os import environ
 
 from smartredis import Client, ConfigOptions
 
 from smartsim._core.utils.network import get_ip_from_host
 
 from ..._core.launcher.step import Step
-from ..._core.utils.helpers import unpack_colo_db_identifier, unpack_db_identifier
+from ..._core.utils.helpers import (
+    SignalInterceptionStack,
+    unpack_colo_db_identifier,
+    unpack_db_identifier,
+)
 from ..._core.utils.redis import (
     db_is_active,
     set_ml_model,
@@ -51,7 +55,7 @@
     shutdown_db_node,
 )
 from ...database import Orchestrator
-from ...entity import Ensemble, EntityList, EntitySequence, Model, SmartSimEntity
+from ...entity import Ensemble, EntitySequence, Model, SmartSimEntity
 from ...error import (
     LauncherError,
     SmartSimError,
@@ -61,16 +65,25 @@
 )
 from ...log import get_logger
 from ...servertype import CLUSTERED, STANDALONE
-from ...status import STATUS_CANCELLED, STATUS_RUNNING, TERMINAL_STATUSES
+from ...status import TERMINAL_STATUSES, SmartSimStatus
 from ..config import CONFIG
-from ..launcher import LocalLauncher, LSFLauncher, PBSLauncher, SlurmLauncher
+from ..launcher import (
+    DragonLauncher,
+    LocalLauncher,
+    LSFLauncher,
+    PBSLauncher,
+    SlurmLauncher,
+)
 from ..launcher.launcher import Launcher
 from ..utils import check_cluster_status, create_cluster, serialize
+from .controller_utils import _AnonymousBatchJob, _look_up_launched_data
 from .job import Job
 from .jobmanager import JobManager
 from .manifest import LaunchedManifest, LaunchedManifestBuilder, Manifest
 
 if t.TYPE_CHECKING:
+    from types import FrameType
+
     from ..utils.serialize import TStepLaunchMetaData
 
 
@@ -90,7 +103,6 @@ def __init__(self, launcher: str = "local") -> None:
         """Initialize a Controller
 
         :param launcher: the type of launcher being used
-        :type launcher: str
         """
         self._jobs = JobManager(JM_LOCK)
         self.init_launcher(launcher)
@@ -112,9 +124,16 @@ def start(
         The controller will start the job-manager thread upon
         execution of all jobs.
         """
+        # launch a telemetry monitor to track job progress
+        if CONFIG.telemetry_enabled:
+            self._start_telemetry_monitor(exp_path)
+
         self._jobs.kill_on_interrupt = kill_on_interrupt
+
         # register custom signal handler for ^C (SIGINT)
-        signal.signal(signal.SIGINT, self._jobs.signal_interrupt)
+        SignalInterceptionStack.get(signal.SIGINT).push_unique(
+            self._jobs.signal_interrupt
+        )
         launched = self._launch(exp_name, exp_path, manifest)
 
         # start the job manager thread if not already started
@@ -125,16 +144,17 @@ def start(
             launched.map(_look_up_launched_data(self._launcher))
         )
 
-        # launch a telemetry monitor to track job progress
-        if CONFIG.telemetry_enabled:
-            self._start_telemetry_monitor(exp_path)
-
         # block until all non-database jobs are complete
         if block:
             # poll handles its own keyboard interrupt as
-            # it may be called seperately
+            # it may be called separately
             self.poll(5, True, kill_on_interrupt=kill_on_interrupt)
 
+    @property
+    def active_orchestrator_jobs(self) -> t.Dict[str, Job]:
+        """Return active orchestrator jobs."""
+        return {**self._jobs.db_jobs}
+
     @property
     def orchestrator_active(self) -> bool:
         with JM_LOCK:
@@ -148,11 +168,8 @@ def poll(
         """Poll running jobs and receive logging output of job status
 
         :param interval: number of seconds to wait before polling again
-        :type interval: int
         :param verbose: set verbosity
-        :type verbose: bool
         :param kill_on_interrupt: flag for killing jobs when SIGINT is received
-        :type kill_on_interrupt: bool, optional
         """
         self._jobs.kill_on_interrupt = kill_on_interrupt
         to_monitor = self._jobs.jobs
@@ -172,7 +189,6 @@ def finished(
         """Return a boolean indicating wether a job has finished or not
 
         :param entity: object launched by SmartSim.
-        :type entity: Entity | EntitySequence
         :returns: bool
         :raises ValueError: if entity has not been launched yet
         """
@@ -202,7 +218,6 @@ def stop_entity(
         the jobmanager so that the job appears as "cancelled".
 
         :param entity: entity to be stopped
-        :type entity: Entity | EntitySequence
         """
         with JM_LOCK:
             job = self._jobs[entity.name]
@@ -225,8 +240,8 @@ def stop_entity(
 
     def stop_db(self, db: Orchestrator) -> None:
         """Stop an orchestrator
+
         :param db: orchestrator to be stopped
-        :type db: Orchestrator
         """
         if db.batch:
             self.stop_entity(db)
@@ -243,7 +258,13 @@ def stop_db(self, db: Orchestrator) -> None:
                             continue
 
                         job = self._jobs[node.name]
-                        job.set_status(STATUS_CANCELLED, "", 0, output=None, error=None)
+                        job.set_status(
+                            SmartSimStatus.STATUS_CANCELLED,
+                            "",
+                            0,
+                            output=None,
+                            error=None,
+                        )
                         self._jobs.move_to_completed(job)
 
         db.reset_hosts()
@@ -252,7 +273,6 @@ def stop_entity_list(self, entity_list: EntitySequence[SmartSimEntity]) -> None:
         """Stop an instance of an entity list
 
         :param entity_list: entity list to be stopped
-        :type entity_list: EntitySequence
         """
 
         if entity_list.batch:
@@ -271,14 +291,12 @@ def get_jobs(self) -> t.Dict[str, Job]:
 
     def get_entity_status(
         self, entity: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]]
-    ) -> str:
+    ) -> SmartSimStatus:
         """Get the status of an entity
 
         :param entity: entity to get status of
-        :type entity: SmartSimEntity | EntitySequence
         :raises TypeError: if not SmartSimEntity | EntitySequence
         :return: status of entity
-        :rtype: str
         """
         if not isinstance(entity, (SmartSimEntity, EntitySequence)):
             raise TypeError(
@@ -289,15 +307,13 @@ def get_entity_status(
 
     def get_entity_list_status(
         self, entity_list: EntitySequence[SmartSimEntity]
-    ) -> t.List[str]:
+    ) -> t.List[SmartSimStatus]:
         """Get the statuses of an entity list
 
         :param entity_list: entity list containing entities to
                             get statuses of
-        :type entity_list: EntitySequence
         :raises TypeError: if not EntitySequence
-        :return: list of str statuses
-        :rtype: list
+        :return: list of SmartSimStatus statuses
         """
         if not isinstance(entity_list, EntitySequence):
             raise TypeError(
@@ -316,7 +332,6 @@ def init_launcher(self, launcher: str) -> None:
         and local launching
 
         :param launcher: which launcher to initialize
-        :type launcher: str
         :raises SSUnsupportedError: if a string is passed that is not
                                     a supported launcher
         :raises TypeError: if no launcher argument is provided.
@@ -327,6 +342,7 @@ def init_launcher(self, launcher: str) -> None:
             "pals": PBSLauncher,
             "lsf": LSFLauncher,
             "local": LocalLauncher,
+            "dragon": DragonLauncher,
         }
 
         if launcher is not None:
@@ -340,6 +356,37 @@ def init_launcher(self, launcher: str) -> None:
         else:
             raise TypeError("Must provide a 'launcher' argument")
 
+    @staticmethod
+    def symlink_output_files(
+        job_step: Step, entity: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]]
+    ) -> None:
+        """Create symlinks for entity output files that point to the output files
+        under the .smartsim directory
+
+        :param job_step: Job step instance
+        :param entity: Entity instance
+        """
+        historical_out, historical_err = map(pathlib.Path, job_step.get_output_files())
+        entity_out = pathlib.Path(entity.path) / f"{entity.name}.out"
+        entity_err = pathlib.Path(entity.path) / f"{entity.name}.err"
+
+        # check if there is already a link to a previous run
+        if entity_out.is_symlink() or entity_err.is_symlink():
+            entity_out.unlink()
+            entity_err.unlink()
+
+        historical_err.touch()
+        historical_out.touch()
+
+        if historical_err.exists() and historical_out.exists():
+            entity_out.symlink_to(historical_out)
+            entity_err.symlink_to(historical_err)
+        else:
+            raise FileNotFoundError(
+                f"Output files for {entity.name} could not be found. "
+                "Symlinking files failed."
+            )
+
     def _launch(
         self, exp_name: str, exp_path: str, manifest: Manifest
     ) -> LaunchedManifest[t.Tuple[str, Step]]:
@@ -349,15 +396,14 @@ def _launch(
         address of the database can be given to following entities
 
         :param exp_name: The name of the launching experiment
-        :type exp_name: str
         :param exp_path: path to location of ``Experiment`` directory if generated
-        :type exp_path: str
         :param manifest: Manifest of deployables to launch
-        :type manifest: Manifest
         """
 
         manifest_builder = LaunchedManifestBuilder[t.Tuple[str, Step]](
-            exp_name=exp_name, exp_path=exp_path, launcher_name=str(self._launcher)
+            exp_name=exp_name,
+            exp_path=exp_path,
+            launcher_name=str(self._launcher),
         )
         # Loop over deployables to launch and launch multiple orchestrators
         for orchestrator in manifest.dbs:
@@ -385,6 +431,11 @@ def _launch(
         steps: t.List[
             t.Tuple[Step, t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]]]
         ] = []
+
+        symlink_substeps: t.List[
+            t.Tuple[Step, t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]]]
+        ] = []
+
         for elist in manifest.ensembles:
             ens_telem_dir = manifest_builder.run_telemetry_subdirectory / "ensemble"
             if elist.batch:
@@ -392,6 +443,11 @@ def _launch(
                 manifest_builder.add_ensemble(
                     elist, [(batch_step.name, step) for step in substeps]
                 )
+
+                # symlink substeps to maintain directory structure
+                for substep, substep_entity in zip(substeps, elist.models):
+                    symlink_substeps.append((substep, substep_entity))
+
                 steps.append((batch_step, elist))
             else:
                 # if ensemble is to be run as separate job steps, aka not in a batch
@@ -409,19 +465,26 @@ def _launch(
             model_telem_dir = manifest_builder.run_telemetry_subdirectory / "model"
             if model.batch_settings:
                 anon_entity_list = _AnonymousBatchJob(model)
-                batch_step, _ = self._create_batch_job_step(
+                batch_step, substeps = self._create_batch_job_step(
                     anon_entity_list, model_telem_dir
                 )
                 manifest_builder.add_model(model, (batch_step.name, batch_step))
+
+                symlink_substeps.append((substeps[0], model))
                 steps.append((batch_step, model))
             else:
                 job_step = self._create_job_step(model, model_telem_dir)
                 manifest_builder.add_model(model, (job_step.name, job_step))
                 steps.append((job_step, model))
 
-        # launch steps
+        # launch and symlink steps
         for step, entity in steps:
             self._launch_step(step, entity)
+            self.symlink_output_files(step, entity)
+
+        # symlink substeps to maintain directory structure
+        for substep, entity in symlink_substeps:
+            self.symlink_output_files(substep, entity)
 
         return manifest_builder.finalize()
 
@@ -437,10 +500,8 @@ def _launch_orchestrator(
         set them in the JobManager
 
         :param orchestrator: orchestrator to launch
-        :type orchestrator: Orchestrator
         :param manifest_builder: An `LaunchedManifestBuilder` to record the
                                  names and `Step`s of the launched orchestrator
-        :type manifest_builder: LaunchedManifestBuilder[tuple[str, Step]]
         """
         orchestrator.remove_stale_files()
         orc_telem_dir = manifest_builder.run_telemetry_subdirectory / "database"
@@ -453,7 +514,13 @@ def _launch_orchestrator(
             manifest_builder.add_database(
                 orchestrator, [(orc_batch_step.name, step) for step in substeps]
             )
+
             self._launch_step(orc_batch_step, orchestrator)
+            self.symlink_output_files(orc_batch_step, orchestrator)
+
+            # symlink substeps to maintain directory structure
+            for substep, substep_entity in zip(substeps, orchestrator.entities):
+                self.symlink_output_files(substep, substep_entity)
 
         # if orchestrator was run on existing allocation, locally, or in allocation
         else:
@@ -466,6 +533,7 @@ def _launch_orchestrator(
             )
             for db_step in db_steps:
                 self._launch_step(*db_step)
+                self.symlink_output_files(*db_step)
 
         # wait for orchestrator to spin up
         self._orchestrator_launch_wait(orchestrator)
@@ -506,19 +574,43 @@ def _launch_step(
         """Use the launcher to launch a job step
 
         :param job_step: a job step instance
-        :type job_step: Step
         :param entity: entity instance
-        :type entity: SmartSimEntity
         :raises SmartSimError: if launch fails
         """
-        try:
-            job_id = self._launcher.run(job_step)
-        except LauncherError as e:
-            msg = f"An error occurred when launching {entity.name} \n"
-            msg += "Check error and output files for details.\n"
-            msg += f"{entity}"
-            logger.error(msg)
-            raise SmartSimError(f"Job step {entity.name} failed to launch") from e
+        # attempt to retrieve entity name in JobManager.completed
+        completed_job = self._jobs.completed.get(entity.name, None)
+
+        # if completed job DNE and is the entity name is not
+        # running in JobManager.jobs or JobManager.db_jobs,
+        # launch the job
+        if completed_job is None and (
+            entity.name not in self._jobs.jobs and entity.name not in self._jobs.db_jobs
+        ):
+            try:
+                job_id = self._launcher.run(job_step)
+            except LauncherError as e:
+                msg = f"An error occurred when launching {entity.name} \n"
+                msg += "Check error and output files for details.\n"
+                msg += f"{entity}"
+                logger.error(msg)
+                raise SmartSimError(f"Job step {entity.name} failed to launch") from e
+
+        # if the completed job does exist and the entity passed in is the same
+        # that has ran and completed, relaunch the entity.
+        elif completed_job is not None and completed_job.entity is entity:
+            try:
+                job_id = self._launcher.run(job_step)
+            except LauncherError as e:
+                msg = f"An error occurred when launching {entity.name} \n"
+                msg += "Check error and output files for details.\n"
+                msg += f"{entity}"
+                logger.error(msg)
+                raise SmartSimError(f"Job step {entity.name} failed to launch") from e
+
+        # the entity is using a duplicate name of an existing entity in
+        # the experiment, throw an error
+        else:
+            raise SSUnsupportedError("SmartSim entities cannot have duplicate names.")
 
         # a job step is a task if it is not managed by a workload manager (i.e. Slurm)
         # but is rather started, monitored, and exited through the Popen interface
@@ -540,13 +632,10 @@ def _create_batch_job_step(
         """Use launcher to create batch job step
 
         :param entity_list: EntityList to launch as batch
-        :type entity_list: EntityList
         :param telemetry_dir: Path to a directory in which the batch job step
                               may write telemetry events
-        :type telemetry_dir: pathlib.Path
         :return: batch job step instance and a list of run steps to be
                  executed within the batch job
-        :rtype: tuple[Step, list[Step]]
         """
         if not entity_list.batch_settings:
             raise ValueError(
@@ -558,7 +647,7 @@ def _create_batch_job_step(
             entity_list.name, entity_list.path, entity_list.batch_settings
         )
         batch_step.meta["entity_type"] = str(type(entity_list).__name__).lower()
-        batch_step.meta["status_dir"] = str(telemetry_dir / entity_list.name)
+        batch_step.meta["status_dir"] = str(telemetry_dir)
 
         substeps = []
         for entity in entity_list.entities:
@@ -575,12 +664,9 @@ def _create_job_step(
         """Create job steps for all entities with the launcher
 
         :param entity: an entity to create a step for
-        :type entity: SmartSimEntity
         :param telemetry_dir: Path to a directory in which the job step
                                may write telemetry events
-        :type telemetry_dir: pathlib.Path
         :return: the job step
-        :rtype: Step
         """
         # get SSDB, SSIN, SSOUT and add to entity run settings
         if isinstance(entity, Model):
@@ -597,7 +683,6 @@ def _prep_entity_client_env(self, entity: Model) -> None:
         """Retrieve all connections registered to this entity
 
         :param entity: The entity to retrieve connections from
-        :type entity:  Model
         """
 
         client_env: t.Dict[str, t.Union[str, int, float, bool]] = {}
@@ -662,17 +747,28 @@ def _save_orchestrator(self, orchestrator: Orchestrator) -> None:
         to the orchestrator.
 
         :param orchestrator: Orchestrator configuration to be saved
-        :type orchestrator: Orchestrator
         """
 
-        dat_file = "/".join((orchestrator.path, "smartsim_db.dat"))
-        db_jobs = self._jobs.db_jobs
-        orc_data = {"db": orchestrator, "db_jobs": db_jobs}
-        steps = []
-        for db_job in db_jobs.values():
-            steps.append(self._launcher.step_mapping[db_job.name])
-        orc_data["steps"] = steps
-        with open(dat_file, "wb") as pickle_file:
+        if not orchestrator.is_active():
+            raise Exception("Orchestrator is not running")
+
+        # Extract only the db_jobs associated with this particular orchestrator
+        if orchestrator.batch:
+            job_names = [orchestrator.name]
+        else:
+            job_names = [dbnode.name for dbnode in orchestrator.entities]
+        db_jobs = {
+            name: job for name, job in self._jobs.db_jobs.items() if name in job_names
+        }
+
+        # Extract the associated steps
+        steps = [
+            self._launcher.step_mapping[db_job.name] for db_job in db_jobs.values()
+        ]
+
+        orc_data = {"db": orchestrator, "db_jobs": db_jobs, "steps": steps}
+
+        with open(orchestrator.checkpoint_file, "wb") as pickle_file:
             pickle.dump(orc_data, pickle_file)
 
     def _orchestrator_launch_wait(self, orchestrator: Orchestrator) -> None:
@@ -684,7 +780,6 @@ def _orchestrator_launch_wait(self, orchestrator: Orchestrator) -> None:
         be launched with SSDB address
 
         :param orchestrator: orchestrator instance
-        :type orchestrator: Orchestrator
         :raises SmartSimError: if launch fails or manually stopped by user
         """
         if orchestrator.batch:
@@ -702,10 +797,9 @@ def _orchestrator_launch_wait(self, orchestrator: Orchestrator) -> None:
 
                 # _jobs.get_status acquires JM lock for main thread, no need for locking
                 statuses = self.get_entity_list_status(orchestrator)
-                if all(stat == STATUS_RUNNING for stat in statuses):
+                if all(stat == SmartSimStatus.STATUS_RUNNING for stat in statuses):
                     ready = True
-                    # TODO remove in favor of by node status check
-                    time.sleep(CONFIG.jm_interval)
+                    # TODO: Add a node status check
                 elif any(stat in TERMINAL_STATUSES for stat in statuses):
                     self.stop_db(orchestrator)
                     msg = "Orchestrator failed during startup"
@@ -723,14 +817,14 @@ def _orchestrator_launch_wait(self, orchestrator: Orchestrator) -> None:
                 # launch explicitly
                 raise
 
-    def reload_saved_db(self, checkpoint_file: str) -> Orchestrator:
+    def reload_saved_db(
+        self, checkpoint_file: t.Union[str, os.PathLike[str]]
+    ) -> Orchestrator:
         with JM_LOCK:
-            if self.orchestrator_active:
-                raise SmartSimError("Orchestrator exists and is active")
 
             if not osp.exists(checkpoint_file):
                 raise FileNotFoundError(
-                    f"The SmartSim database config file {checkpoint_file} "
+                    f"The SmartSim database config file {os.fspath(checkpoint_file)} "
                     "cannot be found."
                 )
 
@@ -766,7 +860,7 @@ def reload_saved_db(self, checkpoint_file: str) -> Orchestrator:
             try:
                 for db_job, step in job_steps:
                     self._jobs.db_jobs[db_job.ename] = db_job
-                    self._launcher.step_mapping[db_job.name] = step
+                    self._launcher.add_step_to_mapping_table(db_job.name, step)
                     if step.task_id:
                         self._launcher.task_manager.add_existing(int(step.task_id))
             except LauncherError as e:
@@ -795,9 +889,9 @@ def _set_dbobjects(self, manifest: Manifest) -> None:
             if not db_is_active(hosts=hosts, ports=ports, num_shards=len(db_addresses)):
                 raise SSInternalError("Cannot set DB Objects, DB is not running")
 
-            environ[f"SSDB{db_name}"] = db_addresses[0]
+            os.environ[f"SSDB{db_name}"] = db_addresses[0]
 
-            environ[f"SR_DB_TYPE{db_name}"] = (
+            os.environ[f"SR_DB_TYPE{db_name}"] = (
                 CLUSTERED if len(db_addresses) > 1 else STANDALONE
             )
 
@@ -833,7 +927,6 @@ def _start_telemetry_monitor(self, exp_dir: str) -> None:
         of the processes launched through this controller.
 
         :param exp_dir: An experiment directory
-        :type exp_dir: str
         """
         if (
             self._telemetry_monitor is None
@@ -859,43 +952,3 @@ def _start_telemetry_monitor(self, exp_dir: str) -> None:
                 cwd=str(pathlib.Path(__file__).parent.parent.parent),
                 shell=False,
             )
-            logger.debug("Telemetry monitor started")
-
-
-class _AnonymousBatchJob(EntityList[Model]):
-    @staticmethod
-    def _validate(model: Model) -> None:
-        if model.batch_settings is None:
-            msg = "Unable to create _AnonymousBatchJob without batch_settings"
-            raise SmartSimError(msg)
-
-    def __init__(self, model: Model) -> None:
-        self._validate(model)
-        super().__init__(model.name, model.path)
-        self.entities = [model]
-        self.batch_settings = model.batch_settings
-
-    def _initialize_entities(self, **kwargs: t.Any) -> None: ...
-
-
-def _look_up_launched_data(
-    launcher: Launcher,
-) -> t.Callable[[t.Tuple[str, Step]], "TStepLaunchMetaData"]:
-    def _unpack_launched_data(data: t.Tuple[str, Step]) -> "TStepLaunchMetaData":
-        # NOTE: we cannot assume that the name of the launched step
-        # ``launched_step_name`` is equal to the name of the step referring to
-        # the entity ``step.name`` as is the case when an entity list is
-        # launched as a batch job
-        launched_step_name, step = data
-        launched_step_map = launcher.step_mapping[launched_step_name]
-        out_file, err_file = step.get_output_files()
-        return (
-            launched_step_map.step_id,
-            launched_step_map.task_id,
-            launched_step_map.managed,
-            out_file,
-            err_file,
-            pathlib.Path(step.meta.get("status_dir", step.cwd)),
-        )
-
-    return _unpack_launched_data
diff --git a/smartsim/_core/control/controller_utils.py b/smartsim/_core/control/controller_utils.py
new file mode 100644
index 000000000..37ae9aebf
--- /dev/null
+++ b/smartsim/_core/control/controller_utils.py
@@ -0,0 +1,77 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import annotations
+
+import pathlib
+import typing as t
+
+from ..._core.launcher.step import Step
+from ...entity import EntityList, Model
+from ...error import SmartSimError
+from ..launcher.launcher import Launcher
+
+if t.TYPE_CHECKING:
+    from ..utils.serialize import TStepLaunchMetaData
+
+
+class _AnonymousBatchJob(EntityList[Model]):
+    @staticmethod
+    def _validate(model: Model) -> None:
+        if model.batch_settings is None:
+            msg = "Unable to create _AnonymousBatchJob without batch_settings"
+            raise SmartSimError(msg)
+
+    def __init__(self, model: Model) -> None:
+        self._validate(model)
+        super().__init__(model.name, model.path)
+        self.entities = [model]
+        self.batch_settings = model.batch_settings
+
+    def _initialize_entities(self, **kwargs: t.Any) -> None: ...
+
+
+def _look_up_launched_data(
+    launcher: Launcher,
+) -> t.Callable[[t.Tuple[str, Step]], "TStepLaunchMetaData"]:
+    def _unpack_launched_data(data: t.Tuple[str, Step]) -> "TStepLaunchMetaData":
+        # NOTE: we cannot assume that the name of the launched step
+        # ``launched_step_name`` is equal to the name of the step referring to
+        # the entity ``step.name`` as is the case when an entity list is
+        # launched as a batch job
+        launched_step_name, step = data
+        launched_step_map = launcher.step_mapping[launched_step_name]
+        out_file, err_file = step.get_output_files()
+        return (
+            launched_step_map.step_id,
+            launched_step_map.task_id,
+            launched_step_map.managed,
+            out_file,
+            err_file,
+            pathlib.Path(step.meta.get("status_dir", step.cwd)),
+        )
+
+    return _unpack_launched_data
diff --git a/smartsim/_core/control/job.py b/smartsim/_core/control/job.py
index f3bd8cf3a..6941d7607 100644
--- a/smartsim/_core/control/job.py
+++ b/smartsim/_core/control/job.py
@@ -24,46 +24,170 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import pathlib
 import time
 import typing as t
 from dataclasses import dataclass
 
 from ...entity import EntitySequence, SmartSimEntity
-from ...status import STATUS_NEW
+from ...status import SmartSimStatus
 
 
 @dataclass(frozen=True)
 class _JobKey:
+    """A helper class for creating unique lookup keys within the telemetry
+    monitor. These keys are not guaranteed to be unique across experiments,
+    only within an experiment (due to process ID re-use by the OS)"""
+
     step_id: str
+    """The process id of an unmanaged task"""
     task_id: str
+    """The task id of a managed task"""
 
 
 class JobEntity:
-    """API required for a job processed in the JobManager with support for
-    telemetry monitoring
+    """An entity containing run-time SmartSimEntity metadata. The run-time metadata
+    is required to perform telemetry collection. The `JobEntity` satisfies the core
+    API necessary to use a `JobManager` to manage retrieval of managed step updates.
     """
 
     def __init__(self) -> None:
         self.name: str = ""
+        """The entity name"""
         self.path: str = ""
+        """The root path for entity output files"""
         self.step_id: str = ""
+        """The process id of an unmanaged task"""
         self.task_id: str = ""
+        """The task id of a managed task"""
         self.type: str = ""
+        """The type of the associated `SmartSimEntity`"""
         self.timestamp: int = 0
+        """The timestamp when the entity was created"""
         self.status_dir: str = ""
+        """The path configured by the experiment for the entities telemetry output"""
+        self.telemetry_on: bool = False
+        """"Flag indicating if optional telemetry is enabled for the entity"""
+        self.collectors: t.Dict[str, str] = {}
+        """Mapping of collectors enabled for the entity"""
+        self.config: t.Dict[str, str] = {}
+        """Telemetry configuration supplied by the experiment"""
+        self._is_complete: bool = False
+        """Flag indicating if the entity has completed execution"""
 
     @property
     def is_db(self) -> bool:
+        """Returns `True` if the entity represents a database or database shard"""
         return self.type in ["orchestrator", "dbnode"]
 
     @property
     def is_managed(self) -> bool:
+        """Returns `True` if the entity is managed by a workload manager"""
         return bool(self.step_id)
 
     @property
     def key(self) -> _JobKey:
+        """Return a `_JobKey` that identifies an entity.
+        NOTE: not guaranteed to be unique over time due to reused process IDs"""
         return _JobKey(self.step_id, self.task_id)
 
+    @property
+    def is_complete(self) -> bool:
+        """Returns `True` if the entity has completed execution"""
+        return self._is_complete
+
+    def check_completion_status(self) -> None:
+        """Check for telemetry outputs indicating the entity has completed
+        TODO: determine correct location to avoid exposing telemetry
+        implementation details into `JobEntity`
+        """
+        # avoid touching file-system if not necessary
+        if self._is_complete:
+            return
+
+        # status telemetry is tracked in JSON files written to disk. look
+        # for a corresponding `stop` event in the entity status directory
+        state_file = pathlib.Path(self.status_dir) / "stop.json"
+        if state_file.exists():
+            self._is_complete = True
+
+    @staticmethod
+    def _map_db_metadata(entity_dict: t.Dict[str, t.Any], entity: "JobEntity") -> None:
+        """Map DB-specific properties from a runtime manifest onto a `JobEntity`
+
+        :param entity_dict: The raw dictionary deserialized from manifest JSON
+        :param entity: The entity instance to modify
+        """
+        if entity.is_db:
+            # add collectors if they're configured to be enabled in the manifest
+            entity.collectors = {
+                "client": entity_dict.get("client_file", ""),
+                "client_count": entity_dict.get("client_count_file", ""),
+                "memory": entity_dict.get("memory_file", ""),
+            }
+
+            entity.telemetry_on = any(entity.collectors.values())
+            entity.config["host"] = entity_dict.get("hostname", "")
+            entity.config["port"] = entity_dict.get("port", "")
+
+    @staticmethod
+    def _map_standard_metadata(
+        entity_type: str,
+        entity_dict: t.Dict[str, t.Any],
+        entity: "JobEntity",
+        exp_dir: str,
+        raw_experiment: t.Dict[str, t.Any],
+    ) -> None:
+        """Map universal properties from a runtime manifest onto a `JobEntity`
+
+        :param entity_type: The type of the associated `SmartSimEntity`
+        :param entity_dict: The raw dictionary deserialized from manifest JSON
+        :param entity: The entity instance to modify
+        :param exp_dir: The path to the experiment working directory
+        :param raw_experiment: The raw experiment dictionary deserialized from
+        manifest JSON
+        """
+        metadata = entity_dict["telemetry_metadata"]
+        status_dir = pathlib.Path(metadata.get("status_dir"))
+        is_dragon = raw_experiment["launcher"].lower() == "dragon"
+
+        # all entities contain shared properties that identify the task
+        entity.type = entity_type
+        entity.name = (
+            entity_dict["name"]
+            if not is_dragon
+            else entity_dict["telemetry_metadata"]["step_id"]
+        )
+        entity.step_id = str(metadata.get("step_id") or "")
+        entity.task_id = str(metadata.get("task_id") or "")
+        entity.timestamp = int(entity_dict.get("timestamp", "0"))
+        entity.path = str(exp_dir)
+        entity.status_dir = str(status_dir)
+
+    @classmethod
+    def from_manifest(
+        cls,
+        entity_type: str,
+        entity_dict: t.Dict[str, t.Any],
+        exp_dir: str,
+        raw_experiment: t.Dict[str, t.Any],
+    ) -> "JobEntity":
+        """Instantiate a `JobEntity` from the dictionary deserialized from manifest JSON
+
+        :param entity_type: The type of the associated `SmartSimEntity`
+        :param entity_dict: The raw dictionary deserialized from manifest JSON
+        :param exp_dir: The path to the experiment working directory
+        :param raw_experiment: raw experiment deserialized from manifest JSON
+        """
+        entity = JobEntity()
+
+        cls._map_standard_metadata(
+            entity_type, entity_dict, entity, exp_dir, raw_experiment
+        )
+        cls._map_db_metadata(entity_dict, entity)
+
+        return entity
+
 
 class Job:
     """Keep track of various information for the controller.
@@ -83,20 +207,15 @@ def __init__(
         """Initialize a Job.
 
         :param job_name: Name of the job step
-        :type job_name: str
         :param job_id: The id associated with the job
-        :type job_id: str
         :param entity: The SmartSim entity(list) associated with the job
-        :type entity: SmartSimEntity | EntitySequence | JobEntity
         :param launcher: Launcher job was started with
-        :type launcher: str
         :param is_task: process monitored by TaskManager (True) or the WLM (True)
-        :type is_task: bool
         """
         self.name = job_name
         self.jid = job_id
         self.entity = entity
-        self.status = STATUS_NEW
+        self.status = SmartSimStatus.STATUS_NEW
         # status before smartsim status mapping is applied
         self.raw_status: t.Optional[str] = None
         self.returncode: t.Optional[int] = None
@@ -116,7 +235,7 @@ def ename(self) -> str:
 
     def set_status(
         self,
-        new_status: str,
+        new_status: SmartSimStatus,
         raw_status: str,
         returncode: t.Optional[int],
         error: t.Optional[str] = None,
@@ -125,9 +244,10 @@ def set_status(
         """Set the status  of a job.
 
         :param new_status: The new status of the job
-        :type new_status: str
+        :param raw_status: The raw status of the launcher
         :param returncode: The return code for the job
-        :type return_code: str
+        :param error: Content produced by stderr
+        :param output: Content produced by stdout
         """
         self.status = new_status
         self.raw_status = raw_status
@@ -149,15 +269,12 @@ def reset(
         """Reset the job in order to be able to restart it.
 
         :param new_job_name: name of the new job step
-        :type new_job_name: str
         :param new_job_id: new job id to launch under
-        :type new_job_id: int
         :param is_task: process monitored by TaskManager (True) or the WLM (True)
-        :type is_task: bool
         """
         self.name = new_job_name
         self.jid = new_job_id
-        self.status = STATUS_NEW
+        self.status = SmartSimStatus.STATUS_NEW
         self.returncode = None
         self.output = None
         self.error = None
@@ -170,7 +287,6 @@ def error_report(self) -> str:
         """A descriptive error report based on job fields
 
         :return: error report for display in terminal
-        :rtype: str
         """
         warning = f"{self.ename} failed. See below for details \n"
         if self.error:
@@ -190,7 +306,6 @@ def __str__(self) -> str:
         """Return user-readable string of the Job
 
         :returns: A user-readable string of the Job
-        :rtype: str
         """
         if self.jid:
             job = "{}({}): {}"
@@ -208,19 +323,18 @@ class History:
     def __init__(self, runs: int = 0) -> None:
         """Init a history object for a job
 
-        :param runs: number of runs so far, defaults to 0
-        :type runs: int, optional
+        :param runs: number of runs so far
         """
         self.runs = runs
         self.jids: t.Dict[int, t.Optional[str]] = {}
-        self.statuses: t.Dict[int, str] = {}
+        self.statuses: t.Dict[int, SmartSimStatus] = {}
         self.returns: t.Dict[int, t.Optional[int]] = {}
         self.job_times: t.Dict[int, float] = {}
 
     def record(
         self,
         job_id: t.Optional[str],
-        status: str,
+        status: SmartSimStatus,
         returncode: t.Optional[int],
         job_time: float,
     ) -> None:
diff --git a/smartsim/_core/control/jobmanager.py b/smartsim/_core/control/jobmanager.py
index e482b9951..1bc24cf9a 100644
--- a/smartsim/_core/control/jobmanager.py
+++ b/smartsim/_core/control/jobmanager.py
@@ -35,7 +35,7 @@
 from ...database import Orchestrator
 from ...entity import DBNode, EntitySequence, SmartSimEntity
 from ...log import ContextThread, get_logger
-from ...status import STATUS_NEVER_STARTED, TERMINAL_STATUSES
+from ...status import TERMINAL_STATUSES, SmartSimStatus
 from ..config import CONFIG
 from ..launcher import Launcher, LocalLauncher
 from ..utils.network import get_ip_from_host
@@ -61,7 +61,6 @@ def __init__(self, lock: RLock, launcher: t.Optional[Launcher] = None) -> None:
         """Initialize a Jobmanager
 
         :param launcher: a Launcher object to manage jobs
-        :type: SmartSim.Launcher
         """
         self.monitor: t.Optional[Thread] = None
 
@@ -124,7 +123,6 @@ def move_to_completed(self, job: Job) -> None:
            actively monitored by the job manager
 
         :param job: job instance we are transitioning
-        :type job: Job
         """
         with self._lock:
             self.completed[job.ename] = job
@@ -141,9 +139,7 @@ def __getitem__(self, entity_name: str) -> Job:
         from which it was created.
 
         :param entity_name: The name of the entity of a job
-        :type entity_name: str
         :returns: the Job associated with the entity_name
-        :rtype: Job
         """
         with self._lock:
             entities = ChainMap(self.db_jobs, self.jobs, self.completed)
@@ -153,7 +149,6 @@ def __call__(self) -> t.Dict[str, Job]:
         """Returns dictionary all jobs for () operator
 
         :returns: Dictionary of all jobs
-        :rtype: dictionary
         """
         all_jobs = {**self.jobs, **self.db_jobs}
         return all_jobs
@@ -175,13 +170,9 @@ def add_job(
         """Add a job to the job manager which holds specific jobs by type.
 
         :param job_name: name of the job step
-        :type job_name: str
         :param job_id: job step id created by launcher
-        :type job_id: str
         :param entity: entity that was launched on job step
-        :type entity: SmartSimEntity | EntitySequence
         :param is_task: process monitored by TaskManager (True) or the WLM (True)
-        :type is_task: bool
         """
         launcher = str(self._launcher)
         # all operations here should be atomic
@@ -197,9 +188,7 @@ def is_finished(self, entity: SmartSimEntity) -> bool:
         """Detect if a job has completed
 
         :param entity: entity to check
-        :type entity: SmartSimEntity
         :return: True if finished
-        :rtype: bool
         """
         with self._lock:
             job = self[entity.name]  # locked operation
@@ -239,12 +228,11 @@ def check_jobs(self) -> None:
     def get_status(
         self,
         entity: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]],
-    ) -> str:
+    ) -> SmartSimStatus:
         """Return the status of a job.
 
         :param entity: SmartSimEntity or EntitySequence instance
-        :type entity: SmartSimEntity | EntitySequence
-        :returns: tuple of status
+        :returns: a SmartSimStatus status
         """
         with self._lock:
             if entity.name in self.completed:
@@ -254,13 +242,12 @@ def get_status(
                 job: Job = self[entity.name]  # locked
                 return job.status
 
-            return STATUS_NEVER_STARTED
+            return SmartSimStatus.STATUS_NEVER_STARTED
 
     def set_launcher(self, launcher: Launcher) -> None:
         """Set the launcher of the job manager to a specific launcher instance
 
         :param launcher: child of Launcher
-        :type launcher: Launcher instance
         """
         self._launcher = launcher
 
@@ -268,9 +255,7 @@ def query_restart(self, entity_name: str) -> bool:
         """See if the job just started should be restarted or not.
 
         :param entity_name: name of entity to check for a job for
-        :type entity_name: str
         :return: if job should be restarted instead of started
-        :rtype: bool
         """
         if entity_name in self.completed:
             return True
@@ -287,13 +272,9 @@ def restart_job(
         ready to launch again.
 
         :param job_name: new job step name
-        :type job_name: str
         :param job_id: new job id
-        :type job_id: str
         :param entity_name: name of the entity of the job
-        :type entity_name: str
         :param is_task: process monitored by TaskManager (True) or the WLM (True)
-        :type is_task: bool
 
         """
         with self._lock:
@@ -311,7 +292,6 @@ def get_db_host_addresses(self) -> t.Dict[str, t.List[str]]:
         for corresponding database identifiers
 
         :return: dictionary of host ip addresses
-        :rtype: Dict[str, list]
         """
 
         address_dict: t.Dict[str, t.List[str]] = {}
@@ -333,7 +313,6 @@ def set_db_hosts(self, orchestrator: Orchestrator) -> None:
         """Set the DB hosts in db_jobs so future entities can query this
 
         :param orchestrator: orchestrator instance
-        :type orchestrator: Orchestrator
         """
         # should only be called during launch in the controller
 
@@ -349,9 +328,9 @@ def set_db_hosts(self, orchestrator: Orchestrator) -> None:
                         self.db_jobs[dbnode.name].hosts = dbnode.hosts
 
     def signal_interrupt(self, signo: int, _frame: t.Optional[FrameType]) -> None:
+        """Custom handler for whenever SIGINT is received"""
         if not signo:
             logger.warning("Received SIGINT with no signal number")
-        """Custom handler for whenever SIGINT is received"""
         if self.actively_monitoring and len(self) > 0:
             if self.kill_on_interrupt:
                 for _, job in self().items():
diff --git a/smartsim/_core/control/manifest.py b/smartsim/_core/control/manifest.py
index 25037540c..fd5770f18 100644
--- a/smartsim/_core/control/manifest.py
+++ b/smartsim/_core/control/manifest.py
@@ -68,7 +68,6 @@ def dbs(self) -> t.List[Orchestrator]:
 
         :raises SmartSimError: if user added to databases to manifest
         :return: List of orchestrator instances
-        :rtype: list[Orchestrator]
         """
         dbs = [item for item in self._deployables if isinstance(item, Orchestrator)]
         return dbs
@@ -78,7 +77,6 @@ def models(self) -> t.List[Model]:
         """Return Model instances in Manifest
 
         :return: model instances
-        :rtype: List[Model]
         """
         _models: t.List[Model] = [
             item for item in self._deployables if isinstance(item, Model)
@@ -90,7 +88,6 @@ def ensembles(self) -> t.List[Ensemble]:
         """Return Ensemble instances in Manifest
 
         :return: list of ensembles
-        :rtype: List[Ensemble]
         """
         return [e for e in self._deployables if isinstance(e, Ensemble)]
 
@@ -100,7 +97,6 @@ def all_entity_lists(self) -> t.List[EntitySequence[SmartSimEntity]]:
         exceptional ones like Orchestrator
 
         :return: list of entity lists
-        :rtype: List[EntitySequence[SmartSimEntity]]
         """
         _all_entity_lists: t.List[EntitySequence[SmartSimEntity]] = list(self.ensembles)
 
@@ -109,6 +105,14 @@ def all_entity_lists(self) -> t.List[EntitySequence[SmartSimEntity]]:
 
         return _all_entity_lists
 
+    @property
+    def has_deployable(self) -> bool:
+        """
+        Return True if the manifest contains entities that
+        must be physically deployed
+        """
+        return bool(self._deployables)
+
     @staticmethod
     def _check_names(deployables: t.List[t.Any]) -> None:
         used = []
@@ -294,7 +298,10 @@ def _entities_to_data(
     def finalize(self) -> LaunchedManifest[_T]:
         return LaunchedManifest(
             metadata=_LaunchedManifestMetadata(
-                self.run_id, self.exp_name, self.exp_path, self.launcher_name
+                self.run_id,
+                self.exp_name,
+                self.exp_path,
+                self.launcher_name,
             ),
             models=tuple(self._models),
             ensembles=tuple(self._ensembles),
diff --git a/smartsim/_core/control/previewrenderer.py b/smartsim/_core/control/previewrenderer.py
new file mode 100644
index 000000000..857a70397
--- /dev/null
+++ b/smartsim/_core/control/previewrenderer.py
@@ -0,0 +1,192 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2023, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import pathlib
+import typing as t
+from enum import Enum
+
+import jinja2
+import jinja2.utils as u
+from jinja2 import pass_eval_context
+
+from ..._core.config import CONFIG
+from ..._core.control import Manifest
+from ...error.errors import PreviewFormatError
+from ...log import get_logger
+from .job import Job
+
+logger = get_logger(__name__)
+
+if t.TYPE_CHECKING:
+    from smartsim import Experiment
+
+
+class Format(str, Enum):
+    PLAINTEXT = "plain_text"
+
+
+class Verbosity(str, Enum):
+    INFO = "info"
+    DEBUG = "debug"
+    DEVELOPER = "developer"
+
+
+@pass_eval_context
+def as_toggle(_eval_ctx: u.F, value: bool) -> str:
+    """Return "On" if value returns True,
+    and "Off" is value returns False.
+    """
+    return "On" if value else "Off"
+
+
+@pass_eval_context
+def get_ifname(_eval_ctx: u.F, value: t.List[str]) -> str:
+    """Extract Network Interface from orchestrator run settings."""
+    if value:
+        for val in value:
+            if "ifname=" in val:
+                output = val.split("=")[-1]
+                return output
+    return ""
+
+
+@pass_eval_context
+def get_dbtype(_eval_ctx: u.F, value: str) -> str:
+    """Extract data base type."""
+    if value:
+        if "-cli" in value:
+            db_type, _ = value.split("/")[-1].split("-", 1)
+            return db_type
+    return ""
+
+
+@pass_eval_context
+def is_list(_eval_ctx: u.F, value: str) -> bool:
+    """Return True if item is of type list, and False
+    otherwise, to determine how Jinja template should
+    render an item.
+    """
+    return isinstance(value, list)
+
+
+def render_to_file(content: str, filename: str) -> None:
+    """Output preview to a file if an output filename
+    is specified.
+
+    :param content: The rendered preview.
+    :param filename: The name of the file to write the preview to.
+    """
+    filename = find_available_filename(filename)
+
+    with open(filename, "w", encoding="utf-8") as prev_file:
+        prev_file.write(content)
+
+
+def render(
+    exp: "Experiment",
+    manifest: t.Optional[Manifest] = None,
+    verbosity_level: Verbosity = Verbosity.INFO,
+    output_format: Format = Format.PLAINTEXT,
+    output_filename: t.Optional[str] = None,
+    active_dbjobs: t.Optional[t.Dict[str, Job]] = None,
+) -> str:
+    """
+    Render the template from the supplied entities.
+    :param experiment: the experiment to be previewed.
+    :param manifest: the manifest to be previewed.
+    :param verbosity_level: the verbosity level
+    :param output_format: the output format.
+    """
+
+    verbosity_level = Verbosity(verbosity_level)
+
+    _check_output_format(output_format)
+
+    loader = jinja2.PackageLoader(
+        "smartsim.templates.templates.preview", output_format.value
+    )
+    env = jinja2.Environment(loader=loader, autoescape=True)
+
+    env.filters["as_toggle"] = as_toggle
+    env.filters["get_ifname"] = get_ifname
+    env.filters["get_dbtype"] = get_dbtype
+    env.filters["is_list"] = is_list
+    env.globals["Verbosity"] = Verbosity
+
+    tpl_path = "base.template"
+
+    tpl = env.get_template(tpl_path)
+
+    if verbosity_level == Verbosity.INFO:
+        logger.warning(
+            "Only showing user set parameters. Some internal entity "
+            "fields are truncated. To view truncated fields: use verbosity_level "
+            "'developer' or 'debug.'"
+        )
+
+    rendered_preview = tpl.render(
+        exp_entity=exp,
+        active_dbjobs=active_dbjobs,
+        manifest=manifest,
+        config=CONFIG,
+        verbosity_level=verbosity_level,
+    )
+
+    if output_filename:
+        render_to_file(
+            rendered_preview,
+            output_filename,
+        )
+    else:
+        logger.info(rendered_preview)
+    return rendered_preview
+
+
+def find_available_filename(filename: str) -> str:
+    """Iterate through potentially unique names until one is found that does
+    not already exist. Return an unused name variation
+
+    :param filename: The name of the file to write the preview to.
+    """
+
+    path = pathlib.Path(filename)
+    candidate_path = pathlib.Path(filename)
+    index = 1
+
+    while candidate_path.exists():
+        candidate_path = path.with_name(f"{path.stem}_{index:02}.txt")
+        index += 1
+    return str(candidate_path)
+
+
+def _check_output_format(output_format: Format) -> None:
+    """
+    Check that a valid file output format is given.
+    """
+    if not output_format == Format.PLAINTEXT:
+        raise PreviewFormatError(f"The only valid output format currently available \
+is {Format.PLAINTEXT.value}")
diff --git a/smartsim/_core/entrypoints/colocated.py b/smartsim/_core/entrypoints/colocated.py
index 600ae2ff3..508251fe0 100644
--- a/smartsim/_core/entrypoints/colocated.py
+++ b/smartsim/_core/entrypoints/colocated.py
@@ -32,7 +32,7 @@
 import tempfile
 import typing as t
 from pathlib import Path
-from subprocess import PIPE, STDOUT
+from subprocess import STDOUT
 from types import FrameType
 
 import filelock
@@ -62,11 +62,8 @@ def launch_db_model(client: Client, db_model: t.List[str]) -> str:
     """Parse options to launch model on local cluster
 
     :param client: SmartRedis client connected to local DB
-    :type client: Client
     :param db_model: List of arguments defining the model
-    :type db_model: List[str]
     :return: Name of model
-    :rtype: str
     """
     parser = argparse.ArgumentParser("Set ML model on DB")
     parser.add_argument("--name", type=str)
@@ -129,11 +126,8 @@ def launch_db_script(client: Client, db_script: t.List[str]) -> str:
     """Parse options to launch script on local cluster
 
     :param client: SmartRedis client connected to local DB
-    :type client: Client
     :param db_model: List of arguments defining the script
-    :type db_model: List[str]
     :return: Name of model
-    :rtype: str
     """
     parser = argparse.ArgumentParser("Set script on DB")
     parser.add_argument("--name", type=str)
@@ -177,6 +171,7 @@ def main(
     db_scripts: t.List[t.List[str]],
     db_identifier: str,
 ) -> None:
+    # pylint: disable=too-many-statements
     global DBPID  # pylint: disable=global-statement
 
     lo_address = current_ip("lo")
@@ -201,8 +196,17 @@ def main(
     # we generally want to catch all exceptions here as
     # if this process dies, the application will most likely fail
     try:
-        process = psutil.Popen(cmd, stdout=PIPE, stderr=STDOUT)
-        DBPID = process.pid
+        hostname = socket.gethostname()
+        filename = (
+            f"colo_orc_{hostname}.log"
+            if os.getenv("SMARTSIM_LOG_LEVEL") == "debug"
+            else os.devnull
+        )
+        with open(filename, "w", encoding="utf-8") as file:
+            process = psutil.Popen(cmd, stdout=file.fileno(), stderr=STDOUT)
+            DBPID = process.pid
+        # printing to stdout shell file for extraction
+        print(f"__PID__{DBPID}__PID__", flush=True)
 
     except Exception as e:
         cleanup()
@@ -245,12 +249,8 @@ def launch_db_scripts(client: Client, db_scripts: t.List[t.List[str]]) -> None:
                 raise SSInternalError(
                     "Failed to set model or script, could not connect to database"
                 ) from ex
-            finally:
-                # Make sure we don't keep this around
-                del client
-
-        for line in iter(process.stdout.readline, b""):
-            print(line.decode("utf-8").rstrip(), flush=True)
+            # Make sure we don't keep this around
+            del client
 
     except Exception as e:
         cleanup()
diff --git a/smartsim/_core/entrypoints/dragon.py b/smartsim/_core/entrypoints/dragon.py
new file mode 100644
index 000000000..92ebd735f
--- /dev/null
+++ b/smartsim/_core/entrypoints/dragon.py
@@ -0,0 +1,351 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterpris
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import dataclasses
+import json
+import os
+import signal
+import socket
+import sys
+import textwrap
+import time
+import typing as t
+from types import FrameType
+
+import zmq
+import zmq.auth.thread
+
+from smartsim._core.config import get_config
+from smartsim._core.launcher.dragon import dragonSockets
+from smartsim._core.launcher.dragon.dragonBackend import DragonBackend
+from smartsim._core.schemas import (
+    DragonBootstrapRequest,
+    DragonBootstrapResponse,
+    DragonShutdownRequest,
+)
+from smartsim._core.utils.network import get_best_interface_and_address
+from smartsim.log import ContextThread, get_logger
+
+"""
+Dragon server entrypoint script
+"""
+
+logger = get_logger("Dragon Server")
+
+# kill is not catchable
+SIGNALS = [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM, signal.SIGABRT]
+
+SHUTDOWN_INITIATED = False
+
+
+@dataclasses.dataclass
+class DragonEntrypointArgs:
+    launching_address: str
+    interface: str
+
+
+def handle_signal(signo: int, _frame: t.Optional[FrameType] = None) -> None:
+    if not signo:
+        logger.info("Received signal with no signo")
+    else:
+        logger.info(f"Received signal {signo}")
+    cleanup()
+
+
+def get_log_path() -> str:
+    config = get_config()
+    return config.dragon_log_filename
+
+
+def print_summary(network_interface: str, ip_address: str) -> None:
+    zmq_config = {"interface": network_interface, "address": ip_address}
+
+    log_path = get_log_path()
+    with open(log_path, "w", encoding="utf-8") as dragon_config_log:
+        dragon_config_log.write(
+            textwrap.dedent(f"""\
+                -------- Dragon Configuration --------
+                IPADDRESS: {ip_address}
+                NETWORK: {network_interface}
+                HOSTNAME: {socket.gethostname()}
+                DRAGON_SERVER_CONFIG: {json.dumps(zmq_config)}
+                --------------------------------------
+                """),
+        )
+
+
+def start_updater(
+    backend: DragonBackend, updater: t.Optional[ContextThread]
+) -> ContextThread:
+    """Start the ``DragonBackend`` updater thread.
+
+    If ``updater`` is not None, then it is first checked and if it
+    alive, no other thread is started.
+
+    :param backend: The dragon backend for which the thread will be started
+    :param updater: An existing updater thread that might have to be replaced
+    :return: Running updater thread
+    """
+    # If the updater was started, check if it completed or died
+    if updater is not None:
+        updater.join(0.1)
+        # If it's alive, there is nothing to do
+        if updater.is_alive():
+            return updater
+    updater = ContextThread(name="DragonBackend", daemon=True, target=backend.update)
+    updater.start()
+    return updater
+
+
+def is_updater_healthy(backend: DragonBackend) -> bool:
+    """Check if the backend has been updated recently.
+
+    The acceptable delay is defined as the server timeout plus the backend's cooldown
+    period. If the server timeout is set to `-1`, then the acceptable delay is set to
+    one minute plus the cooldown period.
+
+    :param backend: The backend for which the updater's health is checked
+    :return: Whether the backend was updated recently
+    """
+    server_timeout = get_config().dragon_server_timeout / 1000
+    acceptable_delay = backend.cooldown_period + (
+        60.0 if server_timeout == -1 else server_timeout
+    )
+
+    heartbeat_delay = backend.current_time - backend.last_heartbeat
+    if heartbeat_delay > acceptable_delay:
+        logger.debug(
+            f"Updater inactive for {heartbeat_delay:.2f} seconds, will request restart."
+        )
+        return False
+    return True
+
+
+def updater_fallback(backend: DragonBackend, updater: ContextThread) -> ContextThread:
+    """Check if updater has updated the backend recently, if not, check its status
+    and start a new one if it is not alive.
+    :param backend: The dragon backend for which the udpater's health must be checked
+    :param updater: The updater thread which has to be checked and (possibly) replaced
+    :return: Running updater thread
+    """
+    if is_updater_healthy(backend):
+        return updater
+    return start_updater(backend, updater)
+
+
+# pylint: disable-next=too-many-statements
+def run(
+    zmq_context: "zmq.Context[t.Any]",
+    dragon_head_address: str,
+    dragon_pid: int,
+) -> None:
+    logger.debug(f"Opening socket {dragon_head_address}")
+    dragon_head_socket = dragonSockets.get_secure_socket(zmq_context, zmq.REP, True)
+    dragon_head_socket.bind(dragon_head_address)
+    dragon_backend = DragonBackend(pid=dragon_pid)
+
+    backend_updater = start_updater(dragon_backend, None)
+    server = dragonSockets.as_server(dragon_head_socket)
+
+    logger.debug(f"Listening to {dragon_head_address}")
+
+    while not dragon_backend.should_shutdown:
+        try:
+            req = server.recv()
+            logger.debug(f"Received {type(req).__name__} {req}")
+        except zmq.Again:
+            backend_updater = updater_fallback(dragon_backend, backend_updater)
+            continue
+
+        resp = dragon_backend.process_request(req)
+
+        logger.debug(f"Sending {type(resp).__name__} {resp}")
+        try:
+            server.send(resp)
+        except zmq.Again:
+            logger.error("Could not send response back to launcher.")
+            backend_updater = updater_fallback(dragon_backend, backend_updater)
+
+        # We can only check the heartbeat if the backend has not shut down
+        if not dragon_backend.should_shutdown:
+            logger.debug(f"Listening to {dragon_head_address}")
+            backend_updater = updater_fallback(dragon_backend, backend_updater)
+
+        if SHUTDOWN_INITIATED:
+            dragon_backend.process_request(DragonShutdownRequest())
+
+    logger.info("Backend shutdown has been requested")
+
+    if backend_updater.is_alive():
+        backend_updater.join(1)
+
+    if not dragon_backend.frontend_shutdown:
+        logger.info("Frontend will have to be shut down externally")
+        while True:
+            logger.info("Waiting for external shutdown")
+            time.sleep(5)
+
+
+def execute_entrypoint(args: DragonEntrypointArgs) -> int:
+    if_config = get_best_interface_and_address()
+    interface = if_config.interface
+    address = if_config.address
+    if not interface:
+        raise ValueError("Net interface could not be determined")
+    dragon_head_address = f"tcp://{address}"
+
+    smartsim_config = get_config()
+    if args.launching_address:
+        zmq_context = zmq.Context()
+        zmq_context.setsockopt(
+            zmq.SNDTIMEO, value=smartsim_config.dragon_server_timeout
+        )
+        zmq_context.setsockopt(
+            zmq.RCVTIMEO, value=smartsim_config.dragon_server_timeout
+        )
+        zmq_context.setsockopt(zmq.REQ_CORRELATE, 1)
+        zmq_context.setsockopt(zmq.REQ_RELAXED, 1)
+
+        if str(args.launching_address).split(":", maxsplit=1)[0] == dragon_head_address:
+            address = "localhost"
+            dragon_head_address = "tcp://localhost:5555"
+        else:
+            dragon_head_address += ":5555"
+
+        zmq_authenticator = dragonSockets.get_authenticator(zmq_context, timeout=-1)
+
+        logger.debug("Getting launcher socket")
+        launcher_socket = dragonSockets.get_secure_socket(zmq_context, zmq.REQ, False)
+
+        logger.debug(f"Connecting launcher socket to: {args.launching_address}")
+        launcher_socket.connect(args.launching_address)
+        client = dragonSockets.as_client(launcher_socket)
+
+        logger.debug(
+            f"Sending bootstrap request to launcher_socket with {dragon_head_address}"
+        )
+        client.send(DragonBootstrapRequest(address=dragon_head_address))
+        response = client.recv()
+
+        logger.debug(f"Received bootstrap response: {response}")
+        if not isinstance(response, DragonBootstrapResponse):
+            raise ValueError(
+                "Could not receive connection confirmation from launcher. Aborting."
+            )
+
+        print_summary(interface, dragon_head_address)
+
+        try:
+            logger.debug("Executing event loop")
+            run(
+                zmq_context=zmq_context,
+                dragon_head_address=dragon_head_address,
+                dragon_pid=response.dragon_pid,
+            )
+        except Exception as e:
+            logger.error(f"Dragon server failed with {e}", exc_info=True)
+            return os.EX_SOFTWARE
+        finally:
+            if zmq_authenticator is not None and zmq_authenticator.is_alive():
+                zmq_authenticator.stop()
+
+    logger.info("Shutting down! Bye bye!")
+
+    return 0
+
+
+def remove_config_log() -> None:
+    """Remove the Dragon `config_log` file from the file system. Used to
+    clean up after a dragon environment is shutdown to eliminate an
+    unnecessary attempt to connect to a stopped ZMQ server."""
+    log_path = get_log_path()
+    if os.path.exists(log_path):
+        os.remove(log_path)
+
+
+def cleanup() -> None:
+    global SHUTDOWN_INITIATED  # pylint: disable=global-statement
+    logger.debug("Cleaning up")
+    remove_config_log()
+    SHUTDOWN_INITIATED = True
+
+
+def register_signal_handlers() -> None:
+    # make sure to register the cleanup before the start
+    # the process so our signaller will be able to stop
+    # the database process.
+    for sig in SIGNALS:
+        signal.signal(sig, handle_signal)
+
+
+def parse_arguments(args: t.List[str]) -> DragonEntrypointArgs:
+    parser = argparse.ArgumentParser(
+        prefix_chars="+", description="SmartSim Dragon Head Process"
+    )
+    parser.add_argument(
+        "+launching_address",
+        type=str,
+        help="Address of launching process if a ZMQ connection can be established",
+        required=True,
+    )
+    parser.add_argument(
+        "+interface",
+        type=str,
+        help="Network Interface name",
+        required=False,
+    )
+    args_ = parser.parse_args(args)
+
+    if not args_.launching_address:
+        raise ValueError("Empty launching address supplied.")
+
+    return DragonEntrypointArgs(args_.launching_address, args_.interface)
+
+
+def main(args_: t.List[str]) -> int:
+    """Execute the dragon entrypoint as a module"""
+    os.environ["PYTHONUNBUFFERED"] = "1"
+    logger.info("Dragon server started")
+
+    args = parse_arguments(args_)
+    register_signal_handlers()
+
+    try:
+        return_code = execute_entrypoint(args)
+        return return_code
+    except Exception:
+        logger.error(
+            "An unexpected error occurred in the Dragon entrypoint.", exc_info=True
+        )
+    finally:
+        cleanup()
+
+    return -1
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
diff --git a/smartsim/_core/entrypoints/dragon_client.py b/smartsim/_core/entrypoints/dragon_client.py
new file mode 100644
index 000000000..e998ddce1
--- /dev/null
+++ b/smartsim/_core/entrypoints/dragon_client.py
@@ -0,0 +1,203 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterpris
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import dataclasses
+import json
+import os
+import signal
+import sys
+import time
+import typing as t
+from pathlib import Path
+from types import FrameType
+
+import zmq
+
+from smartsim._core.launcher.dragon.dragonConnector import DragonConnector
+from smartsim._core.schemas import (
+    DragonHandshakeRequest,
+    DragonRequest,
+    DragonShutdownRequest,
+    request_registry,
+)
+from smartsim.log import get_logger
+
+"""
+Dragon client entrypoint script, used to start a server, send requests to it
+and then shut it down.
+"""
+
+logger = get_logger("Dragon Client")
+
+SIGNALS = [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM, signal.SIGABRT]
+
+
+@dataclasses.dataclass
+class DragonClientEntrypointArgs:
+    submit: Path
+
+
+def cleanup() -> None:
+    """Cleanup resources"""
+    logger.debug("Cleaning up")
+
+
+def parse_requests(request_filepath: Path) -> t.List[DragonRequest]:
+    """Parse serialized requests from file
+
+    :param request_filepath: Path to file with serialized requests
+    :return: Deserialized requests
+    """
+    requests: t.List[DragonRequest] = []
+    try:
+        with open(request_filepath, "r", encoding="utf-8") as request_file:
+            req_strings = json.load(fp=request_file)
+    except FileNotFoundError as e:
+        logger.error(
+            "Could not find file with run requests,"
+            f"please check whether {request_filepath} exists."
+        )
+        raise e from None
+    except json.JSONDecodeError as e:
+        logger.error(f"Could not decode request file {request_filepath}.")
+        raise e from None
+
+    requests = [request_registry.from_string(req_str) for req_str in req_strings]
+
+    return requests
+
+
+def parse_arguments(args: t.List[str]) -> DragonClientEntrypointArgs:
+    """Parse arguments used to run entrypoint script
+
+    :param args: Arguments without name of executable
+    :raises ValueError: If the request file is not specified
+    :return: Parsed arguments
+    """
+    parser = argparse.ArgumentParser(
+        prefix_chars="+",
+        description="SmartSim Dragon Client Process, to be used in batch scripts",
+    )
+    parser.add_argument("+submit", type=str, help="Path to request file", required=True)
+    args_ = parser.parse_args(args)
+
+    if not args_.submit:
+        raise ValueError("Request file not provided.")
+
+    return DragonClientEntrypointArgs(submit=Path(args_.submit))
+
+
+def handle_signal(signo: int, _frame: t.Optional[FrameType] = None) -> None:
+    """Handle signals sent to this process
+
+    :param signo: Signal number
+    :param _frame: Frame, defaults to None
+    """
+    if not signo:
+        logger.info("Received signal with no signo")
+    else:
+        logger.info(f"Received signal {signo}")
+    cleanup()
+
+
+def register_signal_handlers() -> None:
+    """Register signal handlers prior to execution"""
+    # make sure to register the cleanup before the start
+    # the process so our signaller will be able to stop
+    # the server process.
+    for sig in SIGNALS:
+        signal.signal(sig, handle_signal)
+
+
+def execute_entrypoint(args: DragonClientEntrypointArgs) -> int:
+    """Execute the entrypoint with specified arguments
+
+    :param args: Parsed arguments
+    :return: Return code
+    """
+
+    try:
+        requests = parse_requests(args.submit)
+    except Exception:
+        logger.error("Dragon client failed to parse request file", exc_info=True)
+        return os.EX_OSFILE
+
+    requests.append(DragonShutdownRequest(immediate=False, frontend_shutdown=True))
+
+    connector = DragonConnector()
+
+    for request in requests:
+        response = connector.send_request(request)
+        if response.error_message is not None:
+            logger.error(response.error_message)
+
+    logger.info("Terminated sending requests, waiting for Dragon Server to complete")
+
+    if not connector.can_monitor:
+        logger.error(
+            "Could not get Dragon Server PID and will not be able to monitor it."
+        )
+        return os.EX_IOERR
+
+    while True:
+        try:
+            time.sleep(5)
+            connector.send_request(DragonHandshakeRequest())
+        except zmq.error.Again:
+            logger.debug("Could not reach server, assuming backend has shut down")
+            break
+
+    logger.info("Client has finished.")
+
+    return os.EX_OK
+
+
+def main(args_: t.List[str]) -> int:
+    """Execute the dragon client entrypoint as a module"""
+
+    os.environ["PYTHONUNBUFFERED"] = "1"
+    logger.info("Dragon client started")
+
+    args = parse_arguments(args_)
+    register_signal_handlers()
+
+    try:
+        return execute_entrypoint(args)
+    except Exception:
+        logger.error(
+            "An unexpected error occurred in the Dragon client entrypoint",
+            exc_info=True,
+        )
+    finally:
+        cleanup()
+
+    return os.EX_SOFTWARE
+
+
+if __name__ == "__main__":
+
+    sys.exit(main(sys.argv[1:]))
diff --git a/smartsim/_core/entrypoints/indirect.py b/smartsim/_core/entrypoints/indirect.py
index f94ad6e61..1f445ac4a 100644
--- a/smartsim/_core/entrypoints/indirect.py
+++ b/smartsim/_core/entrypoints/indirect.py
@@ -37,8 +37,8 @@
 import psutil
 
 import smartsim.log
-from smartsim._core.entrypoints.telemetrymonitor import track_event
-from smartsim._core.utils.helpers import decode_cmd, get_ts
+from smartsim._core.utils.helpers import decode_cmd, get_ts_ms
+from smartsim._core.utils.telemetry.telemetry import write_event
 
 STEP_PID: t.Optional[int] = None
 logger = smartsim.log.get_logger(__name__)
@@ -49,15 +49,21 @@
 
 def main(
     cmd: str,
-    etype: str,
+    entity_type: str,
     cwd: str,
     status_dir: str,
 ) -> int:
-    """The main function of the entrypoint. This function takes an encoded step
-    command and runs it in a subprocess. In the background, this entrypoint
-    will then monitor the subprocess and write out status events such as when
-    the subprocess has started or stopped and write these events to a status
-    directory.
+    """This function receives an encoded step command from a SmartSim Experiment
+    and runs it in a subprocess. The entrypoint integrates with the telemetry
+    monitor by writing status update events. It is useful for wrapping
+    unmanaged tasks - a workload manager can be queried for a managed task
+    to achieve the same result.
+
+    :param cmd: a base64 encoded cmd to execute
+    :param entity_type: `SmartSimEntity` entity class. Valid values
+    include: orchestrator, dbnode, ensemble, model
+    :param cwd: working directory to execute the cmd from
+    :param status_dir: path to the output directory for status updates
     """
     global STEP_PID  # pylint: disable=global-statement
     proxy_pid = os.getpid()
@@ -94,34 +100,37 @@ def main(
         cleanup()
         return 1
     finally:
-        track_event(
-            get_ts(),
+        write_event(
+            get_ts_ms(),
             proxy_pid,
             "",  # step_id for unmanaged task is always empty
-            etype,
+            entity_type,
             "start",
             status_path,
-            logger,
             detail=start_detail,
             return_code=start_rc,
         )
 
     logger.info(f"Waiting for child process {STEP_PID} to complete")
-    ret_code = process.wait()
+
+    try:
+        ret_code = process.wait()
+    except Exception:
+        logger.error("Failed to complete process", exc_info=True)
+        ret_code = -1
 
     logger.info(
         f"Indirect proxy {proxy_pid} child process {STEP_PID} complete."
         f" return code: {ret_code}"
     )
     msg = f"Process {STEP_PID} finished with return code: {ret_code}"
-    track_event(
-        get_ts(),
+    write_event(
+        get_ts_ms(),
         proxy_pid,
         "",  # step_id for unmanaged task is always empty
-        etype,
+        entity_type,
         "stop",
         status_path,
-        logger,
         detail=msg,
         return_code=ret_code,
     )
@@ -132,11 +141,12 @@ def main(
 
 def cleanup() -> None:
     """Perform cleanup required for clean termination"""
-    logger.info("Performing cleanup")
     global STEP_PID  # pylint: disable=global-statement
     if STEP_PID is None:
         return
 
+    logger.info("Performing cleanup")
+
     try:
         # attempt to stop the subprocess performing step-execution
         if psutil.pid_exists(STEP_PID):
@@ -228,7 +238,7 @@ def get_parser() -> argparse.ArgumentParser:
 
         rc = main(
             cmd=parsed_args.command,
-            etype=parsed_args.entity_type,
+            entity_type=parsed_args.entity_type,
             cwd=parsed_args.working_dir,
             status_dir=parsed_args.telemetry_dir,
         )
diff --git a/smartsim/_core/entrypoints/redis.py b/smartsim/_core/entrypoints/redis.py
index 018fc26fd..c4d8cbbd6 100644
--- a/smartsim/_core/entrypoints/redis.py
+++ b/smartsim/_core/entrypoints/redis.py
@@ -37,7 +37,6 @@
 
 from smartsim._core.utils.network import current_ip
 from smartsim.entity.dbnode import LaunchedShardData
-from smartsim.error import SSInternalError
 from smartsim.log import get_logger
 
 logger = get_logger(__name__)
@@ -111,6 +110,7 @@ def main(args: argparse.Namespace) -> int:
         *build_cluster_args(shard_data),
         *build_bind_args(src_addr, *bind_addrs),
     ]
+
     print_summary(cmd, args.ifname, shard_data)
 
     try:
@@ -119,9 +119,10 @@ def main(args: argparse.Namespace) -> int:
 
         for line in iter(process.stdout.readline, b""):
             print(line.decode("utf-8").rstrip(), flush=True)
-    except Exception as e:
+    except Exception:
         cleanup()
-        raise SSInternalError("Database process starter raised an exception") from e
+        logger.error("Database process starter raised an exception", exc_info=True)
+        return 1
     return 0
 
 
@@ -179,6 +180,7 @@ def cleanup() -> None:
         action="store_true",
         help="Specify if this orchestrator shard is part of a cluster",
     )
+
     args_ = parser.parse_args()
 
     # make sure to register the cleanup before the start
diff --git a/smartsim/_core/entrypoints/telemetrymonitor.py b/smartsim/_core/entrypoints/telemetrymonitor.py
index 115528bf4..5ed1a0c91 100644
--- a/smartsim/_core/entrypoints/telemetrymonitor.py
+++ b/smartsim/_core/entrypoints/telemetrymonitor.py
@@ -23,667 +23,149 @@
 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
 import argparse
-import json
+import asyncio
 import logging
 import os
+import os.path
 import pathlib
 import signal
 import sys
-import threading
-import time
 import typing as t
-from dataclasses import dataclass, field
 from types import FrameType
 
-from watchdog.events import (
-    FileCreatedEvent,
-    FileModifiedEvent,
-    LoggingEventHandler,
-    PatternMatchingEventHandler,
+import smartsim._core.config as cfg
+from smartsim._core.utils.telemetry.telemetry import (
+    TelemetryMonitor,
+    TelemetryMonitorArgs,
 )
-from watchdog.observers import Observer
-from watchdog.observers.api import BaseObserver
-
-from smartsim._core.config import CONFIG
-from smartsim._core.control.job import JobEntity, _JobKey
-from smartsim._core.control.jobmanager import JobManager
-from smartsim._core.launcher.launcher import Launcher
-from smartsim._core.launcher.local.local import LocalLauncher
-from smartsim._core.launcher.lsf.lsfLauncher import LSFLauncher
-from smartsim._core.launcher.pbs.pbsLauncher import PBSLauncher
-from smartsim._core.launcher.slurm.slurmLauncher import SlurmLauncher
-from smartsim._core.launcher.stepInfo import StepInfo
-from smartsim._core.utils.helpers import get_ts
-from smartsim._core.utils.serialize import MANIFEST_FILENAME
-from smartsim.error.errors import SmartSimError
-from smartsim.status import STATUS_COMPLETED, TERMINAL_STATUSES
-
-"""Telemetry Monitor entrypoint"""
-
-# kill is not catchable
-SIGNALS = [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM, signal.SIGABRT]
-_EventClass = t.Literal["start", "stop", "timestep"]
-_MAX_MANIFEST_LOAD_ATTEMPTS: t.Final[int] = 6
-
-
-@dataclass
-class Run:
-    """Model containing entities of an individual start call for an experiment"""
-
-    timestamp: int
-    models: t.List[JobEntity]
-    orchestrators: t.List[JobEntity]
-    ensembles: t.List[JobEntity]
-
-    def flatten(
-        self, filter_fn: t.Optional[t.Callable[[JobEntity], bool]] = None
-    ) -> t.List[JobEntity]:
-        """Flatten runs into a list of SmartSimEntity run events"""
-        entities = self.models + self.orchestrators + self.ensembles
-        if filter_fn:
-            entities = [entity for entity in entities if filter_fn(entity)]
-        return entities
-
-
-@dataclass
-class RuntimeManifest:
-    """The runtime manifest holds meta information about the experiment entities created
-    at runtime to satisfy the experiment requirements.
-    """
-
-    name: str
-    path: pathlib.Path
-    launcher: str
-    runs: t.List[Run] = field(default_factory=list)
-
-
-def _hydrate_persistable(
-    persistable_entity: t.Dict[str, t.Any],
-    entity_type: str,
-    exp_dir: str,
-) -> JobEntity:
-    """Populate JobEntity instance with supplied metdata and instance details"""
-    entity = JobEntity()
-
-    metadata = persistable_entity["telemetry_metadata"]
-    status_dir = pathlib.Path(metadata.get("status_dir"))
-
-    entity.type = entity_type
-    entity.name = persistable_entity["name"]
-    entity.step_id = str(metadata.get("step_id") or "")
-    entity.task_id = str(metadata.get("task_id") or "")
-    entity.timestamp = int(persistable_entity.get("timestamp", "0"))
-    entity.path = str(exp_dir)
-    entity.status_dir = str(status_dir)
-
-    return entity
-
-
-def hydrate_persistable(
-    entity_type: str,
-    persistable_entity: t.Dict[str, t.Any],
-    exp_dir: pathlib.Path,
-) -> t.List[JobEntity]:
-    """Map entity data persisted in a manifest file to an object"""
-    entities = []
-
-    # an entity w/parent key creates persistables for entities it contains
-    parent_keys = {"shards", "models"}
-    parent_keys = parent_keys.intersection(persistable_entity.keys())
-    if parent_keys:
-        container = "shards" if "shards" in parent_keys else "models"
-        child_type = "orchestrator" if container == "shards" else "model"
-        for child_entity in persistable_entity[container]:
-            entity = _hydrate_persistable(child_entity, child_type, str(exp_dir))
-            entities.append(entity)
-
-        return entities
-
-    entity = _hydrate_persistable(persistable_entity, entity_type, str(exp_dir))
-    entities.append(entity)
-    return entities
-
-
-def hydrate_persistables(
-    entity_type: str,
-    run: t.Dict[str, t.Any],
-    exp_dir: pathlib.Path,
-) -> t.Dict[str, t.List[JobEntity]]:
-    """Map a collection of entity data persisted in a manifest file to an object"""
-    persisted: t.Dict[str, t.List[JobEntity]] = {
-        "model": [],
-        "orchestrator": [],
-    }
-    for item in run[entity_type]:
-        entities = hydrate_persistable(entity_type, item, exp_dir)
-        for new_entity in entities:
-            persisted[new_entity.type].append(new_entity)
-
-    return persisted
-
-
-def hydrate_runs(
-    persisted_runs: t.List[t.Dict[str, t.Any]], exp_dir: pathlib.Path
-) -> t.List[Run]:
-    """Map run data persisted in a manifest file to an object"""
-    the_runs: t.List[Run] = []
-    for run_instance in persisted_runs:
-        run_entities: t.Dict[str, t.List[JobEntity]] = {
-            "model": [],
-            "orchestrator": [],
-            "ensemble": [],
-        }
-
-        for key in run_entities:
-            _entities = hydrate_persistables(key, run_instance, exp_dir)
-            for entity_type, new_entities in _entities.items():
-                if new_entities:
-                    run_entities[entity_type].extend(new_entities)
-
-        run = Run(
-            run_instance["timestamp"],
-            run_entities["model"],
-            run_entities["orchestrator"],
-            run_entities["ensemble"],
-        )
-        the_runs.append(run)
-
-    return the_runs
-
-
-def load_manifest(file_path: str) -> t.Optional[RuntimeManifest]:
-    """Load a persisted manifest and return the content"""
-    manifest_dict: t.Optional[t.Dict[str, t.Any]] = None
-    try_count = 1
-
-    while manifest_dict is None and try_count < _MAX_MANIFEST_LOAD_ATTEMPTS:
-        source = pathlib.Path(file_path)
-        source = source.resolve()
-
-        try:
-            if text := source.read_text(encoding="utf-8").strip():
-                manifest_dict = json.loads(text)
-        except json.JSONDecodeError as ex:
-            print(f"Error loading manifest: {ex}")
-            # hack/fix: handle issues reading file before it is fully written
-            time.sleep(0.5 * try_count)
-        finally:
-            try_count += 1
-
-    if not manifest_dict:
-        return None
-
-    exp = manifest_dict.get("experiment", None)
-    if not exp:
-        raise ValueError("Manifest missing required experiment")
-
-    runs = manifest_dict.get("runs", None)
-    if runs is None:
-        raise ValueError("Manifest missing required runs")
-
-    exp_dir = pathlib.Path(exp["path"])
-    runs = hydrate_runs(runs, exp_dir)
-
-    manifest = RuntimeManifest(
-        name=exp["name"],
-        path=exp_dir,
-        launcher=exp["launcher"],
-        runs=runs,
-    )
-    return manifest
-
-
-def track_event(
-    timestamp: int,
-    task_id: t.Union[int, str],
-    step_id: str,
-    etype: str,
-    action: _EventClass,
-    status_dir: pathlib.Path,
-    logger: logging.Logger,
-    detail: str = "",
-    return_code: t.Optional[int] = None,
-) -> None:
-    """Persist a tracking event for an entity"""
-    tgt_path = status_dir / f"{action}.json"
-    tgt_path.parent.mkdir(parents=True, exist_ok=True)
-
-    try:
-        task_id = int(task_id)
-    except ValueError:
-        pass
-
-    entity_dict = {
-        "timestamp": timestamp,
-        "job_id": task_id,
-        "step_id": step_id,
-        "type": etype,
-        "action": action,
-    }
-
-    if detail is not None:
-        entity_dict["detail"] = detail
-
-    if return_code is not None:
-        entity_dict["return_code"] = return_code
-
-    try:
-        if not tgt_path.exists():
-            # Don't overwrite existing tracking files
-            bytes_written = tgt_path.write_text(json.dumps(entity_dict, indent=2))
-            if bytes_written < 1:
-                logger.warning("event tracking failed to write tracking file.")
-    except Exception:
-        logger.error("Unable to write tracking file.", exc_info=True)
-
-
-def faux_return_code(step_info: StepInfo) -> t.Optional[int]:
-    """Create a faux return code for a task run by the WLM. Must not be
-    called with non-terminal statuses or results may be confusing
-    """
-    if step_info.status not in TERMINAL_STATUSES:
-        return None
-
-    if step_info.status == STATUS_COMPLETED:
-        return os.EX_OK
-
-    return 1
-
-
-class ManifestEventHandler(PatternMatchingEventHandler):
-    """The ManifestEventHandler monitors an experiment for changes and updates
-    a telemetry datastore as needed.
-
-    It contains event handlers that are triggered by changes to a runtime experiment
-    manifest. The runtime manifest differs from a standard manifest. A runtime manifest
-    may contain multiple experiment executions in a `runs` collection.
-
-    It also contains a long-polling loop that checks experiment entities for updates
-    at each timestep.
-    """
-
-    def __init__(
-        self,
-        pattern: str,
-        logger: logging.Logger,
-        ignore_patterns: t.Any = None,
-        ignore_directories: bool = True,
-        case_sensitive: bool = False,
-    ) -> None:
-        super().__init__(
-            [pattern], ignore_patterns, ignore_directories, case_sensitive
-        )  # type: ignore
-        self._logger = logger
-        self._tracked_runs: t.Dict[int, Run] = {}
-        self._tracked_jobs: t.Dict[_JobKey, JobEntity] = {}
-        self._completed_jobs: t.Dict[_JobKey, JobEntity] = {}
-        self._launcher: t.Optional[Launcher] = None
-        self.job_manager: JobManager = JobManager(threading.RLock())
-        self._launcher_map: t.Dict[str, t.Type[Launcher]] = {
-            "slurm": SlurmLauncher,
-            "pbs": PBSLauncher,
-            "lsf": LSFLauncher,
-            "local": LocalLauncher,
-        }
-
-    def init_launcher(self, launcher: str) -> Launcher:
-        """Initialize the controller with a specific type of launcher.
-        SmartSim currently supports slurm, pbs(pro), lsf,
-        and local launching
-
-        :param launcher: which launcher to initialize
-        :type launcher: str
-        :raises SSUnsupportedError: if a string is passed that is not
-                                    a supported launcher
-        :raises TypeError: if no launcher argument is provided.
-        """
-        if not launcher:
-            raise TypeError("Must provide a 'launcher' argument")
-
-        if launcher_type := self._launcher_map.get(launcher.lower(), None):
-            return launcher_type()
-
-        raise ValueError("Launcher type not supported: " + launcher)
-
-    def set_launcher(self, launcher_type: str) -> None:
-        """Set the launcher for the experiment"""
-        self._launcher = self.init_launcher(launcher_type)
-        self.job_manager.set_launcher(self._launcher)
-        self.job_manager.start()
-
-    def process_manifest(self, manifest_path: str) -> None:
-        """Read the runtime manifest for the experiment and track new entities
-
-        :param manifest_path: The full path to the manifest file
-        :type manifest_path: str
-        """
-        try:
-            manifest = load_manifest(manifest_path)
-            if not manifest:
-                return
-        except json.JSONDecodeError:
-            self._logger.error(f"Malformed manifest encountered: {manifest_path}")
-            return
-        except ValueError:
-            self._logger.error("Manifest content error", exc_info=True)
-            return
-
-        if self._launcher is None:
-            self.set_launcher(manifest.launcher)
-
-        if not self._launcher:
-            raise SmartSimError(f"Unable to set launcher from {manifest_path}")
-
-        runs = [run for run in manifest.runs if run.timestamp not in self._tracked_runs]
-
-        exp_dir = pathlib.Path(manifest_path).parent.parent.parent
-
-        for run in runs:
-            for entity in run.flatten(
-                filter_fn=lambda e: e.key not in self._tracked_jobs and e.is_managed
-            ):
-                entity.path = str(exp_dir)
-
-                self._tracked_jobs[entity.key] = entity
-                track_event(
-                    run.timestamp,
-                    entity.task_id,
-                    entity.step_id,
-                    entity.type,
-                    "start",
-                    pathlib.Path(entity.status_dir),
-                    self._logger,
-                )
+from smartsim.log import DEFAULT_LOG_FORMAT, HostnameFilter
 
-                if entity.is_managed:
-                    self.job_manager.add_job(
-                        entity.name,
-                        entity.task_id,
-                        entity,
-                        False,
-                    )
-                    self._launcher.step_mapping.add(
-                        entity.name, entity.step_id, entity.task_id, True
-                    )
-            self._tracked_runs[run.timestamp] = run
+"""Telemetry Monitor entrypoint
+Starts a long-running, standalone process that hosts a `TelemetryMonitor`"""
 
-    def on_modified(self, event: FileModifiedEvent) -> None:
-        """Event handler for when a file or directory is modified.
 
-        :param event: Event representing file/directory modification.
-        :type event: FileModifiedEvent
-        """
-        super().on_modified(event)  # type: ignore
-        self._logger.info(f"processing manifest modified @ {event.src_path}")
-        self.process_manifest(event.src_path)
+logger = logging.getLogger("TelemetryMonitor")
 
-    def on_created(self, event: FileCreatedEvent) -> None:
-        """Event handler for when a file or directory is created.
 
-        :param event: Event representing file/directory creation.
-        :type event: FileCreatedEvent
-        """
-        super().on_created(event)  # type: ignore
-        self._logger.info(f"processing manifest created @ {event.src_path}")
-        self.process_manifest(event.src_path)
-
-    def _to_completed(
-        self,
-        timestamp: int,
-        entity: JobEntity,
-        step_info: StepInfo,
-    ) -> None:
-        """Move a monitored entity from the active to completed collection to
-        stop monitoring for updates during timesteps.
-
-        :param timestamp: the current timestamp for event logging
-        :type timestamp: int
-        :param entity: the running SmartSim Job
-        :type entity: JobEntity
-        :param experiment_dir: the experiement directory to monitor for changes
-        :type experiment_dir: pathlib.Path
-        :param entity: the StepInfo received when requesting a Job status update
-        :type entity: StepInfo
-        """
-        inactive_entity = self._tracked_jobs.pop(entity.key)
-        if entity.key not in self._completed_jobs:
-            self._completed_jobs[entity.key] = inactive_entity
-
-        job = self.job_manager[entity.name]
-        self.job_manager.move_to_completed(job)
-
-        status_clause = f"status: {step_info.status}"
-        error_clause = f", error: {step_info.error}" if step_info.error else ""
-        detail = f"{status_clause}{error_clause}"
-
-        if hasattr(job.entity, "status_dir"):
-            write_path = pathlib.Path(job.entity.status_dir)
-
-        track_event(
-            timestamp,
-            entity.task_id,
-            entity.step_id,
-            entity.type,
-            "stop",
-            write_path,
-            self._logger,
-            detail=detail,
-            return_code=faux_return_code(step_info),
-        )
-
-    def on_timestep(self, timestamp: int) -> None:
-        """Called at polling frequency to request status updates on
-        monitored entities
-
-        :param timestamp: the current timestamp for event logging
-        :type timestamp: int
-        :param experiment_dir: the experiement directory to monitor for changes
-        :type experiment_dir: pathlib.Path
-        """
-        entity_map = self._tracked_jobs
-
-        if not self._launcher:
-            return
-
-        # consider not using name to avoid collisions
-        names = {entity.name: entity for entity in entity_map.values()}
-
-        if names:
-            step_updates = self._launcher.get_step_update(list(names.keys()))
-
-            for step_name, step_info in step_updates:
-                if step_info and step_info.status in TERMINAL_STATUSES:
-                    completed_entity = names[step_name]
-                    self._to_completed(timestamp, completed_entity, step_info)
-
-
-def can_shutdown(action_handler: ManifestEventHandler, logger: logging.Logger) -> bool:
-    jobs = action_handler.job_manager.jobs
-    db_jobs = action_handler.job_manager.db_jobs
-
-    has_jobs = bool(jobs)
-    has_dbs = bool(db_jobs)
-    has_running_jobs = has_jobs or has_dbs
-
-    if has_jobs:
-        logger.debug(f"telemetry monitor is monitoring {len(jobs)} jobs")
-    if has_dbs:
-        logger.debug(f"telemetry monitor is monitoring {len(db_jobs)} dbs")
-
-    return not has_running_jobs
-
-
-def event_loop(
-    observer: BaseObserver,
-    action_handler: ManifestEventHandler,
-    frequency: t.Union[int, float],
-    logger: logging.Logger,
-    cooldown_duration: int,
+def register_signal_handlers(
+    handle_signal: t.Callable[[int, t.Optional[FrameType]], None]
 ) -> None:
-    """Executes all attached timestep handlers every <frequency> seconds
+    """Register a signal handling function for all termination events
 
-    :param observer: (optional) a preconfigured watchdog Observer to inject
-    :type observer: t.Optional[BaseObserver]
-    :param action_handler: The manifest event processor instance
-    :type action_handler: ManifestEventHandler
-    :param frequency: frequency (in seconds) of update loop
-    :type frequency: t.Union[int, float]
-    :param logger: a preconfigured Logger instance
-    :type logger: logging.Logger
-    :param cooldown_duration: number of seconds the telemetry monitor should
-                              poll for new jobs before attempting to shutdown
-    :type cooldown_duration: int
+    :param handle_signal: the function to execute when a term signal is received
     """
-    elapsed: int = 0
-    last_ts: int = get_ts()
-
-    while observer.is_alive():
-        timestamp = get_ts()
-        logger.debug(f"Telemetry timestep: {timestamp}")
-        action_handler.on_timestep(timestamp)
-
-        elapsed += timestamp - last_ts
-        last_ts = timestamp
-
-        if can_shutdown(action_handler, logger):
-            if elapsed >= cooldown_duration:
-                logger.info("beginning telemetry manager shutdown")
-                observer.stop()  # type: ignore
-        else:
-            # reset cooldown any time there are still jobs running
-            elapsed = 0
-
-        time.sleep(frequency)
+    # NOTE: omitting kill because it is not catchable
+    term_signals = [signal.SIGINT, signal.SIGQUIT, signal.SIGTERM, signal.SIGABRT]
+    for signal_num in term_signals:
+        signal.signal(signal_num, handle_signal)
 
 
-def main(
-    frequency: t.Union[int, float],
-    experiment_dir: pathlib.Path,
-    logger: logging.Logger,
-    observer: t.Optional[BaseObserver] = None,
-    cooldown_duration: t.Optional[int] = 0,
-) -> int:
-    """Setup the monitoring entities and start the timer-based loop that
-    will poll for telemetry data
+def get_parser() -> argparse.ArgumentParser:
+    """Instantiate a parser to process command line arguments
 
-    :param frequency: frequency (in seconds) of update loop
-    :type frequency: t.Union[int, float]
-    :param experiment_dir: the experiement directory to monitor for changes
-    :type experiment_dir: pathlib.Path
-    :param logger: a preconfigured Logger instance
-    :type logger: logging.Logger
-    :param observer: (optional) a preconfigured Observer to inject
-    :type observer: t.Optional[BaseObserver]
-    :param cooldown_duration: number of seconds the telemetry monitor should
-                              poll for new jobs before attempting to shutdown
-    :type cooldown_duration: int
+    :returns: An argument parser ready to accept required telemetry monitor parameters
     """
-    manifest_relpath = pathlib.Path(CONFIG.telemetry_subdir) / MANIFEST_FILENAME
-    manifest_path = experiment_dir / manifest_relpath
-    monitor_pattern = str(manifest_relpath)
-
-    logger.info(
-        f"Executing telemetry monitor with frequency: {frequency}s"
-        f", on target directory: {experiment_dir}"
-        f" matching pattern: {monitor_pattern}"
-    )
-
-    cooldown_duration = cooldown_duration or CONFIG.telemetry_cooldown
-    log_handler = LoggingEventHandler(logger)  # type: ignore
-    action_handler = ManifestEventHandler(monitor_pattern, logger)
-
-    if observer is None:
-        observer = Observer()
-
-    try:
-        if manifest_path.exists():
-            # a manifest may not exist depending on startup timing
-            action_handler.process_manifest(str(manifest_path))
-
-        observer.schedule(log_handler, experiment_dir, recursive=True)  # type:ignore
-        observer.schedule(action_handler, experiment_dir, recursive=True)  # type:ignore
-        observer.start()  # type: ignore
-
-        event_loop(observer, action_handler, frequency, logger, cooldown_duration)
-        return os.EX_OK
-    except Exception as ex:
-        logger.error(ex)
-    finally:
-        if observer.is_alive():
-            observer.stop()  # type: ignore
-            observer.join()
-
-    return os.EX_SOFTWARE
-
-
-def handle_signal(signo: int, _frame: t.Optional[FrameType]) -> None:
-    """Helper function to ensure clean process termination"""
-    if not signo:
-        logger = logging.getLogger()
-        logger.warning("Received signal with no signo")
-
-
-def register_signal_handlers() -> None:
-    """Register a signal handling function for all termination events"""
-    for sig in SIGNALS:
-        signal.signal(sig, handle_signal)
-
-
-def get_parser() -> argparse.ArgumentParser:
-    """Instantiate a parser to process command line arguments"""
     arg_parser = argparse.ArgumentParser(description="SmartSim Telemetry Monitor")
-    arg_parser.add_argument(
-        "-frequency",
-        type=int,
-        help="Frequency of telemetry updates (in seconds))",
-        required=True,
-    )
     arg_parser.add_argument(
         "-exp_dir",
         type=str,
         help="Experiment root directory",
         required=True,
     )
+    arg_parser.add_argument(
+        "-frequency",
+        type=float,
+        help="Frequency of telemetry updates (in seconds))",
+        required=True,
+    )
     arg_parser.add_argument(
         "-cooldown",
         type=int,
         help="Default lifetime of telemetry monitor (in seconds) before auto-shutdown",
-        default=CONFIG.telemetry_cooldown,
+        default=cfg.CONFIG.telemetry_cooldown,
+    )
+    arg_parser.add_argument(
+        "-loglevel",
+        type=int,
+        help="Logging level",
+        default=logging.INFO,
     )
     return arg_parser
 
 
-if __name__ == "__main__":
-    os.environ["PYTHONUNBUFFERED"] = "1"
+def parse_arguments() -> TelemetryMonitorArgs:
+    """Parse the command line arguments and return an instance
+    of TelemetryMonitorArgs populated with the CLI inputs
 
+    :returns: `TelemetryMonitorArgs` instance populated with command line arguments
+    """
     parser = get_parser()
-    args = parser.parse_args()
+    parsed_args = parser.parse_args()
+    return TelemetryMonitorArgs(
+        parsed_args.exp_dir,
+        parsed_args.frequency,
+        parsed_args.cooldown,
+        parsed_args.loglevel,
+    )
 
-    log = logging.getLogger(f"{__name__}.TelemetryMonitor")
-    log.setLevel(logging.DEBUG)
-    log.propagate = False
 
-    log_path = os.path.join(
-        args.exp_dir, CONFIG.telemetry_subdir, "telemetrymonitor.log"
-    )
-    fh = logging.FileHandler(log_path, "a")
-    log.addHandler(fh)
+def configure_logger(logger_: logging.Logger, log_level_: int, exp_dir: str) -> None:
+    """Configure the telemetry monitor logger to write logs to the
+    target output file path passed as an argument to the entrypoint
+
+    :param logger_: logger to configure
+    :param log_level_: log level to apply to the python logging system
+    :param exp_dir: root path to experiment outputs
+    """
+    logger_.setLevel(log_level_)
+    logger_.propagate = False
+
+    # use a standard subdirectory of the experiment output path for logs
+    telemetry_dir = pathlib.Path(exp_dir) / cfg.CONFIG.telemetry_subdir
+
+    # all telemetry monitor logs are written to file in addition to stdout
+    log_path = telemetry_dir / "logs/telemetrymonitor.out"
+    log_path.parent.mkdir(parents=True, exist_ok=True)
+    file_handler = logging.FileHandler(log_path, "a")
+
+    # HostnameFilter is required to enrich log context to use DEFAULT_LOG_FORMAT
+    file_handler.addFilter(HostnameFilter())
+
+    formatter = logging.Formatter(DEFAULT_LOG_FORMAT)
+    file_handler.setFormatter(formatter)
+    logger_.addHandler(file_handler)
+
+
+if __name__ == "__main__":
+    """Prepare the telemetry monitor process using command line arguments.
+
+    Sample usage:
+    python -m smartsim._core.entrypoints.telemetrymonitor -exp_dir <exp_dir>
+          -frequency 30 -cooldown 90 -loglevel INFO
+    The experiment id is generated during experiment startup
+    and can be found in the manifest.json in <exp_dir>/.smartsim/telemetry
+    """
+    os.environ["PYTHONUNBUFFERED"] = "1"
+
+    args = parse_arguments()
+    configure_logger(logger, args.log_level, args.exp_dir)
+
+    telemetry_monitor = TelemetryMonitor(args)
 
     # Must register cleanup before the main loop is running
-    register_signal_handlers()
+    def cleanup_telemetry_monitor(_signo: int, _frame: t.Optional[FrameType]) -> None:
+        """Create an enclosure on `manifest_observer` to avoid global variables"""
+        logger.info("Shutdown signal received by telemetry monitor entrypoint")
+        telemetry_monitor.cleanup()
+
+    register_signal_handlers(cleanup_telemetry_monitor)
 
     try:
-        main(
-            int(args.frequency),
-            pathlib.Path(args.exp_dir),
-            log,
-            cooldown_duration=args.cooldown,
-        )
+        asyncio.run(telemetry_monitor.run())
         sys.exit(0)
     except Exception:
-        log.exception(
+        logger.exception(
             "Shutting down telemetry monitor due to unexpected error", exc_info=True
         )
 
diff --git a/smartsim/_core/generation/generator.py b/smartsim/_core/generation/generator.py
index 502753df7..8706cf568 100644
--- a/smartsim/_core/generation/generator.py
+++ b/smartsim/_core/generation/generator.py
@@ -64,11 +64,8 @@ def __init__(
         collision between entities.
 
         :param gen_path: Path in which files need to be generated
-        :type gen_path: str
-        :param overwrite: toggle entity replacement, defaults to False
-        :type overwrite: bool, optional
+        :param overwrite: toggle entity replacement
         :param verbose: Whether generation information should be logged to std out
-        :type verbose: bool, optional
         """
         self._writer = ModelWriter()
         self.gen_path = gen_path
@@ -82,7 +79,6 @@ def log_file(self) -> str:
         of all generated entities.
 
         :returns: path to file with parameter settings
-        :rtype: str
         """
         return join(self.gen_path, "smartsim_params.txt")
 
@@ -129,11 +125,7 @@ def set_tag(self, tag: str, regex: t.Optional[str] = None) -> None:
 
         :param tag: A string of characters that signify
                     the string to be changed. Defaults to ``;``
-        :type tag: str
-
-        :param regex: full regex for the modelwriter to search for,
-                      defaults to None
-        :type regex: str | None
+        :param regex: full regex for the modelwriter to search for
         """
         self._writer.set_tag(tag, regex)
 
@@ -148,7 +140,7 @@ def _gen_exp_dir(self) -> None:
             )
         if not path.isdir(self.gen_path):
             # keep exists ok for race conditions on NFS
-            pathlib.Path(self.gen_path).mkdir(exist_ok=True)
+            pathlib.Path(self.gen_path).mkdir(exist_ok=True, parents=True)
         else:
             logger.log(
                 level=self.log_level, msg="Working in previously created experiment"
@@ -167,7 +159,6 @@ def _gen_orc_dir(self, orchestrator_list: t.List[Orchestrator]) -> None:
            configuration files for the orchestrator.
 
         :param orchestrator: Orchestrator instance
-        :type orchestrator: Orchestrator | None
         """
         # Loop through orchestrators
         for orchestrator in orchestrator_list:
@@ -177,13 +168,12 @@ def _gen_orc_dir(self, orchestrator_list: t.List[Orchestrator]) -> None:
             # Always remove orchestrator files if present.
             if path.isdir(orc_path):
                 shutil.rmtree(orc_path, ignore_errors=True)
-            pathlib.Path(orc_path).mkdir(exist_ok=self.overwrite)
+            pathlib.Path(orc_path).mkdir(exist_ok=self.overwrite, parents=True)
 
     def _gen_entity_list_dir(self, entity_lists: t.List[Ensemble]) -> None:
         """Generate directories for Ensemble instances
 
         :param entity_lists: list of Ensemble instances
-        :type entity_lists: list
         """
 
         if not entity_lists:
@@ -209,9 +199,7 @@ def _gen_entity_dirs(
         """Generate directories for Entity instances
 
         :param entities: list of Model instances
-        :type entities: list[Model]
-        :param entity_list: Ensemble instance, defaults to None
-        :type entity_list: Ensemble | None
+        :param entity_list: Ensemble instance
         :raises EntityExistsError: if a directory already exists for an
                                    entity by that name
         """
@@ -247,7 +235,6 @@ def _write_tagged_entity_files(self, entity: Model) -> None:
            an Ensemble.
 
         :param entity: a Model instance
-        :type entity: Model
         """
         if entity.files:
             to_write = []
@@ -258,7 +245,6 @@ def _build_tagged_files(tagged: TaggedFilesHierarchy) -> None:
 
                 :param tagged: a TaggedFileHierarchy to be built as a
                                directory structure
-                :type tagged: TaggedFilesHierarchy
                 """
                 for file in tagged.files:
                     dst_path = path.join(entity.path, tagged.base, path.basename(file))
@@ -291,9 +277,7 @@ def _log_params(
         and what values were set to the parameters
 
         :param entity: the model being generated
-        :type entity: Model
         :param files_to_params: a dict connecting each file to its parameter settings
-        :type files_to_params: t.Dict[str, t.Dict[str, str]]
         """
         used_params: t.Dict[str, str] = {}
         file_to_tables: t.Dict[str, str] = {}
@@ -333,7 +317,6 @@ def _copy_entity_files(entity: Model) -> None:
         """Copy the entity files and directories attached to this entity.
 
         :param entity: Model
-        :type entity: Model
         """
         if entity.files:
             for to_copy in entity.files.copy:
@@ -348,7 +331,6 @@ def _link_entity_files(entity: Model) -> None:
         """Symlink the entity files attached to this entity.
 
         :param entity: Model
-        :type entity: Model
         """
         if entity.files:
             for to_link in entity.files.link:
diff --git a/smartsim/_core/generation/modelwriter.py b/smartsim/_core/generation/modelwriter.py
index 3062ea1db..2998d4e35 100644
--- a/smartsim/_core/generation/modelwriter.py
+++ b/smartsim/_core/generation/modelwriter.py
@@ -24,6 +24,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import collections
 import re
 import typing as t
 
@@ -47,10 +48,8 @@ def set_tag(self, tag: str, regex: t.Optional[str] = None) -> None:
 
         :param tag: tag for the modelwriter to search for,
                     defaults to semi-colon e.g. ";"
-        :type tag: str
         :param regex: full regex for the modelwriter to search for,
                      defaults to "(;.+;)"
-        :type regex: str, optional
         """
         if regex:
             self.regex = regex
@@ -68,13 +67,9 @@ def configure_tagged_model_files(
            instance.
 
         :param tagged_files: list of paths to tagged files
-        :type model: list[str]
         :param params: model parameters
-        :type params: dict[str, str]
         :param make_missing_tags_fatal: raise an error if a tag is missing
-        :type make_missing_tags_fatal: bool
         :returns: A dict connecting each file to its parameter settings
-        :rtype: dict[str,dict[str,str]]
         """
         files_to_tags: t.Dict[str, t.Dict[str, str]] = {}
         for tagged_file in tagged_files:
@@ -89,7 +84,6 @@ def _set_lines(self, file_path: str) -> None:
         """Set the lines for the modelwrtter to iterate over
 
         :param file_path: path to the newly created and tagged file
-        :type file_path: str
         :raises ParameterWriterError: if the newly created file cannot be read
         """
         try:
@@ -117,43 +111,31 @@ def _replace_tags(
            model. The tag defaults to ";"
 
         :param model: The model instance
-        :type model: Model
         :param make_fatal: (Optional) Set to True to force a fatal error
             if a tag is not matched
-        :type make_fatal: bool
         :returns: A dict of parameter names and values set for the file
-        :rtype: dict[str,str]
         """
         edited = []
-        unused_tags: t.Dict[str, t.List[int]] = {}
+        unused_tags: t.DefaultDict[str, t.List[int]] = collections.defaultdict(list)
         used_params: t.Dict[str, str] = {}
-        for i, line in enumerate(self.lines):
-            search = re.search(self.regex, line)
-            if search:
-                while search:
-                    tagged_line = search.group(0)
-                    previous_value = self._get_prev_value(tagged_line)
-                    if self._is_ensemble_spec(tagged_line, params):
-                        new_val = str(params[previous_value])
-                        new_line = re.sub(self.regex, new_val, line, 1)
-                        search = re.search(self.regex, new_line)
-                        used_params[previous_value] = new_val
-                        if not search:
-                            edited.append(new_line)
-                        else:
-                            line = new_line
-
-                    # if a tag is found but is not in this model's configurations
-                    # put in placeholder value
-                    else:
-                        tag = tagged_line.split(self.tag)[1]
-                        if tag not in unused_tags:
-                            unused_tags[tag] = []
-                        unused_tags[tag].append(i + 1)
-                        edited.append(re.sub(self.regex, previous_value, line))
-                        search = None  # Move on to the next tag
-            else:
-                edited.append(line)
+        for i, line in enumerate(self.lines, 1):
+            while search := re.search(self.regex, line):
+                tagged_line = search.group(0)
+                previous_value = self._get_prev_value(tagged_line)
+                if self._is_ensemble_spec(tagged_line, params):
+                    new_val = str(params[previous_value])
+                    line = re.sub(self.regex, new_val, line, 1)
+                    used_params[previous_value] = new_val
+
+                # if a tag is found but is not in this model's configurations
+                # put in placeholder value
+                else:
+                    tag = tagged_line.split(self.tag)[1]
+                    unused_tags[tag].append(i)
+                    line = re.sub(self.regex, previous_value, line)
+                    break
+            edited.append(line)
+
         for tag, value in unused_tags.items():
             missing_tag_message = f"Unused tag {tag} on line(s): {str(value)}"
             if make_fatal:
diff --git a/smartsim/_core/launcher/__init__.py b/smartsim/_core/launcher/__init__.py
index 0c4001cd4..d78909641 100644
--- a/smartsim/_core/launcher/__init__.py
+++ b/smartsim/_core/launcher/__init__.py
@@ -24,6 +24,7 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from .dragon.dragonLauncher import DragonLauncher
 from .launcher import Launcher
 from .local.local import LocalLauncher
 from .lsf.lsfLauncher import LSFLauncher
@@ -32,6 +33,7 @@
 
 __all__ = [
     "Launcher",
+    "DragonLauncher",
     "LocalLauncher",
     "LSFLauncher",
     "PBSLauncher",
diff --git a/smartsim/_core/launcher/colocated.py b/smartsim/_core/launcher/colocated.py
index 11d26b141..c69a9cef1 100644
--- a/smartsim/_core/launcher/colocated.py
+++ b/smartsim/_core/launcher/colocated.py
@@ -42,11 +42,8 @@ def write_colocated_launch_script(
     is created for this entity.
 
     :param file_name: name of the script to write
-    :type file_name: str
     :param db_log: log file for the db
-    :type db_log: str
     :param colocated_settings: db settings from entity run_settings
-    :type colocated_settings: dict[str, Any]
     """
 
     colocated_cmd = _build_colocated_wrapper_cmd(db_log, **colocated_settings)
@@ -67,9 +64,14 @@ def write_colocated_launch_script(
         # STDOUT of the job
         if colocated_settings["debug"]:
             script_file.write("export SMARTSIM_LOG_LEVEL=debug\n")
-
-        script_file.write(f"{colocated_cmd}\n")
-        script_file.write("DBPID=$!\n\n")
+        script_file.write(f"db_stdout=$({colocated_cmd})\n")
+        # extract and set DBPID within the shell script that is
+        # enclosed between __PID__ and sent to stdout by the colocated
+        # entrypoints file
+        script_file.write(
+            "DBPID=$(echo $db_stdout | sed -n "
+            "'s/.*__PID__\\([0-9]*\\)__PID__.*/\\1/p')\n"
+        )
 
         # Write the actual launch command for the app
         script_file.write("$@\n\n")
@@ -88,21 +90,13 @@ def _build_colocated_wrapper_cmd(
     """Build the command use to run a colocated DB application
 
     :param db_log: log file for the db
-    :type db_log: str
-    :param cpus: db cpus, defaults to 1
-    :type cpus: int, optional
-    :param rai_args: redisai args, defaults to None
-    :type rai_args: dict[str, str], optional
-    :param extra_db_args: extra redis args, defaults to None
-    :type extra_db_args: dict[str, str], optional
+    :param cpus: db cpus
+    :param rai_args: redisai args
+    :param extra_db_args: extra redis args
     :param port: port to bind DB to
-    :type port: int
     :param ifname: network interface(s) to bind DB to
-    :type ifname: str | list[str], optional
     :param db_cpu_list: The list of CPUs that the database should be limited to
-    :type db_cpu_list: str, optional
     :return: the command to run
-    :rtype: str
     """
     # pylint: disable=too-many-locals
 
@@ -190,10 +184,8 @@ def _build_colocated_wrapper_cmd(
         db_script_cmd = _build_db_script_cmd(db_scripts)
         db_cmd.extend(db_script_cmd)
 
-    # run colocated db in the background
-    db_cmd.append("&")
-
     cmd.extend(db_cmd)
+
     return " ".join(cmd)
 
 
@@ -235,7 +227,8 @@ def _build_db_script_cmd(db_scripts: t.List[DBScript]) -> t.List[str]:
         if db_script.func:
             # Notice that here db_script.func is guaranteed to be a str
             # because we don't allow the user to pass a serialized function
-            sanitized_func = db_script.func.replace("\n", "\\n")
+            func = db_script.func
+            sanitized_func = func.replace("\n", "\\n")
             if not (
                 sanitized_func.startswith("'")
                 and sanitized_func.endswith("'")
diff --git a/smartsim/_core/launcher/dragon/__init__.py b/smartsim/_core/launcher/dragon/__init__.py
new file mode 100644
index 000000000..efe03908e
--- /dev/null
+++ b/smartsim/_core/launcher/dragon/__init__.py
@@ -0,0 +1,25 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/smartsim/_core/launcher/dragon/dragonBackend.py b/smartsim/_core/launcher/dragon/dragonBackend.py
new file mode 100644
index 000000000..245660662
--- /dev/null
+++ b/smartsim/_core/launcher/dragon/dragonBackend.py
@@ -0,0 +1,734 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import collections
+import functools
+import itertools
+import time
+import typing as t
+from dataclasses import dataclass, field
+from enum import Enum
+from threading import RLock
+
+from tabulate import tabulate
+
+# pylint: disable=import-error
+# isort: off
+import dragon.infrastructure.connection as dragon_connection
+import dragon.infrastructure.policy as dragon_policy
+import dragon.native.group_state as dragon_group_state
+import dragon.native.process as dragon_process
+import dragon.native.process_group as dragon_process_group
+import dragon.native.machine as dragon_machine
+
+# pylint: enable=import-error
+# isort: on
+from ...._core.config import get_config
+from ...._core.schemas import (
+    DragonHandshakeRequest,
+    DragonHandshakeResponse,
+    DragonRequest,
+    DragonResponse,
+    DragonRunRequest,
+    DragonRunResponse,
+    DragonShutdownRequest,
+    DragonShutdownResponse,
+    DragonStopRequest,
+    DragonStopResponse,
+    DragonUpdateStatusRequest,
+    DragonUpdateStatusResponse,
+)
+from ...._core.utils.helpers import create_short_id_str
+from ....log import get_logger
+from ....status import TERMINAL_STATUSES, SmartSimStatus
+
+logger = get_logger(__name__)
+
+
+class DragonStatus(str, Enum):
+    ERROR = str(dragon_group_state.Error())
+    RUNNING = str(dragon_group_state.Running())
+
+    def __str__(self) -> str:
+        return self.value
+
+
+@dataclass
+class ProcessGroupInfo:
+    status: SmartSimStatus
+    """Status of step"""
+    process_group: t.Optional[dragon_process_group.ProcessGroup] = None
+    """Internal Process Group object, None for finished or not started steps"""
+    puids: t.Optional[t.List[t.Optional[int]]] = None  # puids can be None
+    """List of Process UIDS belonging to the ProcessGroup"""
+    return_codes: t.Optional[t.List[int]] = None
+    """List of return codes of completed processes"""
+    hosts: t.List[str] = field(default_factory=list)
+    """List of hosts on which the Process Group """
+    redir_workers: t.Optional[dragon_process_group.ProcessGroup] = None
+    """Workers used to redirect stdout and stderr to file"""
+
+    @property
+    def smartsim_info(self) -> t.Tuple[SmartSimStatus, t.Optional[t.List[int]]]:
+        """Information needed by SmartSim Launcher and Job Manager"""
+        return (self.status, self.return_codes)
+
+    def __str__(self) -> str:
+        if self.process_group is not None and self.redir_workers is not None:
+            msg = [f"Active Group ({self.status})"]
+            if self.puids is not None:
+                msg.append(f"Number processes: {len(self.puids)}")
+        else:
+            msg = [f"Inactive Group ({self.status})"]
+
+        if self.hosts is not None:
+            msg.append(f"Hosts: {','.join(self.hosts)}")
+        if self.return_codes is not None:
+            msg.append(f"{self.return_codes}")
+
+        return ", ".join(msg)
+
+
+# Thanks to Colin Wahl from HPE HPC Dragon Team
+def redir_worker(io_conn: dragon_connection.Connection, file_path: str) -> None:
+    """Read stdout/stderr from the Dragon connection.
+
+    :param io_conn: Dragon connection to stdout or stderr
+    :param file_path: path to file to write to
+    """
+    while io_conn is None or not io_conn.readable:
+        time.sleep(0.1)
+    try:
+        with open(file_path, "a", encoding="utf-8") as file_to_write:
+            while True:
+                output = io_conn.recv()
+                print(output, flush=True, file=file_to_write, end="")
+    except EOFError:
+        pass
+    except Exception as e:
+        print(e)
+    finally:
+        try:
+            io_conn.close()
+        except Exception as e:
+            print(e)
+
+
+class DragonBackend:
+    """The DragonBackend class is the main interface between
+    SmartSim and Dragon. It is not intended to be user-facing,
+    and will only be called by the Dragon entry-point script or
+    by threads spawned by it.
+    """
+
+    def __init__(self, pid: int) -> None:
+        self._pid = pid
+        """PID of dragon executable which launched this server"""
+        self._group_infos: t.Dict[str, ProcessGroupInfo] = {}
+        """ProcessGroup execution state information"""
+        self._queue_lock = RLock()
+        """Lock that needs to be acquired to access internal queues"""
+        self._step_ids = (f"{create_short_id_str()}-{id}" for id in itertools.count())
+        """Incremental ID to assign to new steps prior to execution"""
+
+        self._initialize_hosts()
+        self._queued_steps: "collections.OrderedDict[str, DragonRunRequest]" = (
+            collections.OrderedDict()
+        )
+        """Steps waiting for execution"""
+        self._stop_requests: t.Deque[DragonStopRequest] = collections.deque()
+        """Stop requests which have not been processed yet"""
+        self._running_steps: t.List[str] = []
+        """List of currently running steps"""
+        self._completed_steps: t.List[str] = []
+        """List of completed steps"""
+        self._last_beat: float = 0.0
+        """Time at which the last heartbeat was set"""
+        self._heartbeat()
+        self._last_update_time = self._last_beat
+        """Time at which the status update was printed the last time"""
+        self._shutdown_requested = False
+        """Whether the shutdown was requested to this server"""
+        self._can_shutdown = False
+        """Whether the server can shut down"""
+        self._frontend_shutdown: bool = False
+        """Whether the server frontend should shut down when the backend does"""
+        self._shutdown_initiation_time: t.Optional[float] = None
+        """The time at which the server initiated shutdown"""
+        smartsim_config = get_config()
+        self._cooldown_period = (
+            smartsim_config.telemetry_frequency * 2 + 5
+            if smartsim_config.telemetry_enabled
+            else 5
+        )
+        """Time in seconds needed to server to complete shutdown"""
+
+        self._view = DragonBackendView(self)
+        logger.debug(self._view.host_desc)
+
+    @property
+    def hosts(self) -> list[str]:
+        with self._queue_lock:
+            return self._hosts
+
+    @property
+    def allocated_hosts(self) -> dict[str, str]:
+        with self._queue_lock:
+            return self._allocated_hosts
+
+    @property
+    def free_hosts(self) -> t.Deque[str]:
+        with self._queue_lock:
+            return self._free_hosts
+
+    @property
+    def group_infos(self) -> dict[str, ProcessGroupInfo]:
+        with self._queue_lock:
+            return self._group_infos
+
+    def _initialize_hosts(self) -> None:
+        with self._queue_lock:
+            self._hosts: t.List[str] = sorted(
+                dragon_machine.Node(node).hostname
+                for node in dragon_machine.System().nodes
+            )
+            """List of hosts available in allocation"""
+            self._free_hosts: t.Deque[str] = collections.deque(self._hosts)
+            """List of hosts on which steps can be launched"""
+            self._allocated_hosts: t.Dict[str, str] = {}
+            """Mapping of hosts on which a step is already running to step ID"""
+
+    def __str__(self) -> str:
+        return self.status_message
+
+    @property
+    def status_message(self) -> str:
+        """Message with status of available nodes and history of launched jobs.
+
+        :returns: Status message
+        """
+        return (
+            "Dragon server backend update\n"
+            f"{self._view.host_table}\n{self._view.step_table}"
+        )
+
+    def _heartbeat(self) -> None:
+        self._last_beat = self.current_time
+
+    @property
+    def cooldown_period(self) -> int:
+        """Time (in seconds) the server will wait before shutting down
+
+        when exit conditions are met (see ``should_shutdown()`` for further details).
+        """
+        return self._cooldown_period
+
+    @property
+    def _has_cooled_down(self) -> bool:
+        if self._shutdown_initiation_time is None:
+            logger.debug(f"Starting cooldown period of {self._cooldown_period} seconds")
+            self._shutdown_initiation_time = self.current_time
+        return (
+            self.current_time - self._shutdown_initiation_time > self._cooldown_period
+        )
+
+    @property
+    def frontend_shutdown(self) -> bool:
+        """Whether the frontend will have to shutdown once the backend does
+
+        If False, the frontend will wait for an external signal to stop.
+        """
+        return self._frontend_shutdown
+
+    @property
+    def last_heartbeat(self) -> float:
+        """Time (in seconds) at which the last heartbeat was set"""
+        return self._last_beat
+
+    @property
+    def should_shutdown(self) -> bool:
+        """Whether the server should shut down
+
+        A server should shut down if a DragonShutdownRequest was received
+        and it requested immediate shutdown, or if it did not request immediate
+        shutdown, but all jobs have been executed.
+        In both cases, a cooldown period may need to be waited before shutdown.
+        """
+        if self._shutdown_requested and self._can_shutdown:
+            return self._has_cooled_down
+        return False
+
+    @property
+    def current_time(self) -> float:
+        """Current time for DragonBackend object, in seconds since the Epoch"""
+        return time.time()
+
+    def _can_honor(self, request: DragonRunRequest) -> t.Tuple[bool, t.Optional[str]]:
+        """Check if request can be honored with resources available in the allocation.
+
+        Currently only checks for total number of nodes,
+        in the future it will also look at other constraints
+        such as memory, accelerators, and so on.
+        """
+        if request.nodes > len(self._hosts):
+            message = f"Cannot satisfy request. Requested {request.nodes} nodes, "
+            message += f"but only {len(self._hosts)} nodes are available."
+            return False, message
+        if self._shutdown_requested:
+            message = "Cannot satisfy request, server is shutting down."
+            return False, message
+        return True, None
+
+    def _allocate_step(
+        self, step_id: str, request: DragonRunRequest
+    ) -> t.Optional[t.List[str]]:
+
+        num_hosts: int = request.nodes
+        with self._queue_lock:
+            if num_hosts <= 0 or num_hosts > len(self._free_hosts):
+                return None
+            to_allocate = []
+            for _ in range(num_hosts):
+                host = self._free_hosts.popleft()
+                self._allocated_hosts[host] = step_id
+                to_allocate.append(host)
+            return to_allocate
+
+    @staticmethod
+    def _create_redirect_workers(
+        global_policy: dragon_policy.Policy,
+        policies: t.List[dragon_policy.Policy],
+        puids: t.List[int],
+        out_file: t.Optional[str],
+        err_file: t.Optional[str],
+    ) -> dragon_process_group.ProcessGroup:
+        grp_redir = dragon_process_group.ProcessGroup(
+            restart=False, policy=global_policy, pmi_enabled=False
+        )
+        for pol, puid in zip(policies, puids):
+            proc = dragon_process.Process(None, ident=puid)
+            if out_file:
+                grp_redir.add_process(
+                    nproc=1,
+                    template=dragon_process.ProcessTemplate(
+                        target=redir_worker,
+                        args=(proc.stdout_conn, out_file),
+                        stdout=dragon_process.Popen.DEVNULL,
+                        policy=pol,
+                    ),
+                )
+            if err_file:
+                grp_redir.add_process(
+                    nproc=1,
+                    template=dragon_process.ProcessTemplate(
+                        target=redir_worker,
+                        args=(proc.stderr_conn, err_file),
+                        stdout=dragon_process.Popen.DEVNULL,
+                        policy=pol,
+                    ),
+                )
+
+        return grp_redir
+
+    def _stop_steps(self) -> None:
+        self._heartbeat()
+        with self._queue_lock:
+            while len(self._stop_requests) > 0:
+                request = self._stop_requests.popleft()
+                step_id = request.step_id
+                if step_id not in self._group_infos:
+                    logger.error(f"Requested to stop non-existing step {step_id}")
+                    continue
+
+                logger.debug(f"Stopping step {step_id}")
+                if request.step_id in self._queued_steps:
+                    self._queued_steps.pop(step_id)
+                else:
+                    # Technically we could just terminate, but what if
+                    # the application intercepts that and ignores it?
+                    proc_group = self._group_infos[step_id].process_group
+                    if (
+                        proc_group is not None
+                        and proc_group.status == DragonStatus.RUNNING
+                    ):
+                        try:
+                            proc_group.kill()
+                        except dragon_process_group.DragonProcessGroupError:
+                            try:
+                                proc_group.stop()
+                            except dragon_process_group.DragonProcessGroupError:
+                                logger.error("Process group already stopped")
+                    redir_group = self._group_infos[step_id].redir_workers
+                    if redir_group is not None:
+                        try:
+                            redir_group.join(0.1)
+                            redir_group = None
+                        except Exception as e:
+                            logger.error(e)
+
+                self._group_infos[step_id].status = SmartSimStatus.STATUS_CANCELLED
+                self._group_infos[step_id].return_codes = [-9]
+
+    def _start_steps(self) -> None:
+        self._heartbeat()
+        with self._queue_lock:
+            started = []
+            for step_id, request in self._queued_steps.items():
+                hosts = self._allocate_step(step_id, self._queued_steps[step_id])
+                if not hosts:
+                    continue
+
+                logger.debug(f"Step id {step_id} allocated on {hosts}")
+
+                global_policy = dragon_policy.Policy(
+                    placement=dragon_policy.Policy.Placement.HOST_NAME,
+                    host_name=hosts[0],
+                )
+                grp = dragon_process_group.ProcessGroup(
+                    restart=False, pmi_enabled=request.pmi_enabled, policy=global_policy
+                )
+
+                policies = []
+                for node_name in hosts:
+                    local_policy = dragon_policy.Policy(
+                        placement=dragon_policy.Policy.Placement.HOST_NAME,
+                        host_name=node_name,
+                    )
+                    policies.extend([local_policy] * request.tasks_per_node)
+                    tmp_proc = dragon_process.ProcessTemplate(
+                        target=request.exe,
+                        args=request.exe_args,
+                        cwd=request.path,
+                        env={**request.current_env, **request.env},
+                        stdout=dragon_process.Popen.PIPE,
+                        stderr=dragon_process.Popen.PIPE,
+                        policy=local_policy,
+                    )
+                    grp.add_process(nproc=request.tasks_per_node, template=tmp_proc)
+
+                try:
+                    grp.init()
+                    grp.start()
+                    grp_status = SmartSimStatus.STATUS_RUNNING
+                except Exception as e:
+                    logger.error(e)
+                    grp_status = SmartSimStatus.STATUS_FAILED
+
+                puids = None
+                try:
+                    puids = list(
+                        set(grp.puids + [puid for puid, retcode in grp.inactive_puids])
+                    )
+                    self._group_infos[step_id] = ProcessGroupInfo(
+                        process_group=grp,
+                        puids=puids,
+                        return_codes=[],
+                        status=grp_status,
+                        hosts=hosts,
+                    )
+                    self._running_steps.append(step_id)
+                    started.append(step_id)
+                except Exception as e:
+                    logger.error(e)
+
+                if (
+                    puids is not None
+                    and len(puids) == len(policies)
+                    and grp_status == SmartSimStatus.STATUS_RUNNING
+                ):
+                    redir_grp = DragonBackend._create_redirect_workers(
+                        global_policy,
+                        policies,
+                        puids,
+                        request.output_file,
+                        request.error_file,
+                    )
+                    try:
+                        redir_grp.init()
+                        redir_grp.start()
+                    except Exception as e:
+                        raise IOError(
+                            f"Could not redirect stdout and stderr for PUIDS {puids}"
+                        ) from e
+                    self._group_infos[step_id].redir_workers = redir_grp
+                elif puids is not None and grp_status == SmartSimStatus.STATUS_RUNNING:
+                    logger.error("Cannot redirect workers: some PUIDS are missing")
+
+            if started:
+                logger.debug(f"{started=}")
+
+            for step_id in started:
+                try:
+                    self._queued_steps.pop(step_id)
+                except KeyError:
+                    logger.error(
+                        f"Tried to allocate the same step twice, step id {step_id}"
+                    )
+                except Exception as e:
+                    logger.error(e)
+
+    def _refresh_statuses(self) -> None:
+        self._heartbeat()
+        with self._queue_lock:
+            terminated = []
+            for step_id in self._running_steps:
+                group_info = self._group_infos[step_id]
+                grp = group_info.process_group
+                if grp is None:
+                    group_info.status = SmartSimStatus.STATUS_FAILED
+                    group_info.return_codes = [-1]
+                elif group_info.status not in TERMINAL_STATUSES:
+                    if grp.status == str(DragonStatus.RUNNING):
+                        group_info.status = SmartSimStatus.STATUS_RUNNING
+                    else:
+                        puids = group_info.puids
+                        if puids is not None and all(
+                            puid is not None for puid in puids
+                        ):
+                            try:
+                                group_info.return_codes = [
+                                    dragon_process.Process(None, ident=puid).returncode
+                                    for puid in puids
+                                ]
+                            except (ValueError, TypeError) as e:
+                                logger.error(e)
+                                group_info.return_codes = [-1 for _ in puids]
+                        else:
+                            group_info.return_codes = [0]
+                        if not group_info.status == SmartSimStatus.STATUS_CANCELLED:
+                            group_info.status = (
+                                SmartSimStatus.STATUS_FAILED
+                                if any(group_info.return_codes)
+                                or grp.status == DragonStatus.ERROR
+                                else SmartSimStatus.STATUS_COMPLETED
+                            )
+
+                if group_info.status in TERMINAL_STATUSES:
+                    terminated.append(step_id)
+
+            if terminated:
+                logger.debug(f"{terminated=}")
+
+            for step_id in terminated:
+                self._running_steps.remove(step_id)
+                self._completed_steps.append(step_id)
+                group_info = self._group_infos[step_id]
+                if group_info is not None:
+                    for host in group_info.hosts:
+                        logger.debug(f"Releasing host {host}")
+                        try:
+                            self._allocated_hosts.pop(host)
+                        except KeyError:
+                            logger.error(f"Tried to free a non-allocated host: {host}")
+                        self._free_hosts.append(host)
+                    group_info.process_group = None
+                    group_info.redir_workers = None
+
+    def _update_shutdown_status(self) -> None:
+        self._heartbeat()
+        with self._queue_lock:
+            self._can_shutdown |= (
+                all(
+                    grp_info.status in TERMINAL_STATUSES
+                    and grp_info.process_group is None
+                    and grp_info.redir_workers is None
+                    for grp_info in self._group_infos.values()
+                )
+                and self._shutdown_requested
+            )
+
+    def _should_print_status(self) -> bool:
+        if self.current_time - self._last_update_time > 10:
+            self._last_update_time = self.current_time
+            return True
+        return False
+
+    def _update(self) -> None:
+        self._stop_steps()
+        self._start_steps()
+        self._refresh_statuses()
+        self._update_shutdown_status()
+
+    def _kill_all_running_jobs(self) -> None:
+        with self._queue_lock:
+            for step_id, group_info in self._group_infos.items():
+                if group_info.status not in TERMINAL_STATUSES:
+                    self._stop_requests.append(DragonStopRequest(step_id=step_id))
+
+    def update(self) -> None:
+        """Update internal data structures, queues, and job statuses"""
+        logger.debug("Dragon Backend update thread started")
+        while not self.should_shutdown:
+            try:
+                self._update()
+                time.sleep(0.1)
+            except Exception as e:
+                logger.error(e)
+            if self._should_print_status():
+                try:
+                    logger.debug(str(self))
+                except ValueError as e:
+                    logger.error(e)
+
+        logger.debug("Dragon Backend update thread stopping")
+
+    @functools.singledispatchmethod
+    # Deliberately suppressing errors so that overloads have the same signature
+    # pylint: disable-next=no-self-use
+    def process_request(self, request: DragonRequest) -> DragonResponse:
+        """Process an incoming DragonRequest"""
+        raise TypeError(f"Unsure how to process a `{type(request)}` request")
+
+    @process_request.register
+    def _(self, request: DragonRunRequest) -> DragonRunResponse:
+        step_id = next(self._step_ids)
+        with self._queue_lock:
+            honorable, err = self._can_honor(request)
+            if not honorable:
+                self._group_infos[step_id] = ProcessGroupInfo(
+                    status=SmartSimStatus.STATUS_FAILED, return_codes=[-1]
+                )
+            else:
+                self._queued_steps[step_id] = request
+                self._group_infos[step_id] = ProcessGroupInfo(
+                    status=SmartSimStatus.STATUS_NEVER_STARTED
+                )
+            return DragonRunResponse(step_id=step_id, error_message=err)
+
+    @process_request.register
+    def _(self, request: DragonUpdateStatusRequest) -> DragonUpdateStatusResponse:
+        with self._queue_lock:
+            return DragonUpdateStatusResponse(
+                statuses={
+                    step_id: self._group_infos[step_id].smartsim_info
+                    for step_id in request.step_ids
+                    if step_id in self._group_infos
+                }
+            )
+
+    @process_request.register
+    def _(self, request: DragonStopRequest) -> DragonStopResponse:
+        with self._queue_lock:
+            self._stop_requests.append(request)
+        return DragonStopResponse()
+
+    @process_request.register
+    # Deliberately suppressing errors so that overloads have the same signature
+    # pylint: disable-next=no-self-use,unused-argument
+    def _(self, request: DragonHandshakeRequest) -> DragonHandshakeResponse:
+        return DragonHandshakeResponse(dragon_pid=self._pid)
+
+    @process_request.register
+    # Deliberately suppressing errors so that overloads have the same signature
+    # pylint: disable-next=no-self-use,unused-argument
+    def _(self, request: DragonShutdownRequest) -> DragonShutdownResponse:
+        self._shutdown_requested = True
+        self._update_shutdown_status()
+        if request.immediate:
+            self._kill_all_running_jobs()
+        self._frontend_shutdown = request.frontend_shutdown
+        return DragonShutdownResponse()
+
+
+class DragonBackendView:
+    def __init__(self, backend: DragonBackend):
+        self._backend = backend
+
+    @property
+    def host_desc(self) -> str:
+        hosts = self._backend.hosts
+        num_hosts = len(hosts)
+        host_string = str(num_hosts) + (" hosts" if num_hosts != 1 else " host")
+        return f"{host_string} available for execution: {hosts}"
+
+    @staticmethod
+    def _proc_group_info_table_line(
+        step_id: str, proc_group_info: ProcessGroupInfo
+    ) -> t.List[str]:
+        table_line = [step_id, f"{proc_group_info.status.value}"]
+
+        if proc_group_info.hosts is not None:
+            table_line.append(f"{','.join(proc_group_info.hosts)}")
+        else:
+            table_line.append("")
+
+        if proc_group_info.return_codes is not None:
+            table_line.append(
+                f"{','.join(str(ret) for ret in proc_group_info.return_codes)}"
+            )
+        else:
+            table_line.append("")
+
+        if proc_group_info.puids is not None:
+            table_line.append(f"{len(proc_group_info.puids)}")
+        else:
+            table_line.append("")
+
+        return table_line
+
+    @property
+    def step_table(self) -> str:
+        """Table representation of all jobs which have been started on the server."""
+        headers = ["Step", "Status", "Hosts", "Return codes", "Num procs"]
+
+        group_infos = self._backend.group_infos
+
+        colalign = (
+            ["left", "left", "left", "center", "center"]
+            if len(group_infos) > 0
+            else None
+        )
+        values = [
+            self._proc_group_info_table_line(step, group_info)
+            for step, group_info in group_infos.items()
+        ]
+
+        return tabulate(
+            values,
+            headers,
+            disable_numparse=True,
+            tablefmt="github",
+            colalign=colalign,
+        )
+
+    @property
+    def host_table(self) -> str:
+        """Table representation of current state of nodes available
+
+        in the allocation.
+        """
+        headers = ["Host", "Status"]
+        hosts = self._backend.hosts
+        free_hosts = self._backend.free_hosts
+
+        def _host_table_line(host: str) -> list[str]:
+            return [host, "Free" if host in free_hosts else "Busy"]
+
+        colalign = ["left", "center"] if len(hosts) > 0 else None
+        values = [_host_table_line(host) for host in hosts]
+
+        return tabulate(
+            values, headers, disable_numparse=True, tablefmt="github", colalign=colalign
+        )
diff --git a/smartsim/_core/launcher/dragon/dragonConnector.py b/smartsim/_core/launcher/dragon/dragonConnector.py
new file mode 100644
index 000000000..0cd68c24e
--- /dev/null
+++ b/smartsim/_core/launcher/dragon/dragonConnector.py
@@ -0,0 +1,532 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import annotations
+
+import atexit
+import fileinput
+import itertools
+import json
+import os
+import subprocess
+import sys
+import typing as t
+from collections import defaultdict
+from pathlib import Path
+from threading import RLock
+
+import psutil
+import zmq
+import zmq.auth.thread
+
+from ...._core.launcher.dragon import dragonSockets
+from ....error.errors import SmartSimError
+from ....log import get_logger
+from ...config import get_config
+from ...schemas import (
+    DragonBootstrapRequest,
+    DragonBootstrapResponse,
+    DragonHandshakeRequest,
+    DragonHandshakeResponse,
+    DragonRequest,
+    DragonResponse,
+    DragonShutdownRequest,
+)
+from ...utils.network import find_free_port, get_best_interface_and_address
+
+logger = get_logger(__name__)
+
+_SchemaT = t.TypeVar("_SchemaT", bound=t.Union[DragonRequest, DragonResponse])
+
+DRG_LOCK = RLock()
+
+
+class DragonConnector:
+    """This class encapsulates the functionality needed
+    to start a Dragon server and communicate with it.
+    """
+
+    def __init__(self) -> None:
+        self._context: zmq.Context[t.Any] = zmq.Context.instance()
+        self._context.setsockopt(zmq.REQ_CORRELATE, 1)
+        self._context.setsockopt(zmq.REQ_RELAXED, 1)
+        self._authenticator: t.Optional[zmq.auth.thread.ThreadAuthenticator] = None
+        config = get_config()
+        self._reset_timeout(config.dragon_server_timeout)
+        self._dragon_head_socket: t.Optional[zmq.Socket[t.Any]] = None
+        self._dragon_head_process: t.Optional[subprocess.Popen[bytes]] = None
+        # Returned by dragon head, useful if shutdown is to be requested
+        # but process was started by another connector
+        self._dragon_head_pid: t.Optional[int] = None
+        self._dragon_server_path = config.dragon_server_path
+        logger.debug(f"Dragon Server path was set to {self._dragon_server_path}")
+        self._env_vars: t.Dict[str, str] = {}
+        if self._dragon_server_path is None:
+            raise SmartSimError(
+                "DragonConnector could not find the dragon server path. "
+                "This should not happen if the Connector was started by an "
+                "experiment.\nIf the DragonConnector was started manually, "
+                "then the environment variable SMARTSIM_DRAGON_SERVER_PATH "
+                "should be set to an existing directory."
+            )
+
+    @property
+    def is_connected(self) -> bool:
+        """Whether the Connector established a connection to the server
+
+        :return: True if connected
+        """
+        return self._dragon_head_socket is not None
+
+    @property
+    def can_monitor(self) -> bool:
+        """Whether the Connector knows the PID of the dragon server head process
+        and can monitor its status
+
+        :return: True if the server can be monitored"""
+        return self._dragon_head_pid is not None
+
+    def _handshake(self, address: str) -> None:
+        self._dragon_head_socket = dragonSockets.get_secure_socket(
+            self._context, zmq.REQ, False
+        )
+        self._dragon_head_socket.connect(address)
+        try:
+            dragon_handshake = _assert_schema_type(
+                self.send_request(DragonHandshakeRequest()), DragonHandshakeResponse
+            )
+            self._dragon_head_pid = dragon_handshake.dragon_pid
+            logger.debug(
+                f"Successful handshake with Dragon server at address {address}"
+            )
+        except (zmq.ZMQError, zmq.Again) as e:
+            logger.debug(e)
+            self._dragon_head_socket.close()
+            self._dragon_head_socket = None
+
+            raise SmartSimError(
+                f"Unsuccessful handshake with Dragon server at address {address}"
+            ) from e
+
+    def _reset_timeout(self, timeout: int = get_config().dragon_server_timeout) -> None:
+        self._context.setsockopt(zmq.SNDTIMEO, value=timeout)
+        self._context.setsockopt(zmq.RCVTIMEO, value=timeout)
+        if self._authenticator is not None and self._authenticator.thread is not None:
+            try:
+                self._authenticator.thread.authenticator.zap_socket.setsockopt(
+                    zmq.SNDTIMEO, timeout
+                )
+                self._authenticator.thread.authenticator.zap_socket.setsockopt(
+                    zmq.RCVTIMEO, timeout
+                )
+            except zmq.ZMQError:
+                pass
+
+    def ensure_connected(self) -> None:
+        """Ensure that the Connector established a connection to the server
+
+        If the Connector is not connected, attempt to connect and raise an error
+        on failure.
+
+        :raises SmartSimError: if connection cannot be established
+        """
+        if not self.is_connected:
+            self.connect_to_dragon()
+        if not self.is_connected:
+            raise SmartSimError("Could not connect to Dragon server")
+
+    def _get_new_authenticator(
+        self, timeout: int = get_config().dragon_server_timeout
+    ) -> None:
+        if self._authenticator is not None:
+            if self._authenticator.thread is not None:
+                try:
+                    logger.debug("Closing ZAP socket")
+                    self._authenticator.thread.authenticator.zap_socket.close()
+                except Exception as e:
+                    logger.debug(f"Could not close ZAP socket, {e}")
+            try:
+                self._authenticator.stop()
+            except zmq.Again:
+                logger.debug("Could not stop authenticator")
+        try:
+            self._authenticator = dragonSockets.get_authenticator(
+                self._context, timeout
+            )
+            return
+        except RuntimeError as e:
+            logger.error("Could not get authenticator")
+            raise e from None
+
+    @staticmethod
+    def _get_dragon_log_level() -> str:
+        smartsim_to_dragon = defaultdict(lambda: "NONE")
+        smartsim_to_dragon["developer"] = "INFO"
+        return smartsim_to_dragon.get(get_config().log_level, "NONE")
+
+    def _connect_to_existing_server(self, path: Path) -> None:
+        config = get_config()
+        dragon_config_log = path / config.dragon_log_filename
+
+        if not dragon_config_log.is_file():
+            return
+
+        dragon_confs = self._parse_launched_dragon_server_info_from_files(
+            [dragon_config_log]
+        )
+        logger.debug(dragon_confs)
+
+        for dragon_conf in dragon_confs:
+            logger.debug(
+                "Found dragon server config file. Checking if the server"
+                f" is still up at address {dragon_conf['address']}."
+            )
+            try:
+                self._reset_timeout()
+                self._get_new_authenticator(-1)
+                self._handshake(dragon_conf["address"])
+            except SmartSimError as e:
+                logger.error(e)
+            finally:
+                self._reset_timeout(config.dragon_server_timeout)
+            if self.is_connected:
+                logger.debug("Connected to existing Dragon server")
+                return
+
+    def _start_connector_socket(self, socket_addr: str) -> zmq.Socket[t.Any]:
+        config = get_config()
+        connector_socket: t.Optional[zmq.Socket[t.Any]] = None
+        self._reset_timeout(config.dragon_server_startup_timeout)
+        self._get_new_authenticator(-1)
+        connector_socket = dragonSockets.get_secure_socket(self._context, zmq.REP, True)
+        logger.debug(f"Binding connector to {socket_addr}")
+        connector_socket.bind(socket_addr)
+        if connector_socket is None:
+            raise SmartSimError("Socket failed to initialize")
+
+        return connector_socket
+
+    def load_persisted_env(self) -> t.Dict[str, str]:
+        """Load key-value pairs from a .env file created during dragon installation
+
+        :return: Key-value pairs stored in .env file"""
+        if self._env_vars:
+            # use previously loaded env vars.
+            return self._env_vars
+
+        config = get_config()
+
+        if not config.dragon_dotenv.exists():
+            self._env_vars = {}
+            return self._env_vars
+
+        with open(config.dragon_dotenv, encoding="utf-8") as dot_env:
+            for kvp in dot_env.readlines():
+                split = kvp.strip().split("=", maxsplit=1)
+                key, value = split[0], split[-1]
+                self._env_vars[key] = value
+
+        return self._env_vars
+
+    def merge_persisted_env(self, current_env: t.Dict[str, str]) -> t.Dict[str, str]:
+        """Combine the current environment variable set with the dragon .env by adding
+        Dragon-specific values and prepending any new values to existing keys
+
+        :param current_env: Environment which has to be merged with .env variables
+        :return: Merged environment
+        """
+        # ensure we start w/a complete env from current env state
+        merged_env: t.Dict[str, str] = {**current_env}
+
+        # copy all the values for dragon straight into merged_env
+        merged_env.update(
+            {k: v for k, v in self._env_vars.items() if k.startswith("DRAGON")}
+        )
+
+        # prepend dragon env updates into existing env vars
+        for key, value in self._env_vars.items():
+            if not key.startswith("DRAGON"):
+                if current_value := current_env.get(key, None):
+                    # when a key is not dragon specific, don't overwrite the current
+                    # value. instead, prepend the value dragon needs to/current env
+                    value = f"{value}:{current_value}"
+                merged_env[key] = value
+        return merged_env
+
+    def connect_to_dragon(self) -> None:
+        """Connect to Dragon server
+
+        :raises SmartSimError: If connection cannot be established
+        """
+        config = get_config()
+        with DRG_LOCK:
+            # TODO use manager instead
+            if self.is_connected:
+                return
+            if self._dragon_server_path is None:
+                raise SmartSimError("Path to Dragon server not set.")
+
+            logger.info(
+                "Establishing connection with Dragon server or starting a new one..."
+            )
+
+            path = _resolve_dragon_path(self._dragon_server_path)
+
+            self._connect_to_existing_server(path)
+            if self.is_connected:
+                return
+
+            path.mkdir(parents=True, exist_ok=True)
+
+            local_address = get_best_interface_and_address().address
+            if local_address is None:
+                # TODO parse output file
+                raise SmartSimError(
+                    "Could not determine SmartSim's local address, "
+                    "the Dragon server could not be started."
+                )
+            # find first available port >= 5995
+            port = find_free_port(start=5995)
+            socket_addr = f"tcp://{local_address}:{port}"
+            connector_socket = self._start_connector_socket(socket_addr)
+
+            cmd = [
+                "dragon",
+                "-t",
+                config.dragon_transport,
+                "-l",
+                DragonConnector._get_dragon_log_level(),
+                sys.executable,
+                "-m",
+                "smartsim._core.entrypoints.dragon",
+                "+launching_address",
+                socket_addr,
+            ]
+
+            dragon_out_file = path / "dragon_head.out"
+            dragon_err_file = path / "dragon_head.err"
+
+            self.load_persisted_env()
+            merged_env = self.merge_persisted_env(os.environ.copy())
+            merged_env.update({"PYTHONUNBUFFERED": "1"})
+
+            with (
+                open(dragon_out_file, "w", encoding="utf-8") as dragon_out,
+                open(dragon_err_file, "w", encoding="utf-8") as dragon_err,
+            ):
+                logger.debug(f"Starting Dragon environment: {' '.join(cmd)}")
+
+                # pylint: disable-next=consider-using-with
+                self._dragon_head_process = subprocess.Popen(
+                    args=cmd,
+                    bufsize=0,
+                    stderr=dragon_err.fileno(),
+                    stdout=dragon_out.fileno(),
+                    cwd=path,
+                    shell=False,
+                    env=merged_env,
+                    start_new_session=True,
+                )
+
+            server = dragonSockets.as_server(connector_socket)
+            logger.debug(f"Listening to {socket_addr}")
+            request = _assert_schema_type(server.recv(), DragonBootstrapRequest)
+            server.send(
+                DragonBootstrapResponse(dragon_pid=self._dragon_head_process.pid)
+            )
+            connector_socket.close()
+            logger.debug(f"Connecting to {request.address}")
+            self._reset_timeout(config.dragon_server_timeout)
+            self._handshake(request.address)
+
+            # Only the Connector which started the server is
+            # responsible of it, that's why we register the
+            # cleanup in this code branch.
+            # The cleanup function should not have references
+            # to this object to avoid Garbage Collector lockup
+            server_socket = self._dragon_head_socket
+            server_process_pid = self._dragon_head_process.pid
+
+            if server_socket is not None and self._dragon_head_process is not None:
+                atexit.register(
+                    _dragon_cleanup,
+                    server_socket=server_socket,
+                    server_process_pid=server_process_pid,
+                    server_authenticator=self._authenticator,
+                )
+            elif self._dragon_head_process is not None:
+                self._dragon_head_process.wait(1.0)
+                if self._dragon_head_process.stdout:
+                    for line in iter(self._dragon_head_process.stdout.readline, b""):
+                        logger.info(line.decode("utf-8").rstrip())
+                if self._dragon_head_process.stderr:
+                    for line in iter(self._dragon_head_process.stderr.readline, b""):
+                        logger.warning(line.decode("utf-8").rstrip())
+                logger.warning(self._dragon_head_process.returncode)
+            else:
+                logger.warning("Could not start Dragon server as subprocess")
+
+    def cleanup(self) -> None:
+        """Shut down Dragon server and authenticator thread"""
+        if self._dragon_head_socket is not None and self._dragon_head_pid is not None:
+            _dragon_cleanup(
+                server_socket=self._dragon_head_socket,
+                server_process_pid=self._dragon_head_pid,
+                server_authenticator=self._authenticator,
+            )
+            self._dragon_head_socket = None
+            self._dragon_head_pid = None
+            self._authenticator = None
+
+    def send_request(self, request: DragonRequest, flags: int = 0) -> DragonResponse:
+        """Send a request to the Dragon server using a secure socket
+
+        :param request: The request to send
+        :param flags: 0MQ flags, defaults to 0
+        :raises SmartSimError: If not connected to Dragon server
+        :return: Response from server
+        """
+        self.ensure_connected()
+        if (socket := self._dragon_head_socket) is None:
+            raise SmartSimError("Not connected to Dragon")
+        return self._send_req_with_socket(socket, request, flags)
+
+    @staticmethod
+    def _parse_launched_dragon_server_info_from_iterable(
+        stream: t.Iterable[str], num_dragon_envs: t.Optional[int] = None
+    ) -> t.List[t.Dict[str, str]]:
+        lines = (line.strip() for line in stream)
+        lines = (line for line in lines if line)
+        tokenized = (line.split(maxsplit=1) for line in lines)
+        tokenized = (tokens for tokens in tokenized if len(tokens) > 1)
+        dragon_env_jsons = (
+            config_dict
+            for first, config_dict in tokenized
+            if "DRAGON_SERVER_CONFIG" in first
+        )
+        dragon_envs = (json.loads(config_dict) for config_dict in dragon_env_jsons)
+
+        dragon_envs = (
+            dragon_env for dragon_env in dragon_envs if "address" in dragon_env
+        )
+
+        if num_dragon_envs:
+            sliced_dragon_envs = itertools.islice(dragon_envs, num_dragon_envs)
+            return list(sliced_dragon_envs)
+        return list(dragon_envs)
+
+    @classmethod
+    def _parse_launched_dragon_server_info_from_files(
+        cls,
+        file_paths: t.List[t.Union[str, "os.PathLike[str]"]],
+        num_dragon_envs: t.Optional[int] = None,
+    ) -> t.List[t.Dict[str, str]]:
+        with fileinput.FileInput(file_paths) as ifstream:
+            dragon_envs = cls._parse_launched_dragon_server_info_from_iterable(
+                ifstream, num_dragon_envs
+            )
+
+            return dragon_envs
+
+    @staticmethod
+    def _send_req_with_socket(
+        socket: zmq.Socket[t.Any],
+        request: DragonRequest,
+        send_flags: int = 0,
+        recv_flags: int = 0,
+    ) -> DragonResponse:
+        client = dragonSockets.as_client(socket)
+        with DRG_LOCK:
+            logger.debug(f"Sending {type(request).__name__}: {request}")
+            client.send(request, send_flags)
+            response = client.recv(flags=recv_flags)
+
+            logger.debug(f"Received {type(response).__name__}: {response}")
+            return response
+
+
+def _assert_schema_type(obj: object, typ: t.Type[_SchemaT], /) -> _SchemaT:
+    if not isinstance(obj, typ):
+        raise TypeError(f"Expected schema of type `{typ}`, but got {type(obj)}")
+    return obj
+
+
+def _dragon_cleanup(
+    server_socket: t.Optional[zmq.Socket[t.Any]] = None,
+    server_process_pid: t.Optional[int] = 0,
+    server_authenticator: t.Optional[zmq.auth.thread.ThreadAuthenticator] = None,
+) -> None:
+    """Clean up resources used by the launcher.
+    :param server_socket: (optional) Socket used to connect to dragon environment
+    :param server_process_pid: (optional) Process ID of the dragon entrypoint
+    :param server_authenticator: (optional) Authenticator used to secure sockets
+    """
+    try:
+        if server_socket is not None:
+            print("Sending shutdown request to dragon environment")
+            # pylint: disable-next=protected-access
+            DragonConnector._send_req_with_socket(
+                server_socket, DragonShutdownRequest(), recv_flags=zmq.NOBLOCK
+            )
+    except zmq.error.ZMQError as e:
+        # Can't use the logger as I/O file may be closed
+        if not isinstance(e, zmq.Again):
+            print("Could not send shutdown request to dragon server")
+            print(f"ZMQ error: {e}", flush=True)
+    finally:
+        print("Sending shutdown request is complete")
+
+    if server_process_pid and psutil.pid_exists(server_process_pid):
+        try:
+            _, retcode = os.waitpid(server_process_pid, 0)
+            print(
+                f"Dragon server process shutdown is complete, return code {retcode}",
+                flush=True,
+            )
+        except Exception as e:
+            logger.debug(e)
+
+    try:
+        if server_authenticator is not None and server_authenticator.is_alive():
+            print("Shutting down ZMQ authenticator")
+            server_authenticator.stop()
+    except Exception:
+        print("Authenticator shutdown error")
+    else:
+        print("Authenticator shutdown is complete")
+
+
+def _resolve_dragon_path(fallback: t.Union[str, "os.PathLike[str]"]) -> Path:
+    dragon_server_path = get_config().dragon_server_path or os.path.join(
+        fallback, ".smartsim", "dragon"
+    )
+    dragon_server_paths = dragon_server_path.split(":")
+    if len(dragon_server_paths) > 1:
+        logger.warning(
+            "Multiple dragon servers not supported, "
+            "will connect to (or start) first server in list."
+        )
+    return Path(dragon_server_paths[0])
diff --git a/smartsim/_core/launcher/dragon/dragonLauncher.py b/smartsim/_core/launcher/dragon/dragonLauncher.py
new file mode 100644
index 000000000..17b47e309
--- /dev/null
+++ b/smartsim/_core/launcher/dragon/dragonLauncher.py
@@ -0,0 +1,321 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import annotations
+
+import os
+import typing as t
+
+from ...._core.launcher.stepMapping import StepMap
+from ....error import LauncherError, SmartSimError
+from ....log import get_logger
+from ....settings import (
+    DragonRunSettings,
+    QsubBatchSettings,
+    RunSettings,
+    SbatchSettings,
+    SettingsBase,
+)
+from ....status import SmartSimStatus
+from ...schemas import (
+    DragonRunRequest,
+    DragonRunResponse,
+    DragonStopRequest,
+    DragonStopResponse,
+    DragonUpdateStatusRequest,
+    DragonUpdateStatusResponse,
+)
+from ..launcher import WLMLauncher
+from ..pbs.pbsLauncher import PBSLauncher
+from ..slurm.slurmLauncher import SlurmLauncher
+from ..step import DragonBatchStep, DragonStep, LocalStep, Step
+from ..stepInfo import StepInfo
+from .dragonConnector import DragonConnector, _SchemaT
+
+logger = get_logger(__name__)
+
+
+class DragonLauncher(WLMLauncher):
+    """This class encapsulates the functionality needed
+    to launch jobs on systems that use Dragon on top of a workload manager.
+
+    All WLM launchers are capable of launching managed and unmanaged
+    jobs. Managed jobs are queried through interaction with with WLM,
+    in this case the Dragon server. Unmanaged jobs are held in the TaskManager
+    and are managed through references to their launching process ID
+    i.e. a psutil.Popen object.
+    Batch Jobs are routed to either Slurm or PBS and their step ID
+    is stored, prefixed with the name of the scheduler, to allow
+    the Job Manager to interact with it.
+    """
+
+    def __init__(self) -> None:
+        super().__init__()
+        self._connector = DragonConnector()
+        """Connector used to start and interact with the Dragon server"""
+        self._slurm_launcher = SlurmLauncher()
+        """Slurm sub-launcher, used only for batch jobs"""
+        self._pbs_launcher = PBSLauncher()
+        """PBS sub-launcher, used only for batch jobs"""
+
+    @property
+    def is_connected(self) -> bool:
+        return self._connector.is_connected
+
+    def cleanup(self) -> None:
+        self._connector.cleanup()
+
+    # RunSettings types supported by this launcher
+    @property
+    def supported_rs(self) -> t.Dict[t.Type[SettingsBase], t.Type[Step]]:
+        # RunSettings types supported by this launcher
+        return {
+            DragonRunSettings: DragonStep,
+            SbatchSettings: DragonBatchStep,
+            QsubBatchSettings: DragonBatchStep,
+            RunSettings: LocalStep,
+        }
+
+    def add_step_to_mapping_table(self, name: str, step_map: StepMap) -> None:
+        super().add_step_to_mapping_table(name, step_map)
+
+        if step_map.step_id is None:
+            return
+        sublauncher: t.Optional[t.Union[SlurmLauncher, PBSLauncher]] = None
+        if step_map.step_id.startswith("SLURM-"):
+            sublauncher = self._slurm_launcher
+        elif step_map.step_id.startswith("PBS-"):
+            sublauncher = self._pbs_launcher
+        else:
+            return
+
+        sublauncher_step_map = StepMap(
+            step_id=DragonLauncher._unprefix_step_id(step_map.step_id),
+            task_id=step_map.task_id,
+            managed=step_map.managed,
+        )
+        sublauncher.add_step_to_mapping_table(name, sublauncher_step_map)
+
+    def run(self, step: Step) -> t.Optional[str]:
+        """Run a job step through Slurm
+
+        :param step: a job step instance
+        :raises LauncherError: if launch fails
+        :return: job step id if job is managed
+        """
+
+        if not self.task_manager.actively_monitoring:
+            self.task_manager.start()
+
+        step_id = None
+        task_id = None
+
+        cmd = step.get_launch_cmd()
+        out, err = step.get_output_files()
+
+        if isinstance(step, DragonBatchStep):
+            # wait for batch step to submit successfully
+            sublauncher_step_id: t.Optional[str] = None
+            return_code, out, err = self.task_manager.start_and_wait(cmd, step.cwd)
+            if return_code != 0:
+                raise LauncherError(f"Sbatch submission failed\n {out}\n {err}")
+            if out:
+                sublauncher_step_id = out.strip()
+                logger.debug(
+                    f"Gleaned batch job id: {sublauncher_step_id} for {step.name}"
+                )
+
+            if sublauncher_step_id is None:
+                raise SmartSimError("Could not get step id for batch step")
+
+            if isinstance(step.batch_settings, SbatchSettings):
+                self._slurm_launcher.step_mapping.add(
+                    step.name, sublauncher_step_id, task_id, step.managed
+                )
+                step_id = "SLURM-" + sublauncher_step_id
+            elif isinstance(step.batch_settings, QsubBatchSettings):
+                self._pbs_launcher.step_mapping.add(
+                    step.name, sublauncher_step_id, task_id, step.managed
+                )
+                step_id = "PBS-" + sublauncher_step_id
+        elif isinstance(step, DragonStep):
+            run_args = step.run_settings.run_args
+            req_env = step.run_settings.env_vars
+            self._connector.load_persisted_env()
+            merged_env = self._connector.merge_persisted_env(os.environ.copy())
+            nodes = int(run_args.get("nodes", None) or 1)
+            tasks_per_node = int(run_args.get("tasks-per-node", None) or 1)
+            response = _assert_schema_type(
+                self._connector.send_request(
+                    DragonRunRequest(
+                        exe=cmd[0],
+                        exe_args=cmd[1:],
+                        path=step.cwd,
+                        name=step.name,
+                        nodes=nodes,
+                        tasks_per_node=tasks_per_node,
+                        env=req_env,
+                        current_env=merged_env,
+                        output_file=out,
+                        error_file=err,
+                    )
+                ),
+                DragonRunResponse,
+            )
+            step_id = str(response.step_id)
+        else:
+            # pylint: disable-next=consider-using-with
+            out_strm = open(out, "w+", encoding="utf-8")
+            # pylint: disable-next=consider-using-with
+            err_strm = open(err, "w+", encoding="utf-8")
+            task_id = self.task_manager.start_task(
+                cmd, step.cwd, step.env, out=out_strm.fileno(), err=err_strm.fileno()
+            )
+            step.managed = False
+
+        self.step_mapping.add(step.name, step_id, task_id, step.managed)
+
+        return step_id
+
+    def stop(self, step_name: str) -> StepInfo:
+        """Step a job step
+
+        :param step_name: name of the job to stop
+        :return: update for job due to cancel
+        """
+
+        stepmap = self.step_mapping[step_name]
+        step_id = str(stepmap.step_id)
+
+        if step_id.startswith("SLURM-"):
+            return self._slurm_launcher.stop(step_name)
+
+        if step_id.startswith("PBS-"):
+            return self._pbs_launcher.stop(step_name)
+
+        _assert_schema_type(
+            self._connector.send_request(DragonStopRequest(step_id=step_id)),
+            DragonStopResponse,
+        )
+
+        _, step_info = self.get_step_update([step_name])[0]
+        if not step_info:
+            raise LauncherError(f"Could not get step_info for job step {step_name}")
+
+        step_info.status = (
+            SmartSimStatus.STATUS_CANCELLED  # set status to cancelled instead of failed
+        )
+        step_info.launcher_status = str(SmartSimStatus.STATUS_CANCELLED)
+        return step_info
+
+    @staticmethod
+    def _unprefix_step_id(step_id: str) -> str:
+        return step_id.split("-", maxsplit=1)[1]
+
+    def _get_managed_step_update(self, step_ids: t.List[str]) -> t.List[StepInfo]:
+        """Get step updates for Dragon-managed jobs
+
+        :param step_ids: list of job step ids
+        :return: list of updates for managed jobs
+        """
+
+        step_id_updates: dict[str, StepInfo] = {}
+
+        dragon_step_ids: t.List[str] = []
+        slurm_step_ids: t.List[str] = []
+        pbs_step_ids: t.List[str] = []
+        for step_id in step_ids:
+            if step_id.startswith("SLURM-"):
+                slurm_step_ids.append(step_id)
+            elif step_id.startswith("PBS-"):
+                pbs_step_ids.append(step_id)
+            else:
+                dragon_step_ids.append(step_id)
+
+        if slurm_step_ids:
+            # pylint: disable-next=protected-access
+            slurm_updates = self._slurm_launcher._get_managed_step_update(
+                [
+                    DragonLauncher._unprefix_step_id(step_id)
+                    for step_id in slurm_step_ids
+                ]
+            )
+            step_id_updates.update(dict(zip(slurm_step_ids, slurm_updates)))
+
+        if pbs_step_ids:
+            # pylint: disable-next=protected-access
+            pbs_updates = self._pbs_launcher._get_managed_step_update(
+                [DragonLauncher._unprefix_step_id(step_id) for step_id in pbs_step_ids]
+            )
+            step_id_updates.update(dict(zip(pbs_step_ids, pbs_updates)))
+
+        if dragon_step_ids:
+            response = _assert_schema_type(
+                self._connector.send_request(
+                    DragonUpdateStatusRequest(step_ids=dragon_step_ids)
+                ),
+                DragonUpdateStatusResponse,
+            )
+
+            for step_id in step_ids:
+                if step_id not in response.statuses:
+                    msg = "Missing step id update from Dragon launcher."
+                    if response.error_message is not None:
+                        msg += "\nDragon backend reported following error: "
+                        msg += response.error_message
+                    logger.error(msg)
+                    info = StepInfo(
+                        SmartSimStatus.STATUS_FAILED,
+                        SmartSimStatus.STATUS_FAILED.value,
+                        -1,
+                    )
+                else:
+                    status, ret_codes = response.statuses[step_id]
+                    if ret_codes:
+                        grp_ret_code = min(ret_codes)
+                        if any(ret_codes):
+                            _err_msg = (
+                                f"One or more processes failed for job {step_id} "
+                                f"Return codes were: {ret_codes}"
+                            )
+                            logger.error(_err_msg)
+                    else:
+                        grp_ret_code = None
+                    info = StepInfo(status, status.value, grp_ret_code)
+
+                step_id_updates[step_id] = info
+
+        # Order matters as we return an ordered list of StepInfo objects
+        return [step_id_updates[step_id] for step_id in step_ids]
+
+    def __str__(self) -> str:
+        return "Dragon"
+
+
+def _assert_schema_type(obj: object, typ: t.Type[_SchemaT], /) -> _SchemaT:
+    if not isinstance(obj, typ):
+        raise TypeError(f"Expected schema of type `{typ}`, but got {type(obj)}")
+    return obj
diff --git a/smartsim/_core/launcher/dragon/dragonSockets.py b/smartsim/_core/launcher/dragon/dragonSockets.py
new file mode 100644
index 000000000..80acd61a2
--- /dev/null
+++ b/smartsim/_core/launcher/dragon/dragonSockets.py
@@ -0,0 +1,158 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import typing as t
+
+import zmq
+import zmq.auth.thread
+
+from smartsim._core.config.config import get_config
+from smartsim._core.schemas import dragonRequests as _dragonRequests
+from smartsim._core.schemas import dragonResponses as _dragonResponses
+from smartsim._core.schemas import utils as _utils
+from smartsim._core.utils.security import KeyManager
+from smartsim.log import get_logger
+
+if t.TYPE_CHECKING:
+    from zmq import Context
+    from zmq.sugar.socket import Socket
+
+logger = get_logger(__name__)
+
+AUTHENTICATOR: t.Optional["zmq.auth.thread.ThreadAuthenticator"] = None
+
+
+def as_server(
+    socket: "Socket[t.Any]",
+) -> _utils.SocketSchemaTranslator[
+    _dragonResponses.DragonResponse,
+    _dragonRequests.DragonRequest,
+]:
+    return _utils.SocketSchemaTranslator(
+        socket, _dragonResponses.response_registry, _dragonRequests.request_registry
+    )
+
+
+def as_client(
+    socket: "Socket[t.Any]",
+) -> _utils.SocketSchemaTranslator[
+    _dragonRequests.DragonRequest,
+    _dragonResponses.DragonResponse,
+]:
+    return _utils.SocketSchemaTranslator(
+        socket, _dragonRequests.request_registry, _dragonResponses.response_registry
+    )
+
+
+def get_secure_socket(
+    context: "zmq.Context[t.Any]",
+    socket_type: int,
+    is_server: bool,
+) -> "Socket[t.Any]":
+    """Create secured socket that consumes & produces encrypted messages
+
+    :param context: ZMQ context object
+    :param socket_type: Type of ZMQ socket to create
+    :param is_server: Pass `True` to secure the socket as server. Pass `False`
+    to secure the socket as a client.
+    :returns: the secured socket prepared for sending encrypted messages
+    """
+    config = get_config()
+    socket: "Socket[t.Any]" = context.socket(socket_type)
+
+    key_manager = KeyManager(config, as_server=is_server, as_client=not is_server)
+    server_keys, client_keys = key_manager.get_keys()
+    logger.debug(f"Applying keys to socket: {server_keys}, {client_keys}")
+
+    if is_server:
+        logger.debug("Configuring socket as server")
+
+        # configure the server keys on the socket
+        socket.curve_secretkey = server_keys.private
+        socket.curve_publickey = server_keys.public
+
+        socket.curve_server = True
+    else:
+        # configure client keys on the socket to encrypt outgoing messages
+        socket.curve_secretkey = client_keys.private
+        socket.curve_publickey = client_keys.public
+
+        # set the server public key for decrypting incoming messages
+        socket.curve_serverkey = server_keys.public
+    return socket
+
+
+def get_authenticator(
+    context: "zmq.Context[t.Any]", timeout: int = get_config().dragon_server_timeout
+) -> "zmq.auth.thread.ThreadAuthenticator":
+    """Create an authenticator to handle encryption of ZMQ communications
+
+    :param context: ZMQ context object
+    :returns: the activated `Authenticator`
+    """
+    # pylint: disable-next=global-statement
+    global AUTHENTICATOR
+
+    if AUTHENTICATOR is not None:
+        if AUTHENTICATOR.is_alive():
+            return AUTHENTICATOR
+        try:
+            logger.debug("Stopping authenticator")
+            AUTHENTICATOR.thread.authenticator.zap_socket.close()
+            AUTHENTICATOR.thread.join(0.1)
+            AUTHENTICATOR = None
+        except Exception as e:
+            logger.debug(e)
+        finally:
+            logger.debug("Stopped authenticator")
+
+    config = get_config()
+
+    key_manager = KeyManager(config, as_client=True)
+    server_keys, client_keys = key_manager.get_keys()
+    logger.debug(f"Applying keys to authenticator: {server_keys}, {client_keys}")
+
+    AUTHENTICATOR = zmq.auth.thread.ThreadAuthenticator(context, log=logger)
+
+    ctx_sndtimeo = context.getsockopt(zmq.SNDTIMEO)
+    ctx_rcvtimeo = context.getsockopt(zmq.RCVTIMEO)
+
+    AUTHENTICATOR.context.setsockopt(zmq.SNDTIMEO, timeout)
+    AUTHENTICATOR.context.setsockopt(zmq.RCVTIMEO, timeout)
+    AUTHENTICATOR.context.setsockopt(zmq.REQ_CORRELATE, 1)
+    AUTHENTICATOR.context.setsockopt(zmq.REQ_RELAXED, 1)
+
+    # allow all keys in the client key directory to connect
+    logger.debug(f"Securing with client keys in {key_manager.client_keys_dir}")
+    AUTHENTICATOR.configure_curve(domain="*", location=key_manager.client_keys_dir)
+
+    logger.debug("Starting authenticator")
+    AUTHENTICATOR.start()
+
+    context.setsockopt(zmq.SNDTIMEO, ctx_sndtimeo)
+    context.setsockopt(zmq.RCVTIMEO, ctx_rcvtimeo)
+
+    return AUTHENTICATOR
diff --git a/smartsim/_core/launcher/launcher.py b/smartsim/_core/launcher/launcher.py
index 80000c22f..1bf768065 100644
--- a/smartsim/_core/launcher/launcher.py
+++ b/smartsim/_core/launcher/launcher.py
@@ -27,6 +27,7 @@
 import abc
 import typing as t
 
+from ..._core.launcher.stepMapping import StepMap
 from ...error import AllocationError, LauncherError, SSUnsupportedError
 from ...settings import SettingsBase
 from .step import Step
@@ -69,6 +70,15 @@ def run(self, step: Step) -> t.Optional[str]:
     def stop(self, step_name: str) -> StepInfo:
         raise NotImplementedError
 
+    def add_step_to_mapping_table(self, name: str, step_map: StepMap) -> None:
+        """Add a StepMap to the Launcher step mapping table
+        making it monitor the step.
+
+        :param name: name of step to be added
+        :param step_map: step map of added step
+        """
+        self.step_mapping[name] = step_map
+
 
 class WLMLauncher(Launcher):  # cov-wlm
     """The base class for any Launcher that utilizes workload
@@ -94,15 +104,11 @@ def create_step(
         """Create a WLM job step
 
         :param name: name of the entity to be launched
-        :type name: str
         :param cwd: path to launch dir
-        :type cwd: str
         :param step_settings: batch or run settings for entity
-        :type step_settings: BatchSettings | RunSettings
         :raises SSUnsupportedError: if batch or run settings type isnt supported
         :raises LauncherError: if step creation fails
         :return: step instance
-        :rtype: Step
         """
         try:
             step_class = self.supported_rs[type(step_settings)]
@@ -129,9 +135,7 @@ def get_step_update(
         """Get update for a list of job steps
 
         :param step_names: list of job steps to get updates for
-        :type step_names: list[str]
         :return: list of name, job update tuples
-        :rtype: list[(str, StepInfo)]
         """
         updates: t.List[t.Tuple[str, t.Union[StepInfo, None]]] = []
 
@@ -162,9 +166,7 @@ def _get_unmanaged_step_update(
         """Get step updates for Popen managed jobs
 
         :param task_ids: task id to check
-        :type task_ids: list[str]
         :return: list of step updates
-        :rtype: list[StepInfo]
         """
         updates = []
         for task_id in task_ids:
diff --git a/smartsim/_core/launcher/local/local.py b/smartsim/_core/launcher/local/local.py
index 96778ec0d..ffcb84f23 100644
--- a/smartsim/_core/launcher/local/local.py
+++ b/smartsim/_core/launcher/local/local.py
@@ -59,9 +59,7 @@ def get_step_update(
         """Get status updates of each job step name provided
 
         :param step_names: list of step_names
-        :type step_names: list[str]
         :return: list of tuples for update
-        :rtype: list[tuple[str, StepInfo | None]]
         """
         # step ids are process ids of the tasks
         # as there is no WLM intermediary
@@ -78,9 +76,7 @@ def get_step_nodes(self, step_names: t.List[str]) -> t.List[t.List[str]]:
         """Return the address of nodes assigned to the step
 
         :param step_names: list of step_names
-        :type step_names: list[str]
         :return: list of node addresses
-        :rtype: list[list[str]]
 
         TODO: Use socket to find the actual Lo address?
         """
@@ -92,9 +88,7 @@ def run(self, step: Step) -> str:
            files will be written to the entity path.
 
         :param step: LocalStep instance to run
-        :type step: LocalStep
         :return: task_id of the newly created step
-        :rtype: str
         """
         if not self.task_manager.actively_monitoring:
             self.task_manager.start()
@@ -118,9 +112,7 @@ def stop(self, step_name: str) -> UnmanagedStepInfo:
         """Stop a job step
 
         :param step_name: name of the step to be stopped
-        :type step_name: str
         :return: a UnmanagedStepInfo instance
-        :rtype: UnmanagedStepInfo
         """
         # step_id is task_id for local. Naming for consistency
         step_id = self.step_mapping[step_name].task_id
diff --git a/smartsim/_core/launcher/lsf/lsfCommands.py b/smartsim/_core/launcher/lsf/lsfCommands.py
index d6d0ee031..cb92587c1 100644
--- a/smartsim/_core/launcher/lsf/lsfCommands.py
+++ b/smartsim/_core/launcher/lsf/lsfCommands.py
@@ -33,7 +33,6 @@ def bjobs(args: t.List[str]) -> t.Tuple[str, str]:
     """Calls LSF bjobs with args
 
     :param args: List of command arguments
-    :type args: List of str
     :returns: Output and error of bjobs
     """
     cmd = ["bjobs"] + args
@@ -47,9 +46,7 @@ def bkill(args: t.List[str]) -> t.Tuple[int, str, str]:
     returncode is also supplied in this function.
 
     :param args: list of command arguments
-    :type args: list of str
     :return: returncode, output and error
-    :rtype: (int, str, str)
     """
     cmd = ["bkill"] + args
     returncode, out, error = execute_cmd(cmd)
@@ -62,9 +59,7 @@ def jskill(args: t.List[str]) -> t.Tuple[int, str, str]:
     returncode is also supplied in this function.
 
     :param args: list of command arguments
-    :type args: list of str
     :return: returncode, output and error
-    :rtype: (int, str, str)
     """
 
     cmd = ["jskill"] + args
@@ -76,9 +71,7 @@ def jslist(args: t.List[str]) -> t.Tuple[str, str]:
     """Calls LSF jslist with args
 
     :param args: List of command arguments
-    :type args: List of str
     :returns: Output and error of jslist
-    :rtype: (str, str)
     """
     cmd = ["jslist"] + args
     _, out, err = execute_cmd(cmd)
diff --git a/smartsim/_core/launcher/lsf/lsfLauncher.py b/smartsim/_core/launcher/lsf/lsfLauncher.py
index a8b6fafdb..e0ad808ed 100644
--- a/smartsim/_core/launcher/lsf/lsfLauncher.py
+++ b/smartsim/_core/launcher/lsf/lsfLauncher.py
@@ -38,7 +38,7 @@
     RunSettings,
     SettingsBase,
 )
-from ....status import STATUS_CANCELLED, STATUS_COMPLETED
+from ....status import SmartSimStatus
 from ...config import CONFIG
 from ..launcher import WLMLauncher
 from ..step import (
@@ -91,10 +91,8 @@ def run(self, step: Step) -> t.Optional[str]:
         """Run a job step through LSF
 
         :param step: a job step instance
-        :type step: Step
         :raises LauncherError: if launch fails
         :return: job step id if job is managed
-        :rtype: str
         """
         if not self.task_manager.actively_monitoring:
             self.task_manager.start()
@@ -134,9 +132,7 @@ def stop(self, step_name: str) -> StepInfo:
         """Stop/cancel a job step
 
         :param step_name: name of the job to stop
-        :type step_name: str
         :return: update for job due to cancel
-        :rtype: StepInfo
         """
         stepmap = self.step_mapping[step_name]
         if stepmap.managed:
@@ -155,7 +151,9 @@ def stop(self, step_name: str) -> StepInfo:
         if not step_info:
             raise LauncherError(f"Could not get step_info for job step {step_name}")
 
-        step_info.status = STATUS_CANCELLED  # set status to cancelled instead of failed
+        step_info.status = (
+            SmartSimStatus.STATUS_CANCELLED
+        )  # set status to cancelled instead of failed
         return step_info
 
     @staticmethod
@@ -183,9 +181,7 @@ def _get_managed_step_update(self, step_ids: t.List[str]) -> t.List[StepInfo]:
         """Get step updates for WLM managed jobs
 
         :param step_ids: list of job step ids
-        :type step_ids: list[str]
         :return: list of updates for managed jobs
-        :rtype: list[StepInfo]
         """
         updates: t.List[StepInfo] = []
 
@@ -207,7 +203,7 @@ def _get_managed_step_update(self, step_ids: t.List[str]) -> t.List[StepInfo]:
                 # create LSFBatchStepInfo objects to return
                 batch_info = LSFBatchStepInfo(stat, None)
                 # account for case where job history is not logged by LSF
-                if batch_info.status == STATUS_COMPLETED:
+                if batch_info.status == SmartSimStatus.STATUS_COMPLETED:
                     batch_info.returncode = 0
                 updates.append(batch_info)
         return updates
diff --git a/smartsim/_core/launcher/lsf/lsfParser.py b/smartsim/_core/launcher/lsf/lsfParser.py
index 33837d2bd..c3272fa99 100644
--- a/smartsim/_core/launcher/lsf/lsfParser.py
+++ b/smartsim/_core/launcher/lsf/lsfParser.py
@@ -31,9 +31,7 @@ def parse_bsub(output: str) -> str:
     """Parse bsub output and return job id.
 
     :param output: stdout of bsub command
-    :type output: str
     :returns: job id
-    :rtype: str
     """
     for line in output.split("\n"):
         if line.startswith("Job"):
@@ -45,9 +43,7 @@ def parse_bsub_error(output: str) -> str:
     """Parse and return error output of a failed bsub command.
 
     :param output: stderr of qsub command
-    :type output: str
     :returns: error message
-    :rtype: str
     """
     # Search for first non-empty line
     error_lines = []
@@ -77,11 +73,8 @@ def parse_jslist_stepid(output: str, step_id: str) -> t.Tuple[str, t.Optional[st
     options to obtain step status
 
     :param output: output of the bjobs command
-    :type output: str
     :param step_id: allocation id or job step id
-    :type step_id: str
     :return: status and return code
-    :rtype: (str, str)
     """
     result: t.Tuple[str, t.Optional[str]] = ("NOTFOUND", None)
 
@@ -101,11 +94,8 @@ def parse_bjobs_jobid(output: str, job_id: str) -> str:
     to obtain job status.
 
     :param output: output of the bjobs command
-    :type output: str
     :param job_id: allocation id or job step id
-    :type job_id: str
     :return: status
-    :rtype: str
     """
     result = "NOTFOUND"
     for line in output.split("\n"):
@@ -126,9 +116,7 @@ def parse_bjobs_nodes(output: str) -> t.List[str]:
     a job in a list with the duplicates removed.
 
     :param output: output of the `bjobs -w` command
-    :type output: str
     :return: compute nodes of the allocation or job
-    :rtype: list of str
     """
     nodes = []
 
@@ -146,11 +134,8 @@ def parse_max_step_id_from_jslist(output: str) -> t.Optional[str]:
     properly returned
 
     :param output: output bjobs
-    :type output: str
     :param step_name: the name of the step to query
-    :type step_name: str
     :return: the step_id
-    :rtype: str
     """
     max_step_id = None
 
diff --git a/smartsim/_core/launcher/pbs/pbsCommands.py b/smartsim/_core/launcher/pbs/pbsCommands.py
index f738ef1f8..989af93be 100644
--- a/smartsim/_core/launcher/pbs/pbsCommands.py
+++ b/smartsim/_core/launcher/pbs/pbsCommands.py
@@ -33,7 +33,6 @@ def qstat(args: t.List[str]) -> t.Tuple[str, str]:
     """Calls PBS qstat with args
 
     :param args: List of command arguments
-    :type args: List of str
     :returns: Output and error of qstat
     """
     cmd = ["qstat"] + args
@@ -45,7 +44,6 @@ def qsub(args: t.List[str]) -> t.Tuple[str, str]:
     """Calls PBS qsub with args
 
     :param args: List of command arguments
-    :type args: List of str
     :returns: Output and error of salloc
     """
     cmd = ["qsub"] + args
@@ -59,9 +57,7 @@ def qdel(args: t.List[str]) -> t.Tuple[int, str, str]:
     returncode is also supplied in this function.
 
     :param args: list of command arguments
-    :type args: list of str
     :return: output and error
-    :rtype: str
     """
     cmd = ["qdel"] + args
     returncode, out, error = execute_cmd(cmd)
diff --git a/smartsim/_core/launcher/pbs/pbsLauncher.py b/smartsim/_core/launcher/pbs/pbsLauncher.py
index 0b2f85e95..8c2099a8b 100644
--- a/smartsim/_core/launcher/pbs/pbsLauncher.py
+++ b/smartsim/_core/launcher/pbs/pbsLauncher.py
@@ -39,7 +39,7 @@
     RunSettings,
     SettingsBase,
 )
-from ....status import STATUS_CANCELLED, STATUS_COMPLETED
+from ....status import SmartSimStatus
 from ...config import CONFIG
 from ..launcher import WLMLauncher
 from ..step import (
@@ -53,7 +53,11 @@
 )
 from ..stepInfo import PBSStepInfo, StepInfo
 from .pbsCommands import qdel, qstat
-from .pbsParser import parse_qstat_jobid, parse_step_id_from_qstat
+from .pbsParser import (
+    parse_qstat_jobid,
+    parse_qstat_jobid_json,
+    parse_step_id_from_qstat,
+)
 
 logger = get_logger(__name__)
 
@@ -88,10 +92,8 @@ def run(self, step: Step) -> t.Optional[str]:
         """Run a job step through PBSPro
 
         :param step: a job step instance
-        :type step: Step
         :raises LauncherError: if launch fails
         :return: job step id if job is managed
-        :rtype: str
         """
         if not self.task_manager.actively_monitoring:
             self.task_manager.start()
@@ -131,9 +133,7 @@ def stop(self, step_name: str) -> StepInfo:
         """Stop/cancel a job step
 
         :param step_name: name of the job to stop
-        :type step_name: str
         :return: update for job due to cancel
-        :rtype: StepInfo
         """
         stepmap = self.step_mapping[step_name]
         if stepmap.managed:
@@ -149,7 +149,9 @@ def stop(self, step_name: str) -> StepInfo:
         if not step_info:
             raise LauncherError(f"Could not get step_info for job step {step_name}")
 
-        step_info.status = STATUS_CANCELLED  # set status to cancelled instead of failed
+        step_info.status = (
+            SmartSimStatus.STATUS_CANCELLED
+        )  # set status to cancelled instead of failed
         return step_info
 
     @staticmethod
@@ -178,20 +180,29 @@ def _get_managed_step_update(self, step_ids: t.List[str]) -> t.List[StepInfo]:
         """Get step updates for WLM managed jobs
 
         :param step_ids: list of job step ids
-        :type step_ids: list[str]
         :return: list of updates for managed jobs
-        :rtype: list[StepInfo]
         """
         updates: t.List[StepInfo] = []
 
         qstat_out, _ = qstat(step_ids)
         stats = [parse_qstat_jobid(qstat_out, str(step_id)) for step_id in step_ids]
+
+        # Fallback: if all jobs result as NOTFOUND, it might be an issue
+        # with truncated names, we resort to json format which does not truncate
+        # information
+        if all(stat is None for stat in stats):
+            qstat_out_json, _ = qstat(["-f", "-F", "json"] + step_ids)
+            stats = [
+                parse_qstat_jobid_json(qstat_out_json, str(step_id))
+                for step_id in step_ids
+            ]
+
         # create PBSStepInfo objects to return
 
         for stat, _ in zip(stats, step_ids):
-            info = PBSStepInfo(stat, None)
+            info = PBSStepInfo(stat or "NOTFOUND", None)
             # account for case where job history is not logged by PBS
-            if info.status == STATUS_COMPLETED:
+            if info.status == SmartSimStatus.STATUS_COMPLETED:
                 info.returncode = 0
 
             updates.append(info)
diff --git a/smartsim/_core/launcher/pbs/pbsParser.py b/smartsim/_core/launcher/pbs/pbsParser.py
index 362577595..6f8384b11 100644
--- a/smartsim/_core/launcher/pbs/pbsParser.py
+++ b/smartsim/_core/launcher/pbs/pbsParser.py
@@ -33,9 +33,7 @@ def parse_qsub(output: str) -> str:
     output is the job id itself.
 
     :param output: stdout of qsub command
-    :type output: str
     :returns: job id
-    :rtype: str
     """
     return output
 
@@ -44,9 +42,7 @@ def parse_qsub_error(output: str) -> str:
     """Parse and return error output of a failed qsub command.
 
     :param output: stderr of qsub command
-    :type output: str
     :returns: error message
-    :rtype: str
     """
     # look for error first
     for line in output.split("\n"):
@@ -61,18 +57,15 @@ def parse_qsub_error(output: str) -> str:
     return base_err
 
 
-def parse_qstat_jobid(output: str, job_id: str) -> str:
+def parse_qstat_jobid(output: str, job_id: str) -> t.Optional[str]:
     """Parse and return output of the qstat command run with options
     to obtain job status.
 
     :param output: output of the qstat command
-    :type output: str
     :param job_id: allocation id or job step id
-    :type job_id: str
     :return: status
-    :rtype: str
     """
-    result = "NOTFOUND"
+    result = None
     for line in output.split("\n"):
         fields = line.split()
         if len(fields) >= 5:
@@ -83,6 +76,25 @@ def parse_qstat_jobid(output: str, job_id: str) -> str:
     return result
 
 
+def parse_qstat_jobid_json(output: str, job_id: str) -> t.Optional[str]:
+    """Parse and return output of the qstat command run with JSON options
+    to obtain job status.
+
+    :param output: output of the qstat command in JSON format
+    :param job_id: allocation id or job step id
+    :return: status
+    """
+    out_json = load_and_clean_json(output)
+
+    if "Jobs" not in out_json:
+        return None
+    jobs: dict[str, t.Any] = out_json["Jobs"]
+    job: t.Optional[dict[str, t.Any]] = jobs.get(job_id, None)
+    if job is None:
+        return None
+    return str(job.get("job_state", None))
+
+
 def parse_qstat_nodes(output: str) -> t.List[str]:
     """Parse and return the qstat command run with
     options to obtain node list.
@@ -93,9 +105,7 @@ def parse_qstat_nodes(output: str) -> t.List[str]:
     The `output` parameter must be in JSON format.
 
     :param output: output of the qstat command in JSON format
-    :type output: str
     :return: compute nodes of the allocation or job
-    :rtype: list of str
     """
     nodes: t.List[str] = []
     out_json = load_and_clean_json(output)
@@ -116,11 +126,8 @@ def parse_step_id_from_qstat(output: str, step_name: str) -> t.Optional[str]:
     """Parse and return the step id from a qstat command
 
     :param output: output qstat
-    :type output: str
     :param step_name: the name of the step to query
-    :type step_name: str
     :return: the step_id
-    :rtype: str
     """
     step_id: t.Optional[str] = None
     out_json = load_and_clean_json(output)
diff --git a/smartsim/_core/launcher/slurm/slurmCommands.py b/smartsim/_core/launcher/slurm/slurmCommands.py
index 2e37f1d79..839826297 100644
--- a/smartsim/_core/launcher/slurm/slurmCommands.py
+++ b/smartsim/_core/launcher/slurm/slurmCommands.py
@@ -38,7 +38,6 @@ def sstat(args: t.List[str], *, raise_on_err: bool = False) -> t.Tuple[str, str]
     """Calls sstat with args
 
     :param args: List of command arguments
-    :type args: List of str
     :returns: Output and error of sstat
     """
     _, out, err = _execute_slurm_cmd("sstat", args, raise_on_err=raise_on_err)
@@ -49,7 +48,6 @@ def sacct(args: t.List[str], *, raise_on_err: bool = False) -> t.Tuple[str, str]
     """Calls sacct with args
 
     :param args: List of command arguments
-    :type args: List of str
     :returns: Output and error of sacct
     """
     _, out, err = _execute_slurm_cmd("sacct", args, raise_on_err=raise_on_err)
@@ -60,7 +58,6 @@ def salloc(args: t.List[str], *, raise_on_err: bool = False) -> t.Tuple[str, str
     """Calls slurm salloc with args
 
     :param args: List of command arguments
-    :type args: List of str
     :returns: Output and error of salloc
     """
     _, out, err = _execute_slurm_cmd("salloc", args, raise_on_err=raise_on_err)
@@ -71,7 +68,6 @@ def sinfo(args: t.List[str], *, raise_on_err: bool = False) -> t.Tuple[str, str]
     """Calls slurm sinfo with args
 
     :param args: List of command arguments
-    :type args: List of str
     :returns: Output and error of sinfo
     """
     _, out, err = _execute_slurm_cmd("sinfo", args, raise_on_err=raise_on_err)
@@ -82,7 +78,6 @@ def scontrol(args: t.List[str], *, raise_on_err: bool = False) -> t.Tuple[str, s
     """Calls slurm scontrol with args
 
     :param args: List of command arguments
-    :type args: List of str
     :returns: Output and error of sinfo
     """
     _, out, err = _execute_slurm_cmd("scontrol", args, raise_on_err=raise_on_err)
@@ -95,9 +90,7 @@ def scancel(args: t.List[str], *, raise_on_err: bool = False) -> t.Tuple[int, st
     returncode is also supplied in this function.
 
     :param args: list of command arguments
-    :type args: list of str
     :return: output and error
-    :rtype: str
     """
     return _execute_slurm_cmd("scancel", args, raise_on_err=raise_on_err)
 
diff --git a/smartsim/_core/launcher/slurm/slurmLauncher.py b/smartsim/_core/launcher/slurm/slurmLauncher.py
index e939a63db..2e4102391 100644
--- a/smartsim/_core/launcher/slurm/slurmLauncher.py
+++ b/smartsim/_core/launcher/slurm/slurmLauncher.py
@@ -40,7 +40,7 @@
     SettingsBase,
     SrunSettings,
 )
-from ....status import STATUS_CANCELLED
+from ....status import SmartSimStatus
 from ...config import CONFIG
 from ..launcher import WLMLauncher
 from ..step import (
@@ -100,10 +100,8 @@ def get_step_nodes(self, step_names: t.List[str]) -> t.List[t.List[str]]:
         would return nid00034
 
         :param step_names: list of job step names
-        :type step_names: list[str]
         :raises LauncherError: if nodelist aquisition fails
         :return: list of hostnames
-        :rtype: list[str]
         """
         _, step_ids = self.step_mapping.get_ids(step_names, managed=True)
         step_str = _create_step_id_str([val for val in step_ids if val is not None])
@@ -122,10 +120,8 @@ def run(self, step: Step) -> t.Optional[str]:
         """Run a job step through Slurm
 
         :param step: a job step instance
-        :type step: Step
         :raises LauncherError: if launch fails
         :return: job step id if job is managed
-        :rtype: str
         """
         self.check_for_slurm()
         if not self.task_manager.actively_monitoring:
@@ -175,9 +171,7 @@ def stop(self, step_name: str) -> StepInfo:
         """Step a job step
 
         :param step_name: name of the job to stop
-        :type step_name: str
         :return: update for job due to cancel
-        :rtype: StepInfo
         """
         stepmap = self.step_mapping[step_name]
         if stepmap.managed:
@@ -218,7 +212,9 @@ def stop(self, step_name: str) -> StepInfo:
         if not step_info:
             raise LauncherError(f"Could not get step_info for job step {step_name}")
 
-        step_info.status = STATUS_CANCELLED  # set status to cancelled instead of failed
+        step_info.status = (
+            SmartSimStatus.STATUS_CANCELLED
+        )  # set status to cancelled instead of failed
         return step_info
 
     @staticmethod
@@ -255,9 +251,7 @@ def _get_managed_step_update(self, step_ids: t.List[str]) -> t.List[StepInfo]:
         """Get step updates for WLM managed jobs
 
         :param step_ids: list of job step ids
-        :type step_ids: list[str]
         :return: list of updates for managed jobs
-        :rtype: list[StepInfo]
         """
         step_str = _create_step_id_str(step_ids)
         sacct_out, _ = sacct(
diff --git a/smartsim/_core/launcher/slurm/slurmParser.py b/smartsim/_core/launcher/slurm/slurmParser.py
index ede687eb6..4ec187f19 100644
--- a/smartsim/_core/launcher/slurm/slurmParser.py
+++ b/smartsim/_core/launcher/slurm/slurmParser.py
@@ -43,9 +43,7 @@ def parse_salloc_error(output: str) -> t.Optional[str]:
     """Parse and return error output of a failed salloc command
 
     :param output: stderr output of salloc command
-    :type output: str
     :return: error message
-    :rtype: str
     """
     salloc = which("salloc")
     # look for error first
@@ -73,10 +71,9 @@ def jobid_exact_match(parsed_id: str, job_id: str) -> bool:
     the prefix of another job_id, like 1 and 11
     or 1.1 and 1.10. Works with job id or step
     id (i.e. with or without a '.' in the id)
+
     :param parsed_id: the id read from the line
-    :type paserd_id: str
     :param job_id: the id to check for equality
-    :type job_id: str
     """
     if "." in job_id:
         return parsed_id == job_id
@@ -88,11 +85,8 @@ def parse_sacct(output: str, job_id: str) -> t.Tuple[str, t.Optional[str]]:
     """Parse and return output of the sacct command
 
     :param output: output of the sacct command
-    :type output: str
     :param job_id: allocation id or job step id
-    :type job_id: str
     :return: status and returncode
-    :rtype: tuple
     """
     result: t.Tuple[str, t.Optional[str]] = ("PENDING", None)
     for line in output.split("\n"):
@@ -113,9 +107,7 @@ def parse_sstat_nodes(output: str, job_id: str) -> t.List[str]:
     a job in a list with the duplicates removed.
 
     :param output: output of the sstat command
-    :type output: str
     :return: compute nodes of the allocation or job
-    :rtype: list of str
     """
     nodes = []
     for line in output.split("\n"):
@@ -134,11 +126,8 @@ def parse_step_id_from_sacct(output: str, step_name: str) -> t.Optional[str]:
 
     :param output: output of sacct --noheader -p
                    --format=jobname,jobid --job <alloc>
-    :type output: str
     :param step_name: the name of the step to query
-    :type step_name: str
     :return: the step_id
-    :rtype: str
     """
     step_id = None
     for line in output.split("\n"):
diff --git a/smartsim/_core/launcher/step/__init__.py b/smartsim/_core/launcher/step/__init__.py
index 663edb682..c492f3e97 100644
--- a/smartsim/_core/launcher/step/__init__.py
+++ b/smartsim/_core/launcher/step/__init__.py
@@ -25,6 +25,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 from .alpsStep import AprunStep
+from .dragonStep import DragonBatchStep, DragonStep
 from .localStep import LocalStep
 from .lsfStep import BsubBatchStep, JsrunStep
 from .mpiStep import MpiexecStep, MpirunStep, OrterunStep
diff --git a/smartsim/_core/launcher/step/alpsStep.py b/smartsim/_core/launcher/step/alpsStep.py
index 61ca5eee8..eb7903af9 100644
--- a/smartsim/_core/launcher/step/alpsStep.py
+++ b/smartsim/_core/launcher/step/alpsStep.py
@@ -42,11 +42,8 @@ def __init__(self, name: str, cwd: str, run_settings: AprunSettings) -> None:
         """Initialize a ALPS aprun job step
 
         :param name: name of the entity to be launched
-        :type name: str
         :param cwd: path to launch dir
-        :type cwd: str
         :param run_settings: run settings for entity
-        :type run_settings: AprunSettings
         """
         super().__init__(name, cwd, run_settings)
         self.alloc: t.Optional[str] = None
@@ -65,7 +62,6 @@ def get_launch_cmd(self) -> t.List[str]:
         """Get the command to launch this step
 
         :return: launch command
-        :rtype: list[str]
         """
         aprun = self.run_settings.run_command
         if not aprun:
@@ -122,7 +118,6 @@ def _build_exe(self) -> t.List[str]:
         """Build the executable for this step
 
         :return: executable list
-        :rtype: list[str]
         """
         if self._get_mpmd():
             return self._make_mpmd()
diff --git a/smartsim/_core/launcher/step/dragonStep.py b/smartsim/_core/launcher/step/dragonStep.py
new file mode 100644
index 000000000..036a9e565
--- /dev/null
+++ b/smartsim/_core/launcher/step/dragonStep.py
@@ -0,0 +1,248 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+import os
+import shutil
+import sys
+import typing as t
+
+from ...._core.schemas.dragonRequests import DragonRunRequest, request_registry
+from ....error.errors import SSUnsupportedError
+from ....log import get_logger
+from ....settings import (
+    DragonRunSettings,
+    QsubBatchSettings,
+    SbatchSettings,
+    Singularity,
+)
+from .step import Step
+
+logger = get_logger(__name__)
+
+
+class DragonStep(Step):
+    def __init__(self, name: str, cwd: str, run_settings: DragonRunSettings) -> None:
+        """Initialize a srun job step
+
+        :param name: name of the entity to be launched
+        :param cwd: path to launch dir
+        :param run_settings: run settings for entity
+        """
+        super().__init__(name, cwd, run_settings)
+        self.managed = True
+
+    @property
+    def run_settings(self) -> DragonRunSettings:
+        return t.cast(DragonRunSettings, self.step_settings)
+
+    def get_launch_cmd(self) -> t.List[str]:
+        """Get stringified version of request
+         needed to launch this step
+
+        :return: launch command
+        """
+        run_settings = self.run_settings
+        exe_cmd = []
+
+        if run_settings.colocated_db_settings:
+            # Replace the command with the entrypoint wrapper script
+            bash = shutil.which("bash")
+            if not bash:
+                raise RuntimeError("Could not find bash in PATH")
+            launch_script_path = self.get_colocated_launch_script()
+            exe_cmd += [bash, launch_script_path]
+
+        if isinstance(run_settings.container, Singularity):
+            # pylint: disable-next=protected-access
+            exe_cmd += run_settings.container._container_cmds(self.cwd)
+
+        exe_cmd += run_settings.exe
+
+        exe_args = self._get_exe_args_list(run_settings)
+
+        exe_cmd_and_args = exe_cmd + exe_args
+
+        return exe_cmd_and_args
+
+    @staticmethod
+    def _get_exe_args_list(run_setting: DragonRunSettings) -> t.List[str]:
+        """Convenience function to encapsulate checking the
+        runsettings.exe_args type to always return a list
+        """
+        exe_args = run_setting.exe_args
+        args: t.List[str] = exe_args if isinstance(exe_args, list) else [exe_args]
+        return args
+
+
+class DragonBatchStep(Step):
+    def __init__(
+        self,
+        name: str,
+        cwd: str,
+        batch_settings: t.Union[SbatchSettings, QsubBatchSettings],
+    ) -> None:
+        """Initialize a Slurm Sbatch step
+
+        :param name: name of the entity to launch
+        :param cwd: path to launch dir
+        :param batch_settings: batch settings for entity
+        """
+        super().__init__(name, cwd, batch_settings)
+        self.steps: t.List[Step] = []
+        self.managed = True
+        self.batch_settings = batch_settings
+        self._request_file_name = "requests.json"
+
+    def get_launch_cmd(self) -> t.List[str]:
+        """Get the launch command for the batch
+
+        :return: launch command for the batch
+        """
+        if isinstance(self.batch_settings, SbatchSettings):
+            script = self._write_sbatch_script()
+            return [self.batch_settings.batch_cmd, "--parsable", script]
+        if isinstance(self.batch_settings, QsubBatchSettings):
+            script = self._write_qsub_script()
+            return [self.batch_settings.batch_cmd, script]
+
+        raise SSUnsupportedError(
+            "DragonBatchStep only support SbatchSettings and QsubBatchSettings"
+        )
+
+    def add_to_batch(self, step: Step) -> None:
+        """Add a job step to this batch
+
+        :param step: a job step instance e.g. DragonStep
+        """
+        self.steps.append(step)
+        logger.debug(f"Added step command to batch for {step.name}")
+
+    @staticmethod
+    def _dragon_entrypoint_cmd(request_file: str) -> str:
+        """Return command needed to run the Dragon entrypoint"""
+        cmd = [
+            sys.executable,
+            "-m",
+            "smartsim._core.entrypoints.dragon_client",
+            "+submit",
+            request_file,
+        ]
+        return " ".join(cmd)
+
+    def _write_request_file(self) -> str:
+        """Write json file with requests to submit to Dragon server"""
+        request_file = self.get_step_file(
+            ending="json", script_name=self._request_file_name
+        )
+        requests = []
+        for step in self.steps:
+            run_settings = t.cast(DragonRunSettings, step.step_settings)
+            run_args = run_settings.run_args
+            env = run_settings.env_vars
+            nodes = int(run_args.get("nodes", None) or 1)
+            tasks_per_node = int(run_args.get("tasks-per-node", None) or 1)
+
+            cmd = step.get_launch_cmd()
+            out, err = step.get_output_files()
+            request = DragonRunRequest(
+                exe=cmd[0],
+                exe_args=cmd[1:],
+                path=step.cwd,
+                name=step.name,
+                nodes=nodes,
+                tasks_per_node=tasks_per_node,
+                env=env,
+                current_env=os.environ,
+                output_file=out,
+                error_file=err,
+            )
+            requests.append(request_registry.to_string(request))
+        with open(request_file, "w", encoding="utf-8") as script_file:
+            script_file.write(json.dumps(requests))
+
+        return request_file
+
+    def _write_sbatch_script(self) -> str:
+        """Write the PBS batch script
+
+        :return: batch script path after writing
+        """
+        batch_script = self.get_step_file(ending=".sh")
+        output, error = self.get_output_files()
+        request_file = self._write_request_file()
+        with open(batch_script, "w", encoding="utf-8") as script_file:
+            script_file.write("#!/bin/bash\n\n")
+            script_file.write(f"#SBATCH --output={output}\n")
+            script_file.write(f"#SBATCH --error={error}\n")
+            script_file.write(f"#SBATCH --job-name={self.name}\n")
+
+            # add additional sbatch options
+            for opt in self.batch_settings.format_batch_args():
+                script_file.write(f"#SBATCH {opt}\n")
+
+            script_file.write(
+                f"#SBATCH --export=ALL,SMARTSIM_DRAGON_SERVER_PATH={self.cwd},"
+                "PYTHONUNBUFFERED=1\n"
+            )
+
+            for cmd in self.batch_settings.preamble:
+                script_file.write(f"{cmd}\n")
+
+            script_file.write(
+                DragonBatchStep._dragon_entrypoint_cmd(request_file) + "\n"
+            )
+        return batch_script
+
+    def _write_qsub_script(self) -> str:
+        """Write the Slurm batch script
+
+        :return: batch script path after writing
+        """
+        batch_script = self.get_step_file(ending=".sh")
+        output, error = self.get_output_files()
+        request_file = self._write_request_file()
+        with open(batch_script, "w", encoding="utf-8") as script_file:
+            script_file.write("#!/bin/bash\n\n")
+            script_file.write(f"#PBS -o {output}\n")
+            script_file.write(f"#PBS -e {error}\n")
+            script_file.write(f"#PBS -N {self.name}\n")
+            script_file.write("#PBS -V \n")
+
+            # add additional sbatch options
+            for opt in self.batch_settings.format_batch_args():
+                script_file.write(f"#PBS {opt}\n")
+
+            script_file.write(f"#PBS -v SMARTSIM_DRAGON_SERVER_PATH={self.cwd}\n")
+
+            for cmd in self.batch_settings.preamble:
+                script_file.write(f"{cmd}\n")
+
+            script_file.write(
+                DragonBatchStep._dragon_entrypoint_cmd(request_file) + "\n"
+            )
+
+        return batch_script
diff --git a/smartsim/_core/launcher/step/lsfStep.py b/smartsim/_core/launcher/step/lsfStep.py
index 1c88dadb8..0cb921e19 100644
--- a/smartsim/_core/launcher/step/lsfStep.py
+++ b/smartsim/_core/launcher/step/lsfStep.py
@@ -42,11 +42,8 @@ def __init__(self, name: str, cwd: str, batch_settings: BsubBatchSettings) -> No
         """Initialize a LSF bsub step
 
         :param name: name of the entity to launch
-        :type name: str
         :param cwd: path to launch dir
-        :type cwd: str
         :param batch_settings: batch settings for entity
-        :type batch_settings: BsubBatchSettings
         """
         super().__init__(name, cwd, batch_settings)
         self.step_cmds: t.List[t.List[str]] = []
@@ -57,7 +54,6 @@ def get_launch_cmd(self) -> t.List[str]:
         """Get the launch command for the batch
 
         :return: launch command for the batch
-        :rtype: list[str]
         """
         script = self._write_script()
         return [self.batch_settings.batch_cmd, script]
@@ -66,7 +62,6 @@ def add_to_batch(self, step: Step) -> None:
         """Add a job step to this batch
 
         :param step: a job step instance e.g. SrunStep
-        :type step: Step
         """
         launch_cmd = step.get_launch_cmd()
         self.step_cmds.append(launch_cmd)
@@ -76,7 +71,6 @@ def _write_script(self) -> str:
         """Write the batch script
 
         :return: batch script path after writing
-        :rtype: str
         """
         batch_script = self.get_step_file(ending=".sh")
         output, error = self.get_output_files()
@@ -113,11 +107,8 @@ def __init__(self, name: str, cwd: str, run_settings: RunSettings):
         """Initialize a LSF jsrun job step
 
         :param name: name of the entity to be launched
-        :type name: str
         :param cwd: path to launch dir
-        :type cwd: str
         :param run_settings: run settings for entity
-        :type run_settings: RunSettings
         """
         super().__init__(name, cwd, run_settings)
         self.alloc: t.Optional[str] = None
@@ -155,7 +146,6 @@ def get_launch_cmd(self) -> t.List[str]:
         """Get the command to launch this step
 
         :return: launch command
-        :rtype: list[str]
         """
         jsrun = self.run_settings.run_command
         if not jsrun:
@@ -223,7 +213,6 @@ def _build_exe(self) -> t.List[str]:
         """Build the executable for this step
 
         :return: executable list
-        :rtype: list[str]
         """
         exe = self.run_settings.exe
         args = self.run_settings._exe_args  # pylint: disable=protected-access
diff --git a/smartsim/_core/launcher/step/mpiStep.py b/smartsim/_core/launcher/step/mpiStep.py
index 785d55e92..767486462 100644
--- a/smartsim/_core/launcher/step/mpiStep.py
+++ b/smartsim/_core/launcher/step/mpiStep.py
@@ -43,11 +43,8 @@ def __init__(self, name: str, cwd: str, run_settings: RunSettings) -> None:
         """Initialize a job step conforming to the MPI standard
 
         :param name: name of the entity to be launched
-        :type name: str
         :param cwd: path to launch dir
-        :type cwd: str
         :param run_settings: run settings for entity
-        :type run_settings: RunSettings
         """
 
         super().__init__(name, cwd, run_settings)
@@ -64,7 +61,6 @@ def get_launch_cmd(self) -> t.List[str]:
         """Get the command to launch this step
 
         :return: launch command
-        :rtype: list[str]
         """
         run_cmd = self.run_settings.run_command
         if not run_cmd:
@@ -130,7 +126,6 @@ def _build_exe(self) -> t.List[str]:
         """Build the executable for this step
 
         :return: executable list
-        :rtype: list[str]
         """
         if self._get_mpmd():
             return self._make_mpmd()
@@ -161,14 +156,10 @@ def __init__(self, name: str, cwd: str, run_settings: MpiexecSettings) -> None:
         """Initialize an mpiexec job step
 
         :param name: name of the entity to be launched
-        :type name: str
         :param cwd: path to launch dir
-        :type cwd: str
         :param run_settings: run settings for entity
-        :type run_settings: MpiexecSettings
         :param default_run_command: The default command to launch an MPI
                                     application
-        :type default_run_command: str, optional
         """
 
         super().__init__(name, cwd, run_settings)
@@ -179,14 +170,10 @@ def __init__(self, name: str, cwd: str, run_settings: MpirunSettings) -> None:
         """Initialize an mpirun job step
 
         :param name: name of the entity to be launched
-        :type name: str
         :param cwd: path to launch dir
-        :type cwd: str
         :param run_settings: run settings for entity
-        :type run_settings: MpirunSettings
         :param default_run_command: The default command to launch an MPI
                                     application
-        :type default_run_command: str, optional
         """
 
         super().__init__(name, cwd, run_settings)
@@ -197,14 +184,10 @@ def __init__(self, name: str, cwd: str, run_settings: OrterunSettings) -> None:
         """Initialize an orterun job step
 
         :param name: name of the entity to be launched
-        :type name: str
         :param cwd: path to launch dir
-        :type cwd: str
         :param run_settings: run settings for entity
-        :type run_settings: OrterunSettings
         :param default_run_command: The default command to launch an MPI
                                     application
-        :type default_run_command: str, optional
         """
 
         super().__init__(name, cwd, run_settings)
diff --git a/smartsim/_core/launcher/step/pbsStep.py b/smartsim/_core/launcher/step/pbsStep.py
index 65dac3225..82a91aaa4 100644
--- a/smartsim/_core/launcher/step/pbsStep.py
+++ b/smartsim/_core/launcher/step/pbsStep.py
@@ -38,11 +38,8 @@ def __init__(self, name: str, cwd: str, batch_settings: QsubBatchSettings) -> No
         """Initialize a PBSpro qsub step
 
         :param name: name of the entity to launch
-        :type name: str
         :param cwd: path to launch dir
-        :type cwd: str
         :param batch_settings: batch settings for entity
-        :type batch_settings: QsubBatchSettings
         """
         super().__init__(name, cwd, batch_settings)
         self.step_cmds: t.List[t.List[str]] = []
@@ -53,7 +50,6 @@ def get_launch_cmd(self) -> t.List[str]:
         """Get the launch command for the batch
 
         :return: launch command for the batch
-        :rtype: list[str]
         """
         script = self._write_script()
         return [self.batch_settings.batch_cmd, script]
@@ -62,7 +58,6 @@ def add_to_batch(self, step: Step) -> None:
         """Add a job step to this batch
 
         :param step: a job step instance e.g. SrunStep
-        :type step: Step
         """
         launch_cmd = step.get_launch_cmd()
         self.step_cmds.append(launch_cmd)
@@ -72,7 +67,6 @@ def _write_script(self) -> str:
         """Write the batch script
 
         :return: batch script path after writing
-        :rtype: str
         """
         batch_script = self.get_step_file(ending=".sh")
         output, error = self.get_output_files()
diff --git a/smartsim/_core/launcher/step/slurmStep.py b/smartsim/_core/launcher/step/slurmStep.py
index 7baab891b..83f39cf09 100644
--- a/smartsim/_core/launcher/step/slurmStep.py
+++ b/smartsim/_core/launcher/step/slurmStep.py
@@ -42,11 +42,8 @@ def __init__(self, name: str, cwd: str, batch_settings: SbatchSettings) -> None:
         """Initialize a Slurm Sbatch step
 
         :param name: name of the entity to launch
-        :type name: str
         :param cwd: path to launch dir
-        :type cwd: str
         :param batch_settings: batch settings for entity
-        :type batch_settings: SbatchSettings
         """
         super().__init__(name, cwd, batch_settings)
         self.step_cmds: t.List[t.List[str]] = []
@@ -57,7 +54,6 @@ def get_launch_cmd(self) -> t.List[str]:
         """Get the launch command for the batch
 
         :return: launch command for the batch
-        :rtype: list[str]
         """
         script = self._write_script()
         return [self.batch_settings.batch_cmd, "--parsable", script]
@@ -66,7 +62,6 @@ def add_to_batch(self, step: Step) -> None:
         """Add a job step to this batch
 
         :param step: a job step instance e.g. SrunStep
-        :type step: Step
         """
         launch_cmd = ["cd", step.cwd, ";"]
         launch_cmd += step.get_launch_cmd()
@@ -77,7 +72,6 @@ def _write_script(self) -> str:
         """Write the batch script
 
         :return: batch script path after writing
-        :rtype: str
         """
         batch_script = self.get_step_file(ending=".sh")
         output, error = self.get_output_files()
@@ -108,11 +102,8 @@ def __init__(self, name: str, cwd: str, run_settings: SrunSettings) -> None:
         """Initialize a srun job step
 
         :param name: name of the entity to be launched
-        :type name: str
         :param cwd: path to launch dir
-        :type cwd: str
         :param run_settings: run settings for entity
-        :type run_settings: SrunSettings
         """
         super().__init__(name, cwd, run_settings)
         self.alloc: t.Optional[str] = None
@@ -125,7 +116,6 @@ def get_launch_cmd(self) -> t.List[str]:
         """Get the command to launch this step
 
         :return: launch command
-        :rtype: list[str]
         """
         srun = self.run_settings.run_command
         if not srun:
@@ -206,7 +196,6 @@ def _build_exe(self) -> t.List[str]:
         """Build the executable for this step
 
         :return: executable list
-        :rtype: list[str]
         """
         if self._get_mpmd():
             return self._make_mpmd()
diff --git a/smartsim/_core/launcher/step/step.py b/smartsim/_core/launcher/step/step.py
index ddb95a850..2cce6e610 100644
--- a/smartsim/_core/launcher/step/step.py
+++ b/smartsim/_core/launcher/step/step.py
@@ -28,6 +28,7 @@
 
 import functools
 import os.path as osp
+import pathlib
 import sys
 import time
 import typing as t
@@ -66,10 +67,21 @@ def _create_unique_name(entity_name: str) -> str:
         step_name = entity_name + "-" + get_base_36_repr(time.time_ns())
         return step_name
 
+    @staticmethod
+    def _ensure_output_directory_exists(output_dir: str) -> None:
+        """Create the directory for the step output if it doesn't exist already"""
+        if not osp.exists(output_dir):
+            pathlib.Path(output_dir).mkdir(parents=True, exist_ok=True)
+
     def get_output_files(self) -> t.Tuple[str, str]:
-        """Return two paths to error and output files based on cwd"""
-        output = self.get_step_file(ending=".out")
-        error = self.get_step_file(ending=".err")
+        """Return two paths to error and output files based on metadata directory"""
+        try:
+            output_dir = self.meta["status_dir"]
+        except KeyError as exc:
+            raise KeyError("Status directory for this step has not been set.") from exc
+        self._ensure_output_directory_exists(output_dir)
+        output = osp.join(output_dir, f"{self.entity_name}.out")
+        error = osp.join(output_dir, f"{self.entity_name}.err")
         return output, error
 
     def get_step_file(
@@ -114,7 +126,6 @@ def add_to_batch(self, step: Step) -> None:
         """Add a job step to this batch
 
         :param step: a job step instance e.g. SrunStep
-        :type step: Step
         """
         raise SmartSimError("add_to_batch not implemented for this step type")
 
@@ -127,6 +138,14 @@ def proxyable_launch_cmd(
 ) -> t.Callable[[_StepT], t.List[str]]:
     @functools.wraps(fn)
     def _get_launch_cmd(self: _StepT) -> t.List[str]:
+        """
+        Generate a launch command that executes the `JobStep` with the
+        indirect launching entrypoint instead of directly. The original
+        command is passed to the proxy as a base64 encoded string.
+
+        Steps implementing `get_launch_cmd` and decorated with
+        `proxyable_launch_cmd` will generate status updates that can be consumed
+        by the telemetry monitor and dashboard"""
         original_cmd_list = fn(self)
 
         if not CONFIG.telemetry_enabled:
@@ -134,18 +153,22 @@ def _get_launch_cmd(self: _StepT) -> t.List[str]:
 
         if self.managed:
             raise UnproxyableStepError(
-                f"Attempting to proxy managed step of type {type(self)}"
+                f"Attempting to proxy managed step of type {type(self)} "
                 "through the unmanaged step proxy entry point"
             )
 
         proxy_module = "smartsim._core.entrypoints.indirect"
-        etype = self.meta["entity_type"]
+        entity_type = self.meta["entity_type"]
         status_dir = self.meta["status_dir"]
+
+        logger.debug(f"Encoding command{' '.join(original_cmd_list)}")
+
+        # encode the original cmd to avoid potential collisions and escaping
+        # errors when passing it using CLI arguments to the indirect entrypoint
         encoded_cmd = encode_cmd(original_cmd_list)
 
-        # NOTE: this is NOT safe. should either 1) sign cmd and verify OR 2)
-        #       serialize step and let the indirect entrypoint rebuild the
-        #       cmd... for now, test away...
+        # return a new command that executes the proxy and passes
+        # the original command as an argument
         return [
             sys.executable,
             "-m",
@@ -155,7 +178,7 @@ def _get_launch_cmd(self: _StepT) -> t.List[str]:
             "+command",
             encoded_cmd,
             "+entity_type",
-            etype,
+            entity_type,
             "+telemetry_dir",
             status_dir,
             "+working_dir",
diff --git a/smartsim/_core/launcher/stepInfo.py b/smartsim/_core/launcher/stepInfo.py
index 56b5218fc..875eb0322 100644
--- a/smartsim/_core/launcher/stepInfo.py
+++ b/smartsim/_core/launcher/stepInfo.py
@@ -28,20 +28,13 @@
 
 import psutil
 
-from ...status import (
-    SMARTSIM_STATUS,
-    STATUS_CANCELLED,
-    STATUS_COMPLETED,
-    STATUS_FAILED,
-    STATUS_PAUSED,
-    STATUS_RUNNING,
-)
+from ...status import SmartSimStatus
 
 
 class StepInfo:
     def __init__(
         self,
-        status: str = "",
+        status: SmartSimStatus,
         launcher_status: str = "",
         returncode: t.Optional[int] = None,
         output: t.Optional[str] = None,
@@ -54,48 +47,50 @@ def __init__(
         self.error = error
 
     def __str__(self) -> str:
-        info_str = f"Status: {self.status}"
+        info_str = f"Status: {self.status.value}"
         info_str += f" | Launcher Status {self.launcher_status}"
         info_str += f" | Returncode {str(self.returncode)}"
         return info_str
 
     @property
-    def mapping(self) -> t.Dict[str, str]:
+    def mapping(self) -> t.Dict[str, SmartSimStatus]:
         raise NotImplementedError
 
     def _get_smartsim_status(
         self, status: str, returncode: t.Optional[int] = None
-    ) -> str:
+    ) -> SmartSimStatus:
         """
         Map the status of the WLM step to a smartsim-specific status
         """
-        if status in SMARTSIM_STATUS:
-            return SMARTSIM_STATUS[status]
+        if any(ss_status.value == status for ss_status in SmartSimStatus):
+            return SmartSimStatus(status)
 
         if status in self.mapping and returncode in [None, 0]:
             return self.mapping[status]
 
-        return STATUS_FAILED
+        return SmartSimStatus.STATUS_FAILED
 
 
 class UnmanagedStepInfo(StepInfo):
     @property
-    def mapping(self) -> t.Dict[str, str]:
+    def mapping(self) -> t.Dict[str, SmartSimStatus]:
         # see https://github.com/giampaolo/psutil/blob/master/psutil/_pslinux.py
         # see https://github.com/giampaolo/psutil/blob/master/psutil/_common.py
         return {
-            psutil.STATUS_RUNNING: STATUS_RUNNING,
-            psutil.STATUS_SLEEPING: STATUS_RUNNING,  # sleeping thread is still alive
-            psutil.STATUS_WAKING: STATUS_RUNNING,
-            psutil.STATUS_DISK_SLEEP: STATUS_RUNNING,
-            psutil.STATUS_DEAD: STATUS_FAILED,
-            psutil.STATUS_TRACING_STOP: STATUS_PAUSED,
-            psutil.STATUS_WAITING: STATUS_PAUSED,
-            psutil.STATUS_STOPPED: STATUS_PAUSED,
-            psutil.STATUS_LOCKED: STATUS_PAUSED,
-            psutil.STATUS_PARKED: STATUS_PAUSED,
-            psutil.STATUS_IDLE: STATUS_PAUSED,
-            psutil.STATUS_ZOMBIE: STATUS_COMPLETED,
+            psutil.STATUS_RUNNING: SmartSimStatus.STATUS_RUNNING,
+            psutil.STATUS_SLEEPING: (
+                SmartSimStatus.STATUS_RUNNING
+            ),  # sleeping thread is still alive
+            psutil.STATUS_WAKING: SmartSimStatus.STATUS_RUNNING,
+            psutil.STATUS_DISK_SLEEP: SmartSimStatus.STATUS_RUNNING,
+            psutil.STATUS_DEAD: SmartSimStatus.STATUS_FAILED,
+            psutil.STATUS_TRACING_STOP: SmartSimStatus.STATUS_PAUSED,
+            psutil.STATUS_WAITING: SmartSimStatus.STATUS_PAUSED,
+            psutil.STATUS_STOPPED: SmartSimStatus.STATUS_PAUSED,
+            psutil.STATUS_LOCKED: SmartSimStatus.STATUS_PAUSED,
+            psutil.STATUS_PARKED: SmartSimStatus.STATUS_PAUSED,
+            psutil.STATUS_IDLE: SmartSimStatus.STATUS_PAUSED,
+            psutil.STATUS_ZOMBIE: SmartSimStatus.STATUS_COMPLETED,
         }
 
     def __init__(
@@ -114,30 +109,30 @@ def __init__(
 class SlurmStepInfo(StepInfo):  # cov-slurm
     # see https://slurm.schedmd.com/squeue.html#lbAG
     mapping = {
-        "RUNNING": STATUS_RUNNING,
-        "CONFIGURING": STATUS_RUNNING,
-        "STAGE_OUT": STATUS_RUNNING,
-        "COMPLETED": STATUS_COMPLETED,
-        "DEADLINE": STATUS_COMPLETED,
-        "TIMEOUT": STATUS_COMPLETED,
-        "BOOT_FAIL": STATUS_FAILED,
-        "FAILED": STATUS_FAILED,
-        "NODE_FAIL": STATUS_FAILED,
-        "OUT_OF_MEMORY": STATUS_FAILED,
-        "CANCELLED": STATUS_CANCELLED,
-        "CANCELLED+": STATUS_CANCELLED,
-        "REVOKED": STATUS_CANCELLED,
-        "PENDING": STATUS_PAUSED,
-        "PREEMPTED": STATUS_PAUSED,
-        "RESV_DEL_HOLD": STATUS_PAUSED,
-        "REQUEUE_FED": STATUS_PAUSED,
-        "REQUEUE_HOLD": STATUS_PAUSED,
-        "REQUEUED": STATUS_PAUSED,
-        "RESIZING": STATUS_PAUSED,
-        "SIGNALING": STATUS_PAUSED,
-        "SPECIAL_EXIT": STATUS_PAUSED,
-        "STOPPED": STATUS_PAUSED,
-        "SUSPENDED": STATUS_PAUSED,
+        "RUNNING": SmartSimStatus.STATUS_RUNNING,
+        "CONFIGURING": SmartSimStatus.STATUS_RUNNING,
+        "STAGE_OUT": SmartSimStatus.STATUS_RUNNING,
+        "COMPLETED": SmartSimStatus.STATUS_COMPLETED,
+        "DEADLINE": SmartSimStatus.STATUS_COMPLETED,
+        "TIMEOUT": SmartSimStatus.STATUS_COMPLETED,
+        "BOOT_FAIL": SmartSimStatus.STATUS_FAILED,
+        "FAILED": SmartSimStatus.STATUS_FAILED,
+        "NODE_FAIL": SmartSimStatus.STATUS_FAILED,
+        "OUT_OF_MEMORY": SmartSimStatus.STATUS_FAILED,
+        "CANCELLED": SmartSimStatus.STATUS_CANCELLED,
+        "CANCELLED+": SmartSimStatus.STATUS_CANCELLED,
+        "REVOKED": SmartSimStatus.STATUS_CANCELLED,
+        "PENDING": SmartSimStatus.STATUS_PAUSED,
+        "PREEMPTED": SmartSimStatus.STATUS_PAUSED,
+        "RESV_DEL_HOLD": SmartSimStatus.STATUS_PAUSED,
+        "REQUEUE_FED": SmartSimStatus.STATUS_PAUSED,
+        "REQUEUE_HOLD": SmartSimStatus.STATUS_PAUSED,
+        "REQUEUED": SmartSimStatus.STATUS_PAUSED,
+        "RESIZING": SmartSimStatus.STATUS_PAUSED,
+        "SIGNALING": SmartSimStatus.STATUS_PAUSED,
+        "SPECIAL_EXIT": SmartSimStatus.STATUS_PAUSED,
+        "STOPPED": SmartSimStatus.STATUS_PAUSED,
+        "SUSPENDED": SmartSimStatus.STATUS_PAUSED,
     }
 
     def __init__(
@@ -155,23 +150,27 @@ def __init__(
 
 class PBSStepInfo(StepInfo):  # cov-pbs
     @property
-    def mapping(self) -> t.Dict[str, str]:
+    def mapping(self) -> t.Dict[str, SmartSimStatus]:
         # pylint: disable=line-too-long
         # see http://nusc.nsu.ru/wiki/lib/exe/fetch.php/doc/pbs/PBSReferenceGuide19.2.1.pdf#M11.9.90788.PBSHeading1.81.Job.States
         return {
-            "R": STATUS_RUNNING,
-            "B": STATUS_RUNNING,
-            "H": STATUS_PAUSED,
-            "M": STATUS_PAUSED,  # Actually means that it was moved to another server,
+            "R": SmartSimStatus.STATUS_RUNNING,
+            "B": SmartSimStatus.STATUS_RUNNING,
+            "H": SmartSimStatus.STATUS_PAUSED,
+            "M": (
+                SmartSimStatus.STATUS_PAUSED
+            ),  # Actually means that it was moved to another server,
             # TODO: understand what this implies
-            "Q": STATUS_PAUSED,
-            "S": STATUS_PAUSED,
-            "T": STATUS_PAUSED,  # This means in transition, see above for comment
-            "U": STATUS_PAUSED,
-            "W": STATUS_PAUSED,
-            "E": STATUS_COMPLETED,
-            "F": STATUS_COMPLETED,
-            "X": STATUS_COMPLETED,
+            "Q": SmartSimStatus.STATUS_PAUSED,
+            "S": SmartSimStatus.STATUS_PAUSED,
+            "T": (
+                SmartSimStatus.STATUS_PAUSED
+            ),  # This means in transition, see above for comment
+            "U": SmartSimStatus.STATUS_PAUSED,
+            "W": SmartSimStatus.STATUS_PAUSED,
+            "E": SmartSimStatus.STATUS_COMPLETED,
+            "F": SmartSimStatus.STATUS_COMPLETED,
+            "X": SmartSimStatus.STATUS_COMPLETED,
         }
 
     def __init__(
@@ -183,10 +182,14 @@ def __init__(
     ) -> None:
         if status == "NOTFOUND":
             if returncode is not None:
-                smartsim_status = "Completed" if returncode == 0 else "Failed"
+                smartsim_status = (
+                    SmartSimStatus.STATUS_COMPLETED
+                    if returncode == 0
+                    else SmartSimStatus.STATUS_FAILED
+                )
             else:
                 # if PBS job history isnt available, and job isnt in queue
-                smartsim_status = "Completed"
+                smartsim_status = SmartSimStatus.STATUS_COMPLETED
                 returncode = 0
         else:
             smartsim_status = self._get_smartsim_status(status)
@@ -197,16 +200,16 @@ def __init__(
 
 class LSFBatchStepInfo(StepInfo):  # cov-lsf
     @property
-    def mapping(self) -> t.Dict[str, str]:
+    def mapping(self) -> t.Dict[str, SmartSimStatus]:
         # pylint: disable=line-too-long
         # see https://www.ibm.com/docs/en/spectrum-lsf/10.1.0?topic=execution-about-job-states
         return {
-            "RUN": STATUS_RUNNING,
-            "PSUSP": STATUS_PAUSED,
-            "USUSP": STATUS_PAUSED,
-            "SSUSP": STATUS_PAUSED,
-            "PEND": STATUS_PAUSED,
-            "DONE": STATUS_COMPLETED,
+            "RUN": SmartSimStatus.STATUS_RUNNING,
+            "PSUSP": SmartSimStatus.STATUS_PAUSED,
+            "USUSP": SmartSimStatus.STATUS_PAUSED,
+            "SSUSP": SmartSimStatus.STATUS_PAUSED,
+            "PEND": SmartSimStatus.STATUS_PAUSED,
+            "DONE": SmartSimStatus.STATUS_COMPLETED,
         }
 
     def __init__(
@@ -218,9 +221,13 @@ def __init__(
     ) -> None:
         if status == "NOTFOUND":
             if returncode is not None:
-                smartsim_status = "Completed" if returncode == 0 else "Failed"
+                smartsim_status = (
+                    SmartSimStatus.STATUS_COMPLETED
+                    if returncode == 0
+                    else SmartSimStatus.STATUS_FAILED
+                )
             else:
-                smartsim_status = "Completed"
+                smartsim_status = SmartSimStatus.STATUS_COMPLETED
                 returncode = 0
         else:
             smartsim_status = self._get_smartsim_status(status)
@@ -231,14 +238,14 @@ def __init__(
 
 class LSFJsrunStepInfo(StepInfo):  # cov-lsf
     @property
-    def mapping(self) -> t.Dict[str, str]:
+    def mapping(self) -> t.Dict[str, SmartSimStatus]:
         # pylint: disable=line-too-long
         # see https://www.ibm.com/docs/en/spectrum-lsf/10.1.0?topic=execution-about-job-states
         return {
-            "Killed": STATUS_COMPLETED,
-            "Running": STATUS_RUNNING,
-            "Queued": STATUS_PAUSED,
-            "Complete": STATUS_COMPLETED,
+            "Killed": SmartSimStatus.STATUS_COMPLETED,
+            "Running": SmartSimStatus.STATUS_RUNNING,
+            "Queued": SmartSimStatus.STATUS_PAUSED,
+            "Complete": SmartSimStatus.STATUS_COMPLETED,
         }
 
     def __init__(
@@ -250,9 +257,13 @@ def __init__(
     ) -> None:
         if status == "NOTFOUND":
             if returncode is not None:
-                smartsim_status = "Completed" if returncode == 0 else "Failed"
+                smartsim_status = (
+                    SmartSimStatus.STATUS_COMPLETED
+                    if returncode == 0
+                    else SmartSimStatus.STATUS_FAILED
+                )
             else:
-                smartsim_status = "Completed"
+                smartsim_status = SmartSimStatus.STATUS_COMPLETED
                 returncode = 0
         else:
             smartsim_status = self._get_smartsim_status(status, returncode)
diff --git a/smartsim/_core/launcher/taskManager.py b/smartsim/_core/launcher/taskManager.py
index 84123944e..60f097da6 100644
--- a/smartsim/_core/launcher/taskManager.py
+++ b/smartsim/_core/launcher/taskManager.py
@@ -114,17 +114,11 @@ def start_task(
         by a workload manager
 
         :param cmd_list: command to run
-        :type cmd_list: list[str]
         :param cwd: current working directory
-        :type cwd: str
         :param env: environment to launch with
-        :type env: dict[str, str], optional. If None, calling environment is inherited
-        :param out: output file, defaults to PIPE
-        :type out: file, optional
-        :param err: error file, defaults to PIPE
-        :type err: file, optional
+        :param out: output file
+        :param err: error file
         :return: task id
-        :rtype: int
         """
         with self._lock:
             proc = execute_async_cmd(cmd_list, cwd, env=env, out=out, err=err)
@@ -150,15 +144,10 @@ def start_and_wait(
         This is primarily used for batch job launches
 
         :param cmd_list: command to run
-        :type cmd_list: list[str]
         :param cwd: current working directory
-        :type cwd: str
         :param env: environment to launch with
-        :type env: dict[str, str], optional
-        :param timeout: time to wait, defaults to None
-        :type timeout: int, optional
+        :param timeout: time to wait
         :return: returncode, output, and err
-        :rtype: int, str, str
         """
         returncode, out, err = execute_cmd(cmd_list, cwd=cwd, env=env, timeout=timeout)
         if VERBOSE_TM:
@@ -169,7 +158,6 @@ def add_existing(self, task_id: int) -> None:
         """Add existing task to be managed by the TaskManager
 
         :param task_id: task id of existing task
-        :type task_id: str
         :raises LauncherError: If task cannot be found
         """
         with self._lock:
@@ -186,7 +174,6 @@ def remove_task(self, task_id: str) -> None:
         """Remove a task from the TaskManager
 
         :param task_id: id of the task to remove
-        :type task_id: str
         """
         with self._lock:
             if VERBOSE_TM:
@@ -210,9 +197,7 @@ def get_task_update(
         """Get the update of a task
 
         :param task_id: task id
-        :type task_id: str
         :return: status, returncode, output, error
-        :rtype: str, int, str, str
         """
         with self._lock:
             try:
@@ -251,13 +236,9 @@ def add_task_history(
         Add a task to record its future returncode, output and error
 
         :param task_id: id of the task
-        :type task_id: str
         :param returncode: returncode
-        :type returncode: int, defaults to None
-        :param out: output, defaults to None
-        :type out: str, optional
-        :param err: output, defaults to None
-        :type err: str, optional
+        :param out: output
+        :param err: output
         """
         self.task_history[task_id] = (returncode, out, err)
 
@@ -278,7 +259,6 @@ def __init__(self, process: psutil.Process) -> None:
         """Initialize a task
 
         :param process: Popen object
-        :type process: psutil.Process
         """
         self.process = process
         self.pid = str(self.process.pid)
@@ -287,7 +267,6 @@ def check_status(self) -> t.Optional[int]:
         """Ping the job and return the returncode if finished
 
         :return: returncode if finished otherwise None
-        :rtype: int
         """
         if self.owned and isinstance(self.process, psutil.Popen):
             poll_result = self.process.poll()
@@ -302,7 +281,6 @@ def get_io(self) -> t.Tuple[t.Optional[str], t.Optional[str]]:
         """Get the IO from the subprocess
 
         :return: output and error from the Popen
-        :rtype: str, str
         """
         # Process class does not implement communicate
         if not self.owned or not isinstance(self.process, psutil.Popen):
@@ -335,8 +313,7 @@ def kill_callback(proc: psutil.Process) -> None:
     def terminate(self, timeout: int = 10) -> None:
         """Terminate a this process and all children.
 
-        :param timeout: time to wait for task death, defaults to 10
-        :type timeout: int, optional
+        :param timeout: time to wait for task death
         """
 
         def terminate_callback(proc: psutil.Process) -> None:
diff --git a/smartsim/_core/launcher/util/launcherUtil.py b/smartsim/_core/launcher/util/launcherUtil.py
index a24d69e49..1a6ec5d83 100644
--- a/smartsim/_core/launcher/util/launcherUtil.py
+++ b/smartsim/_core/launcher/util/launcherUtil.py
@@ -38,9 +38,7 @@ def __init__(
         """Initialize a ComputeNode
 
         :param node_name: the name of the node
-        :type node_name: str
         :param node_ppn: the number of ppn
-        :type node_ppn: int
         """
         self.name: t.Optional[str] = node_name
         self.ppn: t.Optional[int] = node_ppn
@@ -52,7 +50,6 @@ def _is_valid_node(self) -> bool:
         and ppn being not None.
 
         :returns: True if valid, false otherwise
-        :rtype: bool
         """
         if self.name is None:
             return False
@@ -80,7 +77,6 @@ def _is_valid_partition(self) -> bool:
         and each ComputeNode being valid
 
         :returns: True if valid, false otherwise
-        :rtype: bool
         """
         if self.name is None:
             return False
diff --git a/smartsim/_core/launcher/util/shell.py b/smartsim/_core/launcher/util/shell.py
index c747bacbc..a2b5bc76b 100644
--- a/smartsim/_core/launcher/util/shell.py
+++ b/smartsim/_core/launcher/util/shell.py
@@ -49,22 +49,14 @@ def execute_cmd(
     """Execute a command locally
 
     :param cmd_list: list of command with arguments
-    :type cmd_list: list of str
-    :param shell: run in system shell, defaults to False
-    :type shell: bool, optional
-    :param cwd: current working directory, defaults to None
-    :type cwd: str, optional
-    :param env: environment to launcher process with,
-                defaults to None (current env)
-    :type env: dict[str, str], optional
-    :param proc_input: input to the process, defaults to ""
-    :type proc_input: str, optional
-    :param timeout: timeout of the process, defaults to None
-    :type timeout: int, optional
+    :param shell: run in system shell
+    :param cwd: current working directory
+    :param env: environment to launcher process with
+    :param proc_input: input to the process
+    :param timeout: timeout of the process
     :raises ShellError: if timeout of process was exceeded
     :raises ShellError: if child process raises an error
     :return: returncode, output, and error of the process
-    :rtype: tuple of (int, str, str)
     """
     if VERBOSE_SHELL:
         source = "shell" if shell else "Popen"
@@ -106,13 +98,9 @@ def execute_async_cmd(
     popen subprocess object wrapped with psutil.
 
     :param cmd_list: list of command with arguments
-    :type cmd_list: list of str
     :param cwd: current working directory
-    :type cwd: str
     :param env: environment variables to set
-    :type env: dict[str, str]
     :return: the subprocess object
-    :rtype: psutil.Popen
     """
     if VERBOSE_SHELL:
         logger.debug(f"Executing command: {' '.join(cmd_list)}")
diff --git a/smartsim/settings/mpirunSettings.py b/smartsim/_core/schemas/__init__.py
similarity index 78%
rename from smartsim/settings/mpirunSettings.py
rename to smartsim/_core/schemas/__init__.py
index 994d62bba..d7ee9d83d 100644
--- a/smartsim/settings/mpirunSettings.py
+++ b/smartsim/_core/schemas/__init__.py
@@ -24,18 +24,18 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from warnings import simplefilter, warn
+from .dragonRequests import *
+from .dragonResponses import *
 
-from ..log import get_logger
-
-# pylint: disable-next=unused-import
-from .mpiSettings import MpiexecSettings, MpirunSettings, OrterunSettings
-
-logger = get_logger(__name__)
-
-simplefilter("once", DeprecationWarning)
-warn(
-    "mpirunSettings will be deprecated; use mpiSettings instead.",
-    DeprecationWarning,
-    stacklevel=2,
-)
+__all__ = [
+    "DragonRequest",
+    "DragonRunRequest",
+    "DragonHandshakeRequest",
+    "DragonUpdateStatusRequest",
+    "DragonStopRequest",
+    "DragonResponse",
+    "DragonRunResponse",
+    "DragonHandshakeResponse",
+    "DragonUpdateStatusResponse",
+    "DragonStopResponse",
+]
diff --git a/smartsim/_core/schemas/dragonRequests.py b/smartsim/_core/schemas/dragonRequests.py
new file mode 100644
index 000000000..3e384f746
--- /dev/null
+++ b/smartsim/_core/schemas/dragonRequests.py
@@ -0,0 +1,90 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import typing as t
+
+from pydantic import BaseModel, Field, PositiveInt
+
+import smartsim._core.schemas.utils as _utils
+
+# Black and Pylint disagree about where to put the `...`
+# pylint: disable=multiple-statements
+
+request_registry = _utils.SchemaRegistry["DragonRequest"]()
+
+
+class DragonRequest(BaseModel): ...
+
+
+class DragonRunRequestView(DragonRequest):
+    exe: t.Annotated[str, Field(min_length=1)]
+    exe_args: t.List[t.Annotated[str, Field(min_length=1)]] = []
+    path: t.Annotated[str, Field(min_length=1)]
+    nodes: PositiveInt = 1
+    tasks: PositiveInt = 1
+    tasks_per_node: PositiveInt = 1
+    hostlist: t.Optional[t.Annotated[str, Field(min_length=1)]] = None
+    output_file: t.Optional[t.Annotated[str, Field(min_length=1)]] = None
+    error_file: t.Optional[t.Annotated[str, Field(min_length=1)]] = None
+    env: t.Dict[str, t.Optional[str]] = {}
+    name: t.Optional[t.Annotated[str, Field(min_length=1)]] = None
+    pmi_enabled: bool = True
+
+
+@request_registry.register("run")
+class DragonRunRequest(DragonRunRequestView):
+    current_env: t.Dict[str, t.Optional[str]] = {}
+
+    def __str__(self) -> str:
+        return str(DragonRunRequestView.parse_obj(self.dict(exclude={"current_env"})))
+
+
+@request_registry.register("update_status")
+class DragonUpdateStatusRequest(DragonRequest):
+    step_ids: t.List[t.Annotated[str, Field(min_length=1)]]
+
+
+@request_registry.register("stop")
+class DragonStopRequest(DragonRequest):
+    step_id: t.Annotated[str, Field(min_length=1)]
+
+
+@request_registry.register("handshake")
+class DragonHandshakeRequest(DragonRequest): ...
+
+
+@request_registry.register("bootstrap")
+class DragonBootstrapRequest(DragonRequest):
+    address: t.Annotated[str, Field(min_length=1)]
+
+
+@request_registry.register("shutdown")
+class DragonShutdownRequest(DragonRequest):
+    immediate: bool = True
+    """Whether the server should shut down immediately, setting this to False means
+    that the server will shut down when all jobs are terminated."""
+    frontend_shutdown: bool = True
+    """Whether the frontend will have to shut down or wait for external termination"""
diff --git a/smartsim/_core/schemas/dragonResponses.py b/smartsim/_core/schemas/dragonResponses.py
new file mode 100644
index 000000000..3c5c30a10
--- /dev/null
+++ b/smartsim/_core/schemas/dragonResponses.py
@@ -0,0 +1,73 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import typing as t
+
+from pydantic import BaseModel, Field
+
+import smartsim._core.schemas.utils as _utils
+from smartsim.status import SmartSimStatus
+
+# Black and Pylint disagree about where to put the `...`
+# pylint: disable=multiple-statements
+
+response_registry = _utils.SchemaRegistry["DragonResponse"]()
+
+
+class DragonResponse(BaseModel):
+    error_message: t.Optional[str] = None
+
+
+@response_registry.register("run")
+class DragonRunResponse(DragonResponse):
+    step_id: t.Annotated[str, Field(min_length=1)]
+
+
+@response_registry.register("status_update")
+class DragonUpdateStatusResponse(DragonResponse):
+    # status is a dict: {step_id: (is_alive, returncode)}
+    statuses: t.Mapping[
+        t.Annotated[str, Field(min_length=1)],
+        t.Tuple[SmartSimStatus, t.Optional[t.List[int]]],
+    ] = {}
+
+
+@response_registry.register("stop")
+class DragonStopResponse(DragonResponse): ...
+
+
+@response_registry.register("handshake")
+class DragonHandshakeResponse(DragonResponse):
+    dragon_pid: int
+
+
+@response_registry.register("bootstrap")
+class DragonBootstrapResponse(DragonResponse):
+    dragon_pid: int
+
+
+@response_registry.register("shutdown")
+class DragonShutdownResponse(DragonResponse): ...
diff --git a/smartsim/_core/schemas/utils.py b/smartsim/_core/schemas/utils.py
new file mode 100644
index 000000000..9cb36bcf5
--- /dev/null
+++ b/smartsim/_core/schemas/utils.py
@@ -0,0 +1,124 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import dataclasses
+import typing as t
+
+import pydantic
+import pydantic.dataclasses
+
+if t.TYPE_CHECKING:
+    from zmq.sugar.socket import Socket
+
+_SchemaT = t.TypeVar("_SchemaT", bound=pydantic.BaseModel)
+_SendT = t.TypeVar("_SendT", bound=pydantic.BaseModel)
+_RecvT = t.TypeVar("_RecvT", bound=pydantic.BaseModel)
+
+_DEFAULT_MSG_DELIM: t.Final[str] = "|"
+
+
+@t.final
+@pydantic.dataclasses.dataclass(frozen=True)
+class _Message(t.Generic[_SchemaT]):
+    payload: _SchemaT
+    header: str = pydantic.Field(min_length=1)
+    delimiter: str = pydantic.Field(min_length=1, default=_DEFAULT_MSG_DELIM)
+
+    def __str__(self) -> str:
+        return self.delimiter.join((self.header, self.payload.json()))
+
+    @classmethod
+    def from_str(
+        cls,
+        str_: str,
+        payload_type: t.Type[_SchemaT],
+        delimiter: str = _DEFAULT_MSG_DELIM,
+    ) -> "_Message[_SchemaT]":
+        header, payload = str_.split(delimiter, 1)
+        return cls(payload_type.parse_raw(payload), header, delimiter)
+
+
+class SchemaRegistry(t.Generic[_SchemaT]):
+    def __init__(
+        self, init_map: t.Optional[t.Mapping[str, t.Type[_SchemaT]]] = None
+    ) -> None:
+        self._map = dict(init_map) if init_map else {}
+
+    def register(self, key: str) -> t.Callable[[t.Type[_SchemaT]], t.Type[_SchemaT]]:
+        if _DEFAULT_MSG_DELIM in key:
+            _msg = f"Registry key cannot contain delimiter `{_DEFAULT_MSG_DELIM}`"
+            raise ValueError(_msg)
+        if not key:
+            raise KeyError("Key cannot be the empty string")
+        if key in self._map:
+            raise KeyError(f"Key `{key}` has already been registered for this parser")
+
+        def _register(cls: t.Type[_SchemaT]) -> t.Type[_SchemaT]:
+            self._map[key] = cls
+            return cls
+
+        return _register
+
+    def to_string(self, schema: _SchemaT) -> str:
+        return str(self._to_message(schema))
+
+    def _to_message(self, schema: _SchemaT) -> _Message[_SchemaT]:
+        reverse_map = dict((v, k) for k, v in self._map.items())
+        try:
+            val = reverse_map[type(schema)]
+        except KeyError:
+            raise TypeError(f"Unregistered schema type: {type(schema)}") from None
+        return _Message(schema, val, _DEFAULT_MSG_DELIM)
+
+    def from_string(self, str_: str) -> _SchemaT:
+        try:
+            type_, _ = str_.split(_DEFAULT_MSG_DELIM, 1)
+        except ValueError:
+            _msg = f"Failed to determine schema type of the string {repr(str_)}"
+            raise ValueError(_msg) from None
+        try:
+            cls = self._map[type_]
+        except KeyError:
+            raise ValueError(f"No type of value `{type_}` is registered") from None
+        msg = _Message.from_str(str_, cls, _DEFAULT_MSG_DELIM)
+        return self._from_message(msg)
+
+    @staticmethod
+    def _from_message(msg: _Message[_SchemaT]) -> _SchemaT:
+        return msg.payload
+
+
+@dataclasses.dataclass(frozen=True)
+class SocketSchemaTranslator(t.Generic[_SendT, _RecvT]):
+    socket: "Socket[t.Any]"
+    _send_registry: SchemaRegistry[_SendT]
+    _recv_registry: SchemaRegistry[_RecvT]
+
+    def send(self, schema: _SendT, flags: int = 0) -> None:
+        self.socket.send_string(self._send_registry.to_string(schema), flags)
+
+    def recv(self, flags: int = 0) -> _RecvT:
+        return self._recv_registry.from_string(self.socket.recv_string(flags))
diff --git a/smartsim/_core/utils/__init__.py b/smartsim/_core/utils/__init__.py
index cb9395881..3ea928797 100644
--- a/smartsim/_core/utils/__init__.py
+++ b/smartsim/_core/utils/__init__.py
@@ -24,5 +24,12 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-from .helpers import colorize, delete_elements, init_default, installed_redisai_backends
+from .helpers import (
+    check_for_utility,
+    colorize,
+    delete_elements,
+    execute_platform_cmd,
+    installed_redisai_backends,
+    is_crayex_platform,
+)
 from .redis import check_cluster_status, create_cluster, db_is_active
diff --git a/smartsim/_core/utils/helpers.py b/smartsim/_core/utils/helpers.py
index 27059e320..df2c016a1 100644
--- a/smartsim/_core/utils/helpers.py
+++ b/smartsim/_core/utils/helpers.py
@@ -28,7 +28,10 @@
 A file of helper functions for SmartSim
 """
 import base64
+import collections.abc
 import os
+import signal
+import subprocess
 import typing as t
 import uuid
 from datetime import datetime
@@ -38,16 +41,19 @@
 
 from smartsim._core._install.builder import TRedisAIBackendStr as _TRedisAIBackendStr
 
+if t.TYPE_CHECKING:
+    from types import FrameType
+
+
+_TSignalHandlerFn = t.Callable[[int, t.Optional["FrameType"]], object]
+
 
 def unpack_db_identifier(db_id: str, token: str) -> t.Tuple[str, str]:
     """Unpack the unformatted database identifier
     and format for env variable suffix using the token
     :param db_id: the unformatted database identifier eg. identifier_1
-    :type db_id: str
     :param token: character to use to construct the db suffix
-    :type token: str
     :return: db id suffix and formatted db_id e.g. ("_identifier_1", "identifier_1")
-    :rtype: (str, str)
     """
 
     if db_id == "orchestrator":
@@ -58,10 +64,9 @@ def unpack_db_identifier(db_id: str, token: str) -> t.Tuple[str, str]:
 
 def unpack_colo_db_identifier(db_id: str) -> str:
     """Create database identifier suffix for colocated database
+
     :param db_id: the unformatted database identifier
-    :type db_id: str
     :return: db suffix
-    :rtype: str
     """
     return "_" + db_id if db_id else ""
 
@@ -92,10 +97,9 @@ def fmt_dict(value: t.Dict[str, t.Any]) -> str:
 
 def get_base_36_repr(positive_int: int) -> str:
     """Converts a positive integer to its base 36 representation
+
     :param positive_int: the positive integer to convert
-    :type positive_int: int
     :return: base 36 representation of the given positive int
-    :rtype: str
     """
     digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"
     result = []
@@ -108,23 +112,10 @@ def get_base_36_repr(positive_int: int) -> str:
     return "".join(reversed(result))
 
 
-def init_default(
-    default: t.Any,
-    init_value: t.Any,
-    expected_type: t.Union[t.Type[t.Any], t.Tuple[t.Type[t.Any], ...], None] = None,
-) -> t.Any:
-    if init_value is None:
-        return default
-    if expected_type is not None and not isinstance(init_value, expected_type):
-        raise TypeError(f"Argument was of type {type(init_value)}, not {expected_type}")
-    return init_value
-
-
 def expand_exe_path(exe: str) -> str:
     """Takes an executable and returns the full path to that executable
 
     :param exe: executable or file
-    :type exe: str
     :raises TypeError: if file is not an executable
     :raises FileNotFoundError: if executable cannot be found
     """
@@ -186,9 +177,7 @@ def colorize(
 def delete_elements(dictionary: t.Dict[str, t.Any], key_list: t.List[str]) -> None:
     """Delete elements from a dictionary.
     :param dictionary: the dictionary from which the elements must be deleted.
-    :type dictionary: dict
     :param key_list: the list of keys to delete from the dictionary.
-    :type key: any
     """
     for key in key_list:
         if key in dictionary:
@@ -213,9 +202,7 @@ def cat_arg_and_value(arg_name: str, value: str) -> str:
       `-arg_name=value` (i.e., `-a val`)
 
     :param arg_name: the command line argument name
-    :type arg_name: str
     :param value: the command line argument value
-    :type value: str
     """
 
     if arg_name.startswith("--"):
@@ -259,10 +246,8 @@ def installed_redisai_backends(
     the backend directories (`redisai_tensorflow`, `redisai_torch`,
     `redisai_onnxruntime`, or `redisai_tflite`).
 
-    :param backends_path: path containing backends, defaults to None
-    :type backends_path: str, optional
+    :param backends_path: path containing backends
     :return: list of installed RedisAI backends
-    :rtype: set[str]
     """
     # import here to avoid circular import
     base_path = redis_install_base(backends_path)
@@ -276,12 +261,12 @@ def installed_redisai_backends(
     return {backend for backend in backends if _installed(base_path, backend)}
 
 
-def get_ts() -> int:
-    """Return the current timestamp (accurate to seconds) cast to an integer"""
-    return int(datetime.timestamp(datetime.now()))
+def get_ts_ms() -> int:
+    """Return the current timestamp (accurate to milliseconds) cast to an integer"""
+    return int(datetime.now().timestamp() * 1000)
 
 
-def encode_cmd(cmd: t.List[str]) -> str:
+def encode_cmd(cmd: t.Sequence[str]) -> str:
     """Transform a standard command list into an encoded string safe for providing as an
     argument to a proxy entrypoint
     """
@@ -302,3 +287,205 @@ def decode_cmd(encoded_cmd: str) -> t.List[str]:
     cleaned_cmd = decoded_cmd.decode("ascii").split("|")
 
     return cleaned_cmd
+
+
+def check_for_utility(util_name: str) -> str:
+    """Check for existence of the provided CLI utility.
+
+    :param util_name: CLI utility to locate
+    :returns: Full path to executable if found. Otherwise, empty string"""
+    utility = ""
+
+    try:
+        utility = expand_exe_path(util_name)
+    except FileNotFoundError:
+        ...
+
+    return utility
+
+
+def execute_platform_cmd(cmd: str) -> t.Tuple[str, int]:
+    """Execute the platform check command as a subprocess
+
+    :param cmd: the command to execute
+    :returns: True if platform is cray ex, False otherwise"""
+    process = subprocess.run(
+        cmd.split(),
+        capture_output=True,
+        check=False,
+    )
+    return process.stdout.decode("utf-8"), process.returncode
+
+
+class CrayExPlatformResult:
+    locate_msg = "Unable to locate `{0}`."
+
+    def __init__(self, ldconfig: t.Optional[str], fi_info: t.Optional[str]) -> None:
+        self.ldconfig: t.Optional[str] = ldconfig
+        self.fi_info: t.Optional[str] = fi_info
+        self.has_pmi: bool = False
+        self.has_pmi2: bool = False
+        self.has_cxi: bool = False
+
+    @property
+    def has_ldconfig(self) -> bool:
+        return bool(self.ldconfig)
+
+    @property
+    def has_fi_info(self) -> bool:
+        return bool(self.fi_info)
+
+    @property
+    def is_cray(self) -> bool:
+        return all(
+            (
+                self.has_ldconfig,
+                self.has_fi_info,
+                self.has_pmi,
+                self.has_pmi2,
+                self.has_cxi,
+            )
+        )
+
+    @property
+    def failures(self) -> t.List[str]:
+        """Return a list of messages describing all failed validations"""
+        failure_messages = []
+
+        if not self.has_ldconfig:
+            failure_messages.append(self.locate_msg.format("ldconfig"))
+
+        if not self.has_fi_info:
+            failure_messages.append(self.locate_msg.format("fi_info"))
+
+        if self.has_ldconfig and self.has_fi_info:
+            if not self.has_pmi:
+                failure_messages.append(self.locate_msg.format("pmi.so"))
+            if not self.has_pmi2:
+                failure_messages.append(self.locate_msg.format("pmi2.so"))
+            if not self.has_cxi:
+                failure_messages.append(self.locate_msg.format("cxi.so"))
+
+        return failure_messages
+
+
+def check_platform() -> CrayExPlatformResult:
+    """Returns True if the current platform is identified as Cray EX and
+    HSTA-aware dragon package can be installed, False otherwise.
+
+    :returns: True if current platform is Cray EX, False otherwise"""
+
+    # ldconfig -p | grep cray | grep pmi.so &&
+    # ldconfig -p | grep cray | grep pmi2.so &&
+    # fi_info | grep cxi
+
+    ldconfig = check_for_utility("ldconfig")
+    fi_info = check_for_utility("fi_info")
+
+    result = CrayExPlatformResult(ldconfig, fi_info)
+    if not all((result.has_ldconfig, result.has_fi_info)):
+        return result
+
+    ldconfig1 = f"{ldconfig} -p"
+    ldc_out1, _ = execute_platform_cmd(ldconfig1)
+    candidates = [x for x in ldc_out1.split("\n") if "cray" in x]
+    result.has_pmi = any(x for x in candidates if "pmi.so" in x)
+
+    ldconfig2 = f"{ldconfig} -p"
+    ldc_out2, _ = execute_platform_cmd(ldconfig2)
+    candidates = [x for x in ldc_out2.split("\n") if "cray" in x]
+    result.has_pmi2 = any(x for x in candidates if "pmi2.so" in x)
+
+    fi_info_out, _ = execute_platform_cmd(fi_info)
+    result.has_cxi = any(x for x in fi_info_out.split("\n") if "cxi" in x)
+
+    return result
+
+
+def is_crayex_platform() -> bool:
+    """Returns True if the current platform is identified as Cray EX and
+    HSTA-aware dragon package can be installed, False otherwise.
+
+    :returns: True if current platform is Cray EX, False otherwise"""
+    result = check_platform()
+    return result.is_cray
+
+
+@t.final
+class SignalInterceptionStack(collections.abc.Collection[_TSignalHandlerFn]):
+    """Registers a stack of callables to be called when a signal is
+    received before calling the original signal handler.
+    """
+
+    def __init__(
+        self,
+        signalnum: int,
+        callbacks: t.Optional[t.Iterable[_TSignalHandlerFn]] = None,
+    ) -> None:
+        """Set up a ``SignalInterceptionStack`` for particular signal number.
+
+        .. note::
+            This class typically should not be instanced directly as it will
+            change the registered signal handler regardless of if a signal
+            interception stack is already present. Instead, it is generally
+            best to create or get a signal interception stack for a particular
+            signal number via the `get` factory method.
+
+        :param signalnum: The signal number to intercept
+        :param callbacks: A iterable of functions to call upon receiving the signal
+        """
+        self._callbacks = list(callbacks) if callbacks else []
+        self._original = signal.signal(signalnum, self)
+
+    def __call__(self, signalnum: int, frame: t.Optional["FrameType"]) -> None:
+        """Handle the signal on which the interception stack was registered.
+        End by calling the originally registered signal hander (if present).
+
+        :param frame: The current stack frame
+        """
+        for fn in self:
+            fn(signalnum, frame)
+        if callable(self._original):
+            self._original(signalnum, frame)
+
+    def __contains__(self, obj: object) -> bool:
+        return obj in self._callbacks
+
+    def __iter__(self) -> t.Iterator[_TSignalHandlerFn]:
+        return reversed(self._callbacks)
+
+    def __len__(self) -> int:
+        return len(self._callbacks)
+
+    @classmethod
+    def get(cls, signalnum: int) -> "SignalInterceptionStack":
+        """Fetch an existing ``SignalInterceptionStack`` or create a new one
+        for a particular signal number.
+
+        :param signalnum: The singal number of the signal interception stack
+                          should be registered
+        :returns: The existing or created signal interception stack
+        """
+        handler = signal.getsignal(signalnum)
+        if isinstance(handler, cls):
+            return handler
+        return cls(signalnum, [])
+
+    def push(self, fn: _TSignalHandlerFn) -> None:
+        """Add a callback to the signal interception stack.
+
+        :param fn: A callable to add to the unique signal stack
+        """
+        self._callbacks.append(fn)
+
+    def push_unique(self, fn: _TSignalHandlerFn) -> bool:
+        """Add a callback to the signal interception stack if and only if the
+        callback is not already present.
+
+        :param fn: A callable to add to the unique signal stack
+        :returns: True if the callback was added, False if the callback was
+                  already present
+        """
+        if did_push := fn not in self:
+            self.push(fn)
+        return did_push
diff --git a/smartsim/_core/utils/network.py b/smartsim/_core/utils/network.py
index 69eeb3e1b..aaceb7fc6 100644
--- a/smartsim/_core/utils/network.py
+++ b/smartsim/_core/utils/network.py
@@ -25,6 +25,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import socket
+import typing as t
 
 import psutil
 
@@ -33,13 +34,16 @@
 """
 
 
+class IFConfig(t.NamedTuple):
+    interface: t.Optional[str]
+    address: t.Optional[str]
+
+
 def get_ip_from_host(host: str) -> str:
     """Return the IP address for the interconnect.
 
     :param host: hostname of the compute node e.g. nid00004
-    :type host: str
     :returns: ip of host
-    :rtype: str
     """
     ip_address = socket.gethostbyname(host)
     return ip_address
@@ -50,11 +54,9 @@ def get_ip_from_interface(interface: str) -> str:  # pragma: no cover
     """Get IPV4 address of a network interface
 
     :param interface: interface name
-    :type interface: str
     :raises ValueError: if the interface does not exist
     :raises ValueError: if interface does not have an IPV4 address
     :return: ip address of interface
-    :rtype: str
     """
     net_if_addrs = psutil.net_if_addrs()
     if interface not in net_if_addrs:
@@ -86,3 +88,32 @@ def current_ip(interface: str = "lo") -> str:  # pragma: no cover
         return get_ip_from_interface(loopback)
 
     return get_ip_from_interface(interface)
+
+
+def get_best_interface_and_address() -> IFConfig:
+    available_ifs = psutil.net_if_addrs()
+    # TODO make this a CONFIG-time parameter
+    known_ifs = ["hsn", "ipogif", "ib"]
+    for interface in available_ifs:
+        if any(interface.startswith(if_prefix) for if_prefix in known_ifs):
+            return IFConfig(interface, get_ip_from_interface(interface))
+    return IFConfig(None, None)
+
+
+def find_free_port(start: int = 0) -> int:
+    """A 'good enough' way to find an open port to bind to
+
+    :param start: The first port number to consider
+    :returns: The first open port found
+    """
+    port_num = -1
+    while port_num < 0:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+            try:
+                sock.bind(("0.0.0.0", start))
+                _, port = sock.getsockname()
+                port_num = int(port)
+            except Exception:
+                # swallow connection exception; test if the next port is open
+                start += 1
+    return port_num
diff --git a/smartsim/_core/utils/redis.py b/smartsim/_core/utils/redis.py
index 3bcf1c1f2..7fa59ad83 100644
--- a/smartsim/_core/utils/redis.py
+++ b/smartsim/_core/utils/redis.py
@@ -53,9 +53,7 @@ def create_cluster(hosts: t.List[str], ports: t.List[int]) -> None:  # cov-wlm
     needs to occur manually which is not often.
 
     :param hosts: List of hostnames to connect to
-    :type hosts: List[str]
     :param ports: List of ports for each hostname
-    :type ports: List[int]
     :raises SmartSimError: if cluster creation fails
     """
     ip_list = []
@@ -69,7 +67,7 @@ def create_cluster(hosts: t.List[str], ports: t.List[int]) -> None:  # cov-wlm
     redis_cli = CONFIG.database_cli
     cmd = [redis_cli, "--cluster", "create"]
     cmd += ip_list
-    cmd += ["--cluster-replicas", "0"]
+    cmd += ["--cluster-replicas", "0", "--cluster-yes"]
     returncode, out, err = execute_cmd(cmd, proc_input="yes", shell=False)
 
     if returncode != 0:
@@ -85,11 +83,8 @@ def check_cluster_status(
     """Check that a Redis/KeyDB cluster is up and running
 
     :param hosts: List of hostnames to connect to
-    :type hosts: List[str]
     :param ports: List of ports for each hostname
-    :type ports: List[int]
     :param trials: number of attempts to verify cluster status
-    :type trials: int, optional
 
     :raises SmartSimError: If cluster status cannot be verified
     """
@@ -129,13 +124,9 @@ def db_is_active(hosts: t.List[str], ports: t.List[int], num_shards: int) -> boo
     just ping DB.
 
     :param hosts: list of hosts
-    :type hosts: list[str]
     :param ports: list of ports
-    :type ports: list[int]
     :param num_shards: Number of DB shards
-    :type num_shards: int
     :return: Whether DB is running
-    :rtype: bool
     """
     # if single shard
     if num_shards < 2:
@@ -210,7 +201,7 @@ def set_script(db_script: DBScript, client: Client) -> None:
                     client.set_script(
                         name=db_script.name, script=db_script.script, device=device
                     )
-                else:
+                elif callable(db_script.script):
                     client.set_function(
                         name=db_script.name, function=db_script.script, device=device
                     )
@@ -229,11 +220,8 @@ def shutdown_db_node(host_ip: str, port: int) -> t.Tuple[int, str, str]:  # cov-
     will take care of this automatically.
 
     :param host_ip: IP of host to connect to
-    :type hosts: str
     :param ports: Port to which node is listening
-    :type ports: int
     :return: returncode, output, and error of the process
-    :rtype: tuple of (int, str, str)
     """
     redis_cli = CONFIG.database_cli
     cmd = [redis_cli, "-h", host_ip, "-p", str(port), "shutdown"]
@@ -241,7 +229,9 @@ def shutdown_db_node(host_ip: str, port: int) -> t.Tuple[int, str, str]:  # cov-
 
     if returncode != 0:
         logger.error(out)
-        logger.error(err)
+        err_msg = "Error while shutting down DB node. "
+        err_msg += f"Return code: {returncode}, err: {err}"
+        logger.error(err_msg)
     elif out:
         logger.debug(out)
 
diff --git a/smartsim/_core/utils/security.py b/smartsim/_core/utils/security.py
new file mode 100644
index 000000000..e6f84c81a
--- /dev/null
+++ b/smartsim/_core/utils/security.py
@@ -0,0 +1,302 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import dataclasses
+import pathlib
+import stat
+import typing as t
+from enum import IntEnum
+
+import zmq
+import zmq.auth
+
+from smartsim._core.config.config import Config
+from smartsim.log import get_logger
+
+logger = get_logger(__name__)
+
+
+class _KeyPermissions(IntEnum):
+    """Permissions used by KeyManager"""
+
+    PRIVATE_KEY = stat.S_IRUSR | stat.S_IWUSR
+    """Permissions only allowing an owner to read and write the file"""
+    PUBLIC_KEY = stat.S_IRUSR | stat.S_IWUSR | stat.S_IROTH | stat.S_IRGRP
+    """Permissions allowing an owner, others, and the group to read a file"""
+
+    PRIVATE_DIR = (
+        stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IXOTH | stat.S_IXGRP
+    )
+    """Permissions allowing only owners to read, write and traverse a directory"""
+    PUBLIC_DIR = (
+        stat.S_IRUSR
+        | stat.S_IWUSR
+        | stat.S_IXUSR
+        | stat.S_IROTH
+        | stat.S_IXOTH
+        | stat.S_IRGRP
+        | stat.S_IXGRP
+    )
+    """Permissions allowing non-owners to traverse a directory"""
+
+
+@dataclasses.dataclass(frozen=True)
+class KeyPair:
+    """A public and private key pair"""
+
+    public: bytes = dataclasses.field(default=b"")
+    """The public key"""
+
+    private: bytes = dataclasses.field(default=b"", repr=False)
+    """The private key"""
+
+    @property
+    def empty(self) -> bool:
+        """Return `True` if the KeyPair has no key values set. Useful
+        for faking the null object pattern"""
+        return self.public == self.private and len(self.public) == 0
+
+
+class _KeyLocator:
+    """Determines the paths to use when persisting a `KeyPair` to disk"""
+
+    def __init__(
+        self,
+        root_dir: pathlib.Path,
+        filename: str,
+        category: str,
+    ) -> None:
+        """Initiailize a `KeyLocator`
+
+        :param root_dir: root path where keys are persisted to disk
+        :param filename: the stem name of the key file
+        :param category: the category or use-case for the key (e.g. server)
+        :param separate_keys: flag indicating if public and private keys should
+        be persisted in separate, corresponding directories
+        """
+
+        # constants for standardized paths.
+        self._public_subdir = "pub"
+        """The category subdirectory to use when persisting a public key"""
+
+        self._private_subdir = "priv"
+        """The category subdirectory to use when persisting a private key"""
+
+        self._public_extension = "key"
+        """The extension found on public keys"""
+
+        self._private_extension = "key_secret"
+        """The extension found on private keys"""
+
+        self._key_root_dir = root_dir
+        """Path to the root directory containing key files"""
+
+        self._filename = filename
+        """Base name for key files"""
+
+        self._category = category
+        """Category name used to further separate key locations"""
+
+    @property
+    def public_dir(self) -> pathlib.Path:
+        """Target directory for the public key"""
+        return self.public.parent
+
+    @property
+    def private_dir(self) -> pathlib.Path:
+        """Target directory for the private key"""
+        return self.private.parent
+
+    @property
+    def public_filename(self) -> str:
+        """Filename (<stem>.<suffix>) of the public key file"""
+        return f"{self._filename}.{self._public_extension}"
+
+    @property
+    def private_filename(self) -> str:
+        """Filename (<stem>.<suffix>) of the private key file"""
+        return f"{self._filename}.{self._private_extension}"
+
+    @property
+    def public(self) -> pathlib.Path:
+        """Full target path of the public key file"""
+        # combine the root and key type (e.g. /foo/bar + /server)
+        # then combine the pub/priv key subdir (e.g. /foo/bar/server + /pub)
+        path = self._key_root_dir / self._category / self._public_subdir
+        return path / self.public_filename
+
+    @property
+    def private(self) -> pathlib.Path:
+        """Full target path of the private key file"""
+        # combine the root and key type (e.g. /foo/bar + /server)
+        # then combine the pub/priv key subdir (e.g. /foo/bar/server + /pub)
+        path = self._key_root_dir / self._category / self._private_subdir
+        # combine the pub/priv key subdir if necessary (e.g. /foo/bar + /priv)
+
+        return path / self.private_filename
+
+
+class KeyManager:
+    def __init__(
+        self, config: Config, as_server: bool = False, as_client: bool = False
+    ) -> None:
+        """Initialize a KeyManager instance.
+        :param config: SmartSim configuration
+        :param as_server: flag to indicate when executing in the server context;
+        set to `True` to avoid loading client secret key
+        :param as_client: flag to indicate when executing in the client context;
+        set to `True` to avoid loading server secret key
+        """
+
+        self._as_server = as_server
+        """Set to `True` to return keys appropriate for the server context"""
+
+        self._as_client = as_client
+        """Set to `True` to return keys appropriate for the client context"""
+
+        key_dir = pathlib.Path(config.smartsim_key_path).resolve()
+
+        # Results in key path such as <key_root>/server/pub/smartsim.key
+        self._server_locator = _KeyLocator(key_dir, "smartsim", "server")
+        """The locator for producing the paths to store server key files"""
+
+        # Results in key path such as <key_root>/client/pub/smartsim.key
+        self._client_locator = _KeyLocator(key_dir, "smartsim", "client")
+        """The locator for producing the paths to store client key files"""
+
+    def create_directories(self) -> None:
+        """Create the subdirectory structure necessary to hold
+        the public and private key pairs for servers & clients"""
+        for locator in [self._server_locator, self._client_locator]:
+            if not locator.public_dir.exists():
+                permission = _KeyPermissions.PUBLIC_DIR
+                logger.debug(f"Creating key dir: {locator.public_dir}, {permission}")
+                locator.public_dir.mkdir(parents=True, mode=permission)
+
+            if not locator.private_dir.exists():
+                permission = _KeyPermissions.PRIVATE_DIR
+                logger.debug(f"Creating key dir: {locator.private_dir}, {permission}")
+                locator.private_dir.mkdir(parents=True, mode=permission)
+
+    @classmethod
+    def _load_keypair(cls, locator: _KeyLocator, in_context: bool) -> KeyPair:
+        """Load a specific `KeyPair` from disk
+
+        :param locator: a `KeyLocator` that specifies the path to an existing key
+        :param in_context: Boolean flag indicating if the keypair is the active
+        context; ensures the public and private keys are both loaded when `True`.
+        Only the public key is loaded when `False`
+        :returns: a KeyPair containing the loaded public/private key
+        """
+        # private keys contain public & private key parts
+        key_path = locator.private if in_context else locator.public
+
+        pub_key: bytes = b""
+        priv_key: t.Optional[bytes] = b""
+
+        if key_path.exists():
+            logger.debug(f"Existing key files located at {key_path}")
+            pub_key, priv_key = zmq.auth.load_certificate(key_path)
+        else:
+            logger.debug(f"No key files found at {key_path}")
+
+        # avoid a `None` value in the private key when it isn't loaded
+        return KeyPair(pub_key, priv_key or b"")
+
+    def _load_keys(self) -> t.Tuple[KeyPair, KeyPair]:
+        """Use ZMQ auth to load public/private key pairs for the server and client
+        components from the standard key paths for the associated experiment
+
+        :returns: 2-tuple of `KeyPair` (server_keypair, client_keypair)
+        ]"""
+        try:
+            server_keys = self._load_keypair(self._server_locator, self._as_server)
+            client_keys = self._load_keypair(self._client_locator, self._as_client)
+
+            return server_keys, client_keys
+        except (ValueError, OSError):
+            # expected if no keys could be loaded from disk
+            logger.warning("Loading key pairs failed.", exc_info=True)
+
+        return KeyPair(), KeyPair()
+
+    @classmethod
+    def _move_public_key(cls, locator: _KeyLocator) -> None:
+        """The public and private key pair are created in the same directory. Move
+        the public key out of the private subdir and into the public subdir
+
+        :param locator: `KeyLocator` that determines the path to the
+        key pair persisted in the same directory.
+        """
+        new_path = locator.private.with_suffix(locator.public.suffix)
+        if new_path != locator.public:
+            logger.debug(f"Moving key file from {locator.public} to {new_path}")
+            new_path.rename(locator.public)
+
+    def _create_keys(self) -> None:
+        """Create and persist key files to disk"""
+        for locator in [self._server_locator, self._client_locator]:
+            # create keys in the private directory...
+            zmq.auth.create_certificates(locator.private_dir, locator.private.stem)
+
+            # ...but move the public key out of the private subdirectory
+            self._move_public_key(locator)
+
+            # and ensure correct r/w/x permissions on each file.
+            locator.private.chmod(_KeyPermissions.PRIVATE_KEY)
+            locator.public.chmod(_KeyPermissions.PUBLIC_KEY)
+
+    def get_keys(self, create: bool = True) -> t.Tuple[KeyPair, KeyPair]:
+        """Use ZMQ auth to generate a public/private key pair for the server
+        and client components.
+
+        :param no_create: pass `no_create=True` to ensure keys are not
+        created and only pre-existing keys can be loaded
+        :returns: 2-tuple of `KeyPair` (server_keypair, client_keypair)
+        """
+        logger.debug(f"Loading keys, creation {'is' if create else 'not'} allowed")
+        server_keys, client_keys = self._load_keys()
+
+        # check if we received "empty keys"
+        if not server_keys.empty or not client_keys.empty:
+            return server_keys, client_keys
+
+        if not create:
+            # if directed not to create new keys, return "empty keys"
+            logger.debug("Returning empty key pairs")
+            return KeyPair(), KeyPair()
+
+        self.create_directories()
+        self._create_keys()
+
+        # load keys to ensure they were persisted
+        return self._load_keys()
+
+    @property
+    def client_keys_dir(self) -> pathlib.Path:
+        "Return the path to the client public keys directory"
+        return self._client_locator.public_dir
diff --git a/smartsim/_core/utils/serialize.py b/smartsim/_core/utils/serialize.py
index 69840b838..d4ec66eaf 100644
--- a/smartsim/_core/utils/serialize.py
+++ b/smartsim/_core/utils/serialize.py
@@ -33,10 +33,8 @@
 
 import smartsim._core._cli.utils as _utils
 import smartsim.log
-from smartsim._core.config import CONFIG
 
 if t.TYPE_CHECKING:
-    from smartsim import Experiment
     from smartsim._core.control.manifest import LaunchedManifest as _Manifest
     from smartsim.database.orchestrator import Orchestrator
     from smartsim.entity import DBNode, Ensemble, Model
@@ -54,9 +52,6 @@
 
 
 def save_launch_manifest(manifest: _Manifest[TStepLaunchMetaData]) -> None:
-    if not CONFIG.telemetry_enabled:
-        return
-
     manifest.metadata.run_telemetry_subdirectory.mkdir(parents=True, exist_ok=True)
     exp_out, exp_err = smartsim.log.get_exp_log_paths()
 
@@ -82,7 +77,7 @@ def save_launch_manifest(manifest: _Manifest[TStepLaunchMetaData]) -> None:
         manifest_dict = {
             "schema info": {
                 "schema_name": "entity manifest",
-                "version": "0.0.3",
+                "version": "0.0.4",
             },
             "experiment": {
                 "name": manifest.metadata.exp_name,
@@ -228,6 +223,7 @@ def _dictify_db(
         db_type, _ = db_path.name.split("-", 1)
     else:
         db_type = "Unknown"
+
     return {
         "name": db.name,
         "type": db_type,
@@ -238,6 +234,17 @@ def _dictify_db(
                 "conf_file": shard.cluster_conf_file,
                 "out_file": out_file,
                 "err_file": err_file,
+                "memory_file": (
+                    str(status_dir / "memory.csv") if db.telemetry.is_enabled else ""
+                ),
+                "client_file": (
+                    str(status_dir / "client.csv") if db.telemetry.is_enabled else ""
+                ),
+                "client_count_file": (
+                    str(status_dir / "client_count.csv")
+                    if db.telemetry.is_enabled
+                    else ""
+                ),
                 "telemetry_metadata": {
                     "status_dir": str(status_dir),
                     "step_id": step_id,
diff --git a/smartsim/_core/utils/telemetry/__init__.py b/smartsim/_core/utils/telemetry/__init__.py
new file mode 100644
index 000000000..efe03908e
--- /dev/null
+++ b/smartsim/_core/utils/telemetry/__init__.py
@@ -0,0 +1,25 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/smartsim/_core/utils/telemetry/collector.py b/smartsim/_core/utils/telemetry/collector.py
new file mode 100644
index 000000000..178126dec
--- /dev/null
+++ b/smartsim/_core/utils/telemetry/collector.py
@@ -0,0 +1,482 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024 Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import abc
+import asyncio
+import collections
+import itertools
+import logging
+import typing as t
+
+import redis.asyncio as redisa
+import redis.exceptions as redisex
+
+from smartsim._core.control.job import JobEntity
+from smartsim._core.utils.helpers import get_ts_ms
+from smartsim._core.utils.telemetry.sink import FileSink, Sink
+
+logger = logging.getLogger("TelemetryMonitor")
+
+
+class Collector(abc.ABC):
+    """Base class for telemetry collectors.
+
+    A Collector is used to retrieve runtime metrics about an entity."""
+
+    def __init__(self, entity: JobEntity, sink: Sink) -> None:
+        """Initialize the collector
+
+        :param entity: entity to collect metrics on
+        :param sink: destination to write collected information
+        """
+        self._entity = entity
+        self._sink = sink
+        self._enabled = True
+
+    @property
+    def enabled(self) -> bool:
+        """Boolean indicating if the collector should perform data collection"""
+        return self._entity.telemetry_on
+
+    @enabled.setter
+    def enabled(self, value: bool) -> None:
+        self._entity.telemetry_on = value
+
+    @property
+    def entity(self) -> JobEntity:
+        """The `JobEntity` for which data is collected
+        :return: the entity"""
+        return self._entity
+
+    @property
+    def sink(self) -> Sink:
+        """The sink where collected data is written
+        :return: the sink
+        """
+        return self._sink
+
+    @abc.abstractmethod
+    async def prepare(self) -> None:
+        """Initialization logic for the collector"""
+
+    @abc.abstractmethod
+    async def collect(self) -> None:
+        """Execute metric collection"""
+
+    @abc.abstractmethod
+    async def shutdown(self) -> None:
+        """Execute cleanup of resources for the collector"""
+
+
+class _DBAddress:
+    """Helper class to hold and pretty-print connection details"""
+
+    def __init__(self, host: str, port: int) -> None:
+        """Initialize the instance
+        :param host: host address for database connections
+        :param port: port number for database connections
+        """
+        self.host = host.strip() if host else ""
+        self.port = port
+        self._check()
+
+    def _check(self) -> None:
+        """Validate input arguments"""
+        if not self.host:
+            raise ValueError(f"{type(self).__name__} requires host")
+        if not self.port:
+            raise ValueError(f"{type(self).__name__} requires port")
+
+    def __str__(self) -> str:
+        """Pretty-print the instance"""
+        return f"{self.host}:{self.port}"
+
+
+class DBCollector(Collector):
+    """A base class for collectors that retrieve statistics from an orchestrator"""
+
+    def __init__(self, entity: JobEntity, sink: Sink) -> None:
+        """Initialize the `DBCollector`
+
+        :param entity: entity with metadata about the resource to monitor
+        :param sink: destination to write collected information
+        """
+        super().__init__(entity, sink)
+        self._client: t.Optional[redisa.Redis[bytes]] = None
+        self._address = _DBAddress(
+            self._entity.config.get("host", ""),
+            int(self._entity.config.get("port", 0)),
+        )
+
+    async def _configure_client(self) -> None:
+        """Configure the client connection to the target database"""
+        try:
+            if not self._client:
+                self._client = redisa.Redis(
+                    host=self._address.host, port=self._address.port
+                )
+        except Exception as e:
+            logger.exception(e)
+        finally:
+            if not self._client:
+                logger.error(
+                    f"{type(self).__name__} failed to connect to {self._address}"
+                )
+
+    async def prepare(self) -> None:
+        """Initialization logic for the DB collector. Creates a database
+        connection then executes the `post_prepare` callback function."""
+        if self._client:
+            return
+
+        await self._configure_client()
+        await self._post_prepare()
+
+    @abc.abstractmethod
+    async def _post_prepare(self) -> None:
+        """Hook function to enable subclasses to perform actions
+        after a db client is ready"""
+
+    @abc.abstractmethod
+    async def _perform_collection(
+        self,
+    ) -> t.Sequence[t.Tuple[t.Union[int, float, str], ...]]:
+        """Hook function for subclasses to execute custom metric retrieval.
+        NOTE: all implementations return an iterable of metrics to avoid
+        adding extraneous base class code to differentiate the results
+
+        :return: an iterable containing individual metric collection results
+        """
+
+    async def collect(self) -> None:
+        """Execute database metric collection if the collector is enabled. Writes
+        the resulting metrics to the associated output sink. Calling `collect`
+        when `self.enabled` is `False` performs no actions."""
+        if not self.enabled:
+            # collectors may be disabled by monitoring changes to the
+            # manifest. Leave the collector but do NOT collect
+            logger.debug(f"{type(self).__name__} is not enabled")
+            return
+
+        await self.prepare()
+        if not self._client:
+            logger.warning(f"{type(self).__name__} cannot collect")
+            return
+
+        try:
+            # if we can't communicate w/the db, exit
+            if not await self._check_db():
+                return
+
+            all_metrics = await self._perform_collection()
+            for metrics in all_metrics:
+                await self._sink.save(*metrics)
+        except Exception as ex:
+            logger.warning(f"Collect failed for {type(self).__name__}", exc_info=ex)
+
+    async def shutdown(self) -> None:
+        """Execute cleanup of database client connections"""
+        try:
+            if self._client:
+                logger.info(
+                    f"Shutting down {self._entity.name}::{self.__class__.__name__}"
+                )
+                await self._client.close()
+                self._client = None
+        except Exception as ex:
+            logger.error(
+                f"An error occurred during {type(self).__name__} shutdown", exc_info=ex
+            )
+
+    async def _check_db(self) -> bool:
+        """Check if the target database is reachable.
+
+        :return: `True` if connection succeeds, `False` otherwise.
+        """
+        try:
+            if self._client:
+                return await self._client.ping()
+        except redisex.ConnectionError:
+            logger.warning(f"Cannot ping db {self._address}")
+
+        return False
+
+
+class DBMemoryCollector(DBCollector):
+    """A `DBCollector` that collects memory consumption metrics"""
+
+    def __init__(self, entity: JobEntity, sink: Sink) -> None:
+        super().__init__(entity, sink)
+        self._columns = ["used_memory", "used_memory_peak", "total_system_memory"]
+
+    async def _post_prepare(self) -> None:
+        """Write column headers for a CSV formatted output sink after
+        the database connection is established"""
+        await self._sink.save("timestamp", *self._columns)
+
+    async def _perform_collection(
+        self,
+    ) -> t.Sequence[t.Tuple[int, float, float, float]]:
+        """Perform memory metric collection and return the results
+
+        :return: an iterable containing individual metric collection results
+        in the format `(timestamp,used_memory,used_memory_peak,total_system_memory)`
+        """
+        if self._client is None:
+            return []
+
+        db_info = await self._client.info("memory")
+
+        used = float(db_info["used_memory"])
+        peak = float(db_info["used_memory_peak"])
+        total = float(db_info["total_system_memory"])
+
+        value = (get_ts_ms(), used, peak, total)
+
+        # return a list containing a single record to simplify the parent
+        # class code to save multiple records from a single collection
+        return [value]
+
+
+class DBConnectionCollector(DBCollector):
+    """A `DBCollector` that collects database client-connection metrics"""
+
+    def __init__(self, entity: JobEntity, sink: Sink) -> None:
+        super().__init__(entity, sink)
+        self._columns = ["client_id", "address"]
+
+    async def _post_prepare(self) -> None:
+        """Write column headers for a CSV formatted output sink after
+        the database connection is established"""
+        await self._sink.save("timestamp", *self._columns)
+
+    async def _perform_collection(
+        self,
+    ) -> t.Sequence[t.Tuple[t.Union[int, str, str], ...]]:
+        """Perform connection metric collection and return the results
+
+        :return: an iterable containing individual metric collection results
+        in the format `(timestamp,client_id,address)`
+        """
+        if self._client is None:
+            return []
+
+        now_ts = get_ts_ms()
+        clients = await self._client.client_list()
+
+        values: t.List[t.Tuple[int, str, str]] = []
+
+        # content-filter the metrics and return them all together
+        for client in clients:
+            # all records for the request will have the same timestamp
+            value = now_ts, client["id"], client["addr"]
+            values.append(value)
+
+        return values
+
+
+class DBConnectionCountCollector(DBCollector):
+    """A DBCollector that collects aggregated client-connection count metrics"""
+
+    def __init__(self, entity: JobEntity, sink: Sink) -> None:
+        super().__init__(entity, sink)
+        self._columns = ["num_clients"]
+
+    async def _post_prepare(self) -> None:
+        """Write column headers for a CSV formatted output sink after
+        the database connection is established"""
+        await self._sink.save("timestamp", *self._columns)
+
+    async def _perform_collection(
+        self,
+    ) -> t.Sequence[t.Tuple[int, int]]:
+        """Perform connection-count metric collection and return the results
+
+        :return: an iterable containing individual metric collection results
+        in the format `(timestamp,num_clients)`
+        """
+        if self._client is None:
+            return []
+
+        client_list = await self._client.client_list()
+
+        addresses = {item["addr"] for item in client_list}
+
+        # return a list containing a single record to simplify the parent
+        # class code to save multiple records from a single collection
+        value = (get_ts_ms(), len(addresses))
+        return [value]
+
+
+class CollectorManager:
+    """The `CollectorManager` manages the set of all collectors required to retrieve
+    metrics for an experiment. It provides the ability to add and remove collectors
+    with unique configuration per entity. The `CollectorManager` is primarily used
+    to perform bulk actions on 1-to-many collectors (e.g. prepare all collectors,
+    request metrics for all collectors, close all collector connections)"""
+
+    def __init__(self, timeout_ms: int = 1000) -> None:
+        """Initialize the `CollectorManager` without collectors
+        :param timeout_ms: maximum time (in ms) allowed for `Collector.collect`
+        """
+        # A lookup table to hold a list of registered collectors per entity
+        self._collectors: t.Dict[str, t.List[Collector]] = collections.defaultdict(list)
+        # Max time to allow a collector to work before cancelling requests
+        self._timeout_ms = timeout_ms
+
+    def clear(self) -> None:
+        """Remove all collectors from the monitored set"""
+        self._collectors = collections.defaultdict(list)
+
+    def add(self, collector: Collector) -> None:
+        """Add a collector to the monitored set
+
+        :param collector: `Collector` instance to monitor
+        """
+        entity_name = collector.entity.name
+
+        registered_collectors = self._collectors[entity_name]
+
+        # Exit if the collector is already registered to the entity
+        if any(c for c in registered_collectors if type(c) is type(collector)):
+            return
+
+        logger.debug(f"Adding collector: {entity_name}::{type(collector).__name__}")
+        registered_collectors.append(collector)
+
+    def add_all(self, collectors: t.Sequence[Collector]) -> None:
+        """Add multiple collectors to the monitored set
+
+        :param collectors: a collection of `Collectors` to monitor
+        """
+        for collector in collectors:
+            self.add(collector)
+
+    async def remove_all(self, entities: t.Sequence[JobEntity]) -> None:
+        """Remove all collectors registered to the supplied entities
+
+        :param entities: a collection of `JobEntity` instances that will
+        no longer have registered collectors
+        """
+        if not entities:
+            return
+
+        tasks = (self.remove(entity) for entity in entities)
+        await asyncio.gather(*tasks)
+
+    async def remove(self, entity: JobEntity) -> None:
+        """Remove all collectors registered to the supplied entity
+
+        :param entities: `JobEntity` that will no longer have registered collectors
+        """
+        registered = self._collectors.pop(entity.name, [])
+        if not registered:
+            return
+
+        logger.debug(f"Removing collectors registered for {entity.name}")
+        asyncio.gather(*(collector.shutdown() for collector in registered))
+
+    async def prepare(self) -> None:
+        """Prepare registered collectors to perform collection"""
+        tasks = (collector.prepare() for collector in self.all_collectors)
+        # use gather so all collectors are prepared before collection
+        await asyncio.gather(*tasks)
+
+    async def collect(self) -> None:
+        """Perform collection for all registered collectors"""
+        if collectors := self.all_collectors:
+            tasks = [asyncio.create_task(item.collect()) for item in collectors]
+
+            _, pending = await asyncio.wait(tasks, timeout=self._timeout_ms / 1000.0)
+
+            # any tasks still pending has exceeded the timeout
+            if pending:
+                # manually cancel tasks since asyncio.wait will not
+                for remaining_task in pending:
+                    remaining_task.cancel()
+                logger.debug(f"Execution of {len(pending)} collectors timed out.")
+
+    async def shutdown(self) -> None:
+        """Release resources for all registered collectors"""
+        logger.debug(f"{type(self).__name__} shutting down collectors...")
+        if list(self.all_collectors):
+            shutdown_tasks = []
+            # create an async tasks to execute all shutdowns in parallel
+            for item in self.all_collectors:
+                shutdown_tasks.append(asyncio.create_task(item.shutdown()))
+            # await until all shutdowns are complete
+            await asyncio.wait(shutdown_tasks)
+        logger.debug("Collector shutdown complete...")
+
+    @property
+    def all_collectors(self) -> t.Sequence[Collector]:
+        """Get a list of all registered collectors
+
+        :return: a collection of registered collectors for all entities
+        """
+        # flatten and return all the lists-of-collectors that are registered
+        collectors = itertools.chain.from_iterable(self._collectors.values())
+        return [collector for collector in collectors if collector.enabled]
+
+    @property
+    def dead_collectors(self) -> t.Sequence[Collector]:
+        """Get a list of all disabled collectors
+
+        :return: a collection of disabled collectors for all entities
+        """
+        collectors = itertools.chain.from_iterable(self._collectors.values())
+        return [collector for collector in collectors if not collector.enabled]
+
+    def register_collectors(self, entity: JobEntity) -> None:
+        """Find all configured collectors for the entity and register them
+
+        :param entity: a `JobEntity` instance that will have all configured collectors
+        registered for collection. Configuration is found in the `RuntimeManifest`
+        """
+        collectors: t.List[Collector] = []
+
+        # ONLY db telemetry is implemented at this time. This resolver must
+        # be updated when non-database or always-on collectors are introduced
+        if entity.is_db and entity.telemetry_on:
+            if mem_out := entity.collectors.get("memory", None):
+                collectors.append(DBMemoryCollector(entity, FileSink(mem_out)))
+
+            if con_out := entity.collectors.get("client", None):
+                collectors.append(DBConnectionCollector(entity, FileSink(con_out)))
+
+            if num_out := entity.collectors.get("client_count", None):
+                collectors.append(DBConnectionCountCollector(entity, FileSink(num_out)))
+        else:
+            logger.debug(f"Collectors disabled for db {entity.name}")
+
+        self.add_all(collectors)
+
+    def register_all_collectors(self, entities: t.Sequence[JobEntity]) -> None:
+        """Find all configured collectors for the entity and register them
+
+        :param entities: entities to call `register_collectors` for
+        """
+        for entity in entities:
+            self.register_collectors(entity)
diff --git a/smartsim/_core/utils/telemetry/manifest.py b/smartsim/_core/utils/telemetry/manifest.py
new file mode 100644
index 000000000..942fa4ae8
--- /dev/null
+++ b/smartsim/_core/utils/telemetry/manifest.py
@@ -0,0 +1,242 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024 Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import json
+import logging
+import pathlib
+import time
+import typing as t
+from dataclasses import dataclass, field
+
+from smartsim._core.control.job import JobEntity
+
+logger = logging.getLogger("TelemetryMonitor")
+
+
+@dataclass
+class Run:
+    """
+    A Run contains the collection of entities created when a `SmartSim`
+    driver script executes `Experiment.start`"""
+
+    timestamp: int
+    """the timestamp at the time the `Experiment.start` is called"""
+    models: t.List[JobEntity]
+    """models started in this run"""
+    orchestrators: t.List[JobEntity]
+    """orchestrators started in this run"""
+    ensembles: t.List[JobEntity]
+    """ensembles started in this run"""
+
+    def flatten(
+        self, filter_fn: t.Optional[t.Callable[[JobEntity], bool]] = None
+    ) -> t.Sequence[JobEntity]:
+        """Flatten all `JobEntity`'s in the `Run` into a 1-dimensional list
+
+        :param filter_fn: optional boolean filter that returns
+        True for entities to include in the result
+        """
+        entities = self.models + self.orchestrators + self.ensembles
+        if filter_fn:
+            entities = [entity for entity in entities if filter_fn(entity)]
+        return entities
+
+    @staticmethod
+    def load_entity(
+        entity_type: str,
+        entity_dict: t.Dict[str, t.Any],
+        exp_dir: pathlib.Path,
+        raw_experiment: t.Dict[str, t.Any],
+    ) -> t.List[JobEntity]:
+        """Map entity data persisted in a manifest file to an object
+
+        :param entity_type: type of the associated `SmartSimEntity`
+        :param entity_dict: raw dictionary deserialized from entity in manifest JSON
+        :param exp_dir: root path to experiment outputs
+        :param raw_experiment: raw experiment deserialized from manifest JSON
+        :return: list of loaded `JobEntity` instances
+        """
+        entities = []
+
+        # an entity w/parent keys must create entities for the items that it
+        # comprises. traverse the children and create each entity
+        parent_keys = {"shards", "models"}
+        parent_keys = parent_keys.intersection(entity_dict.keys())
+        if parent_keys:
+            container = "shards" if "shards" in parent_keys else "models"
+            child_type = "orchestrator" if container == "shards" else "model"
+            for child_entity in entity_dict[container]:
+                entity = JobEntity.from_manifest(
+                    child_type, child_entity, str(exp_dir), raw_experiment
+                )
+                entities.append(entity)
+
+            return entities
+
+        # not a parent type, just create the entity w/the entity_type passed in
+        entity = JobEntity.from_manifest(
+            entity_type, entity_dict, str(exp_dir), raw_experiment
+        )
+        entities.append(entity)
+        return entities
+
+    @staticmethod
+    def load_entities(
+        entity_type: str,
+        run: t.Dict[str, t.Any],
+        exp_dir: pathlib.Path,
+        raw_experiment: t.Dict[str, t.Any],
+    ) -> t.Dict[str, t.List[JobEntity]]:
+        """Map a collection of entity data persisted in a manifest file to an object
+
+        :param entity_type: type of the associated `SmartSimEntity`
+        :param run: raw dictionary containing `Run` data deserialized from JSON
+        :param exp_dir: root path to experiment outputs
+        :param raw_experiment: raw experiment deserialized from manifest JSON
+        :return: list of loaded `JobEntity` instances
+        """
+        persisted: t.Dict[str, t.List[JobEntity]] = {
+            "model": [],
+            "orchestrator": [],
+        }
+        for item in run[entity_type]:
+            entities = Run.load_entity(entity_type, item, exp_dir, raw_experiment)
+            for new_entity in entities:
+                persisted[new_entity.type].append(new_entity)
+
+        return persisted
+
+    @staticmethod
+    def load_run(
+        raw_run: t.Dict[str, t.Any],
+        exp_dir: pathlib.Path,
+        raw_experiment: t.Dict[str, t.Any],
+    ) -> "Run":
+        """Map run data persisted in a manifest file to an object
+
+        :param raw_run: raw dictionary containing `Run` data deserialized from JSON
+        :param exp_dir: root path to experiment outputs
+        :param raw_experiment: raw experiment deserialized from manifest JSON
+        :return: populated `Run` instance
+        """
+
+        # create an output mapping to hold the deserialized entities
+        run_entities: t.Dict[str, t.List[JobEntity]] = {
+            "model": [],
+            "orchestrator": [],
+            "ensemble": [],
+        }
+
+        # use the output mapping keys to load all the target
+        # entities from the deserialized JSON
+        for entity_type in run_entities:
+            _entities = Run.load_entities(entity_type, raw_run, exp_dir, raw_experiment)
+
+            # load_entities may return a mapping containing types different from
+            # entity_type IF it was a parent entity. Iterate through the keys in
+            # the output dictionary and put them in the right place
+            for entity_type, new_entities in _entities.items():
+                if not new_entities:
+                    continue
+                run_entities[entity_type].extend(new_entities)
+
+        loaded_run = Run(
+            raw_run["timestamp"],
+            run_entities["model"],
+            run_entities["orchestrator"],
+            run_entities["ensemble"],
+        )
+        return loaded_run
+
+
+@dataclass
+class RuntimeManifest:
+    """The runtime manifest holds information about the entities created
+    at runtime during a SmartSim Experiment. The runtime manifest differs
+    from a standard manifest - it may contain multiple experiment
+    executions in a `runs` collection and holds information that is unknown
+    at design-time, such as IP addresses of host machines.
+    """
+
+    name: str
+    """The name of the `Experiment` associated to the `RuntimeManifest`"""
+    path: pathlib.Path
+    """The path to the `Experiment` working directory"""
+    launcher: str
+    """The launcher type used by the `Experiment`"""
+    runs: t.List[Run] = field(default_factory=list)
+    """A `List` of 0 to many `Run` instances"""
+
+    @staticmethod
+    def load_manifest(file_path: str) -> t.Optional["RuntimeManifest"]:
+        """Load a persisted manifest and return the content
+
+        :param file_path: path to the manifest file to load
+        :return: deserialized `RuntimeManifest` if the manifest file is found,
+        otherwise None
+        """
+        manifest_dict: t.Optional[t.Dict[str, t.Any]] = None
+        try_count, max_attempts = 1, 5
+
+        # allow multiple read attempts in case the manifest is being
+        # written at the time load_manifest is called
+        while manifest_dict is None and try_count <= max_attempts:
+            source = pathlib.Path(file_path)
+            source = source.resolve()
+            time.sleep(0.01)  # a tiny sleep avoids reading partially written json
+
+            try:
+                if text := source.read_text(encoding="utf-8").strip():
+                    manifest_dict = json.loads(text)
+            except json.JSONDecodeError as ex:
+                print(f"Error loading manifest: {ex}")
+                # hack/fix: handle issues reading file before it is fully written
+                time.sleep(0.1 * try_count)
+            finally:
+                try_count += 1
+
+        if not manifest_dict:
+            return None
+
+        # if we don't have an experiment, the manifest is malformed
+        exp = manifest_dict.get("experiment", None)
+        if not exp:
+            raise ValueError("Manifest missing required experiment")
+
+        # if we don't have runs, the manifest is malformed
+        runs = manifest_dict.get("runs", None)
+        if runs is None:
+            raise ValueError("Manifest missing required runs")
+
+        exp_dir = pathlib.Path(exp["path"])
+        runs = [Run.load_run(raw_run, exp_dir, exp) for raw_run in runs]
+
+        manifest = RuntimeManifest(
+            name=exp["name"],
+            path=exp_dir,
+            launcher=exp["launcher"],
+            runs=runs,
+        )
+        return manifest
diff --git a/smartsim/_core/utils/telemetry/sink.py b/smartsim/_core/utils/telemetry/sink.py
new file mode 100644
index 000000000..afea791ea
--- /dev/null
+++ b/smartsim/_core/utils/telemetry/sink.py
@@ -0,0 +1,81 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024 Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import abc
+import logging
+import pathlib
+import typing as t
+
+logger = logging.getLogger("TelemetryMonitor")
+
+
+class Sink(abc.ABC):
+    """Base class for output sinks. Represents a durable, read-only
+    storage mechanism"""
+
+    @abc.abstractmethod
+    async def save(self, *args: t.Any) -> None:
+        """Save the args passed to this method to the underlying sink
+
+        :param args: variadic list of values to save
+        """
+
+
+class FileSink(Sink):
+    """Telemetry sink that writes to a file"""
+
+    def __init__(self, path: str) -> None:
+        """Initialize the FileSink
+
+        :param filename: path to a file backing this `Sink`
+        """
+        super().__init__()
+        self._check_init(path)
+        self._path = pathlib.Path(path)
+
+    @staticmethod
+    def _check_init(filename: str) -> None:
+        """Validate initialization arguments and raise a ValueError
+        if an invalid filename is passed
+
+        :param filename: path to a file backing this `Sink`
+        """
+        if not filename:
+            raise ValueError("No filename provided to FileSink")
+
+    @property
+    def path(self) -> pathlib.Path:
+        """The path to the file this FileSink writes
+
+        :return: path to a file backing this `Sink`
+        """
+        return self._path
+
+    async def save(self, *args: t.Any) -> None:
+        self._path.parent.mkdir(parents=True, exist_ok=True)
+
+        with open(self._path, "a+", encoding="utf-8") as sink_fp:
+            values = ",".join(map(str, args)) + "\n"
+            sink_fp.write(values)
diff --git a/smartsim/_core/utils/telemetry/telemetry.py b/smartsim/_core/utils/telemetry/telemetry.py
new file mode 100644
index 000000000..e9e4c46bc
--- /dev/null
+++ b/smartsim/_core/utils/telemetry/telemetry.py
@@ -0,0 +1,592 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024 Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import asyncio
+import json
+import logging
+import os
+import pathlib
+import threading
+import typing as t
+
+from watchdog.events import (
+    FileSystemEvent,
+    LoggingEventHandler,
+    PatternMatchingEventHandler,
+)
+from watchdog.observers import Observer
+from watchdog.observers.api import BaseObserver
+
+from smartsim._core.config import CONFIG
+from smartsim._core.control.job import JobEntity, _JobKey
+from smartsim._core.control.jobmanager import JobManager
+from smartsim._core.launcher.dragon.dragonLauncher import DragonLauncher
+from smartsim._core.launcher.launcher import Launcher
+from smartsim._core.launcher.local.local import LocalLauncher
+from smartsim._core.launcher.lsf.lsfLauncher import LSFLauncher
+from smartsim._core.launcher.pbs.pbsLauncher import PBSLauncher
+from smartsim._core.launcher.slurm.slurmLauncher import SlurmLauncher
+from smartsim._core.launcher.stepInfo import StepInfo
+from smartsim._core.utils.helpers import get_ts_ms
+from smartsim._core.utils.serialize import MANIFEST_FILENAME
+from smartsim._core.utils.telemetry.collector import CollectorManager
+from smartsim._core.utils.telemetry.manifest import Run, RuntimeManifest
+from smartsim._core.utils.telemetry.util import map_return_code, write_event
+from smartsim.error.errors import SmartSimError
+from smartsim.status import TERMINAL_STATUSES
+
+logger = logging.getLogger("TelemetryMonitor")
+
+
+class ManifestEventHandler(PatternMatchingEventHandler):
+    """The ManifestEventHandler monitors an experiment and updates a
+    datastore as needed. This event handler is triggered by changes to
+    the experiment manifest written to physical disk by a driver.
+
+    It also contains an event loop. The loop checks experiment entities for updates
+    at each timestep and executes a configurable set of metrics collectors."""
+
+    def __init__(
+        self,
+        pattern: str,
+        ignore_patterns: t.Optional[t.List[str]] = None,
+        ignore_directories: bool = True,
+        case_sensitive: bool = False,
+        timeout_ms: int = 1000,
+    ) -> None:
+        """Initialize the manifest event handler
+
+        :param pattern: a pattern that identifies the files whose
+        events are of interest by matching their name
+        :param ignore_patterns: a pattern that identifies the files whose
+        events should be ignored
+        :param ignore_directories: set to `True` to avoid directory events
+        :param case_sensitive: set to `True` to require case sensitivity in
+        resource names in order to match input patterns
+        :param timeout_ms: maximum duration (in ms) of a call to the event
+        loop prior to cancelling tasks
+        """
+        super().__init__(
+            [pattern], ignore_patterns, ignore_directories, case_sensitive
+        )  # type: ignore
+        self._tracked_runs: t.Dict[int, Run] = {}
+        self._tracked_jobs: t.Dict[_JobKey, JobEntity] = {}
+        self._completed_jobs: t.Dict[_JobKey, JobEntity] = {}
+        self._launcher: t.Optional[Launcher] = None
+        self.job_manager: JobManager = JobManager(threading.RLock())
+        self._launcher_map: t.Dict[str, t.Type[Launcher]] = {
+            "slurm": SlurmLauncher,
+            "pbs": PBSLauncher,
+            "lsf": LSFLauncher,
+            "local": LocalLauncher,
+            "dragon": DragonLauncher,
+        }
+        self._collector_mgr = CollectorManager(timeout_ms)
+
+    @property
+    def tracked_jobs(self) -> t.Sequence[JobEntity]:
+        """The collection of `JobEntity` that are actively being monitored
+
+        :return: the collection
+        """
+        return list(self._tracked_jobs.values())
+
+    def init_launcher(self, launcher: str) -> None:
+        """Initialize the controller with a specific type of launcher.
+        SmartSim currently supports Slurm, PBS(Pro), LSF, Dragon
+        and local launching
+
+        :param launcher: the name of the workload manager used by the experiment
+        :raises ValueError: if a string is passed that is not
+        a supported launcher
+        :raises TypeError: if no launcher argument is provided.
+        """
+        if not launcher:
+            raise TypeError("Must provide a 'launcher' argument")
+
+        if launcher_type := self._launcher_map.get(launcher.lower(), None):
+            self._launcher = launcher_type()
+            return
+
+        raise ValueError("Launcher type not supported: " + launcher)
+
+    def init_job_manager(self) -> None:
+        """Initialize the job manager instance"""
+        if not self._launcher:
+            raise TypeError("self._launcher must be initialized")
+
+        self.job_manager.set_launcher(self._launcher)
+        self.job_manager.start()
+
+    def set_launcher(self, launcher_type: str) -> None:
+        """Set the launcher for the experiment
+        :param launcher_type: the name of the workload manager used by the experiment
+        """
+        self.init_launcher(launcher_type)
+
+        if self._launcher is None:
+            raise SmartSimError("Launcher init failed")
+
+        self.job_manager.set_launcher(self._launcher)
+        self.job_manager.start()
+
+    def process_manifest(self, manifest_path: str) -> None:
+        """Read the manifest for the experiment. Process the
+        `RuntimeManifest` by updating the set of tracked jobs
+        and registered collectors
+
+        :param manifest_path: full path to the manifest file
+        """
+        try:
+            # it is possible to read the manifest prior to a completed
+            # write due to no access locking mechanism. log the issue
+            # and continue. it will retry on the next event loop iteration
+            manifest = RuntimeManifest.load_manifest(manifest_path)
+            if not manifest:
+                logger.debug("No manifest file exists")
+                return
+        except json.JSONDecodeError:
+            logger.error(f"Malformed manifest encountered: {manifest_path}")
+            return
+        except ValueError:
+            logger.error("Manifest content error", exc_info=True)
+            return
+
+        if self._launcher is None:
+            self.set_launcher(manifest.launcher)
+
+        if not self._launcher:
+            raise SmartSimError(f"Unable to set launcher from {manifest_path}")
+
+        # filter out previously added items
+        runs = [run for run in manifest.runs if run.timestamp not in self._tracked_runs]
+
+        # manifest is stored at <exp_dir>/.smartsim/telemetry/manifest.json
+        exp_dir = pathlib.Path(manifest_path).parent.parent.parent
+
+        for run in runs:
+            for entity in run.flatten(
+                filter_fn=lambda e: e.key not in self._tracked_jobs
+            ):
+                entity.path = str(exp_dir)
+
+                # track everything coming in (managed and unmanaged)
+                self._tracked_jobs[entity.key] = entity
+
+                # register collectors for new entities as needed
+                if entity.telemetry_on:
+                    self._collector_mgr.register_collectors(entity)
+
+                # persist a `start` event for each new entity in the manifest
+                write_event(
+                    run.timestamp,
+                    entity.task_id,
+                    entity.step_id,
+                    entity.type,
+                    "start",
+                    pathlib.Path(entity.status_dir),
+                )
+
+                if entity.is_managed:
+                    # Tell JobManager the task is unmanaged. This collects
+                    # status updates but does not try to start a new copy
+                    self.job_manager.add_job(
+                        entity.name,
+                        entity.step_id,
+                        entity,
+                        False,
+                    )
+                    # Tell the launcher it's managed so it doesn't attempt
+                    # to look for a PID that may no longer exist
+                    self._launcher.step_mapping.add(
+                        entity.name, entity.step_id, "", True
+                    )
+            self._tracked_runs[run.timestamp] = run
+
+    def on_modified(self, event: FileSystemEvent) -> None:
+        """Event handler for when a file or directory is modified.
+
+        :param event: event representing file/directory modification.
+        """
+        super().on_modified(event)
+        logger.debug(f"Processing manifest modified @ {event.src_path}")
+        self.process_manifest(event.src_path)
+
+    def on_created(self, event: FileSystemEvent) -> None:
+        """Event handler for when a file or directory is created.
+
+        :param event: event representing file/directory creation.
+        """
+        super().on_created(event)
+        logger.debug(f"processing manifest created @ {event.src_path}")
+        self.process_manifest(event.src_path)
+
+    async def _to_completed(
+        self,
+        timestamp: int,
+        entity: JobEntity,
+        step_info: StepInfo,
+    ) -> None:
+        """Move a monitored entity from the active to completed collection to
+        stop monitoring for updates during timesteps.
+
+        :param timestamp: current timestamp for event logging
+        :param entity: running SmartSim Job
+        :param step_info: `StepInfo` received when requesting a Job status update
+        """
+        # remember completed entities to ignore them after manifest updates
+        inactive_entity = self._tracked_jobs.pop(entity.key)
+        if entity.key not in self._completed_jobs:
+            self._completed_jobs[entity.key] = inactive_entity
+
+        # remove all the registered collectors for the completed entity
+        await self._collector_mgr.remove(entity)
+
+        job = self.job_manager[entity.name]
+        self.job_manager.move_to_completed(job)
+
+        status_clause = f"status: {step_info.status}"
+        error_clause = f", error: {step_info.error}" if step_info.error else ""
+
+        write_path = pathlib.Path(entity.status_dir)
+
+        # persist a `stop` event for an entity that has completed
+        write_event(
+            timestamp,
+            entity.task_id,
+            entity.step_id,
+            entity.type,
+            "stop",
+            write_path,
+            detail=f"{status_clause}{error_clause}",
+            return_code=map_return_code(step_info),
+        )
+
+    async def on_timestep(self, timestamp: int) -> None:
+        """Called at polling frequency to request status updates on
+        monitored entities
+
+        :param timestamp: current timestamp for event logging
+        """
+        if not self._launcher:
+            return
+
+        await self._collector_mgr.collect()
+
+        # ensure unmanaged jobs move out of tracked jobs list
+        u_jobs = [job for job in self._tracked_jobs.values() if not job.is_managed]
+        for job in u_jobs:
+            job.check_completion_status()
+            if job.is_complete:
+                completed_entity = self._tracked_jobs.pop(job.key)
+                self._completed_jobs[job.key] = completed_entity
+
+        # consider not using name to avoid collisions
+        m_jobs = [job for job in self._tracked_jobs.values() if job.is_managed]
+        if names := {entity.name: entity for entity in m_jobs}:
+            step_updates: t.List[t.Tuple[str, t.Optional[StepInfo]]] = []
+
+            try:
+                task_names = list(names.keys())
+                updates = self._launcher.get_step_update(task_names)
+                step_updates.extend(updates)
+                logger.debug(f"Retrieved updates for: {task_names}")
+            except Exception:
+                logger.warning(f"Telemetry step updates failed for {names.keys()}")
+
+            try:
+                for step_name, step_info in step_updates:
+                    if step_info and step_info.status in TERMINAL_STATUSES:
+                        completed_entity = names[step_name]
+                        await self._to_completed(timestamp, completed_entity, step_info)
+            except Exception as ex:
+                msg = f"An error occurred getting step updates on {names}"
+                logger.error(msg, exc_info=ex)
+
+    async def shutdown(self) -> None:
+        """Release all resources owned by the `ManifestEventHandler`"""
+        logger.debug(f"{type(self).__name__} shutting down...")
+        await self._collector_mgr.shutdown()
+        logger.debug(f"{type(self).__name__} shutdown complete...")
+
+
+class TelemetryMonitorArgs:
+    """Strongly typed entity to house logic for validating
+    configuration passed to the telemetry monitor"""
+
+    def __init__(
+        self,
+        exp_dir: str,
+        frequency: int,
+        cooldown: int,
+        log_level: int = logging.DEBUG,
+    ) -> None:
+        """Initialize the instance with inputs and defaults
+
+        :param exp_dir: root path to experiment outputs
+        :param frequency: desired frequency of metric & status updates (in seconds)
+        :param frequency: cooldown period (in seconds) before automatic shutdown
+        :param log_level: log level to apply to python logging
+        """
+        self.exp_dir: str = exp_dir
+        self.frequency: int = frequency  # freq in seconds
+        self.cooldown: int = cooldown  # cooldown in seconds
+        self.log_level: int = log_level
+        self._validate()
+
+    @property
+    def min_frequency(self) -> int:
+        """The minimum duration (in seconds) for the monitoring loop to wait
+        between executions of the monitoring loop. Shorter frequencies may
+        not allow the monitoring loop to complete. Adjusting the minimum frequency
+        can result in inconsistent or missing outputs due to the telemetry
+        monitor cancelling processes that exceed the allotted frequency."""
+        return 1
+
+    @property
+    def max_frequency(self) -> int:
+        """The maximum duration (in seconds) for the monitoring loop to wait
+        between executions of the monitoring loop. Longer frequencies potentially
+        keep the telemetry monitor alive unnecessarily."""
+        return 600
+
+    @property
+    def min_cooldown(self) -> int:
+        """The minimum allowed cooldown period that can be configured. Ensures
+        the cooldown does not cause the telemetry monitor to shutdown prior to
+        completing a single pass through the monitoring loop"""
+        return min(self.frequency + 1, self.cooldown)
+
+    @property
+    def max_cooldown(self) -> int:
+        """The maximum allowed cooldown period that can be configured. Ensures the
+        telemetry monitor can automatically shutdown if not needed"""
+        return self.max_frequency
+
+    @property
+    def cooldown_ms(self) -> int:
+        """The duration of the time period (in ms) the telemetry monitor will
+        wait for new resources to monitor before shutting down"""
+        return self.cooldown * 1000
+
+    @property
+    def frequency_ms(self) -> int:
+        """The desired frequency (in ms) of the telemetry monitor attempts
+        to retrieve status updates and metrics"""
+        return self.frequency * 1000
+
+    def _check_exp_dir(self) -> None:
+        """Validate the existence of the experiment directory"""
+        if not pathlib.Path(self.exp_dir).exists():
+            raise ValueError(f"Experiment directory cannot be found: {self.exp_dir}")
+
+    def _check_frequency(self) -> None:
+        """Validate the frequency input is in the range
+        [`min_frequency`, `max_frequency`]"""
+        if self.max_frequency >= self.frequency >= self.min_frequency:
+            return
+
+        freq_tpl = "Telemetry collection frequency must be in the range [{0}, {1}]"
+        raise ValueError(freq_tpl.format(self.min_frequency, self.max_frequency))
+
+    def _check_log_level(self) -> None:
+        """Validate the frequency log level input. Uses standard python log levels"""
+        if self.log_level not in [
+            logging.DEBUG,
+            logging.INFO,
+            logging.WARNING,
+            logging.ERROR,
+        ]:
+            raise ValueError(f"Invalid log_level supplied: {self.log_level}")
+
+    def _validate(self) -> None:
+        """Execute all validation functions"""
+        self._check_exp_dir()
+        self._check_frequency()
+        self._check_log_level()
+
+
+class TelemetryMonitor:
+    """The telemetry monitor is a standalone process managed by SmartSim to perform
+    long-term retrieval of experiment status updates and resource usage
+    metrics. Note that a non-blocking driver script is likely to complete before
+    the SmartSim entities complete. Also, the JobManager performs status updates
+    only as long as the driver is running. This telemetry monitor entrypoint is
+    started automatically when a SmartSim experiment calls the `start` method
+    on resources. The entrypoint runs until it has no resources to monitor."""
+
+    def __init__(self, telemetry_monitor_args: TelemetryMonitorArgs):
+        """Initialize the telemetry monitor instance
+
+        :param telemetry_monitor_args: configuration for the telemetry monitor
+        """
+        self._observer: BaseObserver = Observer()
+        """an observer object that triggers the action handler"""
+        self._args = telemetry_monitor_args
+        """user-supplied arguments configuring telemetry monitor behavior"""
+        self._experiment_dir = pathlib.Path(self._args.exp_dir)
+        """path to the root directory where experiment outputs are written"""
+        self._telemetry_path = self._experiment_dir / CONFIG.telemetry_subdir
+        """path to the root directory where telemetry outputs are written"""
+        self._manifest_path = self._telemetry_path / MANIFEST_FILENAME
+        """path to the runtime manifest file"""
+        self._action_handler: t.Optional[ManifestEventHandler] = None
+        """an event listener holding action handlers for manifest on-change events"""
+
+    def _can_shutdown(self) -> bool:
+        """Determines if the telemetry monitor can perform shutdown. An
+        automatic shutdown will occur if there are no active jobs being monitored.
+        Managed jobs and databases are considered separately due to the way they
+        are stored in the job manager
+
+        :return: return True if capable of automatically shutting down
+        """
+        managed_jobs = (
+            list(self._action_handler.job_manager.jobs.values())
+            if self._action_handler
+            else []
+        )
+        unmanaged_jobs = (
+            list(self._action_handler.tracked_jobs) if self._action_handler else []
+        )
+        # get an individual count of databases for logging
+        n_dbs: int = len(
+            [
+                job
+                for job in managed_jobs + unmanaged_jobs
+                if isinstance(job, JobEntity) and job.is_db
+            ]
+        )
+
+        # if we have no jobs currently being monitored we can shutdown
+        n_jobs = len(managed_jobs) + len(unmanaged_jobs) - n_dbs
+        shutdown_ok = n_jobs + n_dbs == 0
+
+        logger.debug(f"{n_jobs} active job(s), {n_dbs} active db(s)")
+        return shutdown_ok
+
+    async def monitor(self) -> None:
+        """The main monitoring loop. Executes a busy wait and triggers
+        telemetry collectors using frequency from constructor arguments.
+        Continue monitoring until it satisfies automatic shutdown criteria."""
+        elapsed: int = 0
+        last_ts: int = get_ts_ms()
+        shutdown_in_progress = False
+
+        if self._action_handler is None:
+            raise ValueError("The action handler must be initialized to monitor")
+
+        # Event loop runs until the observer shuts down or
+        # an automatic shutdown is started.
+        while self._observer.is_alive() and not shutdown_in_progress:
+            duration_ms = 0
+            start_ts = get_ts_ms()
+            await self._action_handler.on_timestep(start_ts)
+
+            elapsed += start_ts - last_ts
+            last_ts = start_ts
+
+            # check if there are no jobs being monitored
+            if self._can_shutdown():
+                # cooldown period begins accumulating when no entities are monitored
+                if elapsed >= self._args.cooldown_ms:
+                    shutdown_in_progress = True
+                    logger.info("Cooldown complete. Beginning shutdown")
+                    await self._action_handler.shutdown()
+                    logger.debug("Beginning file monitor shutdown")
+                    self._observer.stop()  # type: ignore
+                    logger.debug("Event loop shutdown complete")
+                    break
+            else:
+                # reset cooldown any time jobs are running
+                elapsed = 0
+
+            # track time elapsed to execute metric collection
+            duration_ms = get_ts_ms() - start_ts
+            wait_ms = max(self._args.frequency_ms - duration_ms, 0)
+
+            # delay next loop if collection time didn't exceed loop frequency
+            wait_sec = wait_ms / 1000  # convert to seconds for sleep
+            if elapsed > 0:
+                completion_pct = elapsed / self._args.cooldown_ms * 100
+                logger.info(f"Cooldown {completion_pct:.2f}% complete")
+            logger.debug(f"Collection in {wait_sec:.2f}s")
+            await asyncio.sleep(wait_sec)
+
+        logger.info("Exiting telemetry monitor event loop")
+
+    async def run(self) -> int:
+        """Setup the monitoring entities and start the timer-based loop that
+        will poll for telemetry data
+
+        :return: return code for the process
+        """
+        logger.info("Executing telemetry monitor")
+        logger.info(f"Polling frequency: {self._args.frequency}s")
+        logger.info(f"Experiment directory: {self._experiment_dir}")
+        logger.info(f"Telemetry output: {self._telemetry_path}")
+
+        # Convert second-based inputs to milliseconds
+        frequency_ms = int(self._args.frequency * 1000)
+
+        # Create event handlers to trigger when target files are changed
+        log_handler = LoggingEventHandler(logger)
+        self._action_handler = ManifestEventHandler(
+            str(MANIFEST_FILENAME),
+            timeout_ms=frequency_ms,
+            ignore_patterns=["*.out", "*.err"],
+        )
+
+        try:
+            # The manifest may not exist when the telemetry monitor starts
+            if self._manifest_path.exists():
+                self._action_handler.process_manifest(str(self._manifest_path))
+
+            # Add a handler to log file-system events
+            self._observer.schedule(log_handler, self._telemetry_path)  # type:ignore
+            # Add a handler to perform actions on file-system events
+            self._observer.schedule(
+                self._action_handler, self._telemetry_path
+            )  # type:ignore
+            self._observer.start()  # type: ignore
+
+            # kick off the 'infinite' monitoring loop
+            await self.monitor()
+            return os.EX_OK
+        except Exception as ex:
+            logger.error(ex)
+        finally:
+            await self._action_handler.shutdown()
+            self.cleanup()
+            logger.info("Telemetry monitor shutdown complete")
+
+        return os.EX_SOFTWARE
+
+    def cleanup(self) -> None:
+        """Perform cleanup for all allocated resources"""
+        if self._observer is not None and self._observer.is_alive():
+            logger.debug("Cleaning up manifest observer")
+            self._observer.stop()  # type: ignore
+            self._observer.join()
diff --git a/smartsim/_core/utils/telemetry/util.py b/smartsim/_core/utils/telemetry/util.py
new file mode 100644
index 000000000..2c51d9600
--- /dev/null
+++ b/smartsim/_core/utils/telemetry/util.py
@@ -0,0 +1,113 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024 Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# import asyncio
+import json
+import logging
+import os
+import pathlib
+import typing as t
+
+from smartsim._core.launcher.stepInfo import StepInfo
+from smartsim.status import TERMINAL_STATUSES, SmartSimStatus
+
+_EventClass = t.Literal["start", "stop", "timestep"]
+
+logger = logging.getLogger("TelemetryMonitor")
+
+
+def write_event(
+    timestamp: int,
+    task_id: t.Union[int, str],
+    step_id: str,
+    entity_type: str,
+    event_type: _EventClass,
+    status_dir: pathlib.Path,
+    detail: str = "",
+    return_code: t.Optional[int] = None,
+) -> None:
+    """Write a record to durable storage for a SmartSimEntity lifecycle event.
+    Does not overwrite existing records.
+
+    :param timestamp: when the event occurred
+    :param task_id: the task_id of a managed task
+    :param step_id: the step_id of an unmanaged task
+    :param entity_type: the SmartSimEntity subtype
+        (e.g. `orchestrator`, `ensemble`, `model`, `dbnode`, ...)
+    :param event_type: the event subtype
+    :param status_dir: path where the SmartSimEntity outputs are written
+    :param detail: (optional) additional information to write with the event
+    :param return_code: (optional) the return code of a completed task
+    """
+    tgt_path = status_dir / f"{event_type}.json"
+    tgt_path.parent.mkdir(parents=True, exist_ok=True)
+
+    try:
+        if task_id:
+            task_id = int(task_id)
+    except ValueError:
+        if not isinstance(task_id, str):
+            logger.exception(f"Unable to parse task_id: {task_id}")
+
+    entity_dict = {
+        "timestamp": timestamp,
+        "job_id": task_id,
+        "step_id": step_id,
+        "type": entity_type,
+        "action": event_type,
+    }
+
+    if detail is not None:
+        entity_dict["detail"] = detail
+
+    if return_code is not None:
+        entity_dict["return_code"] = return_code
+
+    try:
+        if not tgt_path.exists():
+            # Don't overwrite existing tracking files
+            bytes_written = tgt_path.write_text(json.dumps(entity_dict, indent=2))
+            if bytes_written < 1:
+                logger.warning("event tracking failed to write tracking file.")
+    except Exception:
+        logger.error("Unable to write tracking file.", exc_info=True)
+
+
+def map_return_code(step_info: StepInfo) -> t.Optional[int]:
+    """Converts a return code from a workload manager into a SmartSim status.
+
+    A non-terminal status is converted to null. This indicates
+    that the process referenced in the `StepInfo` is running
+    and does not yet have a return code.
+
+    :param step_info: step information produced by job manager status update queries
+    :return: a return code if the step is finished, otherwise None
+    """
+    rc_map = {s: 1 for s in TERMINAL_STATUSES}  # return `1` for all terminal statuses
+    rc_map.update(
+        {SmartSimStatus.STATUS_COMPLETED: os.EX_OK}
+    )  # return `0` for full success
+
+    return rc_map.get(step_info.status, None)  # return `None` when in-progress
diff --git a/smartsim/database/orchestrator.py b/smartsim/database/orchestrator.py
index 431cb43c5..f6ce0310f 100644
--- a/smartsim/database/orchestrator.py
+++ b/smartsim/database/orchestrator.py
@@ -23,7 +23,11 @@
 # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# pylint: disable=too-many-lines
+
 import itertools
+import os.path as osp
 import sys
 import typing as t
 from os import environ, getcwd, getenv
@@ -37,8 +41,13 @@
 from .._core.utils import db_is_active
 from .._core.utils.helpers import is_valid_cmd, unpack_db_identifier
 from .._core.utils.network import get_ip_from_host
-from ..entity import DBNode, EntityList
-from ..error import SmartSimError, SSConfigError, SSUnsupportedError
+from ..entity import DBNode, EntityList, TelemetryConfiguration
+from ..error import (
+    SmartSimError,
+    SSConfigError,
+    SSDBFilesNotParseable,
+    SSUnsupportedError,
+)
 from ..log import get_logger
 from ..servertype import CLUSTERED, STANDALONE
 from ..settings import (
@@ -60,6 +69,7 @@
 logger = get_logger(__name__)
 
 by_launcher: t.Dict[str, t.List[str]] = {
+    "dragon": [""],
     "slurm": ["srun", "mpirun", "mpiexec"],
     "pbs": ["aprun", "mpirun", "mpiexec"],
     "pals": ["mpiexec"],
@@ -71,7 +81,7 @@
 def _detect_command(launcher: str) -> str:
     if launcher in by_launcher:
         for cmd in by_launcher[launcher]:
-            if launcher == "local":
+            if launcher in ["local", "dragon"]:
                 return cmd
             if is_valid_cmd(cmd):
                 return cmd
@@ -105,10 +115,15 @@ def _check_run_command(launcher: str, run_command: str) -> None:
         raise SmartSimError(msg)
 
 
-def _get_single_command(run_command: str, batch: bool, single_cmd: bool) -> bool:
+def _get_single_command(
+    run_command: str, launcher: str, batch: bool, single_cmd: bool
+) -> bool:
     if not single_cmd:
         return single_cmd
 
+    if launcher == "dragon":
+        return False
+
     if run_command == "srun" and getenv("SLURM_HET_SIZE") is not None:
         msg = (
             "srun can not launch an orchestrator with single_cmd=True in "
@@ -138,6 +153,7 @@ def _check_local_constraints(launcher: str, batch: bool) -> None:
         raise SmartSimError(msg)
 
 
+# pylint: disable-next=too-many-public-methods
 class Orchestrator(EntityList[DBNode]):
     """The Orchestrator is an in-memory database that can be launched
     alongside entities in SmartSim. Data can be transferred between
@@ -147,6 +163,7 @@ class Orchestrator(EntityList[DBNode]):
 
     def __init__(
         self,
+        path: t.Optional[str] = getcwd(),
         port: int = 6379,
         interface: t.Union[str, t.List[str]] = "lo",
         launcher: str = "local",
@@ -165,28 +182,39 @@ def __init__(
         db_identifier: str = "orchestrator",
         **kwargs: t.Any,
     ) -> None:
-        """Initialize an Orchestrator reference for local launch
-
-        :param port: TCP/IP port, defaults to 6379
-        :type port: int, optional
-        :param interface: network interface(s), defaults to "lo"
-        :type interface: str, list[str], optional
+        """Initialize an ``Orchestrator`` reference for local launch
 
         Extra configurations for RedisAI
 
-        See https://oss.redislabs.com/redisai/configuration/
-
+        See https://oss.redis.com/redisai/configuration/
+
+        :param path: path to location of ``Orchestrator`` directory
+        :param port: TCP/IP port
+        :param interface: network interface(s)
+        :param launcher: type of launcher being used, options are "slurm", "pbs",
+                         "lsf", or "local". If set to "auto",
+                         an attempt will be made to find an available launcher
+                         on the system.
+        :param run_command: specify launch binary or detect automatically
+        :param db_nodes: number of database shards
+        :param batch: run as a batch workload
+        :param hosts: specify hosts to launch on
+        :param account: account to run batch on
+        :param time: walltime for batch 'HH:MM:SS' format
+        :param alloc: allocation to launch database on
+        :param single_cmd: run all shards with one (MPMD) command
         :param threads_per_queue: threads per GPU device
-        :type threads_per_queue: int, optional
-        :param inter_op_threads: threads accross CPU operations
-        :type inter_op_threads: int, optional
+        :param inter_op_threads: threads across CPU operations
         :param intra_op_threads: threads per CPU operation
-        :type intra_op_threads: int, optional
+        :param db_identifier: an identifier to distinguish this orchestrator in
+            multiple-database experiments
         """
         self.launcher, self.run_command = _autodetect(launcher, run_command)
         _check_run_command(self.launcher, self.run_command)
         _check_local_constraints(self.launcher, batch)
-        single_cmd = _get_single_command(self.run_command, batch, single_cmd)
+        single_cmd = _get_single_command(
+            self.run_command, self.launcher, batch, single_cmd
+        )
         self.ports: t.List[int] = []
         self._hosts: t.List[str] = []
         self._user_hostlist: t.List[str] = []
@@ -197,16 +225,16 @@ def __init__(
         self.queue_threads = threads_per_queue
         self.inter_threads = inter_op_threads
         self.intra_threads = intra_op_threads
+        self._telemetry_cfg = TelemetryConfiguration()
 
         gpus_per_shard: t.Optional[int] = None
         cpus_per_shard: t.Optional[int] = None
         if self.launcher == "lsf":
             gpus_per_shard = int(kwargs.pop("gpus_per_shard", 0))
             cpus_per_shard = int(kwargs.pop("cpus_per_shard", 4))
-
         super().__init__(
             name=db_identifier,
-            path=getcwd(),
+            path=str(path),
             port=port,
             interface=interface,
             db_nodes=db_nodes,
@@ -265,18 +293,16 @@ def db_identifier(self) -> str:
         """Return the DB identifier, which is common to a DB and all of its nodes
 
         :return: DB identifier
-        :rtype: str
         """
         return self.name
 
     @property
     def num_shards(self) -> int:
-        """Return the number of DB shards contained in the orchestrator.
+        """Return the number of DB shards contained in the Orchestrator.
         This might differ from the number of ``DBNode`` objects, as each
         ``DBNode`` may start more than one shard (e.g. with MPMD).
 
-        :returns: num_shards
-        :rtype: int
+        :returns: the number of DB shards contained in the Orchestrator
         """
         return sum(node.num_shards for node in self.entities)
 
@@ -288,24 +314,30 @@ def db_nodes(self) -> int:
         an alias to the ``num_shards`` attribute.
 
         :returns: Number of database nodes
-        :rtype: int
         """
         return self.num_shards
 
     @property
     def hosts(self) -> t.List[str]:
-        """Return the hostnames of orchestrator instance hosts
+        """Return the hostnames of Orchestrator instance hosts
 
         Note that this will only be populated after the orchestrator
         has been launched by SmartSim.
 
-        :return: hostnames
-        :rtype: list[str]
+        :return: the hostnames of Orchestrator instance hosts
         """
         if not self._hosts:
             self._hosts = self._get_db_hosts()
         return self._hosts
 
+    @property
+    def telemetry(self) -> TelemetryConfiguration:
+        """Return the telemetry configuration for this entity.
+
+        :returns: configuration of telemetry for this entity
+        """
+        return self._telemetry_cfg
+
     def reset_hosts(self) -> None:
         """Clear hosts or reset them to last user choice"""
         for node in self.entities:
@@ -325,7 +357,6 @@ def get_address(self) -> t.List[str]:
         """Return database addresses
 
         :return: addresses
-        :rtype: list[str]
 
         :raises SmartSimError: If database address cannot be found or is not active
         """
@@ -345,12 +376,12 @@ def is_active(self) -> bool:
         """Check if the database is active
 
         :return: True if database is active, False otherwise
-        :rtype: bool
         """
-        if not self._hosts:
+        try:
+            hosts = self.hosts
+        except SSDBFilesNotParseable:
             return False
-
-        return db_is_active(self._hosts, self.ports, self.num_shards)
+        return db_is_active(hosts, self.ports, self.num_shards)
 
     @property
     def _rai_module(self) -> t.Tuple[str, ...]:
@@ -358,7 +389,6 @@ def _rai_module(self) -> t.Tuple[str, ...]:
 
         :return: Tuple of args to pass to the orchestrator exe
                  to load and configure the RedisAI
-        :rtype: tuple[str]
         """
         module = ["--loadmodule", CONFIG.redisai]
         if self.queue_threads:
@@ -377,6 +407,14 @@ def _redis_exe(self) -> str:
     def _redis_conf(self) -> str:
         return CONFIG.database_conf
 
+    @property
+    def checkpoint_file(self) -> str:
+        """Get the path to the checkpoint file for this Orchestrator
+
+        :return: Path to the checkpoint file if it exists, otherwise a None
+        """
+        return osp.join(self.path, "smartsim_db.dat")
+
     def set_cpus(self, num_cpus: int) -> None:
         """Set the number of CPUs available to each database shard
 
@@ -384,7 +422,6 @@ def set_cpus(self, num_cpus: int) -> None:
         compute threads, background threads, and network I/O.
 
         :param num_cpus: number of cpus to set
-        :type num_cpus: int
         """
         if self.batch:
             if self.launcher == "pbs":
@@ -408,7 +445,6 @@ def set_walltime(self, walltime: str) -> None:
         Note: This will only effect orchestrators launched as a batch
 
         :param walltime: amount of time e.g. 10 hours is 10:00:00
-        :type walltime: str
         :raises SmartSimError: if orchestrator isn't launching as batch
         """
         if not self.batch:
@@ -421,7 +457,6 @@ def set_hosts(self, host_list: t.Union[t.List[str], str]) -> None:
         """Specify the hosts for the ``Orchestrator`` to launch on
 
         :param host_list: list of host (compute node names)
-        :type host_list: str, list[str]
         :raises TypeError: if wrong type
         """
         if isinstance(host_list, str):
@@ -432,9 +467,8 @@ def set_hosts(self, host_list: t.Union[t.List[str], str]) -> None:
             raise TypeError("host_list argument must be list of strings")
         self._user_hostlist = host_list.copy()
         # TODO check length
-        if self.batch:
-            if hasattr(self, "batch_settings") and self.batch_settings:
-                self.batch_settings.set_hostlist(host_list)
+        if self.batch and hasattr(self, "batch_settings") and self.batch_settings:
+            self.batch_settings.set_hostlist(host_list)
 
         if self.launcher == "lsf":
             for db in self.entities:
@@ -465,9 +499,7 @@ def set_batch_arg(self, arg: str, value: t.Optional[str] = None) -> None:
         by SmartSim and will not be allowed to be set.
 
         :param arg: batch argument to set e.g. "exclusive"
-        :type arg: str
         :param value: batch param - set to None if no param value
-        :type value: str | None
         :raises SmartSimError: if orchestrator not launching as batch
         """
         if not hasattr(self, "batch_settings") or not self.batch_settings:
@@ -479,8 +511,7 @@ def set_batch_arg(self, arg: str, value: t.Optional[str] = None) -> None:
                 "it is a reserved keyword in Orchestrator"
             )
         else:
-            if hasattr(self, "batch_settings") and self.batch_settings:
-                self.batch_settings.batch_args[arg] = value
+            self.batch_settings.batch_args[arg] = value
 
     def set_run_arg(self, arg: str, value: t.Optional[str] = None) -> None:
         """Set a run argument the orchestrator should launch
@@ -491,9 +522,7 @@ def set_run_arg(self, arg: str, value: t.Optional[str] = None) -> None:
         For example, "n", "N", etc.
 
         :param arg: run argument to set
-        :type arg: str
         :param value: run parameter - set to None if no parameter value
-        :type value: str | None
         """
         if arg in self._reserved_run_args[type(self.entities[0].run_settings)]:
             logger.warning(
@@ -514,7 +543,6 @@ def enable_checkpoints(self, frequency: int) -> None:
         after 900 seconds if there is at least 1 change to the dataset.
 
         :param frequency: the given number of seconds before the DB saves
-        :type frequency: int
         """
         self.set_db_conf("save", f"{frequency} 1")
 
@@ -523,15 +551,15 @@ def set_max_memory(self, mem: str) -> None:
         Setting max memory to zero also results in no memory limit. Once a limit is
         surpassed, keys will be removed according to the eviction strategy. The
         specified memory size is case insensitive and supports the typical forms of:
-        1k => 1000 bytes
-        1kb => 1024 bytes
-        1m => 1000000 bytes
-        1mb => 1024*1024 bytes
-        1g => 1000000000 bytes
+
+        1k => 1000 bytes \n
+        1kb => 1024 bytes \n
+        1m => 1000000 bytes \n
+        1mb => 1024*1024 bytes \n
+        1g => 1000000000 bytes \n
         1gb => 1024*1024*1024 bytes
 
         :param mem: the desired max memory size e.g. 3gb
-        :type mem: str
         :raises SmartSimError: If 'mem' is an invalid memory value
         :raises SmartSimError: If database is not active
         """
@@ -543,7 +571,6 @@ def set_eviction_strategy(self, strategy: str) -> None:
 
         :param strategy: The max memory policy to use
             e.g. "volatile-lru", "allkeys-lru", etc.
-        :type strategy: str
         :raises SmartSimError: If 'strategy' is an invalid maxmemory policy
         :raises SmartSimError: If database is not active
         """
@@ -556,7 +583,6 @@ def set_max_clients(self, clients: int = 50_000) -> None:
         incoming and another outgoing.
 
         :param clients: the maximum number of connected clients
-        :type clients: int, optional
         """
         self.set_db_conf("maxclients", str(clients))
 
@@ -569,7 +595,6 @@ def set_max_message_size(self, size: int = 1_073_741_824) -> None:
         to 1gb, use 1024*1024*1024.
 
         :param size: maximum message size in bytes
-        :type size: int, optional
         """
         self.set_db_conf("proto-max-bulk-len", str(size))
 
@@ -580,9 +605,7 @@ def set_db_conf(self, key: str, value: str) -> None:
         will take effect starting with the next command executed.
 
         :param key: the configuration parameter
-        :type key: str
         :param value: the database configuration parameter's new value
-        :type value: str
         """
         if self.is_active():
             addresses = []
@@ -847,6 +870,7 @@ def _get_start_script_args(
         ]
         if cluster:
             cmd.append("+cluster")  # is the shard part of a cluster
+
         return cmd
 
     def _get_db_hosts(self) -> t.List[str]:
diff --git a/smartsim/entity/__init__.py b/smartsim/entity/__init__.py
index 4566cd76f..40f03fcdd 100644
--- a/smartsim/entity/__init__.py
+++ b/smartsim/entity/__init__.py
@@ -27,7 +27,7 @@
 from .dbnode import DBNode
 from .dbobject import *
 from .ensemble import Ensemble
-from .entity import SmartSimEntity
+from .entity import SmartSimEntity, TelemetryConfiguration
 from .entityList import EntityList, EntitySequence
 from .files import TaggedFilesHierarchy
 from .model import Model
diff --git a/smartsim/entity/dbnode.py b/smartsim/entity/dbnode.py
index 9b67687f0..d371357f8 100644
--- a/smartsim/entity/dbnode.py
+++ b/smartsim/entity/dbnode.py
@@ -34,7 +34,7 @@
 from dataclasses import dataclass
 
 from .._core.config import CONFIG
-from ..error import SmartSimError
+from ..error import SSDBFilesNotParseable
 from ..log import get_logger
 from ..settings.base import RunSettings
 from .entity import SmartSimEntity
@@ -146,9 +146,7 @@ def _get_cluster_conf_filenames(self, port: int) -> t.List[str]:  # cov-lsf
         This function should bu used if and only if ``_mpmd==True``
 
         :param port: port number
-        :type port: int
         :return: the dbnode configuration file name
-        :rtype: str
         """
         if self.num_shards == 1:
             return [f"nodes-{self.name}-{port}.conf"]
@@ -186,9 +184,8 @@ def _parse_launched_shard_info_from_files(
     def get_launched_shard_info(self) -> "t.List[LaunchedShardData]":
         """Parse the launched database shard info from the output files
 
-        :raises SmartSimError: if all shard info could not be found
+        :raises SSDBFilesNotParseable: if all shard info could not be found
         :return: The found launched shard info
-        :rtype: list[LaunchedShardData]
         """
         ips: "t.List[LaunchedShardData]" = []
         trials = CONFIG.database_file_parse_trials
@@ -214,7 +211,7 @@ def get_launched_shard_info(self) -> "t.List[LaunchedShardData]":
                 f"{len(ips)} out of {self.num_shards} DB shards."
             )
             logger.error(msg)
-            raise SmartSimError(msg)
+            raise SSDBFilesNotParseable(msg)
         return ips
 
     def _parse_db_hosts(self) -> t.List[str]:
@@ -223,9 +220,8 @@ def _parse_db_hosts(self) -> t.List[str]:
         The IP address is preferred, but if hostname is only present
         then a lookup to /etc/hosts is done through the socket library.
 
-        :raises SmartSimError: if host/ip could not be found
+        :raises SSDBFilesNotParseable: if host/ip could not be found
         :return: ip addresses | hostnames
-        :rtype: list[str]
         """
         return list({shard.hostname for shard in self.get_launched_shard_info()})
 
diff --git a/smartsim/entity/dbobject.py b/smartsim/entity/dbobject.py
index 0a495f066..5cb0d061f 100644
--- a/smartsim/entity/dbobject.py
+++ b/smartsim/entity/dbobject.py
@@ -27,7 +27,7 @@
 import typing as t
 from pathlib import Path
 
-from .._core.utils import init_default
+from .._core._install.builder import Device
 from ..error import SSUnsupportedError
 
 __all__ = ["DBObject", "DBModel", "DBScript"]
@@ -46,7 +46,7 @@ def __init__(
         name: str,
         func: t.Optional[_DBObjectFuncT],
         file_path: t.Optional[str],
-        device: t.Literal["CPU", "GPU"],
+        device: str,
         devices_per_node: int,
         first_device: int,
     ) -> None:
@@ -75,9 +75,6 @@ def _check_tensor_args(
         inputs: t.Union[str, t.Optional[t.List[str]]],
         outputs: t.Union[str, t.Optional[t.List[str]]],
     ) -> t.Tuple[t.List[str], t.List[str]]:
-        inputs = init_default([], inputs, (list, str))
-        outputs = init_default([], outputs, (list, str))
-
         if isinstance(inputs, str):
             inputs = [inputs]
         if isinstance(outputs, str):
@@ -103,9 +100,9 @@ def _check_filepath(file: str) -> Path:
         return file_path
 
     @staticmethod
-    def _check_device(device: t.Literal["CPU", "GPU"]) -> str:
-        device = t.cast(t.Literal["CPU", "GPU"], device.upper())
-        if not device.startswith("CPU") and not device.startswith("GPU"):
+    def _check_device(device: str) -> str:
+        valid_devices = [Device.CPU.value, Device.GPU.value]
+        if not any(device.lower().startswith(dev) for dev in valid_devices):
             raise ValueError("Device argument must start with either CPU or GPU")
         return device
 
@@ -113,9 +110,7 @@ def _enumerate_devices(self) -> t.List[str]:
         """Enumerate devices for a DBObject
 
         :param dbobject: DBObject to enumerate
-        :type dbobject: DBObject
         :return: list of device names
-        :rtype: list[str]
         """
 
         if self.device == "GPU" and self.devices_per_node > 1:
@@ -130,16 +125,16 @@ def _enumerate_devices(self) -> t.List[str]:
 
     @staticmethod
     def _check_devices(
-        device: t.Literal["CPU", "GPU"],
+        device: str,
         devices_per_node: int,
         first_device: int,
     ) -> None:
-        if device == "CPU" and devices_per_node > 1:
+        if device.lower() == Device.CPU.value and devices_per_node > 1:
             raise SSUnsupportedError(
                 "Cannot set devices_per_node>1 if CPU is specified under devices"
             )
 
-        if device == "CPU" and first_device > 0:
+        if device.lower() == Device.CPU.value and first_device > 0:
             raise SSUnsupportedError(
                 "Cannot set first_device>0 if CPU is specified under devices"
             )
@@ -160,7 +155,7 @@ def __init__(
         name: str,
         script: t.Optional[str] = None,
         script_path: t.Optional[str] = None,
-        device: t.Literal["CPU", "GPU"] = "CPU",
+        device: str = Device.CPU.value.upper(),
         devices_per_node: int = 1,
         first_device: int = 0,
     ):
@@ -178,17 +173,11 @@ def __init__(
         must be provided
 
         :param name: key to store script under
-        :type name: str
         :param script: TorchScript code
-        :type script: str, optional
-        :param script_path: path to TorchScript code, defaults to None
-        :type script_path: str, optional
-        :param device: device for script execution, defaults to "CPU"
-        :type device: str, optional
+        :param script_path: path to TorchScript code
+        :param device: device for script execution
         :param devices_per_node: number of devices to store the script on
-        :type devices_per_node: int
         :param first_device: first devices to store the script on
-        :type first_device: int
         """
         super().__init__(
             name, script, script_path, device, devices_per_node, first_device
@@ -197,13 +186,13 @@ def __init__(
             raise ValueError("Either script or script_path must be provided")
 
     @property
-    def script(self) -> t.Optional[str]:
+    def script(self) -> t.Optional[t.Union[bytes, str]]:
         return self.func
 
     def __str__(self) -> str:
         desc_str = "Name: " + self.name + "\n"
         if self.func:
-            desc_str += "Func: " + self.func + "\n"
+            desc_str += "Func: " + str(self.func) + "\n"
         if self.file:
             desc_str += "File path: " + str(self.file) + "\n"
         devices_str = self.device + (
@@ -222,7 +211,7 @@ def __init__(
         backend: str,
         model: t.Optional[bytes] = None,
         model_file: t.Optional[str] = None,
-        device: t.Literal["CPU", "GPU"] = "CPU",
+        device: str = Device.CPU.value.upper(),
         devices_per_node: int = 1,
         first_device: int = 0,
         batch_size: int = 0,
@@ -238,31 +227,18 @@ def __init__(
         must be provided
 
         :param name: key to store model under
-        :type name: str
         :param model: model in memory
-        :type model: str, optional
         :param model_file: serialized model
-        :type model_file: file path to model, optional
         :param backend: name of the backend (TORCH, TF, TFLITE, ONNX)
-        :type backend: str
-        :param device: name of device for execution, defaults to "CPU"
-        :type device: str, optional
+        :param device: name of device for execution
         :param devices_per_node: number of devices to store the model on
-        :type devices_per_node: int
         :param first_device: The first device to store the model on
-        :type first_device: int
-        :param batch_size: batch size for execution, defaults to 0
-        :type batch_size: int, optional
-        :param min_batch_size: minimum batch size for model execution, defaults to 0
-        :type min_batch_size: int, optional
-        :param min_batch_timeout: time to wait for minimum batch size, defaults to 0
-        :type min_batch_timeout: int, optional
-        :param tag: additional tag for model information, defaults to ""
-        :type tag: str, optional
-        :param inputs: model inputs (TF only), defaults to None
-        :type inputs: list[str], optional
-        :param outputs: model outupts (TF only), defaults to None
-        :type outputs: list[str], optional
+        :param batch_size: batch size for execution
+        :param min_batch_size: minimum batch size for model execution
+        :param min_batch_timeout: time to wait for minimum batch size
+        :param tag: additional tag for model information
+        :param inputs: model inputs (TF only)
+        :param outputs: model outupts (TF only)
         """
         super().__init__(
             name, model, model_file, device, devices_per_node, first_device
diff --git a/smartsim/entity/ensemble.py b/smartsim/entity/ensemble.py
index b30f82542..cab138685 100644
--- a/smartsim/entity/ensemble.py
+++ b/smartsim/entity/ensemble.py
@@ -24,13 +24,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import os.path as osp
 import typing as t
 from copy import deepcopy
 from os import getcwd
 
 from tabulate import tabulate
 
-from .._core.utils.helpers import init_default
+from .._core._install.builder import Device
 from ..error import (
     EntityExistsError,
     SmartSimError,
@@ -61,6 +62,7 @@ def __init__(
         self,
         name: str,
         params: t.Dict[str, t.Any],
+        path: t.Optional[str] = getcwd(),
         params_as_args: t.Optional[t.List[str]] = None,
         batch_settings: t.Optional[BatchSettings] = None,
         run_settings: t.Optional[RunSettings] = None,
@@ -73,43 +75,33 @@ def __init__(
         parameters to the permutation strategy.
 
         :param name: name of the ensemble
-        :type name: str
         :param params: parameters to expand into ``Model`` members
-        :type params: dict[str, Any]
         :param params_as_args: list of params that should be used as command
             line arguments to the ``Model`` member executables and not written
             to generator files
-        :type params_as_args: list[str]
         :param batch_settings: describes settings for ``Ensemble`` as batch workload
-        :type batch_settings: BatchSettings, optional
         :param run_settings: describes how each ``Model`` should be executed
-        :type run_settings: RunSettings, optional
         :param replicas: number of ``Model`` replicas to create - a keyword
             argument of kwargs
-        :type replicas: int, optional
         :param perm_strategy: strategy for expanding ``params`` into
                              ``Model`` instances from params argument
                              options are "all_perm", "step", "random"
-                             or a callable function. Defaults to "all_perm".
-        :type perm_strategy: str
+                             or a callable function.
         :return: ``Ensemble`` instance
-        :rtype: ``Ensemble``
         """
-        self.params = init_default({}, params, dict)
-        self.params_as_args = init_default({}, params_as_args, (list, str))
+        self.params = params or {}
+        self.params_as_args = params_as_args or []
         self._key_prefixing_enabled = True
-        self.batch_settings = init_default({}, batch_settings, BatchSettings)
-        self.run_settings = init_default({}, run_settings, RunSettings)
+        self.batch_settings = batch_settings
+        self.run_settings = run_settings
+        self.replicas: str
 
-        super().__init__(name, getcwd(), perm_strat=perm_strat, **kwargs)
+        super().__init__(name, str(path), perm_strat=perm_strat, **kwargs)
 
     @property
-    def models(self) -> t.Iterable[Model]:
-        """
-        Helper property to cast self.entities to Model type for type correctness
-        """
-        model_entities = [node for node in self.entities if isinstance(node, Model)]
-        return model_entities
+    def models(self) -> t.Collection[Model]:
+        """An alias for a shallow copy of the ``entities`` attribute"""
+        return list(self.entities)
 
     def _initialize_entities(self, **kwargs: t.Any) -> None:
         """Initialize all the models within the ensemble based
@@ -120,6 +112,7 @@ def _initialize_entities(self, **kwargs: t.Any) -> None:
         """
         strategy = self._set_strategy(kwargs.pop("perm_strat"))
         replicas = kwargs.pop("replicas", None)
+        self.replicas = replicas
 
         # if a ensemble has parameters and run settings, create
         # the ensemble and assign run_settings to each member
@@ -139,9 +132,9 @@ def _initialize_entities(self, **kwargs: t.Any) -> None:
                     run_settings = deepcopy(self.run_settings)
                     model_name = "_".join((self.name, str(i)))
                     model = Model(
-                        model_name,
-                        param_set,
-                        self.path,
+                        name=model_name,
+                        params=param_set,
+                        path=osp.join(self.path, model_name),
                         run_settings=run_settings,
                         params_as_args=self.params_as_args,
                     )
@@ -163,9 +156,9 @@ def _initialize_entities(self, **kwargs: t.Any) -> None:
                     for i in range(replicas):
                         model_name = "_".join((self.name, str(i)))
                         model = Model(
-                            model_name,
-                            {},
-                            self.path,
+                            name=model_name,
+                            params={},
+                            path=osp.join(self.path, model_name),
                             run_settings=deepcopy(self.run_settings),
                         )
                         model.enable_key_prefixing()
@@ -191,7 +184,6 @@ def add_model(self, model: Model) -> None:
         """Add a model to this ensemble
 
         :param model: model instance to be added
-        :type model: Model
         :raises TypeError: if model is not an instance of ``Model``
         :raises EntityExistsError: if model already exists in this ensemble
         """
@@ -222,7 +214,6 @@ def register_incoming_entity(self, incoming_entity: SmartSimEntity) -> None:
         Only python clients can have multiple incoming connections
 
         :param incoming_entity: The entity that data will be received from
-        :type incoming_entity: SmartSimEntity
         """
         for model in self.models:
             model.register_incoming_entity(incoming_entity)
@@ -238,7 +229,6 @@ def query_key_prefixing(self) -> bool:
         """Inquire as to whether each model within the ensemble will prefix their keys
 
         :returns: True if all models have key prefixing enabled, False otherwise
-        :rtype: bool
         """
         return all(model.query_key_prefixing() for model in self.models)
 
@@ -264,12 +254,9 @@ def attach_generator_files(
         would like to change. The tag is settable but defaults
         to a semicolon e.g. THERMO = ;10;
 
-        :param to_copy: files to copy, defaults to []
-        :type to_copy: list, optional
-        :param to_symlink: files to symlink, defaults to []
-        :type to_symlink: list, optional
-        :param to_configure: input files with tagged parameters, defaults to []
-        :type to_configure: list, optional
+        :param to_copy: files to copy
+        :param to_symlink: files to symlink
+        :param to_configure: input files with tagged parameters
         """
         for model in self.models:
             model.attach_generator_files(
@@ -282,7 +269,6 @@ def attached_files_table(self) -> str:
         attached to models belonging to this ensemble.
 
         :returns: A table of all files attached to all models
-        :rtype: str
         """
         if not self.models:
             return "The ensemble is empty, no files to show."
@@ -305,10 +291,8 @@ def _set_strategy(strategy: str) -> StrategyFunction:
         the ensemble
 
         :param strategy: name of the strategy or callable function
-        :type strategy: str
         :raises SSUnsupportedError: if str name is not supported
         :return: strategy function
-        :rtype: callable
         """
         if strategy == "all_perm":
             return create_all_permutations
@@ -328,7 +312,6 @@ def _read_model_parameters(self) -> t.Tuple[t.List[str], t.List[t.List[str]]]:
 
         :raises TypeError: if params are of the wrong type
         :return: param names and values for permutation strategy
-        :rtype: tuple[list, list]
         """
 
         if not isinstance(self.params, dict):
@@ -359,7 +342,7 @@ def add_ml_model(
         backend: str,
         model: t.Optional[bytes] = None,
         model_path: t.Optional[str] = None,
-        device: t.Literal["CPU", "GPU"] = "CPU",
+        device: str = Device.CPU.value.upper(),
         devices_per_node: int = 1,
         first_device: int = 0,
         batch_size: int = 0,
@@ -379,33 +362,19 @@ def add_ml_model(
         must be provided
 
         :param name: key to store model under
-        :type name: str
         :param model: model in memory
-        :type model: str | bytes | None
         :param model_path: serialized model
-        :type model_path: file path to model
         :param backend: name of the backend (TORCH, TF, TFLITE, ONNX)
-        :type backend: str
-        :param device: name of device for execution, defaults to "CPU"
-        :type device: str, optional
-        :param devices_per_node: number of GPUs per node in multiGPU nodes,
-                                 defaults to 1
-        :type devices_per_node: int, optional
+        :param device: name of device for execution
+        :param devices_per_node: number of GPUs per node in multiGPU nodes
         :param first_device: first device in multi-GPU nodes to use for execution,
                              defaults to 0; ignored if devices_per_node is 1
-        :type first_device: int, optional
-        :param batch_size: batch size for execution, defaults to 0
-        :type batch_size: int, optional
-        :param min_batch_size: minimum batch size for model execution, defaults to 0
-        :type min_batch_size: int, optional
-        :param min_batch_timeout: time to wait for minimum batch size, defaults to 0
-        :type min_batch_timeout: int, optional
-        :param tag: additional tag for model information, defaults to ""
-        :type tag: str, optional
-        :param inputs: model inputs (TF only), defaults to None
-        :type inputs: list[str], optional
-        :param outputs: model outupts (TF only), defaults to None
-        :type outputs: list[str], optional
+        :param batch_size: batch size for execution
+        :param min_batch_size: minimum batch size for model execution
+        :param min_batch_timeout: time to wait for minimum batch size
+        :param tag: additional tag for model information
+        :param inputs: model inputs (TF only)
+        :param outputs: model outupts (TF only)
         """
         db_model = DBModel(
             name=name,
@@ -443,7 +412,7 @@ def add_script(
         name: str,
         script: t.Optional[str] = None,
         script_path: t.Optional[str] = None,
-        device: t.Literal["CPU", "GPU"] = "CPU",
+        device: str = Device.CPU.value.upper(),
         devices_per_node: int = 1,
         first_device: int = 0,
     ) -> None:
@@ -463,17 +432,11 @@ def add_script(
         must be provided
 
         :param name: key to store script under
-        :type name: str
         :param script: TorchScript code
-        :type script: str, optional
         :param script_path: path to TorchScript code
-        :type script_path: str, optional
-        :param device: device for script execution, defaults to "CPU"
-        :type device: str, optional
+        :param device: device for script execution
         :param devices_per_node: number of devices on each host
-        :type devices_per_node: int
         :param first_device: first device to use on each host
-        :type first_device: int
         """
         db_script = DBScript(
             name=name,
@@ -503,7 +466,7 @@ def add_function(
         self,
         name: str,
         function: t.Optional[str] = None,
-        device: t.Literal["CPU", "GPU"] = "CPU",
+        device: str = Device.CPU.value.upper(),
         devices_per_node: int = 1,
         first_device: int = 0,
     ) -> None:
@@ -524,15 +487,10 @@ def add_function(
         being stored on nodes M through M + N - 1.
 
         :param name: key to store function under
-        :type name: str
         :param function: TorchScript code
-        :type function: str, optional
-        :param device: device for script execution, defaults to "CPU"
-        :type device: str, optional
+        :param device: device for script execution
         :param devices_per_node: number of devices on each host
-        :type devices_per_node: int
         :param first_device: first device to use on each host
-        :type first_device: int
         """
         db_script = DBScript(
             name=name,
@@ -568,9 +526,7 @@ def _extend_entity_db_models(model: Model, db_models: t.List[DBModel]) -> None:
         found. Otherwise, it appends the given list of DBModels to the Ensemble.
 
         :param model: SmartSim Model object.
-        :type model: Model
         :param db_models: List of DBModels to append to the Ensemble.
-        :type db_models: t.List[DBModel]
         """
         for add_ml_model in db_models:
             dupe = next(
@@ -598,9 +554,7 @@ def _extend_entity_db_scripts(model: Model, db_scripts: t.List[DBScript]) -> Non
         Ensemble.
 
         :param model: SmartSim Model object.
-        :type model: Model
         :param db_scripts: List of DBScripts to append to the Ensemble.
-        :type db_scripts: t.List[DBScript]
         """
         for add_script in db_scripts:
             dupe = next(
diff --git a/smartsim/entity/entity.py b/smartsim/entity/entity.py
index 46202ca6a..012a76744 100644
--- a/smartsim/entity/entity.py
+++ b/smartsim/entity/entity.py
@@ -31,6 +31,64 @@
     import smartsim.settings.base
 
 
+class TelemetryConfiguration:
+    """A base class for configuraing telemetry production behavior on
+    existing `SmartSimEntity` subclasses. Any class that will have
+    optional telemetry collection must expose access to an instance
+    of `TelemetryConfiguration` such as:
+
+    ```
+    @property
+    def telemetry(self) -> TelemetryConfiguration:
+        # Return the telemetry configuration for this entity.
+        # :returns: Configuration object indicating the configuration
+        # status of telemetry for this entity
+        return self._telemetry_producer
+    ```
+
+    An instance will be used by to conditionally serialize
+    values to the `RuntimeManifest`
+    """
+
+    def __init__(self, enabled: bool = False) -> None:
+        """Initialize the telemetry producer and immediately call the `_on_enable` hook.
+
+        :param enabled: flag indicating the initial state of telemetry
+        """
+        self._is_on = enabled
+
+        if self._is_on:
+            self._on_enable()
+        else:
+            self._on_disable()
+
+    @property
+    def is_enabled(self) -> bool:
+        """Boolean flag indicating if telemetry is currently enabled
+
+        :returns: `True` if enabled, `False` otherwise
+        """
+        return self._is_on
+
+    def enable(self) -> None:
+        """Enable telemetry for this producer"""
+        self._is_on = True
+        self._on_enable()
+
+    def disable(self) -> None:
+        """Disable telemetry for this producer"""
+        self._is_on = False
+        self._on_disable()
+
+    def _on_enable(self) -> None:
+        """Overridable hook called after telemetry is `enabled`. Allows subclasses
+        to perform actions when attempts to change configuration are made"""
+
+    def _on_disable(self) -> None:
+        """Overridable hook called after telemetry is `disabled`. Allows subclasses
+        to perform actions when attempts to change configuration are made"""
+
+
 class SmartSimEntity:
     def __init__(
         self, name: str, path: str, run_settings: "smartsim.settings.base.RunSettings"
@@ -42,12 +100,9 @@ def __init__(
         share these attributes.
 
         :param name: Name of the entity
-        :type name: str
         :param path: path to output, error, and configuration files
-        :type path: str
         :param run_settings: Launcher settings specified in the experiment
                              entity
-        :type run_settings: dict
         """
         self.name = name
         self.run_settings = run_settings
diff --git a/smartsim/entity/entityList.py b/smartsim/entity/entityList.py
index 6d958bda6..edaa88668 100644
--- a/smartsim/entity/entityList.py
+++ b/smartsim/entity/entityList.py
@@ -91,16 +91,14 @@ def db_scripts(self) -> t.Iterable["smartsim.entity.DBScript"]:
 
     @property
     def batch(self) -> bool:
-        try:
-            if not hasattr(self, "batch_settings"):
-                return False
-
-            if self.batch_settings:
-                return True
-            return False
-        # local orchestrator cannot launch with batches
-        except AttributeError:
-            return False
+        """Property indicating whether or not the entity sequence should be
+        launched as a batch job
+
+        :return: ``True`` if entity sequence should be launched as a batch job,
+                 ``False`` if the members will be launched individually.
+        """
+        # pylint: disable-next=no-member
+        return hasattr(self, "batch_settings") and self.batch_settings
 
     @property
     def type(self) -> str:
diff --git a/smartsim/entity/files.py b/smartsim/entity/files.py
index 9c282b94e..d00e946e2 100644
--- a/smartsim/entity/files.py
+++ b/smartsim/entity/files.py
@@ -58,13 +58,10 @@ def __init__(
         """Initialize an EntityFiles instance
 
         :param tagged: tagged files for model configuration
-        :type tagged: list of str
         :param copy: files or directories to copy into model
                      or node directories
-        :type copy: list of str
         :param symlink: files to symlink into model or node
                         directories
-        :type symlink: list of str
         """
         self.tagged = tagged or []
         self.copy = copy or []
@@ -102,12 +99,9 @@ def _type_check_files(
         """Check the type of the files provided by the user.
 
         :param file_list: either tagged, copy, or symlink files
-        :type file_list: list of str
         :param file_type: name of the file type e.g. "tagged"
-        :type file_type: str
         :raises TypeError: if incorrect type is provided by user
         :return: file list provided
-        :rtype: list of str
         """
         if file_list:
             if not isinstance(file_list, list):
@@ -128,10 +122,8 @@ def _check_path(file_path: str) -> str:
            the directory or file and create a full path.
 
         :param file_path: path to a specific file or directory
-        :type file_path: str
         :raises FileNotFoundError: if file or directory does not exist
         :return: full path to file or directory
-        :rtype: str
         """
         full_path = path.abspath(file_path)
         if path.isfile(full_path):
@@ -183,12 +175,10 @@ def __init__(self, parent: t.Optional[t.Any] = None, subdir_name: str = "") -> N
         :param parent: The parent hierarchy of the new hierarchy,
                        must be None if creating a root hierarchy,
                        must be provided if creating a subhierachy
-        :type parent: TaggedFilesHierarchy | None, optional
         :param subdir_name: Name of subdirectory representd by the new hierarchy,
                             must be "" if creating a root hierarchy,
                             must be any valid dir name if subhierarchy,
                             invalid names are ".", ".." or contain path seperators
-        :type subdir_name: str, optional
         :raises ValueError: if given a subdir_name without a parent,
                             if given a parent without a subdir_name,
                             or if the subdir_name is invalid
@@ -232,15 +222,12 @@ def from_list_paths(
 
         :param path_list: list of absolute paths to tagged files or dirs
                           containing tagged files
-        :type path_list: list[str]
         :param dir_contents_to_base: When a top level dir is encountered, if
                                      this value is truthy, files in the dir are
                                      put into the base hierarchy level.
                                      Otherwise, a new sub level is created for
                                      the dir
-        :type dir_contents_to_base: bool
         :return: A built tagged file hierarchy for the given files
-        :rtype: TaggedFilesHierarchy
         """
         tagged_file_hierarchy = cls()
         if dir_contents_to_base:
@@ -261,7 +248,6 @@ def _add_file(self, file: str) -> None:
         """Add a file to the current level in the file hierarchy
 
         :param file: absoute path to a file to add to the hierarchy
-        :type file: str
         """
         self.files.add(file)
 
@@ -271,7 +257,6 @@ def _add_dir(self, dir_path: str) -> None:
         the new level sub level tagged file hierarchy
 
         :param dir: absoute path to a dir to add to the hierarchy
-        :type dir: str
         """
         tagged_file_hierarchy = TaggedFilesHierarchy(self, path.basename(dir_path))
         # pylint: disable-next=protected-access
@@ -285,7 +270,6 @@ def _add_paths(self, paths: t.List[str]) -> None:
         TaggedFilesHierarchy.
 
         :param paths: list of paths to files or dirs to add to the hierarchy
-        :type paths: list[str]
         :raises ValueError: if link to dir is found
         :raises FileNotFoundError: if path does not exist
         """
diff --git a/smartsim/entity/model.py b/smartsim/entity/model.py
index c7b8731c2..3f78e042c 100644
--- a/smartsim/entity/model.py
+++ b/smartsim/entity/model.py
@@ -26,14 +26,16 @@
 
 from __future__ import annotations
 
-import collections.abc
+import itertools
 import re
 import sys
 import typing as t
 import warnings
+from os import getcwd
 from os import path as osp
 
-from .._core.utils.helpers import cat_arg_and_value, init_default
+from .._core._install.builder import Device
+from .._core.utils.helpers import cat_arg_and_value
 from ..error import EntityExistsError, SSUnsupportedError
 from ..log import get_logger
 from ..settings.base import BatchSettings, RunSettings
@@ -49,31 +51,25 @@ def __init__(
         self,
         name: str,
         params: t.Dict[str, str],
-        path: str,
         run_settings: RunSettings,
+        path: t.Optional[str] = getcwd(),
         params_as_args: t.Optional[t.List[str]] = None,
         batch_settings: t.Optional[BatchSettings] = None,
     ):
         """Initialize a ``Model``
 
         :param name: name of the model
-        :type name: str
         :param params: model parameters for writing into configuration files or
                        to be passed as command line arguments to executable.
-        :type params: dict
         :param path: path to output, error, and configuration files
-        :type path: str
         :param run_settings: launcher settings specified in the experiment
-        :type run_settings: RunSettings
         :param params_as_args: list of parameters which have to be
                                interpreted as command line arguments to
                                be added to run_settings
-        :type params_as_args: list[str]
         :param batch_settings: Launcher settings for running the individual
-                               model as a batch job, defaults to None
-        :type batch_settings: BatchSettings | None
+                               model as a batch job
         """
-        super().__init__(name, path, run_settings)
+        super().__init__(name, str(path), run_settings)
         self.params = params
         self.params_as_args = params_as_args
         self.incoming_entities: t.List[SmartSimEntity] = []
@@ -85,17 +81,26 @@ def __init__(
 
     @property
     def db_models(self) -> t.Iterable[DBModel]:
-        """Return an immutable collection of attached models"""
+        """Retrieve an immutable collection of attached models
+
+        :return: Return an immutable collection of attached models
+        """
         return (model for model in self._db_models)
 
     @property
     def db_scripts(self) -> t.Iterable[DBScript]:
-        """Return an immutable collection attached of scripts"""
+        """Retrieve an immutable collection attached of scripts
+
+        :return: Return an immutable collection of attached scripts
+        """
         return (script for script in self._db_scripts)
 
     @property
     def colocated(self) -> bool:
-        """Return True if this Model will run with a colocated Orchestrator"""
+        """Return True if this Model will run with a colocated Orchestrator
+
+        :return: Return True of the Model will run with a colocated Orchestrator
+        """
         return bool(self.run_settings.colocated_db_settings)
 
     def register_incoming_entity(self, incoming_entity: SmartSimEntity) -> None:
@@ -106,7 +111,6 @@ def register_incoming_entity(self, incoming_entity: SmartSimEntity) -> None:
         with that entity
 
         :param incoming_entity: The entity that data will be received from
-        :type incoming_entity: SmartSimEntity
         :raises SmartSimError: if incoming entity has already been registered
         """
         if incoming_entity.name in [
@@ -128,7 +132,10 @@ def disable_key_prefixing(self) -> None:
         self._key_prefixing_enabled = False
 
     def query_key_prefixing(self) -> bool:
-        """Inquire as to whether this entity will prefix its keys with its name"""
+        """Inquire as to whether this entity will prefix its keys with its name
+
+        :return: Return True if entity will prefix its keys with its name
+        """
         return self._key_prefixing_enabled
 
     def attach_generator_files(
@@ -155,16 +162,13 @@ def attach_generator_files(
         would like to change. The tag is settable but defaults
         to a semicolon e.g. THERMO = ;10;
 
-        :param to_copy: files to copy, defaults to []
-        :type to_copy: list, optional
-        :param to_symlink: files to symlink, defaults to []
-        :type to_symlink: list, optional
-        :param to_configure: input files with tagged parameters, defaults to []
-        :type to_configure: list, optional
+        :param to_copy: files to copy
+        :param to_symlink: files to symlink
+        :param to_configure: input files with tagged parameters
         """
-        to_copy = init_default([], to_copy, (list, str))
-        to_symlink = init_default([], to_symlink, (list, str))
-        to_configure = init_default([], to_configure, (list, str))
+        to_copy = to_copy or []
+        to_symlink = to_symlink or []
+        to_configure = to_configure or []
 
         # Check that no file collides with the parameter file written
         # by Generator. We check the basename, even though it is more
@@ -185,7 +189,6 @@ def attached_files_table(self) -> str:
         """Return a list of attached files as a plain text table
 
         :returns: String version of table
-        :rtype: str
         """
         if not self.files:
             return "No file attached to this model."
@@ -239,18 +242,12 @@ def colocate_db_uds(
         Generally these don't need to be changed.
 
         :param unix_socket: path to where the socket file will be created
-        :type unix_socket: str, optional
         :param socket_permissions: permissions for the socketfile
-        :type socket_permissions: int, optional
-        :param db_cpus: number of cpus to use for orchestrator, defaults to 1
-        :type db_cpus: int, optional
+        :param db_cpus: number of cpus to use for orchestrator
         :param custom_pinning: CPUs to pin the orchestrator to. Passing an empty
                                iterable disables pinning
-        :type custom_pinning: iterable of ints or iterable of ints, optional
         :param debug: launch Model with extra debug information about the colocated db
-        :type debug: bool, optional
         :param kwargs: additional keyword arguments to pass to the orchestrator database
-        :type kwargs: dict, optional
         """
 
         if not re.match(r"^[a-zA-Z0-9.:\,_\-/]*$", unix_socket):
@@ -305,20 +302,13 @@ def colocate_db_tcp(
 
         Generally these don't need to be changed.
 
-        :param port: port to use for orchestrator database, defaults to 6379
-        :type port: int, optional
-        :param ifname: interface to use for orchestrator, defaults to "lo"
-        :type ifname: str | list[str], optional
-        :param db_cpus: number of cpus to use for orchestrator, defaults to 1
-        :type db_cpus: int, optional
+        :param port: port to use for orchestrator database
+        :param ifname: interface to use for orchestrator
+        :param db_cpus: number of cpus to use for orchestrator
         :param custom_pinning: CPUs to pin the orchestrator to. Passing an empty
                                iterable disables pinning
-        :type custom_pinning: iterable of ints or iterable of ints, optional
         :param debug: launch Model with extra debug information about the colocated db
-        :type debug: bool, optional
         :param kwargs: additional keyword arguments to pass to the orchestrator database
-        :type kwargs: dict, optional
-
         """
 
         tcp_options = {"port": port, "ifname": ifname}
@@ -414,9 +404,10 @@ def _set_colocated_db_settings(
     def _create_pinning_string(
         pin_ids: t.Optional[t.Iterable[t.Union[int, t.Iterable[int]]]], cpus: int
     ) -> t.Optional[str]:
-        """Create a comma-separated string CPU ids. By default, None returns
-        0,1,...,cpus-1; an empty iterable will disable pinning altogether,
-        and an iterable constructs a comma separate string (e.g. 0,2,5)
+        """Create a comma-separated string of CPU ids. By default, ``None``
+        returns 0,1,...,cpus-1; an empty iterable will disable pinning
+        altogether, and an iterable constructs a comma separated string of
+        integers (e.g. ``[0, 2, 5]`` -> ``"0,2,5"``)
         """
 
         def _stringify_id(_id: int) -> str:
@@ -428,40 +419,34 @@ def _stringify_id(_id: int) -> str:
 
             raise TypeError(f"Argument is of type '{type(_id)}' not 'int'")
 
-        _invalid_input_message = (
-            "Expected a cpu pinning specification of type iterable of ints or "
-            f"iterables of ints. Instead got type `{type(pin_ids)}`"
-        )
+        try:
+            pin_ids = tuple(pin_ids) if pin_ids is not None else None
+        except TypeError:
+            raise TypeError(
+                "Expected a cpu pinning specification of type iterable of ints or "
+                f"iterables of ints. Instead got type `{type(pin_ids)}`"
+            ) from None
 
         # Deal with MacOSX limitations first. The "None" (default) disables pinning
-        # and is equivalent to []. The only invalid option is an iterable
+        # and is equivalent to []. The only invalid option is a non-empty pinning
         if sys.platform == "darwin":
-            if pin_ids is None or not pin_ids:
-                return None
-
-            if isinstance(pin_ids, collections.abc.Iterable):
+            if pin_ids:
                 warnings.warn(
                     "CPU pinning is not supported on MacOSX. Ignoring pinning "
                     "specification.",
                     RuntimeWarning,
                 )
-                return None
-            raise TypeError(_invalid_input_message)
+            return None
+
         # Flatten the iterable into a list and check to make sure that the resulting
         # elements are all ints
         if pin_ids is None:
             return ",".join(_stringify_id(i) for i in range(cpus))
         if not pin_ids:
             return None
-        if isinstance(pin_ids, collections.abc.Iterable):
-            pin_list = []
-            for pin_id in pin_ids:
-                if isinstance(pin_id, collections.abc.Iterable):
-                    pin_list.extend([_stringify_id(j) for j in pin_id])
-                else:
-                    pin_list.append(_stringify_id(pin_id))
-            return ",".join(sorted(set(pin_list)))
-        raise TypeError(_invalid_input_message)
+        pin_ids = ((x,) if isinstance(x, int) else x for x in pin_ids)
+        to_fmt = itertools.chain.from_iterable(pin_ids)
+        return ",".join(sorted({_stringify_id(x) for x in to_fmt}))
 
     def params_to_args(self) -> None:
         """Convert parameters to command line arguments and update run settings."""
@@ -487,7 +472,7 @@ def add_ml_model(
         backend: str,
         model: t.Optional[bytes] = None,
         model_path: t.Optional[str] = None,
-        device: t.Literal["CPU", "GPU"] = "CPU",
+        device: str = Device.CPU.value.upper(),
         devices_per_node: int = 1,
         first_device: int = 0,
         batch_size: int = 0,
@@ -507,35 +492,22 @@ def add_ml_model(
         must be provided
 
         :param name: key to store model under
-        :type name: str
         :param backend: name of the backend (TORCH, TF, TFLITE, ONNX)
-        :type backend: str
         :param model: A model in memory (only supported for non-colocated orchestrators)
-        :type model: byte string, optional
         :param model_path: serialized model
-        :type model_path: file path to model
-        :param device: name of device for execution, defaults to "CPU"
-        :type device: str, optional
+        :param device: name of device for execution
         :param devices_per_node: The number of GPU devices available on the host.
                This parameter only applies to GPU devices and will be ignored if device
                is specified as CPU.
-        :type devices_per_node: int
         :param first_device: The first GPU device to use on the host.
                This parameter only applies to GPU devices and will be ignored if device
                is specified as CPU.
-        :type first_device: int
-        :param batch_size: batch size for execution, defaults to 0
-        :type batch_size: int, optional
-        :param min_batch_size: minimum batch size for model execution, defaults to 0
-        :type min_batch_size: int, optional
-        :param min_batch_timeout: time to wait for minimum batch size, defaults to 0
-        :type min_batch_timeout: int, optional
-        :param tag: additional tag for model information, defaults to ""
-        :type tag: str, optional
-        :param inputs: model inputs (TF only), defaults to None
-        :type inputs: list[str], optional
-        :param outputs: model outupts (TF only), defaults to None
-        :type outputs: list[str], optional
+        :param batch_size: batch size for execution
+        :param min_batch_size: minimum batch size for model execution
+        :param min_batch_timeout: time to wait for minimum batch size
+        :param tag: additional tag for model information
+        :param inputs: model inputs (TF only)
+        :param outputs: model outupts (TF only)
         """
         db_model = DBModel(
             name=name,
@@ -559,7 +531,7 @@ def add_script(
         name: str,
         script: t.Optional[str] = None,
         script_path: t.Optional[str] = None,
-        device: t.Literal["CPU", "GPU"] = "CPU",
+        device: str = Device.CPU.value.upper(),
         devices_per_node: int = 1,
         first_device: int = 0,
     ) -> None:
@@ -581,21 +553,15 @@ def add_script(
         must be provided
 
         :param name: key to store script under
-        :type name: str
         :param script: TorchScript code (only supported for non-colocated orchestrators)
-        :type script: str, optional
         :param script_path: path to TorchScript code
-        :type script_path: str, optional
-        :param device: device for script execution, defaults to "CPU"
-        :type device: str, optional
+        :param device: device for script execution
         :param devices_per_node: The number of GPU devices available on the host.
                This parameter only applies to GPU devices and will be ignored if device
                is specified as CPU.
-        :type devices_per_node: int
         :param first_device: The first GPU device to use on the host.
                This parameter only applies to GPU devices and will be ignored if device
                is specified as CPU.
-        :type first_device: int
         """
         db_script = DBScript(
             name=name,
@@ -611,7 +577,7 @@ def add_function(
         self,
         name: str,
         function: t.Optional[str] = None,
-        device: t.Literal["CPU", "GPU"] = "CPU",
+        device: str = Device.CPU.value.upper(),
         devices_per_node: int = 1,
         first_device: int = 0,
     ) -> None:
@@ -630,19 +596,14 @@ def add_function(
         in the model being stored in the first N devices of type ``device``.
 
         :param name: key to store function under
-        :type name: str
         :param function: TorchScript function code
-        :type function: str, optional
-        :param device: device for script execution, defaults to "CPU"
-        :type device: str, optional
+        :param device: device for script execution
         :param devices_per_node: The number of GPU devices available on the host.
                This parameter only applies to GPU devices and will be ignored if device
                is specified as CPU.
-        :type devices_per_node: int
         :param first_device: The first GPU device to use on the host.
                This parameter only applies to GPU devices and will be ignored if device
                is specified as CPU.
-        :type first_device: int
         """
         db_script = DBScript(
             name=name,
diff --git a/smartsim/error/__init__.py b/smartsim/error/__init__.py
index 4268905e6..3a40548e7 100644
--- a/smartsim/error/__init__.py
+++ b/smartsim/error/__init__.py
@@ -32,6 +32,7 @@
     ShellError,
     SmartSimError,
     SSConfigError,
+    SSDBFilesNotParseable,
     SSDBIDConflictError,
     SSInternalError,
     SSReservedKeywordError,
diff --git a/smartsim/error/errors.py b/smartsim/error/errors.py
index 9a6954907..333258a34 100644
--- a/smartsim/error/errors.py
+++ b/smartsim/error/errors.py
@@ -87,6 +87,12 @@ class SSDBIDConflictError(SmartSimError):
     """
 
 
+class SSDBFilesNotParseable(SmartSimError):
+    """Raised when the files related to the database cannot be parsed.
+    Includes the case when the files do not exist.
+    """
+
+
 # Internal Exceptions
 
 
@@ -149,3 +155,7 @@ class UnproxyableStepError(TelemetryError):
 
 class SmartSimCLIActionCancelled(SmartSimError):
     """Raised when a `smart` CLI command is terminated"""
+
+
+class PreviewFormatError(SSUnsupportedError):
+    """Raised when the output format of the preview method call is not supported"""
diff --git a/smartsim/experiment.py b/smartsim/experiment.py
index 9fcc7b13e..6b9d6a1fb 100644
--- a/smartsim/experiment.py
+++ b/smartsim/experiment.py
@@ -24,19 +24,28 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+# pylint: disable=too-many-lines
+
 import os
 import os.path as osp
 import typing as t
-from os import getcwd
+from os import environ, getcwd
 
 from tabulate import tabulate
 
+from smartsim._core.config import CONFIG
 from smartsim.error.errors import SSUnsupportedError
+from smartsim.status import SmartSimStatus
 
-from ._core import Controller, Generator, Manifest
-from ._core.utils import init_default
+from ._core import Controller, Generator, Manifest, previewrenderer
 from .database import Orchestrator
-from .entity import Ensemble, Model, SmartSimEntity
+from .entity import (
+    Ensemble,
+    EntitySequence,
+    Model,
+    SmartSimEntity,
+    TelemetryConfiguration,
+)
 from .error import SmartSimError
 from .log import ctx_exp_path, get_logger, method_contextualizer
 from .settings import Container, base, settings
@@ -54,11 +63,26 @@ def _exp_path_map(exp: "Experiment") -> str:
 _contextualize = method_contextualizer(ctx_exp_path, _exp_path_map)
 
 
+class ExperimentTelemetryConfiguration(TelemetryConfiguration):
+    """Customized telemetry configuration for an `Experiment`. Ensures
+    backwards compatible behavior with drivers using environment variables
+    to enable experiment telemetry"""
+
+    def __init__(self) -> None:
+        super().__init__(enabled=CONFIG.telemetry_enabled)
+
+    def _on_enable(self) -> None:
+        """Modify the environment variable to enable telemetry."""
+        environ["SMARTSIM_FLAG_TELEMETRY"] = "1"
+
+    def _on_disable(self) -> None:
+        """Modify the environment variable to disable telemetry."""
+        environ["SMARTSIM_FLAG_TELEMETRY"] = "0"
+
+
 # pylint: disable=no-self-use
 class Experiment:
-    """Experiments are the Python user interface for SmartSim.
-
-    Experiment is a factory class that creates stages of a workflow
+    """Experiment is a factory class that creates stages of a workflow
     and manages their execution.
 
     The instances created by an Experiment represent executable code
@@ -80,7 +104,7 @@ def __init__(
         exp_path: t.Optional[str] = None,
         launcher: str = "local",
     ):
-        """Initialize an Experiment instance
+        """Initialize an Experiment instance.
 
         With the default settings, the Experiment will use the
         local launcher, which will start all Experiment created
@@ -101,10 +125,10 @@ def __init__(
 
             exp = Experiment(name="my_exp", launcher="slurm")
 
-        If you wish your driver script and Experiment to be run across
+        If you want your Experiment driver script to be run across
         multiple system with different schedulers (workload managers)
-        you can also use the `auto` argument to have the Experiment guess
-        which launcher to use based on system installed binaries and libraries
+        you can also use the `auto` argument to have the Experiment detect
+        which launcher to use based on system installed binaries and libraries.
 
         .. highlight:: python
         .. code-block:: python
@@ -118,15 +142,11 @@ def __init__(
         from the Experiment.
 
         :param name: name for the ``Experiment``
-        :type name: str
-        :param exp_path: path to location of ``Experiment`` directory if generated
-        :type exp_path: str, optional
+        :param exp_path: path to location of ``Experiment`` directory
         :param launcher: type of launcher being used, options are "slurm", "pbs",
                          "lsf", or "local". If set to "auto",
                          an attempt will be made to find an available launcher
                          on the system.
-                         Defaults to "local"
-        :type launcher: str, optional
         """
         self.name = name
         if exp_path:
@@ -135,21 +155,37 @@ def __init__(
             if not osp.isdir(osp.abspath(exp_path)):
                 raise NotADirectoryError("Experiment path provided does not exist")
             exp_path = osp.abspath(exp_path)
-        self.exp_path: str = init_default(osp.join(getcwd(), name), exp_path, str)
+        else:
+            exp_path = osp.join(getcwd(), name)
 
-        if launcher == "auto":
-            launcher = detect_launcher()
-        if launcher == "cobalt":
-            raise SSUnsupportedError("Cobalt launcher is no longer supported.")
+        self.exp_path = exp_path
 
-        self._control = Controller(launcher=launcher)
         self._launcher = launcher.lower()
+
+        if self._launcher == "auto":
+            self._launcher = detect_launcher()
+        if self._launcher == "cobalt":
+            raise SSUnsupportedError("Cobalt launcher is no longer supported.")
+
+        if launcher == "dragon":
+            self._set_dragon_server_path()
+
+        self._control = Controller(launcher=self._launcher)
+
         self.db_identifiers: t.Set[str] = set()
+        self._telemetry_cfg = ExperimentTelemetryConfiguration()
+
+    def _set_dragon_server_path(self) -> None:
+        """Set path for dragon server through environment varialbes"""
+        if not "SMARTSIM_DRAGON_SERVER_PATH" in environ:
+            environ["SMARTSIM_DRAGON_SERVER_PATH_EXP"] = osp.join(
+                self.exp_path, CONFIG.dragon_default_subdir
+            )
 
     @_contextualize
     def start(
         self,
-        *args: t.Any,
+        *args: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]],
         block: bool = True,
         summary: bool = False,
         kill_on_interrupt: bool = True,
@@ -168,7 +204,7 @@ def start(
             model = exp.create_model("my_model", settings)
             exp.start(model)
 
-        Multiple instance can also be passed to the start method
+        Multiple entity instances can also be passed to the start method
         at once no matter which type of instance they are. These will
         all be launched together.
 
@@ -194,18 +230,13 @@ def start(
         zombie processes will need to be manually killed.
 
         :param block: block execution until all non-database
-                      jobs are finished, defaults to True
-        :type block: bool, optional
-        :param summary: print a launch summary prior to launch,
-                        defaults to False
-        :type summary: bool, optional
+                       jobs are finished
+        :param summary: print a launch summary prior to launch
         :param kill_on_interrupt: flag for killing jobs when ^C (SIGINT)
                                   signal is received.
-
-        :type kill_on_interrupt: bool, optional
         """
-
         start_manifest = Manifest(*args)
+        self._create_entity_dir(start_manifest)
         try:
             if summary:
                 self._launch_summary(start_manifest)
@@ -221,7 +252,9 @@ def start(
             raise
 
     @_contextualize
-    def stop(self, *args: t.Any) -> None:
+    def stop(
+        self, *args: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]]
+    ) -> None:
         """Stop specific instances launched by this ``Experiment``
 
         Instances of ``Model``, ``Ensemble`` and ``Orchestrator``
@@ -241,6 +274,7 @@ def stop(self, *args: t.Any) -> None:
             # multiple
             exp.stop(model_1, model_2, db, ensemble)
 
+        :param args: One or more SmartSimEntity or EntitySequence objects.
         :raises TypeError: if wrong type
         :raises SmartSimError: if stop request fails
         """
@@ -260,15 +294,15 @@ def stop(self, *args: t.Any) -> None:
     @_contextualize
     def generate(
         self,
-        *args: t.Any,
+        *args: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]],
         tag: t.Optional[str] = None,
         overwrite: bool = False,
         verbose: bool = False,
     ) -> None:
         """Generate the file structure for an ``Experiment``
 
-        ``Experiment.generate`` creates directories for each instance
-        passed to organize Experiments that launch many instances.
+        ``Experiment.generate`` creates directories for each entity
+        passed to organize Experiments that launch many entities.
 
         If files or directories are attached to ``Model`` objects
         using ``Model.attach_generator_files()``, those files or
@@ -279,12 +313,8 @@ def generate(
         can all be passed as arguments to the generate method.
 
         :param tag: tag used in `to_configure` generator files
-        :type tag: str, optional
-        :param overwrite: overwrite existing folders and contents,
-               defaults to False
-        :type overwrite: bool, optional
+        :param overwrite: overwrite existing folders and contents
         :param verbose: log parameter settings to std out
-        :type verbose: bool
         """
         try:
             generator = Generator(self.exp_path, overwrite=overwrite, verbose=verbose)
@@ -324,14 +354,10 @@ def poll(
         that all jobs launched by this experiment will be killed, and the
         zombie processes will need to be manually killed.
 
-        :param interval: frequency (in seconds) of logging to stdout,
-                         defaults to 10 seconds
-        :type interval: int, optional
-        :param verbose: set verbosity, defaults to True
-        :type verbose: bool, optional
+        :param interval: frequency (in seconds) of logging to stdout
+        :param verbose: set verbosity
         :param kill_on_interrupt: flag for killing jobs when SIGINT is received
-        :type kill_on_interrupt: bool, optional
-        :raises SmartSimError:
+        :raises SmartSimError: if poll request fails
         """
         try:
             self._control.poll(interval, verbose, kill_on_interrupt=kill_on_interrupt)
@@ -351,9 +377,7 @@ def finished(self, entity: SmartSimEntity) -> bool:
         by the user.
 
         :param entity: object launched by this ``Experiment``
-        :type entity: Model | Ensemble
-        :returns: True if job has completed, False otherwise
-        :rtype: bool
+        :returns: True if the job has finished, False otherwise
         :raises SmartSimError: if entity has not been launched
                                by this ``Experiment``
         """
@@ -364,8 +388,10 @@ def finished(self, entity: SmartSimEntity) -> bool:
             raise
 
     @_contextualize
-    def get_status(self, *args: t.Any) -> t.List[str]:
-        """Query the status of launched instances
+    def get_status(
+        self, *args: t.Union[SmartSimEntity, EntitySequence[SmartSimEntity]]
+    ) -> t.List[SmartSimStatus]:
+        """Query the status of launched entity instances
 
         Return a smartsim.status string representing
         the status of the launched instance.
@@ -387,12 +413,11 @@ def get_status(self, *args: t.Any) -> t.List[str]:
             assert all(complete)
 
         :returns: status of the instances passed as arguments
-        :rtype: list[str]
         :raises SmartSimError: if status retrieval fails
         """
         try:
             manifest = Manifest(*args)
-            statuses: t.List[str] = []
+            statuses: t.List[SmartSimStatus] = []
             for entity in manifest.models:
                 statuses.append(self._control.get_entity_status(entity))
             for entity_list in manifest.all_entity_lists:
@@ -411,6 +436,7 @@ def create_ensemble(
         run_settings: t.Optional[base.RunSettings] = None,
         replicas: t.Optional[int] = None,
         perm_strategy: str = "all_perm",
+        path: t.Optional[str] = None,
         **kwargs: t.Any,
     ) -> Ensemble:
         """Create an ``Ensemble`` of ``Model`` instances
@@ -419,7 +445,7 @@ def create_ensemble(
         if using a non-local launcher. e.g. slurm
 
         Ensembles require one of the following combinations
-        of arguments
+        of arguments:
 
             - ``run_settings`` and ``params``
             - ``run_settings`` and ``replicas``
@@ -428,44 +454,43 @@ def create_ensemble(
             - ``batch_settings``, ``run_settings``, and ``replicas``
 
         If given solely batch settings, an empty ensemble
-        will be created that models can be added to manually
+        will be created that Models can be added to manually
         through ``Ensemble.add_model()``.
-        The entire ensemble will launch as one batch.
+        The entire Ensemble will launch as one batch.
 
         Provided batch and run settings, either ``params``
         or ``replicas`` must be passed and the entire ensemble
         will launch as a single batch.
 
         Provided solely run settings, either ``params``
-        or ``replicas`` must be passed and the ensemble members
+        or ``replicas`` must be passed and the Ensemble members
         will each launch sequentially.
 
         The kwargs argument can be used to pass custom input
         parameters to the permutation strategy.
 
-        :param name: name of the ensemble
-        :type name: str
+        :param name: name of the ``Ensemble``
         :param params: parameters to expand into ``Model`` members
-        :type params: dict[str, Any]
         :param batch_settings: describes settings for ``Ensemble`` as batch workload
-        :type batch_settings: BatchSettings
         :param run_settings: describes how each ``Model`` should be executed
-        :type run_settings: RunSettings
         :param replicas: number of replicas to create
-        :type replicas: int
         :param perm_strategy: strategy for expanding ``params`` into
                               ``Model`` instances from params argument
                               options are "all_perm", "step", "random"
-                              or a callable function. Default is "all_perm".
-        :type perm_strategy: str, optional
+                              or a callable function.
         :raises SmartSimError: if initialization fails
         :return: ``Ensemble`` instance
-        :rtype: Ensemble
         """
+        if name is None:
+            raise AttributeError("Entity has no name. Please set name attribute.")
+        check_path = path or osp.join(self.exp_path, name)
+        entity_path: str = osp.abspath(check_path)
+
         try:
             new_ensemble = Ensemble(
-                name,
-                params or {},
+                name=name,
+                params=params or {},
+                path=entity_path,
                 batch_settings=batch_settings,
                 run_settings=run_settings,
                 perm_strat=perm_strategy,
@@ -497,27 +522,27 @@ def create_model(
         ``Model`` instances can be launched sequentially, as a batch job,
         or as a group by adding them into an ``Ensemble``.
 
-        All models require a reference to run settings to specify which
+        All ``Models`` require a reference to run settings to specify which
         executable to launch as well provide options for how to launch
         the executable with the underlying WLM. Furthermore, batch a
-        reference to a batch settings can be added to launch the model
-        as a batch job through ``Experiment.start``. If a model with
+        reference to a batch settings can be added to launch the ``Model``
+        as a batch job through ``Experiment.start``. If a ``Model`` with
         a reference to a set of batch settings is added to a larger
         entity with its own set of batch settings (for e.g. an
         ``Ensemble``) the batch settings of the larger entity will take
-        precedence and the batch setting of the model will be
+        precedence and the batch setting of the ``Model`` will be
         strategically ignored.
 
         Parameters supplied in the `params` argument can be written into
-        configuration files supplied at runtime to the model through
+        configuration files supplied at runtime to the ``Model`` through
         ``Model.attach_generator_files``. `params` can also be turned
         into executable arguments by calling ``Model.params_to_args``
 
         By default, ``Model`` instances will be executed in the
-        current working directory if no `path` argument is supplied.
+        exp_path/model_name directory if no `path` argument is supplied.
         If a ``Model`` instance is passed to ``Experiment.generate``,
         a directory within the ``Experiment`` directory will be created
-        to house the input and output files from the model.
+        to house the input and output files from the ``Model``.
 
         Example initialization of a ``Model`` instance
 
@@ -553,36 +578,31 @@ def create_model(
         deprecated, but remains as an alias for ``Model.colocate_db_tcp``
         for backward compatibility.
 
-        :param name: name of the model
-        :type name: str
+        :param name: name of the ``Model``
         :param run_settings: defines how ``Model`` should be run
-        :type run_settings: RunSettings
-        :param params: model parameters for writing into configuration files
-        :type params: dict, optional
-        :param path: path to where the model should be executed at runtime
-        :type path: str, optional
-        :param enable_key_prefixing: If True, data sent to the Orchestrator
+        :param params: ``Model`` parameters for writing into configuration files
+        :param path: path to where the ``Model`` should be executed at runtime
+        :param enable_key_prefixing: If True, data sent to the ``Orchestrator``
                                      using SmartRedis from this ``Model`` will
                                      be prefixed with the ``Model`` name.
-                                     Default is True.
-        :type enable_key_prefixing: bool, optional
-        :param batch_settings: Settings to run model individually as a batch job,
-                               defaults to None
-        :type batch_settings: BatchSettings | None
+        :param batch_settings: Settings to run ``Model`` individually as a batch job.
         :raises SmartSimError: if initialization fails
         :return: the created ``Model``
-        :rtype: Model
         """
-        path = init_default(getcwd(), path, str)
-
-        if path is None:
-            path = getcwd()
+        if name is None:
+            raise AttributeError("Entity has no name. Please set name attribute.")
+        check_path = path or osp.join(self.exp_path, name)
+        entity_path: str = osp.abspath(check_path)
         if params is None:
             params = {}
 
         try:
             new_model = Model(
-                name, params, path, run_settings, batch_settings=batch_settings
+                name=name,
+                params=params,
+                path=entity_path,
+                run_settings=run_settings,
+                batch_settings=batch_settings,
             )
             if enable_key_prefixing:
                 new_model.enable_key_prefixing()
@@ -605,7 +625,7 @@ def create_run_settings(
         """Create a ``RunSettings`` instance.
 
         run_command="auto" will attempt to automatically
-        match a run command on the system with a RunSettings
+        match a run command on the system with a ``RunSettings``
         class in SmartSim. If found, the class corresponding
         to that run_command will be created and returned.
 
@@ -626,19 +646,12 @@ class in SmartSim. If found, the class corresponding
          - jsrun (LSF)
 
         :param run_command: command to run the executable
-        :type run_command: str
         :param exe: executable to run
-        :type exe: str
         :param exe_args: arguments to pass to the executable
-        :type exe_args: list[str], optional
         :param run_args: arguments to pass to the ``run_command``
-        :type run_args: dict[str, t.Union[int, str, float, None]], optional
         :param env_vars: environment variables to pass to the executable
-        :type env_vars: dict[str, str], optional
         :param container: if execution environment is containerized
-        :type container: Container, optional
         :return: the created ``RunSettings``
-        :rtype: RunSettings
         """
 
         try:
@@ -689,18 +702,12 @@ def create_batch_settings(
                                            batch_args=batch_args)
             bs.set_account("default")
 
-        :param nodes: number of nodes for batch job, defaults to 1
-        :type nodes: int, optional
-        :param time: length of batch job, defaults to ""
-        :type time: str, optional
-        :param queue: queue or partition (if slurm), defaults to ""
-        :type queue: str, optional
-        :param account: user account name for batch system, defaults to ""
-        :type account: str, optional
-        :param batch_args: additional batch arguments, defaults to None
-        :type batch_args: dict[str, str], optional
+        :param nodes: number of nodes for batch job
+        :param time: length of batch job
+        :param queue: queue or partition (if slurm)
+        :param account: user account name for batch system
+        :param batch_args: additional batch arguments
         :return: a newly created BatchSettings instance
-        :rtype: BatchSettings
         :raises SmartSimError: if batch creation fails
         """
         try:
@@ -721,11 +728,12 @@ def create_batch_settings(
     def create_database(
         self,
         port: int = 6379,
+        path: t.Optional[str] = None,
         db_nodes: int = 1,
         batch: bool = False,
         hosts: t.Optional[t.Union[t.List[str], str]] = None,
         run_command: str = "auto",
-        interface: str = "ipogif0",
+        interface: t.Union[str, t.List[str]] = "ipogif0",
         account: t.Optional[str] = None,
         time: t.Optional[str] = None,
         queue: t.Optional[str] = None,
@@ -733,60 +741,49 @@ def create_database(
         db_identifier: str = "orchestrator",
         **kwargs: t.Any,
     ) -> Orchestrator:
-        """Initialize an Orchestrator database
+        """Initialize an ``Orchestrator`` database
 
         The ``Orchestrator`` database is a key-value store based
-        on Redis that can be launched together with other Experiment
+        on Redis that can be launched together with other ``Experiment``
         created instances for online data storage.
 
         When launched, ``Orchestrator`` can be used to communicate
         data between Fortran, Python, C, and C++ applications.
 
         Machine Learning models in Pytorch, Tensorflow, and ONNX (i.e. scikit-learn)
-        can also be stored within the Orchestrator database where they
+        can also be stored within the ``Orchestrator`` database where they
         can be called remotely and executed on CPU or GPU where
         the database is hosted.
 
         To enable a SmartSim ``Model`` to communicate with the database
         the workload must utilize the SmartRedis clients. For more
         information on the database, and SmartRedis clients see the
-        documentation at www.craylabs.org
-
-        :param port: TCP/IP port, defaults to 6379
-        :type port: int, optional
-        :param db_nodes: number of database shards, defaults to 1
-        :type db_nodes: int, optional
-        :param batch: run as a batch workload, defaults to False
-        :type batch: bool, optional
-        :param hosts: specify hosts to launch on, defaults to None
-        :type hosts: list[str], optional
-        :param run_command: specify launch binary or detect automatically,
-            defaults to "auto"
-        :type run_command: str, optional
-        :param interface: Network interface, defaults to "ipogif0"
-        :type interface: str, optional
-        :param account: account to run batch on, defaults to None
-        :type account: str, optional
-        :param time: walltime for batch 'HH:MM:SS' format, defaults to None
-        :type time: str, optional
-        :param queue: queue to run the batch on, defaults to None
-        :type queue: str, optional
-        :param single_cmd: run all shards with one (MPMD) command, defaults to True
-        :type single_cmd: bool, optional
+        documentation at https://www.craylabs.org/docs/smartredis.html
+
+        :param port: TCP/IP port
+        :param db_nodes: number of database shards
+        :param batch: run as a batch workload
+        :param hosts: specify hosts to launch on
+        :param run_command: specify launch binary or detect automatically
+        :param interface: Network interface
+        :param account: account to run batch on
+        :param time: walltime for batch 'HH:MM:SS' format
+        :param queue: queue to run the batch on
+        :param single_cmd: run all shards with one (MPMD) command
         :param db_identifier: an identifier to distinguish this orchestrator in
-            multiple-database experiments, defaults to "orchestrator"
-        :type db_identifier: str, optional
+            multiple-database experiments
         :raises SmartSimError: if detection of launcher or of run command fails
         :raises SmartSimError: if user indicated an incompatible run command
             for the launcher
-        :return: Orchestrator
-        :rtype: Orchestrator or derived class
+        :return: Orchestrator or derived class
         """
 
-        self.append_to_db_identifier_list(db_identifier)
-
+        self._append_to_db_identifier_list(db_identifier)
+        check_path = path or osp.join(self.exp_path, db_identifier)
+        entity_path: str = osp.abspath(check_path)
         return Orchestrator(
             port=port,
+            path=entity_path,
             db_nodes=db_nodes,
             batch=batch,
             hosts=hosts,
@@ -813,7 +810,6 @@ def reconnect_orchestrator(self, checkpoint: str) -> Orchestrator:
 
         :param checkpoint: the `smartsim_db.dat` file created
                            when an ``Orchestrator`` is launched
-        :type checkpoint: str
         """
         try:
             orc = self._control.reload_saved_db(checkpoint)
@@ -822,6 +818,53 @@ def reconnect_orchestrator(self, checkpoint: str) -> Orchestrator:
             logger.error(e)
             raise
 
+    def preview(
+        self,
+        *args: t.Any,
+        verbosity_level: previewrenderer.Verbosity = previewrenderer.Verbosity.INFO,
+        output_format: previewrenderer.Format = previewrenderer.Format.PLAINTEXT,
+        output_filename: t.Optional[str] = None,
+    ) -> None:
+        """Preview entity information prior to launch. This method
+        aggregates multiple pieces of information to give users insight
+        into what and how entities will be launched.  Any instance of
+        ``Model``, ``Ensemble``, or ``Orchestrator`` created by the
+        Experiment can be passed as an argument to the preview method.
+
+        Verbosity levels:
+         - info: Display user-defined fields and entities.
+         - debug: Display user-defined field and entities and auto-generated
+            fields.
+         - developer: Display user-defined field and entities, auto-generated
+            fields, and run commands.
+
+        :param verbosity_level: verbosity level specified by user, defaults to info.
+        :param output_format: Set output format. The possible accepted
+            output formats are ``plain_text``.
+            Defaults to ``plain_text``.
+        :param output_filename: Specify name of file and extension to write
+            preview data to. If no output filename is set, the preview will be
+            output to stdout. Defaults to None.
+        """
+
+        # Retrieve any active orchestrator jobs
+        active_dbjobs = self._control.active_orchestrator_jobs
+
+        preview_manifest = Manifest(*args)
+
+        previewrenderer.render(
+            self,
+            preview_manifest,
+            verbosity_level,
+            output_format,
+            output_filename,
+            active_dbjobs,
+        )
+
+    @property
+    def launcher(self) -> str:
+        return self._launcher
+
     @_contextualize
     def summary(self, style: str = "github") -> str:
         """Return a summary of the ``Experiment``
@@ -830,12 +873,9 @@ def summary(self, style: str = "github") -> str:
         launched and completed in this ``Experiment``
 
         :param style: the style in which the summary table is formatted,
-                       for a full list of styles see:
-                       https://github.com/astanin/python-tabulate#table-format,
-                       defaults to "github"
-        :type style: str, optional
+                       for a full list of styles see the table-format section of:
+                       https://github.com/astanin/python-tabulate
         :return: tabulate string of ``Experiment`` history
-        :rtype: str
         """
         values = []
         headers = [
@@ -869,11 +909,18 @@ def summary(self, style: str = "github") -> str:
             disable_numparse=True,
         )
 
+    @property
+    def telemetry(self) -> TelemetryConfiguration:
+        """Return the telemetry configuration for this entity.
+
+        :returns: configuration of telemetry for this entity
+        """
+        return self._telemetry_cfg
+
     def _launch_summary(self, manifest: Manifest) -> None:
         """Experiment pre-launch summary of entities that will be launched
 
         :param manifest: Manifest of deployables.
-        :type manifest: Manifest
         """
 
         summary = "\n\n=== Launch Summary ===\n"
@@ -894,10 +941,27 @@ def _launch_summary(self, manifest: Manifest) -> None:
 
         logger.info(summary)
 
+    def _create_entity_dir(self, start_manifest: Manifest) -> None:
+        def create_entity_dir(entity: t.Union[Orchestrator, Model, Ensemble]) -> None:
+            if not os.path.isdir(entity.path):
+                os.makedirs(entity.path)
+
+        for model in start_manifest.models:
+            create_entity_dir(model)
+
+        for orch in start_manifest.dbs:
+            create_entity_dir(orch)
+
+        for ensemble in start_manifest.ensembles:
+            create_entity_dir(ensemble)
+
+            for member in ensemble.models:
+                create_entity_dir(member)
+
     def __str__(self) -> str:
         return self.name
 
-    def append_to_db_identifier_list(self, db_identifier: str) -> None:
+    def _append_to_db_identifier_list(self, db_identifier: str) -> None:
         """Check if db_identifier already exists when calling create_database"""
         if db_identifier in self.db_identifiers:
             logger.warning(
@@ -907,35 +971,3 @@ def append_to_db_identifier_list(self, db_identifier: str) -> None:
             )
         # Otherwise, add
         self.db_identifiers.add(db_identifier)
-
-    def enable_telemetry(self) -> None:
-        """Experiments will start producing telemetry for all entities run
-        through ``Experiment.start``
-
-        .. warning::
-
-            This method is currently implemented so that ALL ``Experiment``
-            instances will begin producing telemetry data. In the future it
-            is planned to have this method work on a "per instance" basis!
-        """
-        self._set_telemetry(True)
-
-    def disable_telemetry(self) -> None:
-        """Experiments will stop producing telemetry for all entities run
-        through ``Experiment.start``
-
-        .. warning::
-
-            This method is currently implemented so that ALL ``Experiment``
-            instances will stop producing telemetry data. In the future it
-            is planned to have this method work on a "per instance" basis!
-        """
-        self._set_telemetry(False)
-
-    @staticmethod
-    def _set_telemetry(switch: bool, /) -> None:
-        tm_key = "SMARTSIM_FLAG_TELEMETRY"
-        if switch:
-            os.environ[tm_key] = "1"
-        else:
-            os.environ[tm_key] = "0"
diff --git a/smartsim/log.py b/smartsim/log.py
index 55cb88afb..3d6c0860e 100644
--- a/smartsim/log.py
+++ b/smartsim/log.py
@@ -27,6 +27,7 @@
 import functools
 import logging
 import pathlib
+import socket
 import sys
 import threading
 import typing as t
@@ -39,7 +40,8 @@
 # constants
 DEFAULT_DATE_FORMAT: t.Final[str] = "%H:%M:%S"
 DEFAULT_LOG_FORMAT: t.Final[str] = (
-    "%(asctime)s %(hostname)s %(name)s[%(process)d] %(levelname)s %(message)s"
+    "%(asctime)s %(hostname)s %(name)s[%(process)d:%(threadName)s] "
+    "%(levelname)s %(message)s"
 )
 EXPERIMENT_LOG_FORMAT = DEFAULT_LOG_FORMAT.replace("s[%", "s {%(exp_path)s} [%")
 
@@ -74,9 +76,7 @@ def _translate_log_level(user_log_level: str = "info") -> str:
                       extremely verbose logging.
 
     :param user_log_level: log level specified by user, defaults to info
-    :type user_log_level: str
     :returns: Log level for coloredlogs
-    :rtype: str
     """
     user_log_level = user_log_level.lower()
     if user_log_level in ["info", "debug", "warning"]:
@@ -94,17 +94,12 @@ def get_exp_log_paths() -> t.Tuple[t.Optional[pathlib.Path], t.Optional[pathlib.
     Returns None for both paths if experiment context is unavailable.
 
     :returns: 2-tuple of paths to experiment logs in form (output_path, error_path)
-    if telemetry is enabled, a 2-tuple of None otherwise
-    :rtype: Tuple[pathlib.Path | None, pathlib.Path | None]
     """
     default_paths = None, None
 
-    if not CONFIG.telemetry_enabled:
-        return default_paths
-
-    if _exp_path := ctx_exp_path.get():
-        file_out = pathlib.Path(_exp_path) / CONFIG.telemetry_subdir / "smartsim.out"
-        file_err = pathlib.Path(_exp_path) / CONFIG.telemetry_subdir / "smartsim.err"
+    if _path := ctx_exp_path.get():
+        file_out = pathlib.Path(_path) / CONFIG.telemetry_subdir / "logs/smartsim.out"
+        file_err = pathlib.Path(_path) / CONFIG.telemetry_subdir / "logs/smartsim.err"
         return file_out, file_err
 
     return default_paths
@@ -127,14 +122,34 @@ def filter(self, record: logging.LogRecord) -> bool:
         """Enrich log records with active experiment context
 
         :param record: the record to evaluate for filtering
-        :type record: logging.LogRecord
         :returns: always True
-        :rtype: bool
         """
         record.exp_path = ctx_exp_path.get()
         return True
 
 
+class HostnameFilter(logging.Filter):
+    """Filter that performs enrichment of a log record by adding
+    the hostname of the machine executing the code"""
+
+    def __init__(self, name: str = "") -> None:
+        super().__init__(name)
+        self._hostname = ""
+
+    @property
+    @functools.lru_cache
+    def hostname(self) -> str:
+        """Returns the hostname of the machine executing the code"""
+        self._hostname = socket.gethostname()
+        return self._hostname
+
+    def filter(self, record: logging.LogRecord) -> bool:
+        # the hostname may already added if using the `ColoredLogs` plugin
+        if not hasattr(record, "hostname"):
+            record.hostname = self.hostname
+        return True
+
+
 class ContextAwareLogger(logging.Logger):
     """A logger customized to automatically write experiment logs to a
     dynamic target directory by inspecting the value of a context var"""
@@ -194,13 +209,9 @@ def get_logger(
         logger.warning("This is a warning message")
 
     :param name: the name of the desired logger
-    :type name: str
     :param log_level: what level to set the logger to
-    :type log_level: str
     :param fmt: the format of the log messages
-    :type fmt: str
     :returns: logger instance
-    :rtype: logging.Logger
     """
     # if name is None, then logger is the root logger
     # if not root logger, get the name of file without prefix.
@@ -225,7 +236,6 @@ def __init__(self, maximum_level: str = "INFO"):
         """Create a low-pass log filter allowing messages below a specific log level
 
         :param maximum_level: The maximum log level to be passed by the filter
-        :type maximum_level: str
         """
         super().__init__()
         self.max = maximum_level
@@ -234,9 +244,7 @@ def filter(self, record: logging.LogRecord) -> bool:
         """Filter log records; pass those less than or equal to the maximum level
 
         :param record: the record to evaluate for filtering
-        :type record: logging.LogRecord
         :returns: True if record level passes filter, False otherwise
-        :rtype: bool
         """
         # If a string representation of the level is passed in,
         # the corresponding numeric value is returned.
@@ -249,12 +257,9 @@ def log_to_file(filename: str, log_level: str = "debug") -> None:
     allowing subsequent logging calls to be sent to filename.
 
     :param filename: the name of the desired log file.
-    :type filename: str
-
     :param log_level: as defined in get_logger.  Can be specified
                       to allow the file to store more or less verbose
                       logging information.
-    :type log_level: str
     """
     logger = logging.getLogger("SmartSim")
     stream = open(  # pylint: disable=consider-using-with
@@ -274,19 +279,13 @@ def log_to_exp_file(
     allowing subsequent logging calls to be sent to filename.
 
     :param filename: the name of the desired log file.
-    :type filename: str
     :param log_level: as defined in get_logger.  Can be specified
                       to allow the file to store more or less verbose
                       logging information.
-    :type log_level: int | str
     :param logger: an existing logger to add the handler to
-    :type logger: (optional) logging.Logger
     :param fmt: a log format for the handler (otherwise, EXPERIMENT_LOG_FORMAT)
-    :type fmt: (optional) str
     :param log_filter: log filter to attach to handler
-    :type log_filter: (optional) logging.Filter
     :return: logging.Handler
-    :rtype: logging.Handler
     """
     # ensure logs are written even if specified dir doesn't exist
     log_path = pathlib.Path(filename)
@@ -322,9 +321,8 @@ def method_contextualizer(
     must accept an instance of matching type.
 
     :param ctx_var: The ContextVar that will be modified
-    :type ctx_var: ContextVar
     :param ctx_map: A function that returns the value to be set to ctx_var
-    :type ctx_map: t.Callable[[_T], _ContextT]"""
+    """
 
     def _contextualize(
         fn: "t.Callable[Concatenate[_T, _PR], _RT]", /
diff --git a/smartsim/ml/data.py b/smartsim/ml/data.py
index 4cdc27c06..6175259b2 100644
--- a/smartsim/ml/data.py
+++ b/smartsim/ml/data.py
@@ -57,13 +57,9 @@ class DataInfo:
     can be accessed in ``DataInfo.sample_name`` and ``DataInfo.target_name``.
 
     :param list_name: Name of the aggregation list used for sample datasets
-    :type list_name: str
     :param sample_name: Name of tensor holding training samples in stored datasets.
-    :type sample_name: str
     :param target_name: Name of tensor holding targets or labels in stored datasets.
-    :type target_name: str
     :num_classes: Number of classes (for categorical data).
-    :type num_classes: int | None
     """
 
     def __init__(
@@ -86,7 +82,6 @@ def publish(self, client: Client) -> None:
         stored as metastrings and integers stored as metascalars.
 
         :param client: Client to connect to Database
-        :type client: SmartRedis.Client
         """
         info_ds = Dataset(self._ds_name)
         info_ds.add_meta_string("sample_name", self.sample_name)
@@ -104,16 +99,16 @@ def download(self, client: Client) -> None:
         on the DB, the object members are not modified.
 
         :param client: Client to connect to Database
-        :type client: SmartRedis.Client
         """
         try:
             info_ds = client.get_dataset(self._ds_name)
-        except RedisReplyError:
+        except RedisReplyError as e:
             # If the info was not published, proceed with default parameters
             logger.warning(
                 "Could not retrieve data for DataInfo object, the following "
                 "values will be kept."
             )
+            logger.error(f"Original error from Redis was {e}")
             logger.warning(str(self))
             return
         self.sample_name = info_ds.get_meta_strings("sample_name")[0]
@@ -148,21 +143,13 @@ class TrainingDataUploader:
     by the attributes of this class.
 
     :param list_name: Name of the dataset as stored on the Orchestrator
-    :type list_name: str
     :param sample_name: Name of samples tensor in uploaded Datasets
-    :type sample_name: str
     :param target_name: Name of targets tensor (if needed) in uploaded Datasets
-    :type target_name: str
     :param num_classes: Number of classes of targets, if categorical
-    :type num_classes: int
     :param cluster: Whether the SmartSim Orchestrator is being run as a cluster
-    :type cluster: bool
     :param address: Address of Redis DB as <ip_address>:<port>
-    :type address: str
     :param rank: Rank of DataUploader in multi-process application (e.g. MPI rank).
-    :type rank: int
     :param verbose: If output should be logged to screen.
-    :type verbose: bool
 
     """
 
@@ -266,35 +253,23 @@ class DataDownloader:
      - shuffle the dataset if `shuffle` is set to ``True``.
 
     :param batch_size: Size of batches obtained with __iter__
-    :type batch_size: int
     :param dynamic: Whether new batches should be donwnloaded when ``update_data``
         is called.
-    :type dtnamic: bool
     :param shuffle: whether order of samples has to be shuffled when
         calling `update_data`
-    :type shuffle: bool
     :param data_info_or_list_name: DataInfo object with details about dataset to
         download, if a string is passed, it is used to download DataInfo data
         from DB, assuming it was stored with ``list_name=data_info_or_list_name``
-    :type data_info_or_list_name: DataInfo | str
     :param list_name: Name of aggregation list used to upload data
-    :type list_name: str
     :param cluster: Whether the Orchestrator will be run as a cluster
-    :type cluster: bool
     :param address: Address of Redis client as <ip_address>:<port>
-    :type address: str
     :param replica_rank: When StaticDataDownloader is used distributedly,
         indicates the rank of this object
-    :type replica_rank: int
     :param num_replicas: When BatchDownlaoder is used distributedly, indicates
                          the total number of ranks
-    :type num_replicas: int
     :param verbose: Whether log messages should be printed
-    :type verbose: bool
     :param init_samples: whether samples should be initialized in the constructor
-    :type init_samples: bool
     :param max_fetch_trials: maximum number of attempts to initialize data
-    :type max_fetch_trials: int
     """
 
     def __init__(
@@ -310,6 +285,7 @@ def __init__(
         verbose: bool = False,
         init_samples: bool = True,
         max_fetch_trials: int = -1,
+        wait_interval: float = 10.0,
     ) -> None:
         self.address = address
         self.cluster = cluster
@@ -336,7 +312,7 @@ def __init__(
         self.set_replica_parameters(replica_rank, num_replicas)
 
         if init_samples:
-            self.init_samples(max_fetch_trials)
+            self.init_samples(max_fetch_trials, wait_interval)
 
     @property
     def client(self) -> Client:
@@ -378,7 +354,6 @@ def need_targets(self) -> bool:
         """Compute if targets have to be downloaded.
 
         :return: Whether targets (or labels) should be downloaded
-        :rtype: bool
         """
         return bool(self.target_name) and not self.autoencoding
 
@@ -404,13 +379,13 @@ def __iter__(
             self._data_generation(self._calc_indices(idx)) for idx in range(len(self))
         )
 
-    def init_samples(self, init_trials: int = -1) -> None:
+    def init_samples(self, init_trials: int = -1, wait_interval: float = 10.0) -> None:
         """Initialize samples (and targets, if needed).
 
         A new attempt to download samples will be made every ten seconds,
         for ``init_trials`` times.
+
         :param init_trials: maximum number of attempts to fetch data
-        :type init_trials: int
         """
         self._client = Client(self.cluster, self.address)
 
@@ -418,10 +393,10 @@ def init_samples(self, init_trials: int = -1) -> None:
         max_trials = init_trials or -1
         while not self and num_trials != max_trials:
             self._update_samples_and_targets()
-            self.log(
-                "DataLoader could not download samples, will try again in 10 seconds"
-            )
-            time.sleep(10)
+            msg = "DataLoader could not download samples, will try again in "
+            msg += f"{wait_interval} seconds"
+            self.log(msg)
+            time.sleep(wait_interval)
             num_trials += 1
 
         if not self:
diff --git a/smartsim/ml/tf/__init__.py b/smartsim/ml/tf/__init__.py
index eb3cb565e..46d89d733 100644
--- a/smartsim/ml/tf/__init__.py
+++ b/smartsim/ml/tf/__init__.py
@@ -35,21 +35,20 @@
 
 try:
     import tensorflow as tf
-
-    installed_tf = Version_(tf.__version__)
-    assert installed_tf >= "2.4.0"
-
 except ImportError:  # pragma: no cover
     raise ModuleNotFoundError(
         f"TensorFlow {TF_VERSION} is not installed. "
-        "Please install it to use smartsim.tf"
+        "Please install it to use smartsim.ml.tf"
     ) from None
+
+try:
+    installed_tf = Version_(tf.__version__)
+    assert installed_tf >= TF_VERSION
 except AssertionError:  # pragma: no cover
-    msg = (
+    raise SmartSimError(
         f"TensorFlow >= {TF_VERSION} is required for smartsim. "
         f"tf, you have {tf.__version__}"
-    )
-    raise SmartSimError() from None
+    ) from None
 
 
 # pylint: disable=wrong-import-position
diff --git a/smartsim/ml/tf/utils.py b/smartsim/ml/tf/utils.py
index 69c8e2580..cf69b65e5 100644
--- a/smartsim/ml/tf/utils.py
+++ b/smartsim/ml/tf/utils.py
@@ -47,13 +47,9 @@ def freeze_model(
     a trained model and put it inside an ``orchestrator`` instance
 
     :param model: TensorFlow or Keras model
-    :type model: tf.Module
     :param output_dir: output dir to save model file to
-    :type output_dir: str
     :param file_name: name of model file to create
-    :type file_name: str
     :return: path to model file, model input layer names, model output layer names
-    :rtype: str, list[str], list[str]
     """
     # TODO figure out why layer names don't match up to
     # specified name in Model init.
@@ -93,9 +89,7 @@ def serialize_model(model: keras.Model) -> t.Tuple[str, t.List[str], t.List[str]
     a trained model and put it inside an ``orchestrator`` instance.
 
     :param model: TensorFlow or Keras model
-    :type model: tf.Module
     :return: serialized model, model input layer names, model output layer names
-    :rtype: str, list[str], list[str]
     """
 
     full_model = tf.function(model)
diff --git a/smartsim/settings/__init__.py b/smartsim/settings/__init__.py
index d417c9ef8..6e8f0bc96 100644
--- a/smartsim/settings/__init__.py
+++ b/smartsim/settings/__init__.py
@@ -27,6 +27,7 @@
 from .alpsSettings import AprunSettings
 from .base import RunSettings, SettingsBase
 from .containers import Container, Singularity
+from .dragonRunSettings import DragonRunSettings
 from .lsfSettings import BsubBatchSettings, JsrunSettings
 from .mpiSettings import MpiexecSettings, MpirunSettings, OrterunSettings
 from .palsSettings import PalsMpiexecSettings
@@ -46,6 +47,7 @@
     "SbatchSettings",
     "SrunSettings",
     "PalsMpiexecSettings",
+    "DragonRunSettings",
     "Container",
     "Singularity",
 ]
diff --git a/smartsim/settings/alpsSettings.py b/smartsim/settings/alpsSettings.py
index 5357312a5..54b9c7525 100644
--- a/smartsim/settings/alpsSettings.py
+++ b/smartsim/settings/alpsSettings.py
@@ -46,13 +46,9 @@ def __init__(
         ``AprunSettings`` can be used for the `pbs` launcher.
 
         :param exe: executable
-        :type exe: str
-        :param exe_args: executable arguments, defaults to None
-        :type exe_args: str | list[str], optional
-        :param run_args: arguments for run command, defaults to None
-        :type run_args: dict[str, t.Union[int, str, float, None]], optional
-        :param env_vars: environment vars to launch job with, defaults to None
-        :type env_vars: dict[str, str], optional
+        :param exe_args: executable arguments
+        :param run_args: arguments for run command
+        :param env_vars: environment vars to launch job with
         """
         super().__init__(
             exe,
@@ -71,7 +67,6 @@ def make_mpmd(self, settings: RunSettings) -> None:
         into a single MPMD command joined with ':'
 
         :param settings: ``AprunSettings`` instance
-        :type settings: AprunSettings
         """
         if self.colocated_db_settings:
             raise SSUnsupportedError(
@@ -89,7 +84,6 @@ def set_cpus_per_task(self, cpus_per_task: int) -> None:
         This sets ``--cpus-per-pe``
 
         :param cpus_per_task: number of cpus to use per task
-        :type cpus_per_task: int
         """
         self.run_args["cpus-per-pe"] = int(cpus_per_task)
 
@@ -99,7 +93,6 @@ def set_tasks(self, tasks: int) -> None:
         This sets ``--pes``
 
         :param tasks: number of tasks
-        :type tasks: int
         """
         self.run_args["pes"] = int(tasks)
 
@@ -109,7 +102,6 @@ def set_tasks_per_node(self, tasks_per_node: int) -> None:
         This sets ``--pes-per-node``
 
         :param tasks_per_node: number of tasks per node
-        :type tasks_per_node: int
         """
         self.run_args["pes-per-node"] = int(tasks_per_node)
 
@@ -117,7 +109,6 @@ def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None:
         """Specify the hostlist for this job
 
         :param host_list: hosts to launch on
-        :type host_list: str | list[str]
         :raises TypeError: if not str or list of str
         """
         if isinstance(host_list, str):
@@ -134,7 +125,6 @@ def set_hostlist_from_file(self, file_path: str) -> None:
         This sets ``--node-list-file``
 
         :param file_path: Path to the hostlist file
-        :type file_path: str
         """
         self.run_args["node-list-file"] = file_path
 
@@ -142,7 +132,6 @@ def set_excluded_hosts(self, host_list: t.Union[str, t.List[str]]) -> None:
         """Specify a list of hosts to exclude for launching this job
 
         :param host_list: hosts to exclude
-        :type host_list: str | list[str]
         :raises TypeError: if not str or list of str
         """
         if isinstance(host_list, str):
@@ -159,7 +148,6 @@ def set_cpu_bindings(self, bindings: t.Union[int, t.List[int]]) -> None:
         This sets ``--cpu-binding``
 
         :param bindings: List of cpu numbers
-        :type bindings: list[int] | int
         """
         if isinstance(bindings, int):
             bindings = [bindings]
@@ -171,7 +159,6 @@ def set_memory_per_node(self, memory_per_node: int) -> None:
         This sets ``--memory-per-pe`` in megabytes
 
         :param memory_per_node: Per PE memory limit in megabytes
-        :type memory_per_node: int
         """
         self.run_args["memory-per-pe"] = int(memory_per_node)
 
@@ -181,7 +168,6 @@ def set_verbose_launch(self, verbose: bool) -> None:
         This sets ``--debug`` arg to the highest level
 
         :param verbose: Whether the job should be run verbosely
-        :type verbose: bool
         """
         if verbose:
             self.run_args["debug"] = 7
@@ -194,7 +180,6 @@ def set_quiet_launch(self, quiet: bool) -> None:
         This sets ``--quiet``
 
         :param quiet: Whether the job should be run quietly
-        :type quiet: bool
         """
         if quiet:
             self.run_args["quiet"] = None
@@ -205,7 +190,6 @@ def format_run_args(self) -> t.List[str]:
         """Return a list of ALPS formatted run arguments
 
         :return: list of ALPS arguments for these settings
-        :rtype: list[str]
         """
         # args launcher uses
         args = []
@@ -228,7 +212,6 @@ def format_env_vars(self) -> t.List[str]:
         """Format the environment variables for aprun
 
         :return: list of env vars
-        :rtype: list[str]
         """
         formatted = []
         if self.env_vars:
@@ -242,6 +225,5 @@ def set_walltime(self, walltime: str) -> None:
         Walltime is given in total number of seconds
 
         :param walltime: wall time
-        :type walltime: str
         """
         self.run_args["cpu-time-limit"] = str(walltime)
diff --git a/smartsim/settings/base.py b/smartsim/settings/base.py
index 284d435c0..6373b52fd 100644
--- a/smartsim/settings/base.py
+++ b/smartsim/settings/base.py
@@ -75,19 +75,11 @@ def __init__(
             rs = RunSettings("echo", "hello", "mpirun", run_args={"-np": "2"})
 
         :param exe: executable to run
-        :type exe: str
-        :param exe_args: executable arguments, defaults to None
-        :type exe_args: str | list[str], optional
-        :param run_command: launch binary (e.g. "srun"), defaults to empty str
-        :type run_command: str, optional
-        :param run_args: arguments for run command (e.g. `-np` for `mpiexec`),
-            defaults to None
-        :type run_args: dict[str, str], optional
-        :param env_vars: environment vars to launch job with, defaults to None
-        :type env_vars: dict[str, str], optional
-        :param container: container type for workload (e.g. "singularity"),
-            defaults to None
-        :type container: Container, optional
+        :param exe_args: executable arguments
+        :param run_command: launch binary (e.g. "srun")
+        :param run_args: arguments for run command (e.g. `-np` for `mpiexec`)
+        :param env_vars: environment vars to launch job with
+        :param container: container type for workload (e.g. "singularity")
         """
         # Do not expand executable if running within a container
         self.exe = [exe] if container else [expand_exe_path(exe)]
@@ -117,26 +109,50 @@ def __init__(
 
     @property
     def exe_args(self) -> t.Union[str, t.List[str]]:
+        """Return an immutable list of attached executable arguments.
+
+        :returns: attached executable arguments
+        """
         return self._exe_args
 
     @exe_args.setter
     def exe_args(self, value: t.Union[str, t.List[str], None]) -> None:
+        """Set the executable arguments.
+
+        :param value: executable arguments
+        """
         self._exe_args = self._build_exe_args(value)
 
     @property
     def run_args(self) -> t.Dict[str, t.Union[int, str, float, None]]:
+        """Return an immutable list of attached run arguments.
+
+        :returns: attached run arguments
+        """
         return self._run_args
 
     @run_args.setter
     def run_args(self, value: t.Dict[str, t.Union[int, str, float, None]]) -> None:
+        """Set the run arguments.
+
+        :param value: run arguments
+        """
         self._run_args = copy.deepcopy(value)
 
     @property
     def env_vars(self) -> t.Dict[str, t.Optional[str]]:
+        """Return an immutable list of attached environment variables.
+
+        :returns: attached environment variables
+        """
         return self._env_vars
 
     @env_vars.setter
     def env_vars(self, value: t.Dict[str, t.Optional[str]]) -> None:
+        """Set the environment variables.
+
+        :param value: environment variables
+        """
         self._env_vars = copy.deepcopy(value)
 
     # To be overwritten by subclasses. Set of reserved args a user cannot change
@@ -146,7 +162,6 @@ def set_nodes(self, nodes: int) -> None:
         """Set the number of nodes
 
         :param nodes: number of nodes to run with
-        :type nodes: int
         """
         logger.warning(
             (
@@ -159,7 +174,6 @@ def set_tasks(self, tasks: int) -> None:
         """Set the number of tasks to launch
 
         :param tasks: number of tasks to launch
-        :type tasks: int
         """
         logger.warning(
             (
@@ -172,7 +186,6 @@ def set_tasks_per_node(self, tasks_per_node: int) -> None:
         """Set the number of tasks per node
 
         :param tasks_per_node: number of tasks to launch per node
-        :type tasks_per_node: int
         """
         logger.warning(
             (
@@ -185,7 +198,6 @@ def set_task_map(self, task_mapping: str) -> None:
         """Set a task mapping
 
         :param task_mapping: task mapping
-        :type task_mapping: str
         """
         logger.warning(
             (
@@ -198,7 +210,6 @@ def set_cpus_per_task(self, cpus_per_task: int) -> None:
         """Set the number of cpus per task
 
         :param cpus_per_task: number of cpus per task
-        :type cpus_per_task: int
         """
         logger.warning(
             (
@@ -211,7 +222,6 @@ def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None:
         """Specify the hostlist for this job
 
         :param host_list: hosts to launch on
-        :type host_list: str | list[str]
         """
         logger.warning(
             (
@@ -224,7 +234,6 @@ def set_hostlist_from_file(self, file_path: str) -> None:
         """Use the contents of a file to specify the hostlist for this job
 
         :param file_path: Path to the hostlist file
-        :type file_path: str
         """
         logger.warning(
             (
@@ -237,7 +246,6 @@ def set_excluded_hosts(self, host_list: t.Union[str, t.List[str]]) -> None:
         """Specify a list of hosts to exclude for launching this job
 
         :param host_list: hosts to exclude
-        :type host_list: str | list[str]
         """
         logger.warning(
             (
@@ -250,7 +258,6 @@ def set_cpu_bindings(self, bindings: t.Union[int, t.List[int]]) -> None:
         """Set the cores to which MPI processes are bound
 
         :param bindings: List specifing the cores to which MPI processes are bound
-        :type bindings: list[int] | int
         """
         logger.warning(
             (
@@ -263,7 +270,6 @@ def set_memory_per_node(self, memory_per_node: int) -> None:
         """Set the amount of memory required per node in megabytes
 
         :param memory_per_node: Number of megabytes per node
-        :type memory_per_node: int
         """
         logger.warning(
             (
@@ -276,7 +282,6 @@ def set_verbose_launch(self, verbose: bool) -> None:
         """Set the job to run in verbose mode
 
         :param verbose: Whether the job should be run verbosely
-        :type verbose: bool
         """
         logger.warning(
             (
@@ -289,7 +294,6 @@ def set_quiet_launch(self, quiet: bool) -> None:
         """Set the job to run in quiet mode
 
         :param quiet: Whether the job should be run quietly
-        :type quiet: bool
         """
         logger.warning(
             (
@@ -302,7 +306,6 @@ def set_broadcast(self, dest_path: t.Optional[str] = None) -> None:
         """Copy executable file to allocated compute nodes
 
         :param dest_path: Path to copy an executable file
-        :type dest_path: str | None
         """
         logger.warning(
             (
@@ -315,16 +318,25 @@ def set_time(self, hours: int = 0, minutes: int = 0, seconds: int = 0) -> None:
         """Automatically format and set wall time
 
         :param hours: number of hours to run job
-        :type hours: int
         :param minutes: number of minutes to run job
-        :type minutes: int
         :param seconds: number of seconds to run job
-        :type seconds: int
         """
         return self.set_walltime(
             self._fmt_walltime(int(hours), int(minutes), int(seconds))
         )
 
+    def set_node_feature(self, feature_list: t.Union[str, t.List[str]]) -> None:
+        """Specify the node feature for this job
+
+        :param feature_list: node feature to launch on
+        """
+        logger.warning(
+            (
+                "Feature specification not implemented for this "
+                f"RunSettings type: {type(self)}"
+            )
+        )
+
     @staticmethod
     def _fmt_walltime(hours: int, minutes: int, seconds: int) -> str:
         """Convert hours, minutes, and seconds into valid walltime format
@@ -332,13 +344,9 @@ def _fmt_walltime(hours: int, minutes: int, seconds: int) -> str:
         By defualt the formatted wall time is the total number of seconds.
 
         :param hours: number of hours to run job
-        :type hours: int
         :param minutes: number of minutes to run job
-        :type minutes: int
         :param seconds: number of seconds to run job
-        :type seconds: int
         :returns: Formatted walltime
-        :rtype: str
         """
         time_ = hours * 3600
         time_ += minutes * 60
@@ -349,7 +357,6 @@ def set_walltime(self, walltime: str) -> None:
         """Set the formatted walltime
 
         :param walltime: Time in format required by launcher``
-        :type walltime: str
         """
         logger.warning(
             (
@@ -362,7 +369,6 @@ def set_binding(self, binding: str) -> None:
         """Set binding
 
         :param binding: Binding
-        :type binding: str
         """
         logger.warning(
             (
@@ -375,7 +381,6 @@ def set_mpmd_preamble(self, preamble_lines: t.List[str]) -> None:
         """Set preamble to a file to make a job MPMD
 
         :param preamble_lines: lines to put at the beginning of a file.
-        :type preamble_lines: list[str]
         """
         logger.warning(
             (
@@ -388,7 +393,6 @@ def make_mpmd(self, settings: RunSettings) -> None:
         """Make job an MPMD job
 
         :param settings: ``RunSettings`` instance
-        :type settings: RunSettings
         """
         logger.warning(
             (
@@ -404,7 +408,6 @@ def run_command(self) -> t.Optional[str]:
         Attempt to expand the path to the executable if possible
 
         :returns: launch binary e.g. mpiexec
-        :type: str | None
         """
         cmd = self._run_command
 
@@ -428,7 +431,6 @@ def update_env(self, env_vars: t.Dict[str, t.Union[str, int, float, bool]]) -> N
 
 
         :param env_vars: environment variables to update or add
-        :type env_vars: dict[str, Union[str, int, float, bool]]
         :raises TypeError: if env_vars values cannot be coerced to strings
         """
         val_types = (str, int, float, bool)
@@ -445,16 +447,8 @@ def add_exe_args(self, args: t.Union[str, t.List[str]]) -> None:
         """Add executable arguments to executable
 
         :param args: executable arguments
-        :type args: str | list[str]
-        :raises TypeError: if exe args are not strings
         """
-        if isinstance(args, str):
-            args = args.split()
-
-        for arg in args:
-            if not isinstance(arg, str):
-                raise TypeError("Executable arguments should be a list of str")
-
+        args = self._build_exe_args(args)
         self._exe_args.extend(args)
 
     def set(
@@ -503,11 +497,8 @@ def set(
             # otherwise returns ["exclusive", "None"]
 
         :param arg: name of the argument
-        :type arg: str
         :param value: value of the argument
-        :type value: str | None
         :param conditon: set the argument if condition evaluates to True
-        :type condition: bool
         """
         if not isinstance(arg, str):
             raise TypeError("Argument name should be of type str")
@@ -533,26 +524,26 @@ def set(
 
     @staticmethod
     def _build_exe_args(exe_args: t.Optional[t.Union[str, t.List[str]]]) -> t.List[str]:
-        """Convert exe_args input to a desired collection format"""
-        if exe_args:
-            if isinstance(exe_args, str):
-                return exe_args.split()
-            if isinstance(exe_args, list):
-                exe_args = copy.deepcopy(exe_args)
-                plain_type = all(isinstance(arg, (str)) for arg in exe_args)
-                if not plain_type:
-                    nested_type = all(
-                        all(isinstance(arg, (str)) for arg in exe_args_list)
-                        for exe_args_list in exe_args
-                    )
-                    if not nested_type:
-                        raise TypeError(
-                            "Executable arguments were not list of str or str"
-                        )
-                    return exe_args
-                return exe_args
-            raise TypeError("Executable arguments were not list of str or str")
-        return []
+        """Check and convert exe_args input to a desired collection format"""
+        if not exe_args:
+            return []
+
+        if isinstance(exe_args, list):
+            exe_args = copy.deepcopy(exe_args)
+
+        if not (
+            isinstance(exe_args, str)
+            or (
+                isinstance(exe_args, list)
+                and all(isinstance(arg, str) for arg in exe_args)
+            )
+        ):
+            raise TypeError("Executable arguments were not a list of str or a str.")
+
+        if isinstance(exe_args, str):
+            return exe_args.split()
+
+        return exe_args
 
     def format_run_args(self) -> t.List[str]:
         """Return formatted run arguments
@@ -561,7 +552,6 @@ def format_run_args(self) -> t.List[str]:
         literally with no formatting.
 
         :return: list run arguments for these settings
-        :rtype: list[str]
         """
         formatted = []
         for arg, value in self.run_args.items():
@@ -573,7 +563,6 @@ def format_env_vars(self) -> t.List[str]:
         """Build environment variable string
 
         :returns: formatted list of strings to export variables
-        :rtype: list[str]
         """
         formatted = []
         for key, val in self.env_vars.items():
@@ -619,7 +608,6 @@ def batch_cmd(self) -> str:
         command. If we cannot, returns the batch command as is.
 
         :returns: batch command
-        :type: str
         """
         if is_valid_cmd(self._batch_cmd):
             return expand_exe_path(self._batch_cmd)
@@ -628,10 +616,18 @@ def batch_cmd(self) -> str:
 
     @property
     def batch_args(self) -> t.Dict[str, t.Optional[str]]:
+        """Retrieve attached batch arguments
+
+        :returns: attached batch arguments
+        """
         return self._batch_args
 
     @batch_args.setter
     def batch_args(self, value: t.Dict[str, t.Optional[str]]) -> None:
+        """Attach batch arguments
+
+        :param value: dictionary of batch arguments
+        """
         self._batch_args = copy.deepcopy(value) if value else {}
 
     def set_nodes(self, num_nodes: int) -> None:
@@ -656,7 +652,6 @@ def set_batch_command(self, command: str) -> None:
         """Set the command used to launch the batch e.g. ``sbatch``
 
         :param command: batch command
-        :type command: str
         """
         self._batch_cmd = command
 
@@ -667,7 +662,6 @@ def add_preamble(self, lines: t.List[str]) -> None:
         start virtual environments before running the executables.
 
         :param line: lines to add to preamble.
-        :type line: str or list[str]
         """
         if isinstance(lines, str):
             self._preamble += [lines]
@@ -678,7 +672,10 @@ def add_preamble(self, lines: t.List[str]) -> None:
 
     @property
     def preamble(self) -> t.Iterable[str]:
-        """Return an iterable of preamble clauses to be prepended to the batch file"""
+        """Return an iterable of preamble clauses to be prepended to the batch file
+
+        :return: attached preamble clauses
+        """
         return (clause for clause in self._preamble)
 
     def __str__(self) -> str:  # pragma: no-cover
diff --git a/smartsim/settings/containers.py b/smartsim/settings/containers.py
index bdba1ce88..d2fd4fca2 100644
--- a/smartsim/settings/containers.py
+++ b/smartsim/settings/containers.py
@@ -39,13 +39,9 @@ class Container:
     launch a workload within a container into a single object.
 
     :param image: local or remote path to container image
-    :type image: str
     :param args: arguments to container command
-    :type args: str | list[str], optional
     :param mount: paths to mount (bind) from host machine into image.
-    :type mount: str | list[str] | dict[str, str], optional
     :param working_directory: path of the working directory within the container
-    :type working_directory: str
     """
 
     def __init__(
@@ -70,7 +66,6 @@ def _containerized_run_command(self, run_command: str) -> str:
         """Return modified run_command with container commands prepended.
 
         :param run_command: run command from a RunSettings class
-        :type run_command: str
         """
         raise NotImplementedError(
             "Containerized run command specification not implemented for this "
@@ -99,11 +94,8 @@ class Singularity(Container):
 
     :param image: local or remote path to container image,
         e.g. ``docker://sylabsio/lolcow``
-    :type image: str
     :param args: arguments to 'singularity exec' command
-    :type args: str | list[str], optional
     :param mount: paths to mount (bind) from host machine into image.
-    :type mount: str | list[str] | dict[str, str], optional
     """
 
     def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
diff --git a/smartsim/settings/dragonRunSettings.py b/smartsim/settings/dragonRunSettings.py
new file mode 100644
index 000000000..b8baa4708
--- /dev/null
+++ b/smartsim/settings/dragonRunSettings.py
@@ -0,0 +1,78 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from __future__ import annotations
+
+import typing as t
+
+from ..log import get_logger
+from .base import RunSettings
+
+logger = get_logger(__name__)
+
+
+class DragonRunSettings(RunSettings):
+    def __init__(
+        self,
+        exe: str,
+        exe_args: t.Optional[t.Union[str, t.List[str]]] = None,
+        env_vars: t.Optional[t.Dict[str, t.Optional[str]]] = None,
+        **kwargs: t.Any,
+    ) -> None:
+        """Initialize run parameters for a Dragon process
+
+        ``DragonRunSettings`` should only be used on systems where Dragon
+        is available and installed in the current environment.
+
+        If an allocation is specified, the instance receiving these run
+        parameters will launch on that allocation.
+
+        :param exe: executable to run
+        :param exe_args: executable arguments, defaults to None
+        :param env_vars: environment variables for job, defaults to None
+        :param alloc: allocation ID if running on existing alloc, defaults to None
+        """
+        super().__init__(
+            exe,
+            exe_args,
+            run_command="",
+            env_vars=env_vars,
+            **kwargs,
+        )
+
+    def set_nodes(self, nodes: int) -> None:
+        """Set the number of nodes
+
+        :param nodes: number of nodes to run with
+        """
+        self.run_args["nodes"] = nodes
+
+    def set_tasks_per_node(self, tasks_per_node: int) -> None:
+        """Set the number of tasks for this job
+
+        :param tasks_per_node: number of tasks per node
+        """
+        self.run_args["tasks-per-node"] = tasks_per_node
diff --git a/smartsim/settings/lsfSettings.py b/smartsim/settings/lsfSettings.py
index 32902c8c6..bce0581c5 100644
--- a/smartsim/settings/lsfSettings.py
+++ b/smartsim/settings/lsfSettings.py
@@ -51,13 +51,9 @@ def __init__(
         ``JsrunSettings`` should only be used on LSF-based systems.
 
         :param exe: executable
-        :type exe: str
-        :param exe_args: executable arguments, defaults to None
-        :type exe_args: str | list[str], optional
-        :param run_args: arguments for run command, defaults to None
-        :type run_args: dict[str, t.Union[int, str, float, None]], optional
-        :param env_vars: environment vars to launch job with, defaults to None
-        :type env_vars: dict[str, str], optional
+        :param exe_args: executable arguments
+        :param run_args: arguments for run command
+        :param env_vars: environment vars to launch job with
         """
         super().__init__(
             exe,
@@ -81,7 +77,6 @@ def set_num_rs(self, num_rs: t.Union[str, int]) -> None:
         This sets ``--nrs``.
 
         :param num_rs: Number of resource sets or `ALL_HOSTS`
-        :type num_rs: int or str
         """
         if isinstance(num_rs, str):
             self.run_args["nrs"] = num_rs
@@ -94,7 +89,6 @@ def set_cpus_per_rs(self, cpus_per_rs: int) -> None:
         This sets ``--cpu_per_rs``
 
         :param cpus_per_rs: number of cpus to use per resource set or ALL_CPUS
-        :type cpus_per_rs: int or str
         """
         if self.colocated_db_settings:
             db_cpus = int(t.cast(int, self.colocated_db_settings.get("db_cpus", 0)))
@@ -117,7 +111,6 @@ def set_gpus_per_rs(self, gpus_per_rs: int) -> None:
         This sets ``--gpu_per_rs``
 
         :param gpus_per_rs: number of gpus to use per resource set or ALL_GPUS
-        :type gpus_per_rs: int or str
         """
         if isinstance(gpus_per_rs, str):
             self.run_args["gpu_per_rs"] = gpus_per_rs
@@ -130,7 +123,6 @@ def set_rs_per_host(self, rs_per_host: int) -> None:
         This sets ``--rs_per_host``
 
         :param rs_per_host: number of resource sets to use per host
-        :type rs_per_host: int
         """
         self.run_args["rs_per_host"] = int(rs_per_host)
 
@@ -140,7 +132,6 @@ def set_tasks(self, tasks: int) -> None:
         This sets ``--np``
 
         :param tasks: number of tasks
-        :type tasks: int
         """
         self.run_args["np"] = int(tasks)
 
@@ -150,7 +141,6 @@ def set_tasks_per_rs(self, tasks_per_rs: int) -> None:
         This sets ``--tasks_per_rs``
 
         :param tasks_per_rs: number of tasks per resource set
-        :type tasks_per_rs: int
         """
         self.run_args["tasks_per_rs"] = int(tasks_per_rs)
 
@@ -160,7 +150,6 @@ def set_tasks_per_node(self, tasks_per_node: int) -> None:
         This function is an alias for `set_tasks_per_rs`.
 
         :param tasks_per_node: number of tasks per resource set
-        :type tasks_per_node: int
         """
         self.set_tasks_per_rs(int(tasks_per_node))
 
@@ -170,7 +159,6 @@ def set_cpus_per_task(self, cpus_per_task: int) -> None:
         This function is an alias for `set_cpus_per_rs`.
 
         :param cpus_per_task: number of cpus per resource set
-        :type cpus_per_task: int
         """
         self.set_cpus_per_rs(int(cpus_per_task))
 
@@ -180,7 +168,6 @@ def set_memory_per_rs(self, memory_per_rs: int) -> None:
         This sets ``--memory_per_rs``
 
         :param memory_per_rs: Number of megabytes per rs
-        :type memory_per_rs: int
         """
         self.run_args["memory_per_rs"] = int(memory_per_rs)
 
@@ -190,7 +177,6 @@ def set_memory_per_node(self, memory_per_node: int) -> None:
         Alias for `set_memory_per_rs`.
 
         :param memory_per_node: Number of megabytes per rs
-        :type memory_per_node: int
         """
         self.set_memory_per_rs(int(memory_per_node))
 
@@ -200,7 +186,6 @@ def set_binding(self, binding: str) -> None:
         This sets ``--bind``
 
         :param binding: Binding, e.g. `packed:21`
-        :type binding: str
         """
         self.run_args["bind"] = binding
 
@@ -213,7 +198,6 @@ def make_mpmd(self, settings: RunSettings) -> None:
         the list of settings to be launched in the same ERF file.
 
         :param settings: ``JsrunSettings`` instance
-        :type settings: JsrunSettings, optional
         """
         if self.colocated_db_settings:
             raise SSUnsupportedError(
@@ -231,7 +215,6 @@ def set_mpmd_preamble(self, preamble_lines: t.List[str]) -> None:
 
         :param preamble_lines: lines to put at the beginning of the ERF
                                file.
-        :type preamble_lines: list[str]
         """
         self.mpmd_preamble_lines = preamble_lines
 
@@ -249,7 +232,6 @@ def set_erf_sets(self, erf_sets: t.Dict[str, str]) -> None:
         only `rank` is used.
 
         :param hosts: dictionary of resources
-        :type hosts: dict[str,str]
         """
         self.erf_sets = copy.deepcopy(erf_sets)
 
@@ -259,7 +241,6 @@ def format_env_vars(self) -> t.List[str]:
         its value is propagated from the current environment.
 
         :returns: formatted list of strings to export variables
-        :rtype: list[str]
         """
         format_str = []
         for k, v in self.env_vars.items():
@@ -279,8 +260,6 @@ def set_individual_output(self, suffix: t.Optional[str] = None) -> None:
         :param suffix: Optional suffix to add to output file names,
                        it can contain `%j`, `%h`, `%p`, or `%t`,
                        as specified by `jsrun` options.
-        :type suffix: str, optional
-
         """
         self.run_args["stdio_mode"] = "individual"
         if suffix:
@@ -290,7 +269,6 @@ def format_run_args(self) -> t.List[str]:
         """Return a list of LSF formatted run arguments
 
         :return: list of LSF arguments for these settings
-        :rtype: list[str]
         """
         # args launcher uses
         args = []
@@ -403,16 +381,11 @@ def __init__(
     ) -> None:
         """Specify ``bsub`` batch parameters for a job
 
-        :param nodes: number of nodes for batch, defaults to None
-        :type nodes: int, optional
-        :param time: walltime for batch job in format hh:mm, defaults to None
-        :type time: str, optional
-        :param project: project for batch launch, defaults to None
-        :type project: str, optional
-        :param batch_args: overrides for LSF batch arguments, defaults to None
-        :type batch_args: dict[str, str], optional
-        :param smts: SMTs, defaults to 0
-        :type smts: int, optional
+        :param nodes: number of nodes for batch
+        :param time: walltime for batch job in format hh:mm
+        :param project: project for batch launch
+        :param batch_args: overrides for LSF batch arguments
+        :param smts: SMTs
         """
         self.project: t.Optional[str] = None
 
@@ -445,7 +418,6 @@ def set_walltime(self, walltime: str) -> None:
         :param walltime: Time in hh:mm format, e.g. "10:00" for 10 hours,
                          if time is supplied in hh:mm:ss format, seconds
                          will be ignored and walltime will be set as ``hh:mm``
-        :type walltime: str
         """
         # For compatibility with other launchers, as explained in docstring
         if walltime:
@@ -461,7 +433,6 @@ def set_smts(self, smts: int) -> None:
         takes precedence.
 
         :param smts: SMT (e.g on Summit: 1, 2, or 4)
-        :type smts: int
         """
         self.smts = smts
 
@@ -471,7 +442,6 @@ def set_project(self, project: str) -> None:
         This sets ``-P``.
 
         :param time: project name
-        :type time: str
         """
         if project:
             self.project = project
@@ -482,7 +452,6 @@ def set_account(self, account: str) -> None:
         this function is an alias for `set_project`.
 
         :param account: project name
-        :type account: str
         """
         self.set_project(account)
 
@@ -492,7 +461,6 @@ def set_nodes(self, num_nodes: int) -> None:
         This sets ``-nnodes``.
 
         :param nodes: number of nodes
-        :type nodes: int
         """
         if num_nodes:
             self.batch_args["nnodes"] = str(int(num_nodes))
@@ -503,6 +471,9 @@ def set_expert_mode_req(self, res_req: str, slots: int) -> None:
         disregard all other allocation options.
 
         This sets ``-csm -n slots -R res_req``
+
+        :param res_req: specific resource requirements
+        :param slots: number of resources to allocate
         """
         self.expert_mode = True
         self.batch_args["csm"] = "y"
@@ -513,7 +484,6 @@ def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None:
         """Specify the hostlist for this job
 
         :param host_list: hosts to launch on
-        :type host_list: str | list[str]
         :raises TypeError: if not str or list of str
         """
         if isinstance(host_list, str):
@@ -530,7 +500,6 @@ def set_tasks(self, tasks: int) -> None:
         This sets ``-n``
 
         :param tasks: number of tasks
-        :type tasks: int
         """
         self.batch_args["n"] = str(int(tasks))
 
@@ -538,7 +507,6 @@ def set_queue(self, queue: str) -> None:
         """Set the queue for this job
 
         :param queue: The queue to submit the job on
-        :type queue: str
         """
         if queue:
             self.batch_args["q"] = queue
@@ -573,7 +541,6 @@ def format_batch_args(self) -> t.List[str]:
         """Get the formatted batch arguments for a preview
 
         :return: list of batch arguments for Qsub
-        :rtype: list[str]
         """
         opts = []
 
diff --git a/smartsim/settings/mpiSettings.py b/smartsim/settings/mpiSettings.py
index ce132bcc5..c64c66cbf 100644
--- a/smartsim/settings/mpiSettings.py
+++ b/smartsim/settings/mpiSettings.py
@@ -61,16 +61,11 @@ def __init__(
         None can be provided for arguments that do not have values.
 
         :param exe: executable
-        :type exe: str
-        :param exe_args: executable arguments, defaults to None
-        :type exe_args: str | list[str], optional
-        :param run_args: arguments for run command, defaults to None
-        :type run_args: dict[str, str], optional
-        :param env_vars: environment vars to launch job with, defaults to None
-        :type env_vars: dict[str, str], optional
+        :param exe_args: executable arguments
+        :param run_args: arguments for run command
+        :param env_vars: environment vars to launch job with
         :param fail_if_missing_exec: Throw an exception of the MPI command
                                      is missing. Otherwise, throw a warning
-        :type fail_if_missing_exec: bool, optional
         """
         super().__init__(
             exe,
@@ -101,7 +96,6 @@ def make_mpmd(self, settings: RunSettings) -> None:
         Model instance
 
         :param settings: MpirunSettings instance
-        :type settings: MpirunSettings
         """
         if self.colocated_db_settings:
             raise SSUnsupportedError(
@@ -117,7 +111,6 @@ def set_task_map(self, task_mapping: str) -> None:
         For examples, see the man page for ``mpirun``
 
         :param task_mapping: task mapping
-        :type task_mapping: str
         """
         self.run_args["map-by"] = task_mapping
 
@@ -130,7 +123,6 @@ def set_cpus_per_task(self, cpus_per_task: int) -> None:
         and will soon be replaced.
 
         :param cpus_per_task: number of tasks
-        :type cpus_per_task: int
         """
         self.run_args["cpus-per-proc"] = int(cpus_per_task)
 
@@ -140,7 +132,6 @@ def set_cpu_binding_type(self, bind_type: str) -> None:
         This sets ``--bind-to`` for MPI compliant implementations
 
         :param bind_type: binding type
-        :type bind_type: str
         """
         self.run_args["bind-to"] = bind_type
 
@@ -148,7 +139,6 @@ def set_tasks_per_node(self, tasks_per_node: int) -> None:
         """Set the number of tasks per node
 
         :param tasks_per_node: number of tasks to launch per node
-        :type tasks_per_node: int
         """
         self.run_args["npernode"] = int(tasks_per_node)
 
@@ -158,7 +148,6 @@ def set_tasks(self, tasks: int) -> None:
         This sets ``-n`` for MPI compliant implementations
 
         :param tasks: number of tasks
-        :type tasks: int
         """
         self.run_args["n"] = int(tasks)
 
@@ -168,7 +157,6 @@ def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None:
         This sets ``--host``
 
         :param host_list: list of host names
-        :type host_list: str | list[str]
         :raises TypeError: if not str or list of str
         """
         if isinstance(host_list, str):
@@ -185,7 +173,6 @@ def set_hostlist_from_file(self, file_path: str) -> None:
         This sets ``--hostfile``
 
         :param file_path: Path to the hostlist file
-        :type file_path: str
         """
         self.run_args["hostfile"] = file_path
 
@@ -195,7 +182,6 @@ def set_verbose_launch(self, verbose: bool) -> None:
         This sets ``--verbose``
 
         :param verbose: Whether the job should be run verbosely
-        :type verbose: bool
         """
         if verbose:
             self.run_args["verbose"] = None
@@ -208,7 +194,6 @@ def set_quiet_launch(self, quiet: bool) -> None:
         This sets ``--quiet``
 
         :param quiet: Whether the job should be run quietly
-        :type quiet: bool
         """
         if quiet:
             self.run_args["quiet"] = None
@@ -221,7 +206,6 @@ def set_broadcast(self, dest_path: t.Optional[str] = None) -> None:
         This sets ``--preload-binary``
 
         :param dest_path: Destination path (Ignored)
-        :type dest_path: str | None
         """
         if dest_path is not None and isinstance(dest_path, str):
             logger.warning(
@@ -238,7 +222,6 @@ def set_walltime(self, walltime: str) -> None:
         This sets ``--timeout``
 
         :param walltime: number like string of seconds that a job will run in secs
-        :type walltime: str
         """
         self.run_args["timeout"] = walltime
 
@@ -246,7 +229,6 @@ def format_run_args(self) -> t.List[str]:
         """Return a list of MPI-standard formatted run arguments
 
         :return: list of MPI-standard arguments for these settings
-        :rtype: list[str]
         """
         # args launcher uses
         args = []
@@ -265,7 +247,6 @@ def format_env_vars(self) -> t.List[str]:
         """Format the environment variables for mpirun
 
         :return: list of env vars
-        :rtype: list[str]
         """
         formatted = []
         env_string = "-x"
@@ -299,13 +280,9 @@ def __init__(
         None can be provided for arguments that do not have values.
 
         :param exe: executable
-        :type exe: str
-        :param exe_args: executable arguments, defaults to None
-        :type exe_args: str | list[str], optional
-        :param run_args: arguments for run command, defaults to None
-        :type run_args: dict[str, t.Union[int, str, float, None]], optional
-        :param env_vars: environment vars to launch job with, defaults to None
-        :type env_vars: dict[str, str], optional
+        :param exe_args: executable arguments
+        :param run_args: arguments for run command
+        :param env_vars: environment vars to launch job with
         """
         super().__init__(exe, exe_args, "mpirun", run_args, env_vars, **kwargs)
 
@@ -330,13 +307,9 @@ def __init__(
         None can be provided for arguments that do not have values.
 
         :param exe: executable
-        :type exe: str
-        :param exe_args: executable arguments, defaults to None
-        :type exe_args: str | list[str], optional
-        :param run_args: arguments for run command, defaults to None
-        :type run_args: dict[str, t.Union[int, str, float, None]], optional
-        :param env_vars: environment vars to launch job with, defaults to None
-        :type env_vars: dict[str, str], optional
+        :param exe_args: executable arguments
+        :param run_args: arguments for run command
+        :param env_vars: environment vars to launch job with
         """
         super().__init__(exe, exe_args, "mpiexec", run_args, env_vars, **kwargs)
 
@@ -370,12 +343,8 @@ def __init__(
         None can be provided for arguments that do not have values.
 
         :param exe: executable
-        :type exe: str
-        :param exe_args: executable arguments, defaults to None
-        :type exe_args: str | list[str], optional
-        :param run_args: arguments for run command, defaults to None
-        :type run_args: dict[str, t.Union[int, str, float, None]], optional
-        :param env_vars: environment vars to launch job with, defaults to None
-        :type env_vars: dict[str, str], optional
+        :param exe_args: executable arguments
+        :param run_args: arguments for run command
+        :param env_vars: environment vars to launch job with
         """
         super().__init__(exe, exe_args, "orterun", run_args, env_vars, **kwargs)
diff --git a/smartsim/settings/palsSettings.py b/smartsim/settings/palsSettings.py
index e43cd9466..4100e8efe 100644
--- a/smartsim/settings/palsSettings.py
+++ b/smartsim/settings/palsSettings.py
@@ -45,13 +45,9 @@ class PalsMpiexecSettings(_BaseMPISettings):
     None can be provided for arguments that do not have values.
 
     :param exe: executable
-    :type exe: str
-    :param exe_args: executable arguments, defaults to None
-    :type exe_args: str | list[str], optional
-    :param run_args: arguments for run command, defaults to None
-    :type run_args: dict[str, str], optional
-    :param env_vars: environment vars to launch job with, defaults to None
-    :type env_vars: dict[str, str], optional
+    :param exe_args: executable arguments
+    :param run_args: arguments for run command
+    :param env_vars: environment vars to launch job with
     """
 
     def __init__(
@@ -74,16 +70,11 @@ def __init__(
         None can be provided for arguments that do not have values.
 
         :param exe: executable
-        :type exe: str
-        :param exe_args: executable arguments, defaults to None
-        :type exe_args: str | list[str], optional
-        :param run_args: arguments for run command, defaults to None
-        :type run_args: dict[str, t.Union[int, str, float, None]], optional
-        :param env_vars: environment vars to launch job with, defaults to None
-        :type env_vars: dict[str, str], optional
+        :param exe_args: executable arguments
+        :param run_args: arguments for run command
+        :param env_vars: environment vars to launch job with
         :param fail_if_missing_exec: Throw an exception of the MPI command
                                      is missing. Otherwise, throw a warning
-        :type fail_if_missing_exec: bool, optional
         """
         super().__init__(
             exe,
@@ -103,7 +94,6 @@ def set_task_map(self, task_mapping: str) -> None:
         For examples, see the man page for ``mpirun``
 
         :param task_mapping: task mapping
-        :type task_mapping: str
         """
         logger.warning("set_task_map not supported under PALS")
 
@@ -116,7 +106,6 @@ def set_cpus_per_task(self, cpus_per_task: int) -> None:
         and will soon be replaced.
 
         :param cpus_per_task: number of tasks
-        :type cpus_per_task: int
         """
         logger.warning("set_cpus_per_task not supported under PALS")
 
@@ -126,7 +115,6 @@ def set_cpu_binding_type(self, bind_type: str) -> None:
         This sets ``--bind-to`` for MPI compliant implementations
 
         :param bind_type: binding type
-        :type bind_type: str
         """
         self.run_args["cpu-bind"] = bind_type
 
@@ -134,7 +122,6 @@ def set_tasks(self, tasks: int) -> None:
         """Set the number of tasks
 
         :param tasks: number of total tasks to launch
-        :type tasks: int
         """
         self.run_args["np"] = int(tasks)
 
@@ -142,7 +129,6 @@ def set_tasks_per_node(self, tasks_per_node: int) -> None:
         """Set the number of tasks per node
 
         :param tasks_per_node: number of tasks to launch per node
-        :type tasks_per_node: int
         """
         self.run_args["ppn"] = int(tasks_per_node)
 
@@ -152,7 +138,6 @@ def set_quiet_launch(self, quiet: bool) -> None:
         This sets ``--quiet``
 
         :param quiet: Whether the job should be run quietly
-        :type quiet: bool
         """
 
         logger.warning("set_quiet_launch not supported under PALS")
@@ -163,7 +148,6 @@ def set_broadcast(self, dest_path: t.Optional[str] = None) -> None:
         This sets ``--preload-binary``
 
         :param dest_path: Destination path (Ignored)
-        :type dest_path: str | None
         """
         if dest_path is not None and isinstance(dest_path, str):
             logger.warning(
@@ -178,7 +162,6 @@ def set_walltime(self, walltime: str) -> None:
         """Set the maximum number of seconds that a job will run
 
         :param walltime: number like string of seconds that a job will run in secs
-        :type walltime: str
         """
         logger.warning("set_walltime not supported under PALS")
 
@@ -186,7 +169,6 @@ def set_gpu_affinity_script(self, affinity: str, *args: t.Any) -> None:
         """Set the GPU affinity through a bash script
 
         :param affinity: path to the affinity script
-        :type affinity: str
         """
         self.affinity_script.append(str(affinity))
         for arg in args:
@@ -196,7 +178,6 @@ def format_run_args(self) -> t.List[str]:
         """Return a list of MPI-standard formatted run arguments
 
         :return: list of MPI-standard arguments for these settings
-        :rtype: list[str]
         """
         # args launcher uses
         args = []
@@ -219,7 +200,6 @@ def format_env_vars(self) -> t.List[str]:
         """Format the environment variables for mpirun
 
         :return: list of env vars
-        :rtype: list[str]
         """
         formatted = []
 
@@ -242,7 +222,6 @@ def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None:
         This sets ``--hosts``
 
         :param host_list: list of host names
-        :type host_list: str | list[str]
         :raises TypeError: if not str or list of str
         """
         if isinstance(host_list, str):
diff --git a/smartsim/settings/pbsSettings.py b/smartsim/settings/pbsSettings.py
index 19a58b11c..09d48181a 100644
--- a/smartsim/settings/pbsSettings.py
+++ b/smartsim/settings/pbsSettings.py
@@ -53,20 +53,13 @@ def __init__(
         the value for select statement supplied in ``resources``
         will override.
 
-        :param nodes: number of nodes for batch, defaults to None
-        :type nodes: int, optional
-        :param ncpus: number of cpus per node, defaults to None
-        :type ncpus: int, optional
-        :param time: walltime for batch job, defaults to None
-        :type time: str, optional
-        :param queue: queue to run batch in, defaults to None
-        :type queue: str, optional
-        :param account: account for batch launch, defaults to None
-        :type account: str, optional
-        :param resources: overrides for resource arguments, defaults to None
-        :type resources: dict[str, str], optional
-        :param batch_args: overrides for PBS batch arguments, defaults to None
-        :type batch_args: dict[str, str], optional
+        :param nodes: number of nodes for batch
+        :param ncpus: number of cpus per node
+        :param time: walltime for batch job
+        :param queue: queue to run batch in
+        :param account: account for batch launch
+        :param resources: overrides for resource arguments
+        :param batch_args: overrides for PBS batch arguments
         """
 
         self._ncpus = ncpus
@@ -112,7 +105,6 @@ def set_nodes(self, num_nodes: int) -> None:
         nodes here is sets the 'nodes' resource.
 
         :param num_nodes: number of nodes
-        :type num_nodes: int
         """
 
         if num_nodes:
@@ -122,7 +114,6 @@ def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None:
         """Specify the hostlist for this job
 
         :param host_list: hosts to launch on
-        :type host_list: str | list[str]
         :raises TypeError: if not str or list of str
         """
         if isinstance(host_list, str):
@@ -143,7 +134,6 @@ def set_walltime(self, walltime: str) -> None:
         this value will be overridden
 
         :param walltime: wall time
-        :type walltime: str
         """
         if walltime:
             self.set_resource("walltime", walltime)
@@ -152,7 +142,6 @@ def set_queue(self, queue: str) -> None:
         """Set the queue for the batch job
 
         :param queue: queue name
-        :type queue: str
         """
         if queue:
             self.batch_args["q"] = str(queue)
@@ -165,7 +154,6 @@ def set_ncpus(self, num_cpus: t.Union[int, str]) -> None:
         this value will be overridden
 
         :param num_cpus: number of cpus per node in select
-        :type num_cpus: int
         """
         self._ncpus = int(num_cpus)
 
@@ -173,7 +161,6 @@ def set_account(self, account: str) -> None:
         """Set the account for this batch job
 
         :param acct: account id
-        :type acct: str
         """
         if account:
             self.batch_args["A"] = str(account)
@@ -185,9 +172,7 @@ def set_resource(self, resource_name: str, value: t.Union[str, int]) -> None:
         arguments will be overridden. Likewise for Walltime
 
         :param resource_name: name of resource, e.g. walltime
-        :type resource_name: str
         :param value: value
-        :type value: str
         """
         # TODO add error checking here
         # TODO include option to overwrite place (warning for orchestrator?)
@@ -200,7 +185,6 @@ def format_batch_args(self) -> t.List[str]:
         """Get the formatted batch arguments for a preview
 
         :return: batch arguments for Qsub
-        :rtype: list[str]
         :raises ValueError: if options are supplied without values
         """
         opts = self._create_resource_list()
diff --git a/smartsim/settings/settings.py b/smartsim/settings/settings.py
index 6e6172507..5f7fc3fe2 100644
--- a/smartsim/settings/settings.py
+++ b/smartsim/settings/settings.py
@@ -32,6 +32,7 @@
     AprunSettings,
     BsubBatchSettings,
     Container,
+    DragonRunSettings,
     JsrunSettings,
     MpiexecSettings,
     MpirunSettings,
@@ -63,19 +64,12 @@ def create_batch_settings(
 
     :param launcher: launcher for this experiment, if set to 'auto',
                      an attempt will be made to find an available launcher on the system
-    :type launcher: str
-    :param nodes: number of nodes for batch job, defaults to 1
-    :type nodes: int, optional
-    :param time: length of batch job, defaults to ""
-    :type time: str, optional
-    :param queue: queue or partition (if slurm), defaults to ""
-    :type queue: str, optional
-    :param account: user account name for batch system, defaults to ""
-    :type account: str, optional
-    :param batch_args: additional batch arguments, defaults to None
-    :type batch_args: dict[str, str], optional
+    :param nodes: number of nodes for batch job
+    :param time: length of batch job
+    :param queue: queue or partition (if slurm)
+    :param account: user account name for batch system
+    :param batch_args: additional batch arguments
     :return: a newly created BatchSettings instance
-    :rtype: BatchSettings
     :raises SmartSimError: if batch creation fails
     """
     # all supported batch class implementations
@@ -83,10 +77,13 @@ def create_batch_settings(
         "pbs": QsubBatchSettings,
         "slurm": SbatchSettings,
         "lsf": BsubBatchSettings,
+        "pals": QsubBatchSettings,
     }
 
-    if launcher == "auto":
+    if launcher in ["auto", "dragon"]:
         launcher = detect_launcher()
+        if launcher == "dragon":
+            by_launcher["dragon"] = by_launcher[launcher]
 
     if launcher == "local":
         raise SmartSimError("Local launcher does not support batch workloads")
@@ -127,21 +124,13 @@ def create_run_settings(
 
     :param launcher: launcher to create settings for, if set to 'auto',
                      an attempt will be made to find an available launcher on the system
-    :type launcher: str
     :param run_command: command to run the executable
-    :type run_command: str
     :param exe: executable to run
-    :type exe: str
     :param exe_args: arguments to pass to the executable
-    :type exe_args: list[str], optional
     :param run_args: arguments to pass to the ``run_command``
-    :type run_args: list[str], optional
     :param env_vars: environment variables to pass to the executable
-    :type env_vars: dict[str, str], optional
-    :param container: container type for workload (e.g. "singularity"), defaults to None
-    :type container: Container, optional
+    :param container: container type for workload (e.g. "singularity")
     :return: the created ``RunSettings``
-    :rtype: RunSettings
     :raises SmartSimError: if run_command=="auto" and detection fails
     """
     # all supported RunSettings child classes
@@ -159,6 +148,7 @@ def create_run_settings(
     # run commands supported by each launcher
     # in order of suspected user preference
     by_launcher = {
+        "dragon": [""],
         "slurm": ["srun", "mpirun", "mpiexec"],
         "pbs": ["aprun", "mpirun", "mpiexec"],
         "pals": ["mpiexec"],
@@ -171,7 +161,7 @@ def create_run_settings(
 
     def _detect_command(launcher: str) -> str:
         if launcher in by_launcher:
-            if launcher == "local":
+            if launcher in ["local", "dragon"]:
                 return ""
 
             for cmd in by_launcher[launcher]:
@@ -193,6 +183,11 @@ def _detect_command(launcher: str) -> str:
         # no auto detection for local, revert to false
         run_command = _detect_command(launcher)
 
+    if launcher == "dragon":
+        return DragonRunSettings(
+            exe=exe, exe_args=exe_args, env_vars=env_vars, container=container, **kwargs
+        )
+
     # if user specified and supported or auto detection worked
     if run_command and run_command in supported:
         return supported[run_command](launcher)(
diff --git a/smartsim/settings/slurmSettings.py b/smartsim/settings/slurmSettings.py
index 935a8df39..64f73fa9c 100644
--- a/smartsim/settings/slurmSettings.py
+++ b/smartsim/settings/slurmSettings.py
@@ -55,15 +55,10 @@ def __init__(
         parameters will launch on that allocation.
 
         :param exe: executable to run
-        :type exe: str
-        :param exe_args: executable arguments, defaults to None
-        :type exe_args: list[str] | str, optional
-        :param run_args: srun arguments without dashes, defaults to None
-        :type run_args: dict[str, t.Union[int, str, float, None]], optional
-        :param env_vars: environment variables for job, defaults to None
-        :type env_vars: dict[str, str], optional
-        :param alloc: allocation ID if running on existing alloc, defaults to None
-        :type alloc: str, optional
+        :param exe_args: executable arguments
+        :param run_args: srun arguments without dashes
+        :param env_vars: environment variables for job
+        :param alloc: allocation ID if running on existing alloc
         """
         super().__init__(
             exe,
@@ -84,7 +79,6 @@ def set_nodes(self, nodes: int) -> None:
         Effectively this is setting: ``srun --nodes <num_nodes>``
 
         :param nodes: number of nodes to run with
-        :type nodes: int
         """
         self.run_args["nodes"] = int(nodes)
 
@@ -95,7 +89,6 @@ def make_mpmd(self, settings: RunSettings) -> None:
         Model instance
 
         :param settings: SrunSettings instance
-        :type settings: SrunSettings
         """
         if self.colocated_db_settings:
             raise SSUnsupportedError(
@@ -117,7 +110,6 @@ def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None:
         This sets ``--nodelist``
 
         :param host_list: hosts to launch on
-        :type host_list: str | list[str]
         :raises TypeError: if not str or list of str
         """
         if isinstance(host_list, str):
@@ -134,7 +126,6 @@ def set_hostlist_from_file(self, file_path: str) -> None:
         This sets ``--nodefile``
 
         :param file_path: Path to the hostlist file
-        :type file_path: str
         """
         self.run_args["nodefile"] = file_path
 
@@ -142,7 +133,6 @@ def set_excluded_hosts(self, host_list: t.Union[str, t.List[str]]) -> None:
         """Specify a list of hosts to exclude for launching this job
 
         :param host_list: hosts to exclude
-        :type host_list: list[str]
         :raises TypeError:
         """
         if isinstance(host_list, str):
@@ -159,7 +149,6 @@ def set_cpus_per_task(self, cpus_per_task: int) -> None:
         This sets ``--cpus-per-task``
 
         :param num_cpus: number of cpus to use per task
-        :type num_cpus: int
         """
         self.run_args["cpus-per-task"] = int(cpus_per_task)
 
@@ -169,7 +158,6 @@ def set_tasks(self, tasks: int) -> None:
         This sets ``--ntasks``
 
         :param tasks: number of tasks
-        :type tasks: int
         """
         self.run_args["ntasks"] = int(tasks)
 
@@ -179,7 +167,6 @@ def set_tasks_per_node(self, tasks_per_node: int) -> None:
         This sets ``--ntasks-per-node``
 
         :param tasks_per_node: number of tasks per node
-        :type tasks_per_node: int
         """
         self.run_args["ntasks-per-node"] = int(tasks_per_node)
 
@@ -189,7 +176,6 @@ def set_cpu_bindings(self, bindings: t.Union[int, t.List[int]]) -> None:
         This sets ``--cpu-bind`` using the ``map_cpu:<list>`` option
 
         :param bindings: List specifing the cores to which MPI processes are bound
-        :type bindings: list[int] | int
         """
         if isinstance(bindings, int):
             bindings = [bindings]
@@ -203,7 +189,6 @@ def set_memory_per_node(self, memory_per_node: int) -> None:
         This sets ``--mem`` in megabytes
 
         :param memory_per_node: Amount of memory per node in megabytes
-        :type memory_per_node: int
         """
         self.run_args["mem"] = f"{int(memory_per_node)}M"
 
@@ -213,7 +198,6 @@ def set_verbose_launch(self, verbose: bool) -> None:
         This sets ``--verbose``
 
         :param verbose: Whether the job should be run verbosely
-        :type verbose: bool
         """
         if verbose:
             self.run_args["verbose"] = None
@@ -226,7 +210,6 @@ def set_quiet_launch(self, quiet: bool) -> None:
         This sets ``--quiet``
 
         :param quiet: Whether the job should be run quietly
-        :type quiet: bool
         """
         if quiet:
             self.run_args["quiet"] = None
@@ -239,10 +222,23 @@ def set_broadcast(self, dest_path: t.Optional[str] = None) -> None:
         This sets ``--bcast``
 
         :param dest_path: Path to copy an executable file
-        :type dest_path: str | None
         """
         self.run_args["bcast"] = dest_path
 
+    def set_node_feature(self, feature_list: t.Union[str, t.List[str]]) -> None:
+        """Specify the node feature for this job
+
+        This sets ``-C``
+
+        :param feature_list: node feature to launch on
+        :raises TypeError: if not str or list of str
+        """
+        if isinstance(feature_list, str):
+            feature_list = [feature_list.strip()]
+        elif not all(isinstance(feature, str) for feature in feature_list):
+            raise TypeError("node_feature argument must be string or list of strings")
+        self.run_args["C"] = ",".join(feature_list)
+
     @staticmethod
     def _fmt_walltime(hours: int, minutes: int, seconds: int) -> str:
         """Convert hours, minutes, and seconds into valid walltime format
@@ -250,13 +246,9 @@ def _fmt_walltime(hours: int, minutes: int, seconds: int) -> str:
         Converts time to format HH:MM:SS
 
         :param hours: number of hours to run job
-        :type hours: int
         :param minutes: number of minutes to run job
-        :type minutes: int
         :param seconds: number of seconds to run job
-        :type seconds: int
         :returns: Formatted walltime
-        :rtype: str
         """
         return fmt_walltime(hours, minutes, seconds)
 
@@ -266,7 +258,6 @@ def set_walltime(self, walltime: str) -> None:
         format = "HH:MM:SS"
 
         :param walltime: wall time
-        :type walltime: str
         """
         self.run_args["time"] = str(walltime)
 
@@ -276,7 +267,6 @@ def set_het_group(self, het_group: t.Iterable[int]) -> None:
         this sets `--het-group`
 
         :param het_group: list of heterogeneous groups
-        :type het_group: int or iterable of ints
         """
         het_size_env = os.getenv("SLURM_HET_SIZE")
         if het_size_env is None:
@@ -305,7 +295,6 @@ def format_run_args(self) -> t.List[str]:
         """Return a list of slurm formatted run arguments
 
         :return: list of slurm arguments for these settings
-        :rtype: list[str]
         """
         # add additional slurm arguments based on key length
         opts = []
@@ -338,7 +327,7 @@ def check_env_vars(self) -> None:
                         "environment. If the job is running in an interactive "
                         f"allocation, the value {v} will not be set. Please "
                         "consider removing the variable from the environment "
-                        "and re-run the experiment."
+                        "and re-running the experiment."
                     )
                     logger.warning(msg)
 
@@ -346,7 +335,6 @@ def format_env_vars(self) -> t.List[str]:
         """Build bash compatible environment variable string for Slurm
 
         :returns: the formatted string of environment variables
-        :rtype: list[str]
         """
         self.check_env_vars()
         return [f"{k}={v}" for k, v in self.env_vars.items() if "," not in str(v)]
@@ -359,7 +347,6 @@ def format_comma_sep_env_vars(self) -> t.Tuple[str, t.List[str]]:
         for more information on this, see the slurm documentation for srun
 
         :returns: the formatted string of environment variables
-        :rtype: tuple[str, list[str]]
         """
         self.check_env_vars()
         exportable_env, compound_env, key_only = [], [], []
@@ -392,13 +379,9 @@ def fmt_walltime(hours: int, minutes: int, seconds: int) -> str:
     Converts time to format HH:MM:SS
 
     :param hours: number of hours to run job
-    :type hours: int
     :param minutes: number of minutes to run job
-    :type minutes: int
     :param seconds: number of seconds to run job
-    :type seconds: int
     :returns: Formatted walltime
-    :rtype: str
     """
     delta = datetime.timedelta(hours=hours, minutes=minutes, seconds=seconds)
     fmt_str = str(delta)
@@ -427,14 +410,10 @@ def __init__(
         Initialization values provided (nodes, time, account)
         will overwrite the same arguments in ``batch_args`` if present
 
-        :param nodes: number of nodes, defaults to None
-        :type nodes: int, optional
+        :param nodes: number of nodes
         :param time: walltime for job, e.g. "10:00:00" for 10 hours
-        :type time: str, optional
-        :param account: account for job, defaults to None
-        :type account: str, optional
-        :param batch_args: extra batch arguments, defaults to None
-        :type batch_args: dict[str, str], optional
+        :param account: account for job
+        :param batch_args: extra batch arguments
         """
         super().__init__(
             "sbatch",
@@ -451,7 +430,6 @@ def set_walltime(self, walltime: str) -> None:
         format = "HH:MM:SS"
 
         :param walltime: wall time
-        :type walltime: str
         """
         # TODO check for formatting here
         if walltime:
@@ -461,7 +439,6 @@ def set_nodes(self, num_nodes: int) -> None:
         """Set the number of nodes for this batch job
 
         :param num_nodes: number of nodes
-        :type num_nodes: int
         """
         if num_nodes:
             self.batch_args["nodes"] = str(int(num_nodes))
@@ -470,7 +447,6 @@ def set_account(self, account: str) -> None:
         """Set the account for this batch job
 
         :param account: account id
-        :type account: str
         """
         if account:
             self.batch_args["account"] = account
@@ -479,7 +455,6 @@ def set_partition(self, partition: str) -> None:
         """Set the partition for the batch job
 
         :param partition: partition name
-        :type partition: str
         """
         self.batch_args["partition"] = str(partition)
 
@@ -489,7 +464,6 @@ def set_queue(self, queue: str) -> None:
         Sets the partition for the slurm batch job
 
         :param queue: the partition to run the batch job on
-        :type queue: str
         """
         if queue:
             self.set_partition(queue)
@@ -500,7 +474,6 @@ def set_cpus_per_task(self, cpus_per_task: int) -> None:
         This sets ``--cpus-per-task``
 
         :param num_cpus: number of cpus to use per task
-        :type num_cpus: int
         """
         self.batch_args["cpus-per-task"] = str(int(cpus_per_task))
 
@@ -508,7 +481,6 @@ def set_hostlist(self, host_list: t.Union[str, t.List[str]]) -> None:
         """Specify the hostlist for this job
 
         :param host_list: hosts to launch on
-        :type host_list: str | list[str]
         :raises TypeError: if not str or list of str
         """
         if isinstance(host_list, str):
@@ -523,7 +495,6 @@ def format_batch_args(self) -> t.List[str]:
         """Get the formatted batch arguments for a preview
 
         :return: batch arguments for Sbatch
-        :rtype: list[str]
         """
         opts = []
         # TODO add restricted here
diff --git a/smartsim/status.py b/smartsim/status.py
index 409ec8c1a..e42ef3191 100644
--- a/smartsim/status.py
+++ b/smartsim/status.py
@@ -24,27 +24,21 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from enum import Enum
 
-# Statuses that are applied to jobs
-STATUS_RUNNING = "Running"
-STATUS_COMPLETED = "Completed"
-STATUS_CANCELLED = "Cancelled"
-STATUS_FAILED = "Failed"
-STATUS_NEW = "New"
-STATUS_PAUSED = "Paused"
-STATUS_NEVER_STARTED = "NeverStarted"
 
-# SmartSim status mapping
-SMARTSIM_STATUS = {
-    "Running": STATUS_RUNNING,
-    "Paused": STATUS_PAUSED,
-    "Completed": STATUS_COMPLETED,
-    "Cancelled": STATUS_CANCELLED,
-    "Failed": STATUS_FAILED,
-    "New": STATUS_NEW,
-    "NeverStarted": STATUS_NEVER_STARTED,
-}
+class SmartSimStatus(Enum):
+    STATUS_RUNNING = "Running"
+    STATUS_COMPLETED = "Completed"
+    STATUS_CANCELLED = "Cancelled"
+    STATUS_FAILED = "Failed"
+    STATUS_NEW = "New"
+    STATUS_PAUSED = "Paused"
+    STATUS_NEVER_STARTED = "NeverStarted"
+
 
-# Status groupings
-TERMINAL_STATUSES = {STATUS_CANCELLED, STATUS_COMPLETED, STATUS_FAILED}
-LIVE_STATUSES = {STATUS_RUNNING, STATUS_PAUSED, STATUS_NEW}
+TERMINAL_STATUSES = {
+    SmartSimStatus.STATUS_CANCELLED,
+    SmartSimStatus.STATUS_COMPLETED,
+    SmartSimStatus.STATUS_FAILED,
+}
diff --git a/smartsim/templates/templates/preview/plain_text/activeinfra.template b/smartsim/templates/templates/preview/plain_text/activeinfra.template
new file mode 100644
index 000000000..8f403fbc0
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/activeinfra.template
@@ -0,0 +1,9 @@
+
+    = Database Identifier: {{ db.entity.db_identifier }} =
+        Shards: {{ db.entity.num_shards }}
+        TCP/IP Port(s):
+          {%- for port in db.entity.ports %}
+          {{ port }}
+          {%- endfor %}
+        Network Interface: {{ db.entity.run_settings.exe_args | get_ifname }}
+        Type: {{ config.database_cli | get_dbtype }}
diff --git a/smartsim/templates/templates/preview/plain_text/base.template b/smartsim/templates/templates/preview/plain_text/base.template
new file mode 100644
index 000000000..511712554
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/base.template
@@ -0,0 +1,52 @@
+
+{% include "experiment.template" %}
+{%- if manifest.has_deployable or active_dbjobs %}
+
+=== Entity Preview ===
+
+  {%- if active_dbjobs %}
+
+  == Active Infrastructure ==
+  {%- for name, db in active_dbjobs.items() %}
+      {% include "activeinfra.template" %}
+  {%- endfor %}
+  {%- endif %}
+  {%- if manifest.dbs %}
+
+  == Orchestrators ==
+    {%- for db in manifest.dbs %}
+    {%- if db.is_active() %}
+    WARNING: Cannot preview {{ db.name }}, because it is already started.
+    {%- else %}
+        {% include "orchestrator.template" %}
+    {%- endif %}
+    {%- endfor %}
+  {%- endif %}
+  {%- if manifest.models %}
+
+  == Models ==
+    {%- for model in manifest.models %}
+
+    = Model Name: {{ model.name }} =
+    {%- include "model.template" %}
+      {%- if model.run_settings.colocated_db_settings or manifest.dbs %}
+        Client Configuration:
+        {%- if model.run_settings.colocated_db_settings %}
+        {%- include "clientconfigcolo.template" %}
+        {%- endif %}
+        {%- if manifest.dbs %}
+        {%- include "clientconfig.template" %}
+        {%- endif %}
+        {%- endif %}
+      {%- endfor %}
+      {%- endif %}
+
+  {%- if manifest.ensembles %}
+
+  == Ensembles ==
+    {%- for ensemble in manifest.ensembles %}
+    {%- include "ensemble.template" %}
+    {%- endfor %}
+    {%- endif %}
+
+{%- endif %}
diff --git a/smartsim/templates/templates/preview/plain_text/clientconfig.template b/smartsim/templates/templates/preview/plain_text/clientconfig.template
new file mode 100644
index 000000000..3342918d9
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/clientconfig.template
@@ -0,0 +1,7 @@
+
+{%- if verbosity_level == Verbosity.INFO %}
+{%- include "clientconfig_info.template" -%}
+{%- endif %}
+{%- if verbosity_level == Verbosity.DEBUG or verbosity_level == Verbosity.DEVELOPER %}
+{%- include "clientconfig_debug.template" -%}
+{%- endif %}
diff --git a/smartsim/templates/templates/preview/plain_text/clientconfig_debug.template b/smartsim/templates/templates/preview/plain_text/clientconfig_debug.template
new file mode 100644
index 000000000..51dafd0d1
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/clientconfig_debug.template
@@ -0,0 +1,29 @@
+
+          {%- for db in manifest.dbs %}
+          {%- if db.name %}
+          Database Identifier: {{ db.name }}
+            {%- endif %}
+          {%- if verbosity_level == Verbosity.DEBUG or verbosity_level == Verbosity.DEVELOPER %}
+            Database Backend: {{ config.database_cli | get_dbtype }}
+            TCP/IP Port(s):
+            {%- for port in db.ports %}
+              {{ port }}
+            {%- endfor %}
+            Type: Standalone
+            {%- endif %}
+            {%- endfor %}
+          {%- if model.incoming_entities %}
+          {%- if verbosity_level == Verbosity.DEBUG or verbosity_level == Verbosity.DEVELOPER %}
+          Incoming Entities (Available Data Sources):
+            {%- for incoming in model.incoming_entities %}
+            {{ incoming.name }}
+            {%- endfor %}
+            {%- endif %}
+            {%- endif %}
+          {%- if verbosity_level == Verbosity.DEBUG or verbosity_level == Verbosity.DEVELOPER %}
+          Outgoing Key Collision Prevention (Key Prefixing):
+            Tensors: {{ model.query_key_prefixing() | as_toggle }}
+            Datasets: {{ model.query_key_prefixing() | as_toggle }}
+            ML Models/Torch Scripts: {{ False | as_toggle }}
+            Aggregation Lists: {{ model.query_key_prefixing() | as_toggle }}
+          {%- endif %}
diff --git a/smartsim/templates/templates/preview/plain_text/clientconfig_info.template b/smartsim/templates/templates/preview/plain_text/clientconfig_info.template
new file mode 100644
index 000000000..164f4bd4a
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/clientconfig_info.template
@@ -0,0 +1,19 @@
+
+        {%- for db in manifest.dbs %}
+          {%- if db.name %}
+          Database Identifier: {{ db.name }}
+          {%- endif %}
+            Database Backend: {{ config.database_cli | get_dbtype }}
+            TCP/IP Port(s):
+            {%- for port in db.ports %}
+              {{ port }}
+            {%- endfor %}
+            Type: Standalone
+            {%- endfor %}
+          {%- if model.query_key_prefixing() %}
+          Outgoing Key Collision Prevention (Key Prefixing):
+            Tensors: {{ model.query_key_prefixing() | as_toggle }}
+            Datasets: {{ model.query_key_prefixing() | as_toggle }}
+            ML Models/Torch Scripts: {{ False | as_toggle }}
+            Aggregation Lists: {{ model.query_key_prefixing() | as_toggle }}
+          {%- endif %}
diff --git a/smartsim/templates/templates/preview/plain_text/clientconfigcolo.template b/smartsim/templates/templates/preview/plain_text/clientconfigcolo.template
new file mode 100644
index 000000000..c1278a19a
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/clientconfigcolo.template
@@ -0,0 +1,7 @@
+
+{%- if verbosity_level == Verbosity.INFO %}
+{%- include "clientconfigcolo_info.template" %}
+{% endif %}
+{%- if verbosity_level == Verbosity.DEBUG or verbosity_level == Verbosity.DEVELOPER %}
+{%- include "clientconfigcolo_debug.template" %}
+{%- endif %}
diff --git a/smartsim/templates/templates/preview/plain_text/clientconfigcolo_debug.template b/smartsim/templates/templates/preview/plain_text/clientconfigcolo_debug.template
new file mode 100644
index 000000000..303fd0dca
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/clientconfigcolo_debug.template
@@ -0,0 +1,37 @@
+
+          {%- if model.run_settings.colocated_db_settings.db_identifier %}
+          Database Identifier: {{ model.run_settings.colocated_db_settings.db_identifier }}
+          {%- else %}
+          Database Identifier: N/A
+          {%- endif %}
+            Database Backend: {{ config.database_cli | get_dbtype }}
+            {%- if model.run_settings.colocated_db_settings %}
+            {%- if model.run_settings.colocated_db_settings.port %}
+            Connection Type: TCP
+            TCP/IP Port(s):
+              {{ model.run_settings.colocated_db_settings.port }}
+            {%- endif %}
+            {%- if model.run_settings.colocated_db_settings.unix_socket %}
+            Connection Type: UDS
+            Unix Socket: {{ model.run_settings.colocated_db_settings.unix_socket }}
+            {%- endif %}
+            {%- if model.run_settings.colocated_db_settings.ifname %}
+            {%- if model.run_settings.colocated_db_settings.ifname | is_list %}
+            Network Interface Name: {{ model.run_settings.colocated_db_settings.ifname[0] }}
+            {%- else %}
+            Network Interface Name: {{ model.run_settings.colocated_db_settings.ifname }}
+            {%- endif %}
+            {%- endif %}
+            Type: Colocated
+            {%- if model.incoming_entities %}
+            Incoming Entities (Available Data Sources):
+            {%- for incoming in model.incoming_entities %}
+              {{ incoming.name }}
+              {%- endfor %}
+            {%- endif %}
+            {%- endif %}
+          Outgoing Key Collision Prevention (Key Prefixing):
+            Tensors: {{ model.query_key_prefixing() | as_toggle }}
+            Datasets: {{ model.query_key_prefixing() | as_toggle }}
+            ML Models/Torch Scripts: {{ False | as_toggle }}
+            Aggregation Lists: {{ model.query_key_prefixing() | as_toggle }}
diff --git a/smartsim/templates/templates/preview/plain_text/clientconfigcolo_info.template b/smartsim/templates/templates/preview/plain_text/clientconfigcolo_info.template
new file mode 100644
index 000000000..e03d7ce3b
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/clientconfigcolo_info.template
@@ -0,0 +1,22 @@
+
+          {%- if model.run_settings.colocated_db_settings.db_identifier %}
+          Database Identifier: {{ model.run_settings.colocated_db_settings.db_identifier }}
+          {%- endif %}
+            Database Backend: {{ config.database_cli | get_dbtype }}
+            {%- if model.run_settings.colocated_db_settings.port %}
+            Connection Type: TCP
+            TCP/IP Port(s):
+              {{ model.run_settings.colocated_db_settings.port }}
+            {%- endif %}
+            {%- if model.run_settings.colocated_db_settings.unix_socket %}
+            Connection Type: UDS
+            Unix Socket: {{ model.run_settings.colocated_db_settings.unix_socket }}
+            {%- endif %}
+            Type: Colocated
+            {%- if model.query_key_prefixing() %}
+            Outgoing Key Collision Prevention (Key Prefixing):
+              Tensors: {{ model.query_key_prefixing() | as_toggle }}
+              Datasets: {{ model.query_key_prefixing() | as_toggle }}
+              ML Models/Torch Scripts: {{ False | as_toggle }}
+              Aggregation Lists: {{ model.query_key_prefixing() | as_toggle }}
+            {%- endif %}
diff --git a/smartsim/templates/templates/preview/plain_text/ensemble.template b/smartsim/templates/templates/preview/plain_text/ensemble.template
new file mode 100644
index 000000000..040737cc9
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/ensemble.template
@@ -0,0 +1,7 @@
+
+{%- if verbosity_level == Verbosity.INFO %}
+{%- include "ensemble_info.template" -%}
+{%- endif %}
+{%- if verbosity_level == Verbosity.DEBUG or verbosity_level == Verbosity.DEVELOPER %}
+{%- include "ensemble_debug.template" -%}
+{%- endif %}
diff --git a/smartsim/templates/templates/preview/plain_text/ensemble_debug.template b/smartsim/templates/templates/preview/plain_text/ensemble_debug.template
new file mode 100644
index 000000000..862db6032
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/ensemble_debug.template
@@ -0,0 +1,62 @@
+
+    {% for ensemble in manifest.ensembles %}
+    = Ensemble Name: {{ ensemble.name }} =
+        {%- if ensemble.path %}
+        Path: {{ ensemble.path }}
+        {%- endif %}
+        Members: {{ ensemble|length }}
+        {%- if ensemble.params %}
+        Ensemble Parameters:
+          {%- for key, value in ensemble.params.items() %}
+          {{ key }}: {{ value | join(", ") | wordwrap(150) | safe | replace('\n', '\n              ') }}
+          {%- endfor %}
+        {%- endif %}
+        {%- if ensemble.replicas %}
+        Replicas: {{ ensemble.replicas }}
+        {%- elif ensemble.perm_strat %}
+        Permutation Strategy: {{ ensemble.perm_strat }}
+        {%- endif %}
+        {%- if ensemble.batch_settings %}
+        Batch Launch: True
+        Batch Command: {{ ensemble.batch_settings.batch_cmd }}
+        {%- endif %}
+        {%- if ensemble.batch_settings.batch_args %}
+        Batch Arguments:
+        {%- for key, value in ensemble.batch_settings.batch_args.items() %}
+          {{ key }}: {{ value }}
+        {%- endfor %}
+        {%- endif %}
+
+    {%- if verbosity_level == Verbosity.DEBUG %}
+    {%- for model in ensemble.entities %}
+
+    - Model Name: {{ model.name }} -
+    {%- include 'model.template' %}
+        {%- if model.run_settings.colocated_db_settings or manifest.dbs %}
+        Client Configuration:
+            {%- if model.run_settings.colocated_db_settings %}
+            {%- include "clientconfigcolo.template" %}
+            {%- endif %}
+            {%- if manifest.dbs %}
+            {%- include "clientconfig.template" %}
+            {%- endif %}
+    {%- endif %}
+    {%- endfor %}
+    {%- endif %}
+    {%- if verbosity_level == Verbosity.DEVELOPER %}
+    {%- for model in ensemble.entities %}
+
+    - Model Name: {{ model.name }} -
+        {%- include 'model_debug.template' %}
+        {%- if model.run_settings.colocated_db_settings or manifest.dbs %}
+        Client Configuration:
+            {%- if model.run_settings.colocated_db_settings %}
+            {%- include "clientconfigcolo.template" %}
+            {%- endif %}
+            {%- if manifest.dbs %}
+            {%- include "clientconfig.template" %}
+            {%- endif %}
+        {%- endif %}
+{%- endfor %}
+{%- endif %}
+{% endfor %}
diff --git a/smartsim/templates/templates/preview/plain_text/ensemble_info.template b/smartsim/templates/templates/preview/plain_text/ensemble_info.template
new file mode 100644
index 000000000..17d1a4054
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/ensemble_info.template
@@ -0,0 +1,51 @@
+    = Ensemble Name: {{ ensemble.name }} =
+        Members: {{ ensemble|length }}
+        {%- if verbosity_level == Verbosity.DEBUG or verbosity_level == Verbosity.DEVELOPER %}
+        {%- if ensemble.params %}
+        Ensemble Parameters:
+            {%- for key, value in ensemble.params.items() %}
+          {{ key }}: {{ '{:^9}'.format(value|string)|truncate(81,true,'...')}}
+                {%- endfor %}
+    {%- endif %}
+    {%- endif %}
+    {%- if ensemble.models | length > 2 %}
+    {% set model = ensemble.models[0] %}
+    - Model Name: {{ model.name }} -
+        {%- include 'model.template' %}
+        {%- if model.run_settings.colocated_db_settings or manifest.dbs %}
+        Client Configuration:
+            {%- if model.run_settings.colocated_db_settings %}
+            {%- include "clientconfigcolo.template" %}
+            {%- endif %}
+            {%- if manifest.dbs %}
+            {%- include "clientconfig.template" %}
+            {%- endif %}
+            {%- endif %}
+        ...
+        {% set model = ensemble.models[(ensemble.models | length)-1] %}
+    - Model Name: {{ model.name }} -
+        {%- include 'model.template' %}
+            {% if model.run_settings.colocated_db_settings or manifest.dbs %}
+        Client Configuration:
+            {%- if model.run_settings.colocated_db_settings %}
+            {%- include "clientconfigcolo.template" %}
+            {%- endif %}
+            {%- if manifest.dbs %}
+            {%- include "clientconfig.template" %}
+            {%- endif %}
+            {%- endif %}
+        {%- else %}
+    {% for model in ensemble %}
+    - Model Name: {{ model.name }} -
+        {%- include 'model.template' %}
+        {% if model.run_settings.colocated_db_settings or manifest.dbs %}
+        Client Configuration:
+              {%- if model.run_settings.colocated_db_settings %}
+                {%- include "clientconfigcolo.template" %}
+            {%- endif %}
+            {%- if manifest.dbs %}
+            {%- include "clientconfig.template" %}
+            {%- endif %}
+            {%- endif %}
+        {% endfor %}
+    {%- endif %}
diff --git a/smartsim/templates/templates/preview/plain_text/experiment.template b/smartsim/templates/templates/preview/plain_text/experiment.template
new file mode 100644
index 000000000..d2ef16c05
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/experiment.template
@@ -0,0 +1,5 @@
+=== Experiment Overview ===
+
+  Experiment Name: {{ exp_entity.name }}
+    Experiment Path: {{ exp_entity.exp_path }}
+    Launcher: {{ exp_entity.launcher }}
diff --git a/smartsim/templates/templates/preview/plain_text/model.template b/smartsim/templates/templates/preview/plain_text/model.template
new file mode 100644
index 000000000..303beac67
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/model.template
@@ -0,0 +1,7 @@
+
+{%- if verbosity_level == Verbosity.INFO %}
+{%- include "model_info.template" -%}
+{%- endif %}
+{%- if verbosity_level == Verbosity.DEBUG or verbosity_level == Verbosity.DEVELOPER %}
+{%- include "model_debug.template" -%}
+{%- endif -%}
diff --git a/smartsim/templates/templates/preview/plain_text/model_debug.template b/smartsim/templates/templates/preview/plain_text/model_debug.template
new file mode 100644
index 000000000..186746186
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/model_debug.template
@@ -0,0 +1,114 @@
+
+    {%- if model is defined %}
+        {%- if model.path %}
+        Path: {{ model.path }}
+        {%- endif %}
+        Executable: {{ model.run_settings.exe[0] }}
+        Executable Arguments:
+        {%- for param in model.run_settings.exe_args %}
+          {{ param }}
+        {%- endfor %}
+        {%- if model.run_settings.run_command %}
+        Run Command: {{ model.run_settings.run_command }}
+        {%- endif %}
+        {%- if model.run_settings.run_args %}
+        Run Arguments:
+        {%- for key, value in model.run_settings.run_args.items() %}
+          {{ key }} {{ value }}
+        {%- endfor %}
+        {%- endif %}
+        {%- if model.batch_settings %}
+        Batch Launch: True
+        Batch Command: {{ model.batch_settings.batch_cmd }}
+        Batch Arguments:
+        {%- for key, value in model.batch_settings.batch_args.items() %}
+          {{ key }}: {{ value }}
+        {%- endfor %}
+        {%- endif %}
+        {%- if model.params %}
+        Model Parameters:
+        {%- for param, value in model.params.items() %}
+          {{ param }}: {{ value }}
+        {%- endfor %}
+        {%- endif %}
+        {%- if model.files %}
+        {%- if model.files.tagged %}
+        Tagged Files for Model Configuration:
+        {%- for tagged in model.files.tagged %}
+          {{ tagged }}
+          -> {{ model.path }}
+        {%- endfor %}
+        {%- endif %}
+        {%- if model.files.copy %}
+        Copy Files:
+        {%- for copy in model.files.copy %}
+          {{ copy }}
+          -> {{ model.path }}
+        {%- endfor %}
+        {%- endif %}
+        {%- if model.files.link %}
+        Symlink Files:
+        {%- for link in model.files.link %}
+          {{ link }}
+          -> {{ model.path }}
+        {%- endfor %}
+        {%- endif %}
+        {%- endif %}
+        {%- if model.run_settings.colocated_db_settings %}
+        Colocated:
+        {%- if model.run_settings.colocated_db_settings.db_identifier %}
+          Database Identifier: {{ model.run_settings.colocated_db_settings.db_identifier }}
+        {%- endif %}
+          {%- if model.run_settings.colocated_db_settings.port %}
+          Connection Type: TCP
+          TCP/IP Port(s):
+            {{ model.run_settings.colocated_db_settings.port }}
+          {%- endif %}
+          {%- if model.run_settings.colocated_db_settings.unix_socket %}
+          Connection Type: UDS
+          Unix Socket: {{ model.run_settings.colocated_db_settings.unix_socket }}
+          {%- endif %}
+          {%- if model.run_settings.colocated_db_settings.ifname %}
+          {%- if model.run_settings.colocated_db_settings.ifname | is_list %}
+          Network Interface Name: {{ model.run_settings.colocated_db_settings.ifname[0] }}
+          {%- else %}
+          Network Interface Name: {{ model.run_settings.colocated_db_settings.ifname }}
+          {%- endif %}
+          {%- endif %}
+          CPUs: {{ model.run_settings.colocated_db_settings.cpus }}
+          Custom Pinning: {{ model.run_settings.colocated_db_settings.custom_pinning }}
+          {%- endif %}
+        {%- if model._db_scripts %}
+        Torch Scripts:
+        {%- for script in model._db_scripts%}
+          Name: {{ script.name }}
+            Path: {{ script.file }}
+            Backend: {{ script.device }}
+            Devices Per Node: {{ script.devices_per_node }}
+        {%- endfor %}
+        {%- endif %}
+        {%- if model._db_models %}
+        ML Models:
+        {%- for mlmodel in model._db_models %}
+          Name: {{ mlmodel.name }}
+            Path: {{ mlmodel.file }}
+            Backend: {{ mlmodel.backend }}
+            Device: {{ mlmodel.device }}
+            Devices Per Node: {{ mlmodel.devices_per_node }}
+            {%- if mlmodel.device == "GPU" %}
+            First Device: {{ mlmodel.first_device }}
+            {%- endif %}
+            {%- for input in mlmodel.inputs %}
+            Inputs:
+              {{ input }}
+              {%- endfor %}
+              {%- for output in mlmodel.outputs %}
+            Outputs:
+              {{ output }}
+              {%- endfor %}
+        {%- endfor %}
+        {%- endif %}
+        {%- if model.query_key_prefixing()%}
+        Key Prefix: {{ model.name }}
+        {%- endif %}
+{%- endif %}
diff --git a/smartsim/templates/templates/preview/plain_text/model_info.template b/smartsim/templates/templates/preview/plain_text/model_info.template
new file mode 100644
index 000000000..f746208e5
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/model_info.template
@@ -0,0 +1,54 @@
+
+
+        {%- if model.batch_settings %}
+        Batch Launch: True
+        {% endif %}
+        {%- if model.params %}
+        Model Parameters:
+        {%- for param, value in model.params.items() %}
+          {{ param }}: {{ value }}
+        {%- endfor %}
+        {%- endif %}
+
+        {%- if model.run_settings.colocated_db_settings %}
+        Colocated:
+        {%- if model.run_settings.colocated_db_settings.db_identifier %}
+          Database Identifier: {{ model.run_settings.colocated_db_settings.db_identifier }}
+          {%- endif %}
+          {%- if model.run_settings.colocated_db_settings.port %}
+          Connection Type: TCP
+          TCP/IP Port(s):
+            {{ model.run_settings.colocated_db_settings.port }}
+          {%- endif %}
+          {%- if model.run_settings.colocated_db_settings.unix_socket %}
+          Connection Type: UDS
+          Unix Socket: {{ model.run_settings.colocated_db_settings.unix_socket }}
+        {%- endif %}
+        {%- endif %}
+
+        {%- if model.run_settings.colocated_db_settings['db_scripts'] %}
+        Torch Scripts:
+        {%- for script in model.run_settings.colocated_db_settings['db_scripts'] %}
+          Name: {{ script.name }}
+          Path: {{ script.script_path }}
+        {%- endfor %}
+        {%- endif %}
+        {%- if model.run_settings.colocated_db_settings['db_models'] %}
+        ML Models:
+        {%- for mlmodel in model.run_settings.colocated_db_settings['db_models'] %}
+          Name: {{ mlmodel.name }}
+            Path: {{ mlmodel.model_file }}
+            Backend: {{ mlmodel.backend }}
+            {%- for input in mlmodel.inputs %}
+            Inputs:
+              {{ input }}
+              {%- endfor %}
+              {%- for output in mlmodel.outputs %}
+            Outputs:
+              {{ output }}
+              {%- endfor %}
+        {%- endfor %}
+        {%- endif %}
+    {%- if model.query_key_prefixing() %}
+        Key Prefix: {{ model.name }}
+    {%- endif %}
diff --git a/smartsim/templates/templates/preview/plain_text/orchestrator.template b/smartsim/templates/templates/preview/plain_text/orchestrator.template
new file mode 100644
index 000000000..813b062b3
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/orchestrator.template
@@ -0,0 +1,7 @@
+
+{%- if verbosity_level == Verbosity.INFO %}
+{%- include "orchestrator_info.template" -%}
+{%- endif %}
+{%- if verbosity_level == Verbosity.DEBUG or verbosity_level == Verbosity.DEVELOPER %}
+{%- include "orchestrator_debug.template" -%}
+{%- endif %}
diff --git a/smartsim/templates/templates/preview/plain_text/orchestrator_debug.template b/smartsim/templates/templates/preview/plain_text/orchestrator_debug.template
new file mode 100644
index 000000000..127a4949e
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/orchestrator_debug.template
@@ -0,0 +1,33 @@
+
+    = Database Identifier: {{ db.name }} =
+        {%- if db.path %}
+        Path: {{ db.path }}
+        {%- endif %}
+        Shards: {{ db.num_shards }}
+        TCP/IP Port(s):
+          {%- for port in db.ports %}
+          {{ port }}
+          {%- endfor %}
+        Network Interface: {{ db._interfaces[0] }}
+        Type: {{ config.database_cli | get_dbtype }}
+        Executable: {{ config.database_exe }}
+        {%- if db.run_settings %}
+        Run Command: {{ db.run_settings.run_command }}
+        {%- if db.run_settings.run_args %}
+        Run Arguments:
+        {%- for key, value in db.run_settings.run_args.items() %}
+          {{ key }}: {{ value }}
+        {%- endfor %}
+        {%- endif %}
+        {%- endif %}
+        {%- if db.run_command %}
+        Run Command: {{ db.run_command }}
+        {%- endif %}
+        {%- if db.batch_settings %}
+        Batch Launch: True
+        Batch Command: {{ db.batch_settings.batch_cmd }}
+        Batch Arguments:
+        {%- for key, value in db.batch_settings.batch_args.items() %}
+          {{ key }}: {{ value }}
+        {%- endfor %}
+        {%- endif %}
diff --git a/smartsim/templates/templates/preview/plain_text/orchestrator_info.template b/smartsim/templates/templates/preview/plain_text/orchestrator_info.template
new file mode 100644
index 000000000..11608d6c5
--- /dev/null
+++ b/smartsim/templates/templates/preview/plain_text/orchestrator_info.template
@@ -0,0 +1,11 @@
+
+    = Database Identifier: {{ db.name }} =
+        TCP/IP Port(s):
+          {%- for port in db.ports %}
+          {{ port }}
+          {%- endfor %}
+        Network Interface: {{ db._interfaces[0] }}
+        Type: {{ config.database_cli | get_dbtype }}
+        {%- if db.batch %}
+        Batch Launch: {{ db.batch }}
+        {%- endif %}
diff --git a/smartsim/wlm/__init__.py b/smartsim/wlm/__init__.py
index 3a82a81e5..a5d20d0c9 100644
--- a/smartsim/wlm/__init__.py
+++ b/smartsim/wlm/__init__.py
@@ -75,9 +75,7 @@ def get_hosts(launcher: t.Optional[str] = None) -> t.List[str]:
 
     :param launcher: Name of the WLM to use to collect allocation info. If no launcher
                      is provided ``detect_launcher`` is used to select a launcher.
-    :type launcher: str | None
     :returns: Names of the hosts
-    :rtype: list[str]
     :raises SSUnsupportedError: User attempted to use an unsupported WLM
     """
     if launcher is None:
@@ -94,9 +92,7 @@ def get_queue(launcher: t.Optional[str] = None) -> str:
 
     :param launcher: Name of the WLM to use to collect allocation info. If no launcher
                      is provided ``detect_launcher`` is used to select a launcher.
-    :type launcher: str | None
     :returns: Name of the queue
-    :rtype: str
     :raises SSUnsupportedError: User attempted to use an unsupported WLM
     """
     if launcher is None:
@@ -113,9 +109,7 @@ def get_tasks(launcher: t.Optional[str] = None) -> int:
 
     :param launcher: Name of the WLM to use to collect allocation info. If no launcher
                      is provided ``detect_launcher`` is used to select a launcher.
-    :type launcher: str | None
     :returns: Number of tasks
-    :rtype: int
     :raises SSUnsupportedError: User attempted to use an unsupported WLM
     """
     if launcher is None:
@@ -132,9 +126,7 @@ def get_tasks_per_node(launcher: t.Optional[str] = None) -> t.Dict[str, int]:
 
     :param launcher: Name of the WLM to use to collect allocation info. If no launcher
                      is provided ``detect_launcher`` is used to select a launcher.
-    :type launcher: str | None
     :returns: Map of nodes to number of processes on that node
-    :rtype: dict[str, int]
     :raises SSUnsupportedError: User attempted to use an unsupported WLM
     """
     if launcher is None:
diff --git a/smartsim/wlm/pbs.py b/smartsim/wlm/pbs.py
index eda5baf24..5b559c1e6 100644
--- a/smartsim/wlm/pbs.py
+++ b/smartsim/wlm/pbs.py
@@ -38,7 +38,6 @@ def get_hosts() -> t.List[str]:
     """Get the name of the hosts used in a PBS allocation.
 
     :returns: Names of the host nodes
-    :rtype: list[str]
     :raises SmartSimError: ``PBS_NODEFILE`` is not set
     """
     hosts = []
@@ -59,7 +58,6 @@ def get_queue() -> str:
     """Get the name of queue in a PBS allocation.
 
     :returns: The name of the queue
-    :rtype: str
     :raises SmartSimError: ``PBS_QUEUE`` is not set
     """
     if "PBS_QUEUE" in os.environ:
@@ -76,7 +74,6 @@ def get_tasks() -> int:
         node from which it is run.
 
     :returns: Then number of tasks in the allocation
-    :rtype: int
     :raises LauncherError: Could not access ``qstat``
     :raises SmartSimError: ``PBS_JOBID`` is not set
     """
@@ -103,8 +100,7 @@ def get_tasks_per_node() -> t.Dict[str, int]:
         This method requires ``qstat`` be installed on the
         node from which it is run.
 
-    :returns: Map of chunks to number of processes on that chunck
-    :rtype: dict[str, int]
+    :returns: Map of chunks to number of processes on that chunk
     :raises LauncherError: Could not access ``qstat``
     :raises SmartSimError: ``PBS_JOBID`` is not set
     """
diff --git a/smartsim/wlm/slurm.py b/smartsim/wlm/slurm.py
index 9308eea98..ae7299f28 100644
--- a/smartsim/wlm/slurm.py
+++ b/smartsim/wlm/slurm.py
@@ -31,7 +31,6 @@
 from .._core.launcher.slurm.slurmCommands import salloc, scancel, scontrol, sinfo
 from .._core.launcher.slurm.slurmParser import parse_salloc, parse_salloc_error
 from .._core.launcher.util.launcherUtil import ComputeNode, Partition
-from .._core.utils.helpers import init_default
 from ..error import (
     AllocationError,
     LauncherError,
@@ -60,31 +59,26 @@ def get_allocation(
     The options can be used to pass extra settings to the
     workload manager such as the following for Slurm:
 
-        - nodelist="nid00004"
+    - nodelist="nid00004"
 
     For arguments without a value, pass None or and empty
     string as the value. For Slurm:
 
-        - exclusive=None
+    - exclusive=None
 
-    :param nodes: number of nodes for the allocation, defaults to 1
-    :type nodes: int, optional
-    :param time: wall time of the allocation, HH:MM:SS format, defaults to None
-    :type time: str, optional
-    :param account: account id for allocation, defaults to None
-    :type account: str, optional
-    :param options: additional options for the slurm wlm, defaults to None
-    :type options: dict[str, str], optional
+    :param nodes: number of nodes for the allocation
+    :param time: wall time of the allocation, HH:MM:SS format
+    :param account: account id for allocation
+    :param options: additional options for the slurm wlm
     :raises LauncherError: if the allocation is not successful
     :return: the id of the allocation
-    :rtype: str
     """
     if not which("salloc"):
         raise LauncherError(
             "Attempted slurm function without access to slurm(salloc) at the call site"
         )
 
-    options = init_default({}, options, dict)
+    options = options or {}
 
     salloc_args = _get_alloc_cmd(nodes, time, account, options=options)
     debug_msg = " ".join(salloc_args[1:])
@@ -108,7 +102,6 @@ def release_allocation(alloc_id: str) -> None:
     """Free an allocation's resources
 
     :param alloc_id: allocation id
-    :type alloc_id: str
     :raises LauncherError: if allocation could not be freed
     """
     if not which("scancel"):
@@ -137,15 +130,11 @@ def validate(nodes: int = 1, ppn: int = 1, partition: t.Optional[str] = None) ->
 
     if no partition is provided, the default partition is found and used.
 
-    :param nodes: Override the default node count to validate, defaults to 1
-    :type nodes: int, optional
-    :param ppn: Override the default processes per node to validate, defaults to 1
-    :type ppn: int, optional
-    :param partition: partition to validate, defaults to None
-    :type partition: str, optional
+    :param nodes: Override the default node count to validate
+    :param ppn: Override the default processes per node to validate
+    :param partition: partition to validate
     :raises: LauncherError
     :returns: True if resources are available, False otherwise
-    :rtype: bool
     """
     sys_partitions = _get_system_partition_info()
 
@@ -189,7 +178,6 @@ def get_default_partition() -> str:
     a star following its partition name in sinfo output
 
     :returns: the name of the default partition
-    :rtype: str
     """
     sinfo_output, _ = sinfo(["--noheader", "--format", "%P"])
 
@@ -206,7 +194,6 @@ def get_default_partition() -> str:
 def _get_system_partition_info() -> t.Dict[str, Partition]:
     """Build a dictionary of slurm partitions
     :returns: dict of Partition objects
-    :rtype: dict
     """
 
     sinfo_output, _ = sinfo(["--noheader", "--format", "%R %n %c"])
@@ -280,9 +267,7 @@ def _validate_time_format(time: str) -> str:
     By defualt the formatted wall time is the total number of seconds.
 
     :param time: number of hours to run job
-    :type time: str
     :returns: Formatted walltime
-    :rtype: str
     """
     try:
         hours, minutes, seconds = map(int, time.split(":"))
@@ -302,7 +287,6 @@ def get_hosts() -> t.List[str]:
         on which it is run
 
     :returns: Names of the host nodes
-    :rtype: list[str]
     :raises LauncherError: Could not access ``scontrol``
     :raises SmartSimError: ``SLURM_JOB_NODELIST`` is not set
     """
@@ -325,7 +309,6 @@ def get_queue() -> str:
     """Get the name of queue in a slurm allocation.
 
     :returns: The name of the queue
-    :rtype: str
     :raises SmartSimError: ``SLURM_JOB_PARTITION`` is not set
     """
     if job_partition := os.environ.get("SLURM_JOB_PARTITION", None):
@@ -337,7 +320,6 @@ def get_tasks() -> int:
     """Get the number of tasks in a slurm allocation.
 
     :returns: Then number of tasks in the allocation
-    :rtype: int
     :raises SmartSimError: ``SLURM_NTASKS`` is not set
     """
     if ntasks_str := os.environ.get("SLURM_NTASKS", 0):
@@ -354,7 +336,6 @@ def get_tasks_per_node() -> t.Dict[str, int]:
         on which it is run
 
     :returns: Map of nodes to number of tasks on that node
-    :rtype: dict[str, int]
     :raises SmartSimError: ``SLURM_TASKS_PER_NODE`` is not set
     """
     if "SLURM_TASKS_PER_NODE" in os.environ:
diff --git a/tests/backends/test_cli_mini_exp.py b/tests/backends/test_cli_mini_exp.py
index f02f44270..2fde2ff5f 100644
--- a/tests/backends/test_cli_mini_exp.py
+++ b/tests/backends/test_cli_mini_exp.py
@@ -31,6 +31,7 @@
 import smartredis
 
 import smartsim._core._cli.validate
+import smartsim._core._install.builder as build
 from smartsim._core.utils.helpers import installed_redisai_backends
 
 sklearn_available = True
@@ -47,6 +48,7 @@
 
 
 def test_cli_mini_exp_doesnt_error_out_with_dev_build(
+    prepare_db,
     local_db,
     test_dir,
     monkeypatch,
@@ -56,9 +58,11 @@ def test_cli_mini_exp_doesnt_error_out_with_dev_build(
     to ensure that it does not accidentally report false positive/negatives
     """
 
+    db = prepare_db(local_db).orchestrator
+
     @contextmanager
     def _mock_make_managed_local_orc(*a, **kw):
-        (client_addr,) = local_db.get_address()
+        (client_addr,) = db.get_address()
         yield smartredis.Client(False, address=client_addr)
 
     monkeypatch.setattr(
@@ -67,7 +71,7 @@ def _mock_make_managed_local_orc(*a, **kw):
         _mock_make_managed_local_orc,
     )
     backends = installed_redisai_backends()
-    (db_port,) = local_db.ports
+    (db_port,) = db.ports
 
     smartsim._core._cli.validate.test_install(
         # Shouldn't matter bc we are stubbing creation of orc
@@ -75,7 +79,7 @@ def _mock_make_managed_local_orc(*a, **kw):
         location=test_dir,
         port=db_port,
         # Always test on CPU, heads don't always have GPU
-        device="CPU",
+        device=build.Device.CPU,
         # Test the backends the dev has installed
         with_tf="tensorflow" in backends,
         with_pt="torch" in backends,
diff --git a/tests/backends/test_dataloader.py b/tests/backends/test_dataloader.py
index d02f3f33c..de4bf6d8e 100644
--- a/tests/backends/test_dataloader.py
+++ b/tests/backends/test_dataloader.py
@@ -35,7 +35,7 @@
 from smartsim.experiment import Experiment
 from smartsim.log import get_logger
 from smartsim.ml.data import DataInfo, TrainingDataUploader
-from smartsim.status import STATUS_COMPLETED
+from smartsim.status import SmartSimStatus
 
 logger = get_logger(__name__)
 
@@ -167,19 +167,16 @@ def train_tf(generator):
 
 
 @pytest.mark.skipif(not shouldrun_tf, reason="Test needs TensorFlow to run")
-def test_tf_dataloaders(test_dir, wlmutils):
-    exp = Experiment(
-        "test_tf_dataloaders", test_dir, launcher=wlmutils.get_test_launcher()
-    )
-    orc: Orchestrator = wlmutils.get_orchestrator()
-    exp.generate(orc)
-    exp.start(orc)
+def test_tf_dataloaders(wlm_experiment, prepare_db, single_db, monkeypatch):
+
+    db = prepare_db(single_db).orchestrator
+    orc = wlm_experiment.reconnect_orchestrator(db.checkpoint_file)
+    monkeypatch.setenv("SSDB", orc.get_address()[0])
+    monkeypatch.setenv("SSKEYIN", "test_uploader_0,test_uploader_1")
 
     try:
-        os.environ["SSDB"] = orc.get_address()[0]
         data_info = run_local_uploaders(mpi_size=2, format="tf")
 
-        os.environ["SSKEYIN"] = "test_uploader_0,test_uploader_1"
         for rank in range(2):
             tf_dynamic = TFDataGenerator(
                 data_info_or_list_name="test_data_list",
@@ -190,6 +187,7 @@ def test_tf_dataloaders(test_dir, wlmutils):
                 batch_size=4,
                 max_fetch_trials=5,
                 dynamic=False,  # catch wrong arg
+                wait_interval=0.1,
             )
             train_tf(tf_dynamic)
             assert len(tf_dynamic) == 4
@@ -204,6 +202,7 @@ def test_tf_dataloaders(test_dir, wlmutils):
                 batch_size=4,
                 max_fetch_trials=5,
                 dynamic=True,  # catch wrong arg
+                wait_interval=0.1,
             )
             train_tf(tf_static)
             assert len(tf_static) == 4
@@ -211,11 +210,6 @@ def test_tf_dataloaders(test_dir, wlmutils):
 
     except Exception as e:
         raise e
-    finally:
-        exp.stop(orc)
-        os.environ.pop("SSDB", "")
-        os.environ.pop("SSKEYIN", "")
-        os.environ.pop("SSKEYOUT", "")
 
 
 def create_trainer_torch(experiment: Experiment, filedir, wlmutils):
@@ -234,20 +228,18 @@ def create_trainer_torch(experiment: Experiment, filedir, wlmutils):
 
 
 @pytest.mark.skipif(not shouldrun_torch, reason="Test needs Torch to run")
-def test_torch_dataloaders(fileutils, test_dir, wlmutils):
-    exp = Experiment(
-        "test_tf_dataloaders", test_dir, launcher=wlmutils.get_test_launcher()
-    )
-    orc: Orchestrator = wlmutils.get_orchestrator()
+def test_torch_dataloaders(
+    wlm_experiment, prepare_db, single_db, fileutils, test_dir, wlmutils, monkeypatch
+):
     config_dir = fileutils.get_test_dir_path("ml")
-    exp.generate(orc)
-    exp.start(orc)
+    db = prepare_db(single_db).orchestrator
+    orc = wlm_experiment.reconnect_orchestrator(db.checkpoint_file)
+    monkeypatch.setenv("SSDB", orc.get_address()[0])
+    monkeypatch.setenv("SSKEYIN", "test_uploader_0,test_uploader_1")
 
     try:
-        os.environ["SSDB"] = orc.get_address()[0]
         data_info = run_local_uploaders(mpi_size=2)
 
-        os.environ["SSKEYIN"] = "test_uploader_0,test_uploader_1"
         for rank in range(2):
             torch_dynamic = TorchDataGenerator(
                 data_info_or_list_name="test_data_list",
@@ -258,11 +250,12 @@ def test_torch_dataloaders(fileutils, test_dir, wlmutils):
                 batch_size=4,
                 max_fetch_trials=5,
                 dynamic=False,  # catch wrong arg
-                init_samples=True,  # catch wrong arg
+                init_samples=True,
+                wait_interval=0.1,
             )
             check_dataloader(torch_dynamic, rank, dynamic=True)
 
-            torch_dynamic.init_samples(5)
+            torch_dynamic.init_samples(5, 0.1)
             for _ in range(2):
                 for _ in torch_dynamic:
                     continue
@@ -278,26 +271,22 @@ def test_torch_dataloaders(fileutils, test_dir, wlmutils):
                 max_fetch_trials=5,
                 dynamic=True,  # catch wrong arg
                 init_samples=True,  # catch wrong arg
+                wait_interval=0.1,
             )
             check_dataloader(torch_static, rank, dynamic=False)
 
-            torch_static.init_samples(5)
+            torch_static.init_samples(5, 0.1)
             for _ in range(2):
                 for _ in torch_static:
                     continue
 
-        trainer = create_trainer_torch(exp, config_dir, wlmutils)
-        exp.start(trainer, block=True)
+        trainer = create_trainer_torch(wlm_experiment, config_dir, wlmutils)
+        wlm_experiment.start(trainer, block=True)
 
-        assert exp.get_status(trainer)[0] == STATUS_COMPLETED
+        assert wlm_experiment.get_status(trainer)[0] == SmartSimStatus.STATUS_COMPLETED
 
     except Exception as e:
         raise e
-    finally:
-        exp.stop(orc)
-        os.environ.pop("SSDB", "")
-        os.environ.pop("SSKEYIN", "")
-        os.environ.pop("SSKEYOUT", "")
 
 
 def test_data_info_repr():
@@ -331,15 +320,9 @@ def test_data_info_repr():
 @pytest.mark.skipif(
     not (shouldrun_torch or shouldrun_tf), reason="Requires TF or PyTorch"
 )
-def test_wrong_dataloaders(test_dir, wlmutils):
-    exp = Experiment(
-        "test-wrong-dataloaders",
-        exp_path=test_dir,
-        launcher=wlmutils.get_test_launcher(),
-    )
-    orc = wlmutils.get_orchestrator()
-    exp.generate(orc)
-    exp.start(orc)
+def test_wrong_dataloaders(wlm_experiment, prepare_db, single_db):
+    db = prepare_db(single_db).orchestrator
+    orc = wlm_experiment.reconnect_orchestrator(db.checkpoint_file)
 
     if shouldrun_tf:
         with pytest.raises(SSInternalError):
@@ -365,5 +348,3 @@ def test_wrong_dataloaders(test_dir, wlmutils):
                 cluster=False,
             )
             torch_data_gen.init_samples(init_trials=1)
-
-    exp.stop(orc)
diff --git a/tests/backends/test_dbmodel.py b/tests/backends/test_dbmodel.py
index 75e9f515d..6155b6884 100644
--- a/tests/backends/test_dbmodel.py
+++ b/tests/backends/test_dbmodel.py
@@ -29,12 +29,13 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
 from smartsim._core.utils import installed_redisai_backends
 from smartsim.entity import Ensemble
 from smartsim.entity.dbobject import DBModel
 from smartsim.error.errors import SSUnsupportedError
 from smartsim.log import get_logger
+from smartsim.status import SmartSimStatus
 
 logger = get_logger(__name__)
 
@@ -145,36 +146,30 @@ def save_torch_cnn(path, file_name):
 
 
 @pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
-def test_tf_db_model(fileutils, test_dir, wlmutils, mlutils):
+def test_tf_db_model(
+    wlm_experiment, prepare_db, single_db, fileutils, test_dir, mlutils
+):
     """Test TensorFlow DB Models on remote DB"""
 
-    # Set experiment name
-    exp_name = "test-tf-db-model"
-
     # Retrieve parameters from testing environment
-    test_launcher = wlmutils.get_test_launcher()
-    test_interface = wlmutils.get_test_interface()
-    test_port = wlmutils.get_test_port()
     test_device = mlutils.get_test_device()
     test_num_gpus = 1  # TF backend fails on multiple GPUs
 
     test_script = fileutils.get_test_conf_path("run_tf_dbmodel_smartredis.py")
 
-    # Create the SmartSim Experiment
-    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
-
     # Create RunSettings
-    run_settings = exp.create_run_settings(exe=sys.executable, exe_args=test_script)
+    run_settings = wlm_experiment.create_run_settings(
+        exe=sys.executable, exe_args=test_script
+    )
     run_settings.set_nodes(1)
     run_settings.set_tasks(1)
 
     # Create Model
-    smartsim_model = exp.create_model("smartsim_model", run_settings)
+    smartsim_model = wlm_experiment.create_model("smartsim_model", run_settings)
 
     # Create database
-    host = wlmutils.choose_host(run_settings)
-    db = exp.create_database(port=test_port, interface=test_interface, hosts=host)
-    exp.generate(db)
+    db = prepare_db(single_db).orchestrator
+    wlm_experiment.reconnect_orchestrator(db.checkpoint_file)
 
     # Create and save ML model to filesystem
     model, inputs, outputs = create_tf_cnn()
@@ -211,50 +206,41 @@ def test_tf_db_model(fileutils, test_dir, wlmutils, mlutils):
     # Assert we have added both models
     assert len(smartsim_model._db_models) == 2
 
-    exp.generate(smartsim_model)
+    wlm_experiment.generate(smartsim_model)
 
     # Launch and check successful completion
-    try:
-        exp.start(db, smartsim_model, block=True)
-        statuses = exp.get_status(smartsim_model)
-        assert all(
-            stat == status.STATUS_COMPLETED for stat in statuses
-        ), f"Statuses: {statuses}"
-    finally:
-        exp.stop(db)
+    wlm_experiment.start(smartsim_model, block=True)
+    statuses = wlm_experiment.get_status(smartsim_model)
+    assert all(
+        stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses
+    ), f"Statuses: {statuses}"
 
 
 @pytest.mark.skipif(not should_run_pt, reason="Test needs PyTorch to run")
-def test_pt_db_model(fileutils, test_dir, wlmutils, mlutils):
+def test_pt_db_model(
+    wlm_experiment, prepare_db, single_db, fileutils, test_dir, mlutils
+):
     """Test PyTorch DB Models on remote DB"""
 
-    # Set experiment name
-    exp_name = "test-pt-db-model"
-
     # Retrieve parameters from testing environment
-    test_launcher = wlmutils.get_test_launcher()
-    test_interface = wlmutils.get_test_interface()
-    test_port = wlmutils.get_test_port()
     test_device = mlutils.get_test_device()
     test_num_gpus = mlutils.get_test_num_gpus() if pytest.test_device == "GPU" else 1
 
     test_script = fileutils.get_test_conf_path("run_pt_dbmodel_smartredis.py")
 
-    # Create the SmartSim Experiment
-    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
-
     # Create RunSettings
-    run_settings = exp.create_run_settings(exe=sys.executable, exe_args=test_script)
+    run_settings = wlm_experiment.create_run_settings(
+        exe=sys.executable, exe_args=test_script
+    )
     run_settings.set_nodes(1)
     run_settings.set_tasks(1)
 
     # Create Model
-    smartsim_model = exp.create_model("smartsim_model", run_settings)
+    smartsim_model = wlm_experiment.create_model("smartsim_model", run_settings)
 
     # Create database
-    host = wlmutils.choose_host(run_settings)
-    db = exp.create_database(port=test_port, interface=test_interface, hosts=host)
-    exp.generate(db)
+    db = prepare_db(single_db).orchestrator
+    wlm_experiment.reconnect_orchestrator(db.checkpoint_file)
 
     # Create and save ML model to filesystem
     save_torch_cnn(test_dir, "model1.pt")
@@ -278,55 +264,46 @@ def test_pt_db_model(fileutils, test_dir, wlmutils, mlutils):
     # Assert we have added both models
     assert len(smartsim_model._db_models) == 1
 
-    exp.generate(smartsim_model)
+    wlm_experiment.generate(smartsim_model)
 
     # Launch and check successful completion
-    try:
-        exp.start(db, smartsim_model, block=True)
-        statuses = exp.get_status(smartsim_model)
-        assert all(
-            stat == status.STATUS_COMPLETED for stat in statuses
-        ), f"Statuses: {statuses}"
-    finally:
-        exp.stop(db)
+    wlm_experiment.start(smartsim_model, block=True)
+    statuses = wlm_experiment.get_status(smartsim_model)
+    assert all(
+        stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses
+    ), f"Statuses: {statuses}"
 
 
 @pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
-def test_db_model_ensemble(fileutils, test_dir, wlmutils, mlutils):
+def test_db_model_ensemble(
+    wlm_experiment, prepare_db, single_db, fileutils, test_dir, wlmutils, mlutils
+):
     """Test DBModels on remote DB, with an ensemble"""
 
-    # Set experiment name
-    exp_name = "test-db-model-ensemble"
-
     # Retrieve parameters from testing environment
-    test_launcher = wlmutils.get_test_launcher()
-    test_interface = wlmutils.get_test_interface()
-    test_port = wlmutils.get_test_port()
     test_device = mlutils.get_test_device()
     test_num_gpus = 1  # TF backend fails on multiple GPUs
 
     test_script = fileutils.get_test_conf_path("run_tf_dbmodel_smartredis.py")
 
-    # Create the SmartSim Experiment
-    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
-
     # Create RunSettings
-    run_settings = exp.create_run_settings(exe=sys.executable, exe_args=test_script)
+    run_settings = wlm_experiment.create_run_settings(
+        exe=sys.executable, exe_args=test_script
+    )
     run_settings.set_nodes(1)
     run_settings.set_tasks(1)
 
     # Create ensemble
-    smartsim_ensemble = exp.create_ensemble(
+    smartsim_ensemble = wlm_experiment.create_ensemble(
         "smartsim_model", run_settings=run_settings, replicas=2
     )
 
     # Create Model
-    smartsim_model = exp.create_model("smartsim_model", run_settings)
+    smartsim_model = wlm_experiment.create_model("smartsim_model", run_settings)
 
     # Create database
-    host = wlmutils.choose_host(run_settings)
-    db = exp.create_database(port=test_port, interface=test_interface, hosts=host)
-    exp.generate(db)
+    db = prepare_db(single_db).orchestrator
+    wlm_experiment.reconnect_orchestrator(db.checkpoint_file)
 
     # Create and save ML model to filesystem
     model, inputs, outputs = create_tf_cnn()
@@ -379,17 +356,14 @@ def test_db_model_ensemble(fileutils, test_dir, wlmutils, mlutils):
     # Assert we have added two models to each entity
     assert all([len(entity._db_models) == 2 for entity in smartsim_ensemble])
 
-    exp.generate(smartsim_ensemble)
+    wlm_experiment.generate(smartsim_ensemble)
 
     # Launch and check successful completion
-    try:
-        exp.start(db, smartsim_ensemble, block=True)
-        statuses = exp.get_status(smartsim_ensemble)
-        assert all(
-            stat == status.STATUS_COMPLETED for stat in statuses
-        ), f"Statuses: {statuses}"
-    finally:
-        exp.stop(db)
+    wlm_experiment.start(smartsim_ensemble, block=True)
+    statuses = wlm_experiment.get_status(smartsim_ensemble)
+    assert all(
+        stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses
+    ), f"Statuses: {statuses}"
 
 
 @pytest.mark.skipif(not should_run_tf, reason="Test needs TF to run")
@@ -458,7 +432,7 @@ def test_colocated_db_model_tf(fileutils, test_dir, wlmutils, mlutils):
         exp.start(colo_model, block=True)
         statuses = exp.get_status(colo_model)
         assert all(
-            stat == status.STATUS_COMPLETED for stat in statuses
+            stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses
         ), f"Statuses: {statuses}"
     finally:
         exp.stop(colo_model)
@@ -518,7 +492,7 @@ def test_colocated_db_model_pytorch(fileutils, test_dir, wlmutils, mlutils):
         exp.start(colo_model, block=True)
         statuses = exp.get_status(colo_model)
         assert all(
-            stat == status.STATUS_COMPLETED for stat in statuses
+            stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses
         ), f"Statuses: {statuses}"
     finally:
         exp.stop(colo_model)
@@ -557,7 +531,6 @@ def test_colocated_db_model_ensemble(fileutils, test_dir, wlmutils, mlutils):
 
     # Create a third model with a colocated database
     colo_model = exp.create_model("colocated_model", colo_settings)
-    colo_model.set_path(test_dir)
     colo_model.colocate_db_tcp(
         port=test_port, db_cpus=1, debug=True, ifname=test_interface
     )
@@ -620,7 +593,7 @@ def test_colocated_db_model_ensemble(fileutils, test_dir, wlmutils, mlutils):
         exp.start(colo_ensemble, block=True)
         statuses = exp.get_status(colo_ensemble)
         assert all(
-            stat == status.STATUS_COMPLETED for stat in statuses
+            stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses
         ), f"Statuses: {statuses}"
     finally:
         exp.stop(colo_ensemble)
@@ -724,7 +697,7 @@ def test_colocated_db_model_ensemble_reordered(fileutils, test_dir, wlmutils, ml
         exp.start(colo_ensemble, block=True)
         statuses = exp.get_status(colo_ensemble)
         assert all(
-            stat == status.STATUS_COMPLETED for stat in statuses
+            stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses
         ), f"Statuses: {statuses}"
     finally:
         exp.stop(colo_ensemble)
@@ -756,7 +729,6 @@ def test_colocated_db_model_errors(fileutils, test_dir, wlmutils, mlutils):
 
     # Create colocated SmartSim Model
     colo_model = exp.create_model("colocated_model", colo_settings)
-    colo_model.set_path(test_dir)
     colo_model.colocate_db_tcp(
         port=test_port, db_cpus=1, debug=True, ifname=test_interface
     )
@@ -813,7 +785,6 @@ def test_colocated_db_model_errors(fileutils, test_dir, wlmutils, mlutils):
     colo_ensemble2 = exp.create_ensemble(
         "colocated_ens", run_settings=colo_settings2, replicas=2
     )
-    colo_ensemble2.set_path(test_dir)
     colo_ensemble2.add_ml_model(
         "cnn",
         "TF",
diff --git a/tests/backends/test_dbscript.py b/tests/backends/test_dbscript.py
index 2bffd1da6..2c04bf5db 100644
--- a/tests/backends/test_dbscript.py
+++ b/tests/backends/test_dbscript.py
@@ -30,12 +30,13 @@
 import pytest
 from smartredis import *
 
-from smartsim import Experiment, status
+from smartsim import Experiment
 from smartsim._core.utils import installed_redisai_backends
 from smartsim.entity.dbobject import DBScript
 from smartsim.error.errors import SSUnsupportedError
 from smartsim.log import get_logger
 from smartsim.settings import MpiexecSettings, MpirunSettings
+from smartsim.status import SmartSimStatus
 
 logger = get_logger(__name__)
 
@@ -56,37 +57,29 @@ def timestwo(x):
 
 
 @pytest.mark.skipif(not should_run, reason="Test needs Torch to run")
-def test_db_script(fileutils, test_dir, wlmutils, mlutils):
+def test_db_script(wlm_experiment, prepare_db, single_db, fileutils, mlutils):
     """Test DB scripts on remote DB"""
 
-    # Set experiment name
-    exp_name = "test-db-script"
-
-    # Retrieve parameters from testing environment
-    test_launcher = wlmutils.get_test_launcher()
-    test_interface = wlmutils.get_test_interface()
-    test_port = wlmutils.get_test_port()
     test_device = mlutils.get_test_device()
     test_num_gpus = mlutils.get_test_num_gpus() if pytest.test_device == "GPU" else 1
 
     test_script = fileutils.get_test_conf_path("run_dbscript_smartredis.py")
     torch_script = fileutils.get_test_conf_path("torchscript.py")
 
-    # Create the SmartSim Experiment
-    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
-
     # Create the RunSettings
-    run_settings = exp.create_run_settings(exe=sys.executable, exe_args=test_script)
+    run_settings = wlm_experiment.create_run_settings(
+        exe=sys.executable, exe_args=test_script
+    )
     run_settings.set_nodes(1)
     run_settings.set_tasks(1)
 
     # Create the SmartSim Model
-    smartsim_model = exp.create_model("smartsim_model", run_settings)
+    smartsim_model = wlm_experiment.create_model("smartsim_model", run_settings)
 
     # Create the SmartSim database
-    host = wlmutils.choose_host(run_settings)
-    db = exp.create_database(port=test_port, interface=test_interface, hosts=host)
-    exp.generate(db, smartsim_model)
+    db = prepare_db(single_db).orchestrator
+    wlm_experiment.reconnect_orchestrator(db.checkpoint_file)
+    wlm_experiment.generate(smartsim_model)
 
     # Define the torch script string
     torch_script_str = "def negate(x):\n\treturn torch.neg(x)\n"
@@ -122,51 +115,42 @@ def test_db_script(fileutils, test_dir, wlmutils, mlutils):
     assert len(smartsim_model._db_scripts) == 3
 
     # Launch and check successful completion
-    try:
-        exp.start(db, smartsim_model, block=True)
-        statuses = exp.get_status(smartsim_model)
-        assert all([stat == status.STATUS_COMPLETED for stat in statuses])
-    finally:
-        exp.stop(db)
+    wlm_experiment.start(smartsim_model, block=True)
+    statuses = wlm_experiment.get_status(smartsim_model)
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
 
 @pytest.mark.skipif(not should_run, reason="Test needs Torch to run")
-def test_db_script_ensemble(fileutils, test_dir, wlmutils, mlutils):
+def test_db_script_ensemble(wlm_experiment, prepare_db, single_db, fileutils, mlutils):
     """Test DB scripts on remote DB"""
 
-    # Set experiment name
-    exp_name = "test-db-script"
+    # Set wlm_experimenteriment name
+    wlm_experiment_name = "test-db-script"
 
     # Retrieve parameters from testing environment
-    test_launcher = wlmutils.get_test_launcher()
-    test_interface = wlmutils.get_test_interface()
-    test_port = wlmutils.get_test_port()
     test_device = mlutils.get_test_device()
     test_num_gpus = mlutils.get_test_num_gpus() if pytest.test_device == "GPU" else 1
 
     test_script = fileutils.get_test_conf_path("run_dbscript_smartredis.py")
     torch_script = fileutils.get_test_conf_path("torchscript.py")
 
-    # Create SmartSim Experiment
-    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
-
     # Create RunSettings
-    run_settings = exp.create_run_settings(exe=sys.executable, exe_args=test_script)
+    run_settings = wlm_experiment.create_run_settings(
+        exe=sys.executable, exe_args=test_script
+    )
     run_settings.set_nodes(1)
     run_settings.set_tasks(1)
 
+    db = prepare_db(single_db).orchestrator
+    wlm_experiment.reconnect_orchestrator(db.checkpoint_file)
+
     # Create Ensemble with two identical models
-    ensemble = exp.create_ensemble(
+    ensemble = wlm_experiment.create_ensemble(
         "dbscript_ensemble", run_settings=run_settings, replicas=2
     )
 
     # Create SmartSim model
-    smartsim_model = exp.create_model("smartsim_model", run_settings)
-
-    # Create SmartSim database
-    host = wlmutils.choose_host(run_settings)
-    db = exp.create_database(port=test_port, interface=test_interface, hosts=host)
-    exp.generate(db)
+    smartsim_model = wlm_experiment.create_model("smartsim_model", run_settings)
 
     # Create the script string
     torch_script_str = "def negate(x):\n\treturn torch.neg(x)\n"
@@ -216,14 +200,11 @@ def test_db_script_ensemble(fileutils, test_dir, wlmutils, mlutils):
     # Assert we have added all three models to entities in ensemble
     assert all([len(entity._db_scripts) == 3 for entity in ensemble])
 
-    exp.generate(ensemble)
+    wlm_experiment.generate(ensemble)
 
-    try:
-        exp.start(db, ensemble, block=True)
-        statuses = exp.get_status(ensemble)
-        assert all([stat == status.STATUS_COMPLETED for stat in statuses])
-    finally:
-        exp.stop(db)
+    wlm_experiment.start(ensemble, block=True)
+    statuses = wlm_experiment.get_status(ensemble)
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
 
 @pytest.mark.skipif(not should_run, reason="Test needs Torch to run")
@@ -288,7 +269,7 @@ def test_colocated_db_script(fileutils, test_dir, wlmutils, mlutils):
     try:
         exp.start(colo_model, block=True)
         statuses = exp.get_status(colo_model)
-        assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+        assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
     finally:
         exp.stop(colo_model)
 
@@ -388,7 +369,7 @@ def test_colocated_db_script_ensemble(fileutils, test_dir, wlmutils, mlutils):
     try:
         exp.start(colo_ensemble, block=True)
         statuses = exp.get_status(colo_ensemble)
-        assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+        assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
     finally:
         exp.stop(colo_ensemble)
 
@@ -486,7 +467,7 @@ def test_colocated_db_script_ensemble_reordered(fileutils, test_dir, wlmutils, m
     try:
         exp.start(colo_ensemble, block=True)
         statuses = exp.get_status(colo_ensemble)
-        assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+        assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
     finally:
         exp.stop(colo_ensemble)
 
diff --git a/tests/backends/test_onnx.py b/tests/backends/test_onnx.py
index 7c0e97e41..29771bb1c 100644
--- a/tests/backends/test_onnx.py
+++ b/tests/backends/test_onnx.py
@@ -25,13 +25,14 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os
+import sys
 from pathlib import Path
 
 import pytest
 
 from smartsim import Experiment
 from smartsim._core.utils import installed_redisai_backends
-from smartsim.status import STATUS_FAILED
+from smartsim.status import SmartSimStatus
 
 sklearn_available = True
 try:
@@ -56,7 +57,7 @@
 )
 
 
-def test_sklearn_onnx(test_dir, mlutils, wlmutils):
+def test_sklearn_onnx(wlm_experiment, prepare_db, single_db, mlutils, wlmutils):
     """This test needs two free nodes, 1 for the db and 1 some sklearn models
 
      here we test the following sklearn models:
@@ -73,29 +74,24 @@ def test_sklearn_onnx(test_dir, mlutils, wlmutils):
 
     You may need to put CUDNN in your LD_LIBRARY_PATH if running on GPU
     """
-
-    exp_name = "test_sklearn_onnx"
-
-    exp = Experiment(exp_name, exp_path=test_dir, launcher=wlmutils.get_test_launcher())
     test_device = mlutils.get_test_device()
+    db = prepare_db(single_db).orchestrator
+    wlm_experiment.reconnect_orchestrator(db.checkpoint_file)
 
-    db = wlmutils.get_orchestrator(nodes=1)
-    db.set_path(test_dir)
-    exp.start(db)
-
-    run_settings = exp.create_run_settings(
-        "python", f"run_sklearn_onnx.py --device={test_device}"
+    run_settings = wlm_experiment.create_run_settings(
+        sys.executable, f"run_sklearn_onnx.py --device={test_device}"
     )
-    model = exp.create_model("onnx_models", run_settings)
+    if wlmutils.get_test_launcher() != "local":
+        run_settings.set_tasks(1)
+    model = wlm_experiment.create_model("onnx_models", run_settings)
 
     script_dir = os.path.dirname(os.path.abspath(__file__))
     script_path = Path(script_dir, "run_sklearn_onnx.py").resolve()
     model.attach_generator_files(to_copy=str(script_path))
-    exp.generate(model)
+    wlm_experiment.generate(model)
 
-    exp.start(model, block=True)
+    wlm_experiment.start(model, block=True)
 
-    exp.stop(db)
     # if model failed, test will fail
-    model_status = exp.get_status(model)
-    assert model_status[0] != STATUS_FAILED
+    model_status = wlm_experiment.get_status(model)
+    assert model_status[0] != SmartSimStatus.STATUS_FAILED
diff --git a/tests/backends/test_tf.py b/tests/backends/test_tf.py
index af04c89cb..adf0e9daa 100644
--- a/tests/backends/test_tf.py
+++ b/tests/backends/test_tf.py
@@ -32,7 +32,7 @@
 from smartsim import Experiment
 from smartsim._core.utils import installed_redisai_backends
 from smartsim.error import SmartSimError
-from smartsim.status import STATUS_FAILED
+from smartsim.status import SmartSimStatus
 
 tf_available = True
 try:
@@ -50,7 +50,7 @@
     (not tf_backend_available) or (not tf_available),
     reason="Requires RedisAI TF backend",
 )
-def test_keras_model(test_dir, mlutils, wlmutils):
+def test_keras_model(wlm_experiment, prepare_db, single_db, mlutils, wlmutils):
     """This test needs two free nodes, 1 for the db and 1 for a keras model script
 
     this test can run on CPU/GPU by setting SMARTSIM_TEST_DEVICE=GPU
@@ -60,34 +60,28 @@ def test_keras_model(test_dir, mlutils, wlmutils):
     You may need to put CUDNN in your LD_LIBRARY_PATH if running on GPU
     """
 
-    exp_name = "test_keras_model"
-
-    exp = Experiment(exp_name, exp_path=test_dir, launcher=wlmutils.get_test_launcher())
     test_device = mlutils.get_test_device()
+    db = prepare_db(single_db).orchestrator
+    wlm_experiment.reconnect_orchestrator(db.checkpoint_file)
 
-    db = wlmutils.get_orchestrator(nodes=1)
-    db.set_path(test_dir)
-    exp.start(db)
-
-    run_settings = exp.create_run_settings(
+    run_settings = wlm_experiment.create_run_settings(
         "python", f"run_tf.py --device={test_device}"
     )
 
     if wlmutils.get_test_launcher() != "local":
         run_settings.set_tasks(1)
-    model = exp.create_model("tf_script", run_settings)
+    model = wlm_experiment.create_model("tf_script", run_settings)
 
     script_dir = os.path.dirname(os.path.abspath(__file__))
     script_path = Path(script_dir, "run_tf.py").resolve()
     model.attach_generator_files(to_copy=str(script_path))
-    exp.generate(model)
+    wlm_experiment.generate(model)
 
-    exp.start(model, block=True)
+    wlm_experiment.start(model, block=True)
 
-    exp.stop(db)
     # if model failed, test will fail
-    model_status = exp.get_status(model)[0]
-    assert model_status != STATUS_FAILED
+    model_status = wlm_experiment.get_status(model)[0]
+    assert model_status != SmartSimStatus.STATUS_FAILED
 
 
 def create_tf_model():
diff --git a/tests/backends/test_torch.py b/tests/backends/test_torch.py
index 76a989a2e..c995f76ca 100644
--- a/tests/backends/test_torch.py
+++ b/tests/backends/test_torch.py
@@ -31,7 +31,7 @@
 
 from smartsim import Experiment
 from smartsim._core.utils import installed_redisai_backends
-from smartsim.status import STATUS_FAILED
+from smartsim.status import SmartSimStatus
 
 torch_available = True
 try:
@@ -48,7 +48,9 @@
 )
 
 
-def test_torch_model_and_script(test_dir, mlutils, wlmutils):
+def test_torch_model_and_script(
+    wlm_experiment, prepare_db, single_db, mlutils, wlmutils
+):
     """This test needs two free nodes, 1 for the db and 1 for a torch model script
 
      Here we test both the torchscipt API and the NN API from torch
@@ -60,30 +62,24 @@ def test_torch_model_and_script(test_dir, mlutils, wlmutils):
     You may need to put CUDNN in your LD_LIBRARY_PATH if running on GPU
     """
 
-    exp_name = "test_torch_model_and_script"
-
-    exp = Experiment(exp_name, exp_path=test_dir, launcher=wlmutils.get_test_launcher())
+    db = prepare_db(single_db).orchestrator
+    wlm_experiment.reconnect_orchestrator(db.checkpoint_file)
     test_device = mlutils.get_test_device()
 
-    db = wlmutils.get_orchestrator(nodes=1)
-    db.set_path(test_dir)
-    exp.start(db)
-
-    run_settings = exp.create_run_settings(
+    run_settings = wlm_experiment.create_run_settings(
         "python", f"run_torch.py --device={test_device}"
     )
     if wlmutils.get_test_launcher() != "local":
         run_settings.set_tasks(1)
-    model = exp.create_model("torch_script", run_settings)
+    model = wlm_experiment.create_model("torch_script", run_settings)
 
     script_dir = os.path.dirname(os.path.abspath(__file__))
     script_path = Path(script_dir, "run_torch.py").resolve()
     model.attach_generator_files(to_copy=str(script_path))
-    exp.generate(model)
+    wlm_experiment.generate(model)
 
-    exp.start(model, block=True)
+    wlm_experiment.start(model, block=True)
 
-    exp.stop(db)
     # if model failed, test will fail
-    model_status = exp.get_status(model)[0]
-    assert model_status != STATUS_FAILED
+    model_status = wlm_experiment.get_status(model)[0]
+    assert model_status != SmartSimStatus.STATUS_FAILED
diff --git a/tests/full_wlm/test_generic_batch_launch.py b/tests/full_wlm/test_generic_batch_launch.py
index c69b1746a..fd8017c7c 100644
--- a/tests/full_wlm/test_generic_batch_launch.py
+++ b/tests/full_wlm/test_generic_batch_launch.py
@@ -28,8 +28,9 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
 from smartsim.settings import QsubBatchSettings
+from smartsim.status import SmartSimStatus
 
 # retrieved from pytest fixtures
 if pytest.test_launcher not in pytest.wlm_options:
@@ -44,7 +45,10 @@
 def add_batch_resources(wlmutils, batch_settings):
     if isinstance(batch_settings, QsubBatchSettings):
         for key, value in wlmutils.get_batch_resources().items():
-            batch_settings.set_resource(key, value)
+            if key == "queue":
+                batch_settings.set_queue(value)
+            else:
+                batch_settings.set_resource(key, value)
 
 
 def test_batch_model(fileutils, test_dir, wlmutils):
@@ -54,7 +58,7 @@ def test_batch_model(fileutils, test_dir, wlmutils):
     exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir)
 
     script = fileutils.get_test_conf_path("sleep.py")
-    batch_settings = exp.create_batch_settings(nodes=1, time="00:01:00")
+    batch_settings = exp.create_batch_settings(nodes=1, time="00:05:00")
 
     batch_settings.set_account(wlmutils.get_test_account())
     add_batch_resources(wlmutils, batch_settings)
@@ -62,12 +66,12 @@ def test_batch_model(fileutils, test_dir, wlmutils):
     model = exp.create_model(
         "model", path=test_dir, run_settings=run_settings, batch_settings=batch_settings
     )
-    model.set_path(test_dir)
 
+    exp.generate(model)
     exp.start(model, block=True)
     statuses = exp.get_status(model)
     assert len(statuses) == 1
-    assert statuses[0] == status.STATUS_COMPLETED
+    assert statuses[0] == SmartSimStatus.STATUS_COMPLETED
 
 
 def test_batch_ensemble(fileutils, test_dir, wlmutils):
@@ -88,11 +92,11 @@ def test_batch_ensemble(fileutils, test_dir, wlmutils):
     ensemble = exp.create_ensemble("batch-ens", batch_settings=batch)
     ensemble.add_model(M1)
     ensemble.add_model(M2)
-    ensemble.set_path(test_dir)
 
+    exp.generate(ensemble)
     exp.start(ensemble, block=True)
     statuses = exp.get_status(ensemble)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
 
 def test_batch_ensemble_replicas(fileutils, test_dir, wlmutils):
@@ -109,8 +113,7 @@ def test_batch_ensemble_replicas(fileutils, test_dir, wlmutils):
     ensemble = exp.create_ensemble(
         "batch-ens-replicas", batch_settings=batch, run_settings=settings, replicas=2
     )
-    ensemble.set_path(test_dir)
 
     exp.start(ensemble, block=True)
     statuses = exp.get_status(ensemble)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
diff --git a/tests/full_wlm/test_generic_orc_launch_batch.py b/tests/full_wlm/test_generic_orc_launch_batch.py
index 058aef895..2a5627d6d 100644
--- a/tests/full_wlm/test_generic_orc_launch_batch.py
+++ b/tests/full_wlm/test_generic_orc_launch_batch.py
@@ -25,11 +25,14 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 import os.path as osp
+import pathlib
 import time
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
+from smartsim.settings.pbsSettings import QsubBatchSettings
+from smartsim.status import SmartSimStatus
 
 # retrieved from pytest fixtures
 if pytest.test_launcher not in pytest.wlm_options:
@@ -41,6 +44,15 @@
     )
 
 
+def add_batch_resources(wlmutils, batch_settings):
+    if isinstance(batch_settings, QsubBatchSettings):
+        for key, value in wlmutils.get_batch_resources().items():
+            if key == "queue":
+                batch_settings.set_queue(value)
+            else:
+                batch_settings.set_resource(key, value)
+
+
 def test_launch_orc_auto_batch(test_dir, wlmutils):
     """test single node orchestrator"""
     launcher = wlmutils.get_test_launcher()
@@ -58,21 +70,22 @@ def test_launch_orc_auto_batch(test_dir, wlmutils):
     )
 
     orc.batch_settings.set_account(wlmutils.get_test_account())
+    add_batch_resources(wlmutils, orc.batch_settings)
 
-    orc.batch_settings.set_walltime("00:02:00")
+    orc.batch_settings.set_walltime("00:05:00")
     orc.set_path(test_dir)
 
     exp.start(orc, block=True)
     statuses = exp.get_status(orc)
 
     # don't use assert so that we don't leave an orphan process
-    if status.STATUS_FAILED in statuses:
+    if SmartSimStatus.STATUS_FAILED in statuses:
         exp.stop(orc)
         assert False
 
     exp.stop(orc)
     statuses = exp.get_status(orc)
-    assert all([stat == status.STATUS_CANCELLED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_CANCELLED for stat in statuses])
 
 
 def test_launch_cluster_orc_batch_single(test_dir, wlmutils):
@@ -94,21 +107,22 @@ def test_launch_cluster_orc_batch_single(test_dir, wlmutils):
     )
 
     orc.batch_settings.set_account(wlmutils.get_test_account())
+    add_batch_resources(wlmutils, orc.batch_settings)
 
-    orc.batch_settings.set_walltime("00:02:00")
+    orc.batch_settings.set_walltime("00:05:00")
     orc.set_path(test_dir)
 
     exp.start(orc, block=True)
     statuses = exp.get_status(orc)
 
     # don't use assert so that orc we don't leave an orphan process
-    if status.STATUS_FAILED in statuses:
+    if SmartSimStatus.STATUS_FAILED in statuses:
         exp.stop(orc)
         assert False
 
     exp.stop(orc)
     statuses = exp.get_status(orc)
-    assert all([stat == status.STATUS_CANCELLED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_CANCELLED for stat in statuses])
 
 
 def test_launch_cluster_orc_batch_multi(test_dir, wlmutils):
@@ -130,63 +144,88 @@ def test_launch_cluster_orc_batch_multi(test_dir, wlmutils):
     )
 
     orc.batch_settings.set_account(wlmutils.get_test_account())
+    add_batch_resources(wlmutils, orc.batch_settings)
 
-    orc.batch_settings.set_walltime("00:03:00")
+    orc.batch_settings.set_walltime("00:05:00")
     orc.set_path(test_dir)
 
     exp.start(orc, block=True)
     statuses = exp.get_status(orc)
 
     # don't use assert so that orc we don't leave an orphan process
-    if status.STATUS_FAILED in statuses:
+    if SmartSimStatus.STATUS_FAILED in statuses:
         exp.stop(orc)
         assert False
 
     exp.stop(orc)
     statuses = exp.get_status(orc)
-    assert all([stat == status.STATUS_CANCELLED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_CANCELLED for stat in statuses])
 
 
 def test_launch_cluster_orc_reconnect(test_dir, wlmutils):
     """test reconnecting to clustered 3-node orchestrator"""
+    p_test_dir = pathlib.Path(test_dir)
     launcher = wlmutils.get_test_launcher()
     exp_name = "test-launch-cluster-orc-batch-reconect"
-    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
+    exp_1_dir = p_test_dir / exp_name
+    exp_1_dir.mkdir()
+    exp = Experiment(exp_name, launcher=launcher, exp_path=str(exp_1_dir))
 
     # batch = False to launch on existing allocation
     network_interface = wlmutils.get_test_interface()
     orc = exp.create_database(
         wlmutils.get_test_port(), db_nodes=3, batch=True, interface=network_interface
     )
-    orc.set_path(test_dir)
 
     orc.batch_settings.set_account(wlmutils.get_test_account())
+    add_batch_resources(wlmutils, orc.batch_settings)
 
-    orc.batch_settings.set_walltime("00:03:00")
+    orc.batch_settings.set_walltime("00:05:00")
 
     exp.start(orc, block=True)
 
     statuses = exp.get_status(orc)
-    # don't use assert so that orc we don't leave an orphan process
-    if status.STATUS_FAILED in statuses:
+    try:
+        assert all(stat == SmartSimStatus.STATUS_RUNNING for stat in statuses)
+    except Exception:
         exp.stop(orc)
-        assert False
-
-    exp.stop(orc)
+        raise
 
     exp_name = "test-orc-cluster-orc-batch-reconnect-2nd"
-    exp_2 = Experiment(exp_name, launcher=launcher)
-
-    checkpoint = osp.join(test_dir, "smartsim_db.dat")
-    reloaded_orc = exp_2.reconnect_orchestrator(checkpoint)
-
-    # let statuses update once
-    time.sleep(5)
-
-    statuses = exp_2.get_status(reloaded_orc)
-    for stat in statuses:
-        if stat == status.STATUS_FAILED:
-            exp_2.stop(reloaded_orc)
-            assert False
-
-    exp_2.stop(reloaded_orc)
+    exp_2_dir = p_test_dir / exp_name
+    exp_2_dir.mkdir()
+    exp_2 = Experiment(exp_name, launcher=launcher, exp_path=str(exp_2_dir))
+
+    try:
+        checkpoint = osp.join(orc.path, "smartsim_db.dat")
+        reloaded_orc = exp_2.reconnect_orchestrator(checkpoint)
+
+        # let statuses update once
+        time.sleep(5)
+
+        statuses = exp_2.get_status(reloaded_orc)
+        assert all(stat == SmartSimStatus.STATUS_RUNNING for stat in statuses)
+    except Exception:
+        # Something went wrong! Let the experiment that started the DB
+        # clean up the DB
+        exp.stop(orc)
+        raise
+
+    try:
+        # Test experiment 2 can stop the DB
+        exp_2.stop(reloaded_orc)
+        assert all(
+            stat == SmartSimStatus.STATUS_CANCELLED
+            for stat in exp_2.get_status(reloaded_orc)
+        )
+    except Exception:
+        # Something went wrong! Let the experiment that started the DB
+        # clean up the DB
+        exp.stop(orc)
+        raise
+    else:
+        # Ensure  it is the same DB that Experiment 1 was tracking
+        time.sleep(5)
+        assert not any(
+            stat == SmartSimStatus.STATUS_RUNNING for stat in exp.get_status(orc)
+        )
diff --git a/tests/full_wlm/test_mpmd.py b/tests/full_wlm/test_mpmd.py
index 7f6cc2ea2..0167a8f08 100644
--- a/tests/full_wlm/test_mpmd.py
+++ b/tests/full_wlm/test_mpmd.py
@@ -28,8 +28,9 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
 from smartsim._core.utils.helpers import is_valid_cmd
+from smartsim.status import SmartSimStatus
 
 # retrieved from pytest fixtures
 if pytest.test_launcher not in pytest.wlm_options:
@@ -86,11 +87,13 @@ def prune_commands(launcher):
         settings.make_mpmd(deepcopy(settings))
         settings.make_mpmd(deepcopy(settings))
 
-        mpmd_model = exp.create_model("mmpd", path=test_dir, run_settings=settings)
+        mpmd_model = exp.create_model(
+            f"mpmd-{run_command}", path=test_dir, run_settings=settings
+        )
         exp.start(mpmd_model, block=True)
         statuses = exp.get_status(mpmd_model)
-        assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+        assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
         exp.start(mpmd_model, block=True)
         statuses = exp.get_status(mpmd_model)
-        assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+        assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
diff --git a/tests/full_wlm/test_symlinking.py b/tests/full_wlm/test_symlinking.py
new file mode 100644
index 000000000..c5b5b90ba
--- /dev/null
+++ b/tests/full_wlm/test_symlinking.py
@@ -0,0 +1,180 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import pathlib
+import time
+
+import pytest
+
+from smartsim import Experiment
+
+if pytest.test_launcher not in pytest.wlm_options:
+    pytestmark = pytest.mark.skip(reason="Not testing WLM integrations")
+
+
+def test_batch_model_and_ensemble(test_dir, wlmutils):
+    exp_name = "test-batch"
+    launcher = wlmutils.get_test_launcher()
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
+    rs = exp.create_run_settings("echo", ["spam", "eggs"])
+    bs = exp.create_batch_settings()
+
+    test_model = exp.create_model(
+        "test_model", path=test_dir, run_settings=rs, batch_settings=bs
+    )
+    exp.generate(test_model)
+    exp.start(test_model, block=True)
+
+    assert pathlib.Path(test_model.path).exists()
+    _should_be_symlinked(pathlib.Path(test_model.path, f"{test_model.name}.out"), True)
+    _should_be_symlinked(pathlib.Path(test_model.path, f"{test_model.name}.err"), False)
+    _should_not_be_symlinked(pathlib.Path(test_model.path, f"{test_model.name}.sh"))
+
+    test_ensemble = exp.create_ensemble(
+        "test_ensemble", params={}, batch_settings=bs, run_settings=rs, replicas=3
+    )
+    exp.generate(test_ensemble)
+    exp.start(test_ensemble, block=True)
+
+    assert pathlib.Path(test_ensemble.path).exists()
+    for i in range(len(test_ensemble.models)):
+        _should_be_symlinked(
+            pathlib.Path(
+                test_ensemble.path,
+                f"{test_ensemble.name}_{i}",
+                f"{test_ensemble.name}_{i}.out",
+            ),
+            True,
+        )
+        _should_be_symlinked(
+            pathlib.Path(
+                test_ensemble.path,
+                f"{test_ensemble.name}_{i}",
+                f"{test_ensemble.name}_{i}.err",
+            ),
+            False,
+        )
+
+    _should_not_be_symlinked(pathlib.Path(exp.exp_path, "smartsim_params.txt"))
+
+
+def test_batch_ensemble_symlinks(test_dir, wlmutils):
+    exp_name = "test-batch-ensemble"
+    launcher = wlmutils.get_test_launcher()
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
+    rs = exp.create_run_settings("echo", ["spam", "eggs"])
+    bs = exp.create_batch_settings()
+    test_ensemble = exp.create_ensemble(
+        "test_ensemble", params={}, batch_settings=bs, run_settings=rs, replicas=3
+    )
+    exp.generate(test_ensemble)
+    exp.start(test_ensemble, block=True)
+
+    for i in range(len(test_ensemble.models)):
+        _should_be_symlinked(
+            pathlib.Path(
+                test_ensemble.path,
+                f"{test_ensemble.name}_{i}",
+                f"{test_ensemble.name}_{i}.out",
+            ),
+            True,
+        )
+        _should_be_symlinked(
+            pathlib.Path(
+                test_ensemble.path,
+                f"{test_ensemble.name}_{i}",
+                f"{test_ensemble.name}_{i}.err",
+            ),
+            False,
+        )
+
+    _should_not_be_symlinked(pathlib.Path(exp.exp_path, "smartsim_params.txt"))
+
+
+def test_batch_model_symlinks(test_dir, wlmutils):
+    exp_name = "test-batch-model"
+    launcher = wlmutils.get_test_launcher()
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
+    rs = exp.create_run_settings("echo", ["spam", "eggs"])
+    bs = exp.create_batch_settings()
+    test_model = exp.create_model(
+        "test_model", path=test_dir, run_settings=rs, batch_settings=bs
+    )
+    exp.generate(test_model)
+    exp.start(test_model, block=True)
+
+    assert pathlib.Path(test_model.path).exists()
+
+    _should_be_symlinked(pathlib.Path(test_model.path, f"{test_model.name}.out"), True)
+    _should_be_symlinked(pathlib.Path(test_model.path, f"{test_model.name}.err"), False)
+    _should_not_be_symlinked(pathlib.Path(test_model.path, f"{test_model.name}.sh"))
+
+
+def test_batch_orchestrator_symlinks(test_dir, wlmutils):
+    exp_name = "test-batch-orc"
+    launcher = wlmutils.get_test_launcher()
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
+    port = 2424
+    db = exp.create_database(
+        db_nodes=3,
+        port=port,
+        batch=True,
+        interface=wlmutils.get_test_interface(),
+        single_cmd=False,
+    )
+    exp.generate(db)
+    exp.start(db, block=True)
+    time.sleep(2)
+    exp.stop(db)
+
+    _should_be_symlinked(pathlib.Path(db.path, f"{db.name}.out"), False)
+    _should_be_symlinked(pathlib.Path(db.path, f"{db.name}.err"), False)
+
+    for i in range(db.db_nodes):
+        _should_be_symlinked(pathlib.Path(db.path, f"{db.name}_{i}.out"), False)
+        _should_be_symlinked(pathlib.Path(db.path, f"{db.name}_{i}.err"), False)
+        _should_not_be_symlinked(
+            pathlib.Path(db.path, f"nodes-orchestrator_{i}-{port}.conf")
+        )
+
+
+def _should_not_be_symlinked(non_linked_path: pathlib.Path):
+    """Helper function for assertions about paths that should NOT be symlinked"""
+    assert non_linked_path.exists()
+    assert not non_linked_path.is_symlink()
+
+
+def _should_be_symlinked(linked_path: pathlib.Path, open_file: bool):
+    """Helper function for assertions about paths that SHOULD be symlinked"""
+    assert linked_path.exists()
+    assert linked_path.is_symlink()
+    # ensure the source file exists
+    assert pathlib.Path(os.readlink(linked_path)).exists()
+    if open_file:
+        with open(pathlib.Path(os.readlink(linked_path)), "r") as file:
+            log_contents = file.read()
+        assert "spam eggs" in log_contents
diff --git a/tests/install/test_builder.py b/tests/install/test_builder.py
index 5e6c8e597..feaf7e54f 100644
--- a/tests/install/test_builder.py
+++ b/tests/install/test_builder.py
@@ -27,8 +27,7 @@
 
 import functools
 import pathlib
-import platform
-import threading
+import textwrap
 import time
 
 import pytest
@@ -41,7 +40,9 @@
 
 RAI_VERSIONS = RedisAIVersion("1.2.7")
 
-for_each_device = pytest.mark.parametrize("device", ["cpu", "gpu"])
+for_each_device = pytest.mark.parametrize(
+    "device", [build.Device.CPU, build.Device.GPU]
+)
 
 _toggle_build_optional_backend = lambda backend: pytest.mark.parametrize(
     f"build_{backend}",
@@ -163,7 +164,7 @@ def test_rai_builder_will_add_dep_if_backend_requested_wo_duplicates(
     rai_builder = build.RedisAIBuilder(
         build_tf=build_tf, build_torch=build_pt, build_onnx=build_ort
     )
-    requested_backends = rai_builder._get_deps_to_fetch_for(device)
+    requested_backends = rai_builder._get_deps_to_fetch_for(build.Device(device))
     assert dlpack_dep_presence(requested_backends)
     assert tf_dep_presence(build_tf, requested_backends)
     assert pt_dep_presence(build_pt, requested_backends)
@@ -212,7 +213,7 @@ def test_rai_builder_raises_if_it_fetches_an_unexpected_number_of_ml_deps(
         build.BuildError,
         match=r"Expected to place \d+ dependencies, but only found \d+",
     ):
-        rai_builder._fetch_deps_for("cpu")
+        rai_builder._fetch_deps_for(build.Device.CPU)
 
 
 def test_threaded_map():
@@ -251,18 +252,24 @@ def test_PTArchiveMacOSX_url():
     arch = build.Architecture.X64
     pt_version = RAI_VERSIONS.torch
 
-    pt_linux_cpu = build._PTArchiveLinux(build.Architecture.X64, "cpu", pt_version)
+    pt_linux_cpu = build._PTArchiveLinux(
+        build.Architecture.X64, build.Device.CPU, pt_version, False
+    )
     x64_prefix = "https://download.pytorch.org/libtorch/"
     assert x64_prefix in pt_linux_cpu.url
 
-    pt_macosx_cpu = build._PTArchiveMacOSX(build.Architecture.ARM64, "cpu", pt_version)
+    pt_macosx_cpu = build._PTArchiveMacOSX(
+        build.Architecture.ARM64, build.Device.CPU, pt_version, False
+    )
     arm64_prefix = "https://github.com/CrayLabs/ml_lib_builder/releases/download/"
     assert arm64_prefix in pt_macosx_cpu.url
 
 
 def test_PTArchiveMacOSX_gpu_error():
     with pytest.raises(build.BuildError, match="support GPU on Mac OSX"):
-        build._PTArchiveMacOSX(build.Architecture.ARM64, "gpu", RAI_VERSIONS.torch).url
+        build._PTArchiveMacOSX(
+            build.Architecture.ARM64, build.Device.GPU, RAI_VERSIONS.torch, False
+        ).url
 
 
 def test_valid_platforms():
@@ -362,3 +369,36 @@ def test_valid_platforms():
 )
 def test_git_commands_are_configered_correctly_for_platforms(plat, cmd, expected_cmd):
     assert build.config_git_command(plat, cmd) == expected_cmd
+
+
+def test_modify_source_files(p_test_dir):
+    def make_text_blurb(food):
+        return textwrap.dedent(f"""\
+            My favorite food is {food}
+            {food} is an important part of a healthy breakfast
+            {food} {food} {food} {food}
+            This line should be unchanged!
+            --> {food} <--
+            """)
+
+    original_word = "SPAM"
+    mutated_word = "EGGS"
+
+    source_files = []
+    for i in range(3):
+        source_file = p_test_dir / f"test_{i}"
+        source_file.touch()
+        source_file.write_text(make_text_blurb(original_word))
+        source_files.append(source_file)
+    # Modify a single file
+    build._modify_source_files(source_files[0], original_word, mutated_word)
+    assert source_files[0].read_text() == make_text_blurb(mutated_word)
+    assert source_files[1].read_text() == make_text_blurb(original_word)
+    assert source_files[2].read_text() == make_text_blurb(original_word)
+
+    # Modify multiple files
+    build._modify_source_files(
+        (source_files[1], source_files[2]), original_word, mutated_word
+    )
+    assert source_files[1].read_text() == make_text_blurb(mutated_word)
+    assert source_files[2].read_text() == make_text_blurb(mutated_word)
diff --git a/tests/on_wlm/test_base_settings_on_wlm.py b/tests/on_wlm/test_base_settings_on_wlm.py
index 0b31eedd2..77bebd524 100644
--- a/tests/on_wlm/test_base_settings_on_wlm.py
+++ b/tests/on_wlm/test_base_settings_on_wlm.py
@@ -28,7 +28,8 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
+from smartsim.status import SmartSimStatus
 
 """
 Test the launch and stop of models and ensembles using base
@@ -54,7 +55,7 @@ def test_model_on_wlm(fileutils, test_dir, wlmutils):
     for _ in range(2):
         exp.start(M1, M2, block=True)
         statuses = exp.get_status(M1, M2)
-        assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+        assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
 
 def test_model_stop_on_wlm(fileutils, test_dir, wlmutils):
@@ -74,4 +75,4 @@ def test_model_stop_on_wlm(fileutils, test_dir, wlmutils):
     assert M1.name in exp._control._jobs.completed
     assert M2.name in exp._control._jobs.completed
     statuses = exp.get_status(M1, M2)
-    assert all([stat == status.STATUS_CANCELLED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_CANCELLED for stat in statuses])
diff --git a/tests/on_wlm/test_colocated_model.py b/tests/on_wlm/test_colocated_model.py
index 8baf74bf4..97a47542d 100644
--- a/tests/on_wlm/test_colocated_model.py
+++ b/tests/on_wlm/test_colocated_model.py
@@ -28,8 +28,9 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
 from smartsim.entity import Model
+from smartsim.status import SmartSimStatus
 
 if sys.platform == "darwin":
     supported_dbs = ["tcp", "deprecated"]
@@ -60,14 +61,14 @@ def test_launch_colocated_model_defaults(fileutils, test_dir, coloutils, db_type
     exp.start(colo_model, block=True)
     statuses = exp.get_status(colo_model)
     assert all(
-        stat == status.STATUS_COMPLETED for stat in statuses
+        stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses
     ), f"Statuses: {statuses}"
 
     # test restarting the colocated model
     exp.start(colo_model, block=True)
     statuses = exp.get_status(colo_model)
     assert all(
-        stat == status.STATUS_COMPLETED for stat in statuses
+        stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses
     ), f"Statuses: {statuses}"
 
 
@@ -91,7 +92,7 @@ def test_colocated_model_disable_pinning(fileutils, test_dir, coloutils, db_type
     exp.start(colo_model, block=True)
     statuses = exp.get_status(colo_model)
     assert all(
-        stat == status.STATUS_COMPLETED for stat in statuses
+        stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses
     ), f"Statuses: {statuses}"
 
 
@@ -114,7 +115,7 @@ def test_colocated_model_pinning_auto_2cpu(fileutils, test_dir, coloutils, db_ty
     exp.start(colo_model, block=True)
     statuses = exp.get_status(colo_model)
     assert all(
-        stat == status.STATUS_COMPLETED for stat in statuses
+        stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses
     ), f"Statuses: {statuses}"
 
 
@@ -139,7 +140,7 @@ def test_colocated_model_pinning_range(fileutils, test_dir, coloutils, db_type):
     exp.start(colo_model, block=True)
     statuses = exp.get_status(colo_model)
     assert all(
-        stat == status.STATUS_COMPLETED for stat in statuses
+        stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses
     ), f"Statuses: {statuses}"
 
 
@@ -164,7 +165,7 @@ def test_colocated_model_pinning_list(fileutils, test_dir, coloutils, db_type):
     exp.start(colo_model, block=True)
     statuses = exp.get_status(colo_model)
     assert all(
-        stat == status.STATUS_COMPLETED for stat in statuses
+        stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses
     ), f"Statuses: {statuses}"
 
 
@@ -189,5 +190,5 @@ def test_colocated_model_pinning_mixed(fileutils, test_dir, coloutils, db_type):
     exp.start(colo_model, block=True)
     statuses = exp.get_status(colo_model)
     assert all(
-        stat == status.STATUS_COMPLETED for stat in statuses
+        stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses
     ), f"Statuses: {statuses}"
diff --git a/tests/on_wlm/test_containers_wlm.py b/tests/on_wlm/test_containers_wlm.py
index 8dc4baae0..21f1e1c5e 100644
--- a/tests/on_wlm/test_containers_wlm.py
+++ b/tests/on_wlm/test_containers_wlm.py
@@ -28,9 +28,10 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
 from smartsim.entity import Ensemble
 from smartsim.settings.containers import Singularity
+from smartsim.status import SmartSimStatus
 
 """Test SmartRedis container integration on a supercomputer with a WLM."""
 
@@ -49,10 +50,9 @@ def test_singularity_wlm_smartredis(fileutils, test_dir, wlmutils):
     """
 
     launcher = wlmutils.get_test_launcher()
-    print(launcher)
-    if launcher not in ["pbs", "slurm"]:
+    if launcher not in ["pbs", "slurm", "dragon"]:
         pytest.skip(
-            f"Test only runs on systems with PBS or Slurm as WLM. Current launcher: {launcher}"
+            f"Test only runs on systems with PBS, Dragon, or Slurm as WLM. Current launcher: {launcher}"
         )
 
     exp = Experiment(
@@ -92,7 +92,7 @@ def test_singularity_wlm_smartredis(fileutils, test_dir, wlmutils):
 
     # get and confirm statuses
     statuses = exp.get_status(ensemble)
-    if not all([stat == status.STATUS_COMPLETED for stat in statuses]):
+    if not all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses]):
         exp.stop(orc)
         assert False  # client ensemble failed
 
diff --git a/tests/on_wlm/test_dragon.py b/tests/on_wlm/test_dragon.py
new file mode 100644
index 000000000..a05d38141
--- /dev/null
+++ b/tests/on_wlm/test_dragon.py
@@ -0,0 +1,94 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import pytest
+
+from smartsim import Experiment
+from smartsim._core.launcher.dragon.dragonLauncher import DragonLauncher
+from smartsim.status import SmartSimStatus
+
+# retrieved from pytest fixtures
+if pytest.test_launcher != "dragon":
+    pytestmark = pytest.mark.skip(reason="Test is only for Dragon WLM systems")
+
+
+def test_dragon_global_path(global_dragon_teardown, wlmutils, test_dir, monkeypatch):
+    monkeypatch.setenv("SMARTSIM_DRAGON_SERVER_PATH", test_dir)
+    exp: Experiment = Experiment(
+        "test_dragon_connection",
+        exp_path=test_dir,
+        launcher=wlmutils.get_test_launcher(),
+    )
+    rs = exp.create_run_settings(exe="sleep", exe_args=["1"])
+    model = exp.create_model("sleep", run_settings=rs)
+
+    exp.generate(model)
+    exp.start(model, block=True)
+
+    try:
+        assert exp.get_status(model)[0] == SmartSimStatus.STATUS_COMPLETED
+    finally:
+        launcher: DragonLauncher = exp._control._launcher
+        launcher.cleanup()
+
+
+def test_dragon_exp_path(global_dragon_teardown, wlmutils, test_dir, monkeypatch):
+    monkeypatch.delenv("SMARTSIM_DRAGON_SERVER_PATH", raising=False)
+    monkeypatch.delenv("SMARTSIM_DRAGON_SERVER_PATH_EXP", raising=False)
+    exp: Experiment = Experiment(
+        "test_dragon_connection",
+        exp_path=test_dir,
+        launcher=wlmutils.get_test_launcher(),
+    )
+    rs = exp.create_run_settings(exe="sleep", exe_args=["1"])
+    model = exp.create_model("sleep", run_settings=rs)
+
+    exp.generate(model)
+    exp.start(model, block=True)
+    try:
+        assert exp.get_status(model)[0] == SmartSimStatus.STATUS_COMPLETED
+    finally:
+        launcher: DragonLauncher = exp._control._launcher
+        launcher.cleanup()
+
+
+def test_dragon_cannot_honor(wlmutils, test_dir):
+    exp: Experiment = Experiment(
+        "test_dragon_cannot_honor",
+        exp_path=test_dir,
+        launcher=wlmutils.get_test_launcher(),
+    )
+    rs = exp.create_run_settings(exe="sleep", exe_args=["1"])
+    rs.set_nodes(100)
+    model = exp.create_model("sleep", run_settings=rs)
+
+    exp.generate(model)
+    exp.start(model, block=True)
+
+    try:
+        assert exp.get_status(model)[0] == SmartSimStatus.STATUS_FAILED
+    finally:
+        launcher: DragonLauncher = exp._control._launcher
+        launcher.cleanup()
diff --git a/tests/on_wlm/test_dragon_entrypoint.py b/tests/on_wlm/test_dragon_entrypoint.py
new file mode 100644
index 000000000..025b5692f
--- /dev/null
+++ b/tests/on_wlm/test_dragon_entrypoint.py
@@ -0,0 +1,295 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import os
+import pathlib
+import typing as t
+
+import pytest
+
+# retrieved from pytest fixtures
+if pytest.test_launcher != "dragon":
+    pytestmark = pytest.mark.skip(reason="Test is only for Dragon WLM systems")
+
+try:
+    import smartsim._core.entrypoints.dragon as drg
+except:
+    pytest.skip("Unable to import Dragon library", allow_module_level=True)
+
+
+@pytest.fixture
+def mock_argv() -> t.List[str]:
+    """Fixture for returning valid arguments to the entrypoint"""
+    return ["+launching_address", "mock-addr", "+interface", "mock-interface"]
+
+
+def test_file_removal(test_dir: str, monkeypatch: pytest.MonkeyPatch):
+    """Verify that the log file is removed when expected"""
+    mock_file_name = "mocked_file_name.txt"
+    expected_path = pathlib.Path(test_dir) / mock_file_name
+    expected_path.touch()
+
+    with monkeypatch.context() as ctx:
+        # ensure we get outputs in the test directory
+        ctx.setattr(
+            "smartsim._core.entrypoints.dragon.get_log_path", lambda: str(expected_path)
+        )
+
+        drg.remove_config_log()
+        assert not expected_path.exists(), "Dragon config file was not removed"
+
+
+def test_file_removal_on_bad_path(test_dir: str, monkeypatch: pytest.MonkeyPatch):
+    """Verify that file removal doesn't blow up if the log file wasn't created"""
+    mock_file_name = "mocked_file_name.txt"
+    expected_path = pathlib.Path(test_dir) / mock_file_name
+
+    with monkeypatch.context() as ctx:
+        # ensure we get outputs in the test directory
+        ctx.setattr(
+            "smartsim._core.entrypoints.dragon.get_log_path", lambda: str(expected_path)
+        )
+
+        # confirm the file doesn't exist...
+        assert not expected_path.exists(), "Dragon config file was not found"
+
+        try:
+            # ensure we don't blow up
+            drg.remove_config_log()
+        except:
+            assert False
+
+
+def test_dragon_failure(
+    mock_argv: t.List[str], test_dir: str, monkeypatch: pytest.MonkeyPatch
+):
+    """Verify that the expected cleanup actions are taken when the dragon
+    entrypoint exits"""
+    mock_file_name = "mocked_file_name.txt"
+    expected_path = pathlib.Path(test_dir) / mock_file_name
+    expected_path.touch()
+
+    with monkeypatch.context() as ctx:
+        # ensure we get outputs in the test directory
+        ctx.setattr(
+            "smartsim._core.entrypoints.dragon.get_log_path", lambda: str(expected_path)
+        )
+
+        def raiser(args_) -> int:
+            raise Exception("Something bad...")
+
+        # we don't need to execute the entrypoint...
+        ctx.setattr("smartsim._core.entrypoints.dragon.execute_entrypoint", raiser)
+
+        return_code = drg.main(mock_argv)
+
+        # ensure our exception error code is returned
+        assert return_code == -1
+
+
+def test_dragon_main(
+    mock_argv: t.List[str], test_dir: str, monkeypatch: pytest.MonkeyPatch
+):
+    """Verify that the expected startup & cleanup actions are taken when the dragon
+    entrypoint exits"""
+    mock_file_name = "mocked_file_name.txt"
+    expected_path = pathlib.Path(test_dir) / mock_file_name
+    expected_path.touch()
+
+    with monkeypatch.context() as ctx:
+        # ensure we get outputs in the test directory
+        ctx.setattr(
+            "smartsim._core.entrypoints.dragon.get_log_path", lambda: str(expected_path)
+        )
+        # we don't need to execute the actual entrypoint...
+        ctx.setattr(
+            "smartsim._core.entrypoints.dragon.execute_entrypoint", lambda args_: 0
+        )
+
+        return_code = drg.main(mock_argv)
+
+        # execute_entrypoint should return 0 from our mock
+        assert return_code == 0
+        # the cleanup should remove our config file
+        assert not expected_path.exists(), "Dragon config file was not removed!"
+        # the environment should be set as expected
+        assert os.environ.get("PYTHONUNBUFFERED", None) == "1"
+
+
+@pytest.mark.parametrize(
+    "signal_num",
+    [
+        pytest.param(0, id="non-truthy signal"),
+        pytest.param(-1, id="negative signal"),
+        pytest.param(1, id="positive signal"),
+    ],
+)
+def test_signal_handler(signal_num: int, monkeypatch: pytest.MonkeyPatch):
+    """Verify that the signal handler performs expected actions"""
+    counter: int = 0
+
+    def increment_counter(*args, **kwargs):
+        nonlocal counter
+        counter += 1
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr("smartsim._core.entrypoints.dragon.cleanup", increment_counter)
+        ctx.setattr("smartsim._core.entrypoints.dragon.logger.info", increment_counter)
+
+        drg.handle_signal(signal_num, None)
+
+        # show that we log informational message & do cleanup (take 2 actions)
+        assert counter == 2
+
+
+def test_log_path(monkeypatch: pytest.MonkeyPatch):
+    """Verify that the log path is loaded & returned as expected"""
+
+    with monkeypatch.context() as ctx:
+        expected_filename = "foo.log"
+        ctx.setattr(
+            "smartsim._core.config.config.Config.dragon_log_filename", expected_filename
+        )
+
+        log_path = drg.get_log_path()
+
+        assert expected_filename in log_path
+
+
+def test_summary(test_dir: str, monkeypatch: pytest.MonkeyPatch):
+    """Verify that the summary is written to expected location w/expected information"""
+
+    with monkeypatch.context() as ctx:
+        expected_ip = "127.0.0.111"
+        expected_interface = "mock_int0"
+        summary_file = pathlib.Path(test_dir) / "foo.log"
+        expected_hostname = "mockhostname"
+
+        ctx.setattr(
+            "smartsim._core.config.config.Config.dragon_log_filename",
+            str(summary_file),
+        )
+        ctx.setattr(
+            "smartsim._core.entrypoints.dragon.socket.gethostname",
+            lambda: expected_hostname,
+        )
+
+        drg.print_summary(expected_interface, expected_ip)
+
+        summary = summary_file.read_text()
+
+        assert expected_ip in summary
+        assert expected_interface in summary
+        assert expected_hostname in summary
+
+
+def test_cleanup(monkeypatch: pytest.MonkeyPatch):
+    """Verify that the cleanup function attempts to remove the log file"""
+    counter: int = 0
+
+    def increment_counter(*args, **kwargs):
+        nonlocal counter
+        counter += 1
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr(
+            "smartsim._core.entrypoints.dragon.remove_config_log", increment_counter
+        )
+        drg.SHUTDOWN_INITIATED = False
+        drg.cleanup()
+
+        # show that cleanup removes config
+        assert counter == 1
+        # show that cleanup alters the flag to enable shutdown
+        assert drg.SHUTDOWN_INITIATED
+
+
+def test_signal_handler_registration(test_dir: str, monkeypatch: pytest.MonkeyPatch):
+    """Verify that signal handlers are registered for all expected signals"""
+    sig_nums: t.List[int] = []
+
+    def track_args(*args, **kwargs):
+        nonlocal sig_nums
+        sig_nums.append(args[0])
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr("smartsim._core.entrypoints.dragon.signal.signal", track_args)
+
+        # ensure valid start point
+        assert not sig_nums
+
+        drg.register_signal_handlers()
+
+        # ensure all expected handlers are registered
+        assert set(sig_nums) == set(drg.SIGNALS)
+
+
+def test_arg_parser__no_args():
+    """Verify arg parser fails when no args are not supplied"""
+    args_list = []
+
+    with pytest.raises(SystemExit) as ex:
+        # ensure that parser complains about missing required arguments
+        drg.parse_arguments(args_list)
+
+
+def test_arg_parser__invalid_launch_addr():
+    """Verify arg parser fails with empty launch_address"""
+    addr_flag = "+launching_address"
+    addr_value = ""
+
+    args_list = [addr_flag, addr_value]
+
+    with pytest.raises(ValueError) as ex:
+        args = drg.parse_arguments(args_list)
+
+
+def test_arg_parser__required_only():
+    """Verify arg parser succeeds when optional args are omitted"""
+    addr_flag = "+launching_address"
+    addr_value = "mock-address"
+
+    args_list = [addr_flag, addr_value]
+
+    args = drg.parse_arguments(args_list)
+
+    assert args.launching_address == addr_value
+    assert not args.interface
+
+
+def test_arg_parser__with_optionals():
+    """Verify arg parser succeeds when optional args are included"""
+    addr_flag = "+launching_address"
+    addr_value = "mock-address"
+
+    interface_flag = "+interface"
+    interface_value = "mock-int"
+
+    args_list = [interface_flag, interface_value, addr_flag, addr_value]
+
+    args = drg.parse_arguments(args_list)
+
+    assert args.launching_address == addr_value
+    assert args.interface == interface_value
diff --git a/tests/on_wlm/test_generic_orc_launch.py b/tests/on_wlm/test_generic_orc_launch.py
index 6cf1c3918..cacdd5be5 100644
--- a/tests/on_wlm/test_generic_orc_launch.py
+++ b/tests/on_wlm/test_generic_orc_launch.py
@@ -26,7 +26,8 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
+from smartsim.status import SmartSimStatus
 
 # retrieved from pytest fixtures
 if pytest.test_launcher not in pytest.wlm_options:
@@ -49,19 +50,18 @@ def test_launch_orc_auto(test_dir, wlmutils):
         single_cmd=False,
         hosts=wlmutils.get_test_hostlist(),
     )
-    orc.set_path(test_dir)
 
     exp.start(orc, block=True)
     statuses = exp.get_status(orc)
 
     # don't use assert so that we don't leave an orphan process
-    if status.STATUS_FAILED in statuses:
+    if SmartSimStatus.STATUS_FAILED in statuses:
         exp.stop(orc)
         assert False
 
     exp.stop(orc)
     statuses = exp.get_status(orc)
-    assert all([stat == status.STATUS_CANCELLED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_CANCELLED for stat in statuses])
 
 
 def test_launch_cluster_orc_single(test_dir, wlmutils):
@@ -82,19 +82,18 @@ def test_launch_cluster_orc_single(test_dir, wlmutils):
         single_cmd=True,
         hosts=wlmutils.get_test_hostlist(),
     )
-    orc.set_path(test_dir)
 
     exp.start(orc, block=True)
     statuses = exp.get_status(orc)
 
     # don't use assert so that orc we don't leave an orphan process
-    if status.STATUS_FAILED in statuses:
+    if SmartSimStatus.STATUS_FAILED in statuses:
         exp.stop(orc)
         assert False
 
     exp.stop(orc)
     statuses = exp.get_status(orc)
-    assert all([stat == status.STATUS_CANCELLED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_CANCELLED for stat in statuses])
 
 
 def test_launch_cluster_orc_multi(test_dir, wlmutils):
@@ -115,16 +114,15 @@ def test_launch_cluster_orc_multi(test_dir, wlmutils):
         single_cmd=False,
         hosts=wlmutils.get_test_hostlist(),
     )
-    orc.set_path(test_dir)
 
     exp.start(orc, block=True)
     statuses = exp.get_status(orc)
 
     # don't use assert so that orc we don't leave an orphan process
-    if status.STATUS_FAILED in statuses:
+    if SmartSimStatus.STATUS_FAILED in statuses:
         exp.stop(orc)
         assert False
 
     exp.stop(orc)
     statuses = exp.get_status(orc)
-    assert all([stat == status.STATUS_CANCELLED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_CANCELLED for stat in statuses])
diff --git a/tests/on_wlm/test_het_job.py b/tests/on_wlm/test_het_job.py
index 5a039a7c9..aeea7b474 100644
--- a/tests/on_wlm/test_het_job.py
+++ b/tests/on_wlm/test_het_job.py
@@ -34,10 +34,10 @@
     pytestmark = pytest.mark.skip(reason="Test is only for Slurm WLM systems")
 
 
-def test_mpmd_errors(monkeypatch):
+def test_mpmd_errors(monkeypatch, test_dir):
     monkeypatch.setenv("SLURM_HET_SIZE", "1")
     exp_name = "test-het-job-errors"
-    exp = Experiment(exp_name, launcher="slurm")
+    exp = Experiment(exp_name, exp_path=test_dir, launcher="slurm")
     rs: SrunSettings = exp.create_run_settings("sleep", "1", run_command="srun")
     rs2: SrunSettings = exp.create_run_settings("sleep", "1", run_command="srun")
     with pytest.raises(ValueError):
@@ -49,11 +49,11 @@ def test_mpmd_errors(monkeypatch):
         rs.set_het_group(1)
 
 
-def test_set_het_groups(monkeypatch):
+def test_set_het_groups(monkeypatch, test_dir):
     """Test ability to set one or more het groups to run setting"""
     monkeypatch.setenv("SLURM_HET_SIZE", "4")
     exp_name = "test-set-het-group"
-    exp = Experiment(exp_name, launcher="slurm")
+    exp = Experiment(exp_name, exp_path=test_dir, launcher="slurm")
     rs: SrunSettings = exp.create_run_settings("sleep", "1", run_command="srun")
     rs.set_het_group([1])
     assert rs.run_args["het-group"] == "1"
@@ -63,11 +63,11 @@ def test_set_het_groups(monkeypatch):
         rs.set_het_group([4])
 
 
-def test_orch_single_cmd(monkeypatch, wlmutils):
+def test_orch_single_cmd(monkeypatch, wlmutils, test_dir):
     """Test that single cmd is rejected in a heterogeneous job"""
     monkeypatch.setenv("SLURM_HET_SIZE", "1")
     exp_name = "test-orch-single-cmd"
-    exp = Experiment(exp_name, launcher="slurm")
+    exp = Experiment(exp_name, launcher="slurm", exp_path=test_dir)
     orc = exp.create_database(
         wlmutils.get_test_port(),
         db_nodes=3,
diff --git a/tests/on_wlm/test_launch_errors.py b/tests/on_wlm/test_launch_errors.py
index 905d96f54..2498a5a91 100644
--- a/tests/on_wlm/test_launch_errors.py
+++ b/tests/on_wlm/test_launch_errors.py
@@ -28,8 +28,9 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
 from smartsim.error import SmartSimError
+from smartsim.status import SmartSimStatus
 
 # retrieved from pytest fixtures
 if pytest.test_launcher not in pytest.wlm_options:
@@ -54,7 +55,7 @@ def test_failed_status(fileutils, test_dir, wlmutils):
         time.sleep(2)
     stat = exp.get_status(model)
     assert len(stat) == 1
-    assert stat[0] == status.STATUS_FAILED
+    assert stat[0] == SmartSimStatus.STATUS_FAILED
 
 
 def test_bad_run_command_args(fileutils, test_dir, wlmutils):
diff --git a/tests/on_wlm/test_launch_ompi_lsf.py b/tests/on_wlm/test_launch_ompi_lsf.py
index ed5de291b..51c82e418 100644
--- a/tests/on_wlm/test_launch_ompi_lsf.py
+++ b/tests/on_wlm/test_launch_ompi_lsf.py
@@ -26,7 +26,8 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
+from smartsim.status import SmartSimStatus
 
 # retrieved from pytest fixtures
 if pytest.test_launcher not in pytest.wlm_options:
@@ -49,4 +50,4 @@ def test_launch_openmpi_lsf(fileutils, test_dir, wlmutils):
     model = exp.create_model("ompi-model", path=test_dir, run_settings=settings)
     exp.start(model, block=True)
     statuses = exp.get_status(model)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
diff --git a/tests/on_wlm/test_local_step.py b/tests/on_wlm/test_local_step.py
index 4e5f45e0b..8f7d823b8 100644
--- a/tests/on_wlm/test_local_step.py
+++ b/tests/on_wlm/test_local_step.py
@@ -29,7 +29,7 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
 from smartsim.settings import RunSettings
 
 # retrieved from pytest fixtures
diff --git a/tests/on_wlm/test_preview_wlm.py b/tests/on_wlm/test_preview_wlm.py
new file mode 100644
index 000000000..78da30c9a
--- /dev/null
+++ b/tests/on_wlm/test_preview_wlm.py
@@ -0,0 +1,409 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2023, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from os import path as osp
+
+import numpy as np
+import pytest
+from jinja2.filters import FILTERS
+
+from smartsim import Experiment
+from smartsim._core import Manifest, previewrenderer
+from smartsim._core.config import CONFIG
+from smartsim.database import Orchestrator
+from smartsim.settings import QsubBatchSettings, RunSettings
+
+pytestmark = pytest.mark.slow_tests
+
+on_wlm = (pytest.test_launcher in pytest.wlm_options,)
+
+
+@pytest.fixture
+def choose_host():
+    def _choose_host(wlmutils, index: int = 0):
+        hosts = wlmutils.get_test_hostlist()
+        if hosts:
+            return hosts[index]
+        return None
+
+    return _choose_host
+
+
+def add_batch_resources(wlmutils, batch_settings):
+    if isinstance(batch_settings, QsubBatchSettings):
+        for key, value in wlmutils.get_batch_resources().items():
+            batch_settings.set_resource(key, value)
+
+
+@pytest.mark.skipif(
+    pytest.test_launcher not in pytest.wlm_options,
+    reason="Not testing WLM integrations",
+)
+def test_preview_wlm_run_commands_cluster_orc_model(
+    test_dir, coloutils, fileutils, wlmutils
+):
+    """
+    Test preview of wlm run command and run aruguments on a
+    orchestrator and model
+    """
+
+    exp_name = "test-preview-orc-model"
+    launcher = wlmutils.get_test_launcher()
+    test_port = wlmutils.get_test_port()
+    test_script = fileutils.get_test_conf_path("smartredis/multidbid.py")
+    exp = Experiment(exp_name, launcher=launcher, exp_path=test_dir)
+
+    network_interface = wlmutils.get_test_interface()
+    orc = exp.create_database(
+        wlmutils.get_test_port(),
+        db_nodes=3,
+        batch=False,
+        interface=network_interface,
+        single_cmd=True,
+        hosts=wlmutils.get_test_hostlist(),
+        db_identifier="testdb_reg",
+    )
+
+    db_args = {
+        "port": test_port,
+        "db_cpus": 1,
+        "debug": True,
+        "db_identifier": "testdb_colo",
+    }
+
+    # Create model with colocated database
+    smartsim_model = coloutils.setup_test_colo(
+        fileutils, "uds", exp, test_script, db_args, on_wlm=on_wlm
+    )
+
+    preview_manifest = Manifest(orc, smartsim_model)
+
+    # Execute preview method
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    # Evaluate output
+    if pytest.test_launcher != "dragon":
+        assert "Run Command" in output
+        assert "ntasks" in output
+    assert "Run Arguments" in output
+    assert "nodes" in output
+
+
+@pytest.mark.skipif(
+    pytest.test_launcher not in pytest.wlm_options,
+    reason="Not testing WLM integrations",
+)
+def test_preview_model_on_wlm(fileutils, test_dir, wlmutils):
+    """
+    Test preview of wlm run command and run aruguments for a model
+    """
+    exp_name = "test-preview-model-wlm"
+    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir)
+
+    script = fileutils.get_test_conf_path("sleep.py")
+    settings1 = wlmutils.get_base_run_settings("python", f"{script} --time=5")
+    settings2 = wlmutils.get_base_run_settings("python", f"{script} --time=5")
+    M1 = exp.create_model("m1", path=test_dir, run_settings=settings1)
+    M2 = exp.create_model("m2", path=test_dir, run_settings=settings2)
+
+    preview_manifest = Manifest(M1, M2)
+
+    # Execute preview method
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    if pytest.test_launcher != "dragon":
+        assert "Run Command" in output
+        assert "ntasks" in output
+        assert "time" in output
+        assert "nodes" in output
+    assert "Run Arguments" in output
+
+
+@pytest.mark.skipif(
+    pytest.test_launcher not in pytest.wlm_options,
+    reason="Not testing WLM integrations",
+)
+def test_preview_batch_model(fileutils, test_dir, wlmutils):
+    """Test the preview of a model with batch settings"""
+
+    exp_name = "test-batch-model"
+    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir)
+
+    script = fileutils.get_test_conf_path("sleep.py")
+    batch_settings = exp.create_batch_settings(nodes=1, time="00:01:00")
+
+    batch_settings.set_account(wlmutils.get_test_account())
+    add_batch_resources(wlmutils, batch_settings)
+    run_settings = wlmutils.get_run_settings("python", f"{script} --time=5")
+    model = exp.create_model(
+        "model", path=test_dir, run_settings=run_settings, batch_settings=batch_settings
+    )
+    model.set_path(test_dir)
+
+    preview_manifest = Manifest(model)
+
+    # Execute preview method
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    assert "Batch Launch: True" in output
+    assert "Batch Command" in output
+    assert "Batch Arguments" in output
+    assert "nodes" in output
+    assert "time" in output
+
+
+@pytest.mark.skipif(
+    pytest.test_launcher not in pytest.wlm_options,
+    reason="Not testing WLM integrations",
+)
+def test_preview_batch_ensemble(fileutils, test_dir, wlmutils):
+    """Test preview of a batch ensemble"""
+
+    exp_name = "test-preview-batch-ensemble"
+    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir)
+
+    script = fileutils.get_test_conf_path("sleep.py")
+    settings = wlmutils.get_run_settings("python", f"{script} --time=5")
+    M1 = exp.create_model("m1", path=test_dir, run_settings=settings)
+    M2 = exp.create_model("m2", path=test_dir, run_settings=settings)
+
+    batch = exp.create_batch_settings(nodes=1, time="00:01:00")
+    add_batch_resources(wlmutils, batch)
+
+    batch.set_account(wlmutils.get_test_account())
+    ensemble = exp.create_ensemble("batch-ens", batch_settings=batch)
+    ensemble.add_model(M1)
+    ensemble.add_model(M2)
+    ensemble.set_path(test_dir)
+
+    preview_manifest = Manifest(ensemble)
+
+    # Execute preview method
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    assert "Batch Launch: True" in output
+    assert "Batch Command" in output
+    assert "Batch Arguments" in output
+    assert "nodes" in output
+    assert "time" in output
+
+
+@pytest.mark.skipif(
+    pytest.test_launcher not in pytest.wlm_options,
+    reason="Not testing WLM integrations",
+)
+def test_preview_launch_command(test_dir, wlmutils, choose_host):
+    """Test preview launch command for orchestrator, models, and
+    ensembles"""
+    # Prepare entities
+    test_launcher = wlmutils.get_test_launcher()
+    test_interface = wlmutils.get_test_interface()
+    test_port = wlmutils.get_test_port()
+    exp_name = "test_preview_launch_command"
+    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
+    # create regular database
+    orc = exp.create_database(
+        port=test_port,
+        interface=test_interface,
+        hosts=choose_host(wlmutils),
+    )
+
+    model_params = {"port": 6379, "password": "unbreakable_password"}
+    rs1 = RunSettings("bash", "multi_tags_template.sh")
+    rs2 = exp.create_run_settings("echo", ["spam", "eggs"])
+
+    hello_world_model = exp.create_model(
+        "echo-hello", run_settings=rs1, params=model_params
+    )
+
+    spam_eggs_model = exp.create_model("echo-spam", run_settings=rs2)
+
+    # setup ensemble parameter space
+    learning_rate = list(np.linspace(0.01, 0.5))
+    train_params = {"LR": learning_rate}
+
+    run = exp.create_run_settings(exe="python", exe_args="./train-model.py")
+
+    ensemble = exp.create_ensemble(
+        "Training-Ensemble",
+        params=train_params,
+        params_as_args=["LR"],
+        run_settings=run,
+        perm_strategy="random",
+        n_models=4,
+    )
+
+    preview_manifest = Manifest(orc, spam_eggs_model, hello_world_model, ensemble)
+
+    # Execute preview method
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    assert "orchestrator" in output
+    assert "echo-spam" in output
+    assert "echo-hello" in output
+
+    assert "Training-Ensemble" in output
+    assert "me: Training-Ensemble_0" in output
+    assert "Training-Ensemble_1" in output
+    assert "Training-Ensemble_2" in output
+    assert "Training-Ensemble_3" in output
+
+
+@pytest.mark.skipif(
+    pytest.test_launcher not in pytest.wlm_options,
+    reason="Not testing WLM integrations",
+)
+def test_preview_batch_launch_command(fileutils, test_dir, wlmutils):
+    """Test the preview of a model with batch settings"""
+
+    exp_name = "test-batch-entities"
+    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir)
+
+    script = fileutils.get_test_conf_path("sleep.py")
+    batch_settings = exp.create_batch_settings(nodes=1, time="00:01:00")
+
+    batch_settings.set_account(wlmutils.get_test_account())
+    add_batch_resources(wlmutils, batch_settings)
+    run_settings = wlmutils.get_run_settings("python", f"{script} --time=5")
+    model = exp.create_model(
+        "model", path=test_dir, run_settings=run_settings, batch_settings=batch_settings
+    )
+    model.set_path(test_dir)
+
+    orc = Orchestrator(
+        wlmutils.get_test_port(),
+        db_nodes=3,
+        batch=True,
+        interface="lo",
+        launcher="slurm",
+        run_command="srun",
+    )
+    orc.set_batch_arg("account", "ACCOUNT")
+
+    preview_manifest = Manifest(orc, model)
+    # Execute preview method
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    # Evaluate output
+    assert "Batch Launch: True" in output
+    assert "Batch Command" in output
+    assert "Batch Arguments" in output
+
+
+@pytest.mark.skipif(
+    pytest.test_launcher not in pytest.wlm_options,
+    reason="Not testing WLM integrations",
+)
+def test_ensemble_batch(test_dir, wlmutils):
+    """
+    Test preview of client configuration and key prefixing in Ensemble preview
+    """
+    # Prepare entities
+    test_launcher = wlmutils.get_test_launcher()
+    exp = Experiment(
+        "test-preview-ensemble-clientconfig", exp_path=test_dir, launcher=test_launcher
+    )
+    # Create Orchestrator
+    db = exp.create_database(port=6780, interface="lo")
+    exp.generate(db, overwrite=True)
+    rs1 = exp.create_run_settings("echo", ["hello", "world"])
+    # Create ensemble
+    batch_settings = exp.create_batch_settings(nodes=1, time="00:01:00")
+    batch_settings.set_account(wlmutils.get_test_account())
+    add_batch_resources(wlmutils, batch_settings)
+    ensemble = exp.create_ensemble(
+        "fd_simulation", run_settings=rs1, batch_settings=batch_settings, replicas=2
+    )
+    # enable key prefixing on ensemble
+    ensemble.enable_key_prefixing()
+    exp.generate(ensemble, overwrite=True)
+    rs2 = exp.create_run_settings("echo", ["spam", "eggs"])
+    # Create model
+    ml_model = exp.create_model("tf_training", rs2)
+
+    for sim in ensemble.entities:
+        ml_model.register_incoming_entity(sim)
+
+    exp.generate(ml_model, overwrite=True)
+
+    preview_manifest = Manifest(db, ml_model, ensemble)
+
+    # Call preview renderer for testing output
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    # Evaluate output
+    assert "Client Configuration" in output
+    assert "Database Identifier" in output
+    assert "Database Backend" in output
+    assert "Type" in output
+
+
+@pytest.mark.skipif(
+    pytest.test_launcher not in pytest.wlm_options,
+    reason="Not testing WLM integrations",
+)
+def test_preview_ensemble_db_script(wlmutils, test_dir):
+    """
+    Test preview of a torch script on a model in an ensemble.
+    """
+    # Initialize the Experiment and set the launcher to auto
+    test_launcher = wlmutils.get_test_launcher()
+    exp = Experiment("getting-started", launcher=test_launcher)
+
+    orch = exp.create_database(db_identifier="test_db1")
+    orch_2 = exp.create_database(db_identifier="test_db2", db_nodes=3)
+    # Initialize a RunSettings object
+    model_settings = exp.create_run_settings(exe="python", exe_args="params.py")
+    model_settings_2 = exp.create_run_settings(exe="python", exe_args="params.py")
+    model_settings_3 = exp.create_run_settings(exe="python", exe_args="params.py")
+    # Initialize a Model object
+    model_instance = exp.create_model("model_name", model_settings)
+    model_instance_2 = exp.create_model("model_name_2", model_settings_2)
+    batch = exp.create_batch_settings(time="24:00:00", account="test")
+    ensemble = exp.create_ensemble(
+        "ensemble", batch_settings=batch, run_settings=model_settings_3, replicas=2
+    )
+    ensemble.add_model(model_instance)
+    ensemble.add_model(model_instance_2)
+
+    # TorchScript string
+    torch_script_str = "def negate(x):\n\treturn torch.neg(x)\n"
+
+    # Attach TorchScript to Model
+    model_instance.add_script(
+        name="example_script",
+        script=torch_script_str,
+        device="GPU",
+        devices_per_node=2,
+        first_device=0,
+    )
+    preview_manifest = Manifest(ensemble, orch, orch_2)
+
+    # Call preview renderer for testing output
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    # Evaluate output
+    assert "Torch Script" in output
diff --git a/tests/on_wlm/test_restart.py b/tests/on_wlm/test_restart.py
index 42bbe752c..0116c10d3 100644
--- a/tests/on_wlm/test_restart.py
+++ b/tests/on_wlm/test_restart.py
@@ -28,7 +28,8 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
+from smartsim.status import SmartSimStatus
 
 # retrieved from pytest fixtures
 if pytest.test_launcher not in pytest.wlm_options:
@@ -48,10 +49,10 @@ def test_restart(fileutils, test_dir, wlmutils):
 
     exp.start(M1, M2, block=True)
     statuses = exp.get_status(M1, M2)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
     exp.start(M1, M2, block=True)
     statuses = exp.get_status(M1, M2)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
     # TODO add job history check here.
diff --git a/tests/on_wlm/test_simple_base_settings_on_wlm.py b/tests/on_wlm/test_simple_base_settings_on_wlm.py
index 1611781eb..caa55da3e 100644
--- a/tests/on_wlm/test_simple_base_settings_on_wlm.py
+++ b/tests/on_wlm/test_simple_base_settings_on_wlm.py
@@ -28,8 +28,9 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
 from smartsim.settings.settings import RunSettings
+from smartsim.status import SmartSimStatus
 
 """
 Test the launch and stop of simple models and ensembles that use base
@@ -63,7 +64,7 @@ def test_simple_model_on_wlm(fileutils, test_dir, wlmutils):
     # launch model twice to show that it can also be restarted
     for _ in range(2):
         exp.start(M, block=True)
-        assert exp.get_status(M)[0] == status.STATUS_COMPLETED
+        assert exp.get_status(M)[0] == SmartSimStatus.STATUS_COMPLETED
 
 
 def test_simple_model_stop_on_wlm(fileutils, test_dir, wlmutils):
@@ -83,4 +84,4 @@ def test_simple_model_stop_on_wlm(fileutils, test_dir, wlmutils):
     time.sleep(2)
     exp.stop(M)
     assert M.name in exp._control._jobs.completed
-    assert exp.get_status(M)[0] == status.STATUS_CANCELLED
+    assert exp.get_status(M)[0] == SmartSimStatus.STATUS_CANCELLED
diff --git a/tests/on_wlm/test_simple_entity_launch.py b/tests/on_wlm/test_simple_entity_launch.py
index 1ecc27442..28ddf92f7 100644
--- a/tests/on_wlm/test_simple_entity_launch.py
+++ b/tests/on_wlm/test_simple_entity_launch.py
@@ -24,11 +24,14 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import os.path
 from copy import deepcopy
+from pathlib import Path
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
+from smartsim.status import SmartSimStatus
 
 """
 Test the launch of simple entity types on pre-existing allocations.
@@ -59,7 +62,38 @@ def test_models(fileutils, test_dir, wlmutils):
 
     exp.start(M1, M2, block=True)
     statuses = exp.get_status(M1, M2)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
+
+
+def test_multinode_app(mpi_app_path, test_dir, wlmutils):
+
+    if not mpi_app_path:
+        pytest.skip("Test needs MPI to run")
+
+    exp_name = "test-mpi-app"
+    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir)
+
+    settings = exp.create_run_settings(str(mpi_app_path), [])
+    settings.set_nodes(3)
+
+    model = exp.create_model("mpi_app", run_settings=settings)
+    exp.generate(model)
+
+    exp.start(model, block=True)
+
+    p = Path(model.path)
+    output_files = sorted([str(path) for path in p.glob("mpi_hello*")])
+    expected_files = sorted(
+        [os.path.join(model.path, f"mpi_hello.{idx}.log") for idx in range(3)]
+    )
+
+    assert output_files == expected_files
+
+    for index, file in enumerate(output_files):
+        with open(file) as f:
+            assert f.readlines() == [
+                f"Hello world from rank {index} out of 3 processors\n"
+            ]
 
 
 def test_ensemble(fileutils, test_dir, wlmutils):
@@ -71,11 +105,10 @@ def test_ensemble(fileutils, test_dir, wlmutils):
     settings.set_tasks(1)
 
     ensemble = exp.create_ensemble("e1", run_settings=settings, replicas=2)
-    ensemble.set_path(test_dir)
 
     exp.start(ensemble, block=True)
     statuses = exp.get_status(ensemble)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
 
 def test_summary(fileutils, test_dir, wlmutils):
@@ -84,21 +117,21 @@ def test_summary(fileutils, test_dir, wlmutils):
     exp_name = "test-launch-summary"
     exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir)
 
-    sleep = fileutils.get_test_conf_path("sleep.py")
+    sleep_exp = fileutils.get_test_conf_path("sleep.py")
     bad = fileutils.get_test_conf_path("bad.py")
 
-    sleep_settings = exp.create_run_settings("python", f"{sleep} --time=3")
+    sleep_settings = exp.create_run_settings("python", f"{sleep_exp} --time=3")
     sleep_settings.set_tasks(1)
     bad_settings = exp.create_run_settings("python", f"{bad} --time=6")
     bad_settings.set_tasks(1)
 
-    sleep = exp.create_model("sleep", path=test_dir, run_settings=sleep_settings)
+    sleep_exp = exp.create_model("sleep", path=test_dir, run_settings=sleep_settings)
     bad = exp.create_model("bad", path=test_dir, run_settings=bad_settings)
 
     # start and poll
-    exp.start(sleep, bad)
-    assert exp.get_status(bad)[0] == status.STATUS_FAILED
-    assert exp.get_status(sleep)[0] == status.STATUS_COMPLETED
+    exp.start(sleep_exp, bad)
+    assert exp.get_status(bad)[0] == SmartSimStatus.STATUS_FAILED
+    assert exp.get_status(sleep_exp)[0] == SmartSimStatus.STATUS_COMPLETED
 
     summary_str = exp.summary(style="plain")
     print(summary_str)
@@ -106,13 +139,18 @@ def test_summary(fileutils, test_dir, wlmutils):
     rows = [s.split() for s in summary_str.split("\n")]
     headers = ["Index"] + rows.pop(0)
 
+    # There is no guarantee that the order of
+    # the rows will be sleep, bad
     row = dict(zip(headers, rows[0]))
-    assert sleep.name == row["Name"]
-    assert sleep.type == row["Entity-Type"]
+    row_1 = dict(zip(headers, rows[1]))
+    if row["Name"] != sleep_exp.name:
+        row_1, row = row, row_1
+
+    assert sleep_exp.name == row["Name"]
+    assert sleep_exp.type == row["Entity-Type"]
     assert 0 == int(row["RunID"])
     assert 0 == int(row["Returncode"])
 
-    row_1 = dict(zip(headers, rows[1]))
     assert bad.name == row_1["Name"]
     assert bad.type == row_1["Entity-Type"]
     assert 0 == int(row_1["RunID"])
diff --git a/tests/on_wlm/test_stop.py b/tests/on_wlm/test_stop.py
index 8d75d9f65..abc7441bb 100644
--- a/tests/on_wlm/test_stop.py
+++ b/tests/on_wlm/test_stop.py
@@ -28,7 +28,8 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
+from smartsim.status import SmartSimStatus
 
 """
 Test Stopping launched entities.
@@ -55,7 +56,7 @@ def test_stop_entity(fileutils, test_dir, wlmutils):
     time.sleep(5)
     exp.stop(M1)
     assert M1.name in exp._control._jobs.completed
-    assert exp.get_status(M1)[0] == status.STATUS_CANCELLED
+    assert exp.get_status(M1)[0] == SmartSimStatus.STATUS_CANCELLED
 
 
 def test_stop_entity_list(fileutils, test_dir, wlmutils):
@@ -67,11 +68,10 @@ def test_stop_entity_list(fileutils, test_dir, wlmutils):
     settings.set_tasks(1)
 
     ensemble = exp.create_ensemble("e1", run_settings=settings, replicas=2)
-    ensemble.set_path(test_dir)
 
     exp.start(ensemble, block=False)
     time.sleep(5)
     exp.stop(ensemble)
     statuses = exp.get_status(ensemble)
-    assert all([stat == status.STATUS_CANCELLED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_CANCELLED for stat in statuses])
     assert all([m.name in exp._control._jobs.completed for m in ensemble])
diff --git a/tests/on_wlm/test_wlm_orc_config_settings.py b/tests/on_wlm/test_wlm_orc_config_settings.py
index f9ab60609..c74f2a497 100644
--- a/tests/on_wlm/test_wlm_orc_config_settings.py
+++ b/tests/on_wlm/test_wlm_orc_config_settings.py
@@ -27,6 +27,9 @@
 import pytest
 
 from smartsim.error import SmartSimError
+from smartsim.log import get_logger
+
+logger = get_logger(__name__)
 
 # retrieved from pytest fixtures
 if pytest.test_launcher not in pytest.wlm_options:
@@ -40,13 +43,15 @@
     pytestmark = pytest.mark.skip(reason="SmartRedis version is < 0.3.1")
 
 
-def test_config_methods_on_wlm_single(dbutils, db):
+def test_config_methods_on_wlm_single(dbutils, prepare_db, single_db):
     """Test all configuration file edit methods on single node WLM db"""
 
+    db = prepare_db(single_db).orchestrator
     # test the happy path and ensure all configuration file edit methods
     # successfully execute when given correct key-value pairs
     configs = dbutils.get_db_configs()
     for setting, value in configs.items():
+        logger.debug(f"Setting {setting}={value}")
         config_set_method = dbutils.get_config_edit_method(db, setting)
         config_set_method(value)
 
@@ -67,14 +72,16 @@ def test_config_methods_on_wlm_single(dbutils, db):
                 db.set_db_conf(key, value)
 
 
-def test_config_methods_on_wlm_cluster(dbutils, db_cluster):
+def test_config_methods_on_wlm_cluster(dbutils, prepare_db, clustered_db):
     """Test all configuration file edit methods on an active clustered db"""
 
+    db = prepare_db(clustered_db).orchestrator
     # test the happy path and ensure all configuration file edit methods
     # successfully execute when given correct key-value pairs
     configs = dbutils.get_db_configs()
     for setting, value in configs.items():
-        config_set_method = dbutils.get_config_edit_method(db_cluster, setting)
+        logger.debug(f"Setting {setting}={value}")
+        config_set_method = dbutils.get_config_edit_method(db, setting)
         config_set_method(value)
 
     # ensure SmartSimError is raised when a clustered database's
@@ -83,7 +90,8 @@ def test_config_methods_on_wlm_cluster(dbutils, db_cluster):
     for key, value_list in ss_error_configs.items():
         for value in value_list:
             with pytest.raises(SmartSimError):
-                db_cluster.set_db_conf(key, value)
+                logger.debug(f"Setting {key}={value}")
+                db.set_db_conf(key, value)
 
     # ensure TypeError is raised when a clustered database's
     # Orchestrator.set_db_conf is given invalid CONFIG key-value pairs
@@ -91,4 +99,5 @@ def test_config_methods_on_wlm_cluster(dbutils, db_cluster):
     for key, value_list in type_error_configs.items():
         for value in value_list:
             with pytest.raises(TypeError):
-                db_cluster.set_db_conf(key, value)
+                logger.debug(f"Setting {key}={value}")
+                db.set_db_conf(key, value)
diff --git a/tests/test_collector_manager.py b/tests/test_collector_manager.py
new file mode 100644
index 000000000..56add1ef7
--- /dev/null
+++ b/tests/test_collector_manager.py
@@ -0,0 +1,481 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import asyncio
+import datetime
+
+import pytest
+
+from conftest import MockCollectorEntityFunc
+from smartsim._core.utils.telemetry.collector import (
+    CollectorManager,
+    DBConnectionCollector,
+    DBConnectionCountCollector,
+    DBMemoryCollector,
+    FileSink,
+    redisa,
+)
+from smartsim._core.utils.telemetry.telemetry import JobEntity
+
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+
+def test_collector_manager_add(mock_entity: MockCollectorEntityFunc, mock_sink) -> None:
+    """Ensure that collector manager add & clear work as expected"""
+    entity1 = mock_entity(telemetry_on=True)
+
+    con_col = DBConnectionCollector(entity1, mock_sink())
+    mem_col = DBMemoryCollector(entity1, mock_sink())
+
+    manager = CollectorManager()
+
+    # ensure manager starts empty
+    assert len(list(manager.all_collectors)) == 0
+
+    # ensure added item is in the collector list
+    manager.add(con_col)
+    assert len(list(manager.all_collectors)) == 1
+
+    # ensure a duplicate isn't added
+    manager.add(con_col)
+    assert len(list(manager.all_collectors)) == 1
+
+    # ensure another collector for the same entity is added
+    manager.add(mem_col)
+    assert len(list(manager.all_collectors)) == 2
+
+    # create a collector for another entity
+    entity2 = mock_entity(telemetry_on=True)
+    con_col2 = DBConnectionCollector(entity2, mock_sink())
+
+    # ensure collectors w/same type for new entities are not treated as dupes
+    manager.add(con_col2)
+    assert len(list(manager.all_collectors)) == 3
+
+    # verify no dupe on second entity
+    manager.add(con_col2)
+    assert len(list(manager.all_collectors)) == 3
+
+    manager.clear()
+    assert len(list(manager.all_collectors)) == 0
+
+    # ensure post-clear adding still works
+    manager.add(con_col2)
+    assert len(list(manager.all_collectors)) == 1
+
+
+def test_collector_manager_add_multi(
+    mock_entity: MockCollectorEntityFunc, mock_sink
+) -> None:
+    """Ensure that collector manager multi-add works as expected"""
+    entity = mock_entity(telemetry_on=True)
+
+    con_col = DBConnectionCollector(entity, mock_sink())
+    mem_col = DBMemoryCollector(entity, mock_sink())
+    manager = CollectorManager()
+
+    # add multiple items at once
+    manager.add_all([con_col, mem_col])
+
+    assert len(list(manager.all_collectors)) == 2
+
+    # ensure multi-add does not produce dupes
+    con_col2 = DBConnectionCollector(entity, mock_sink())
+    mem_col2 = DBMemoryCollector(entity, mock_sink())
+
+    manager.add_all([con_col2, mem_col2])
+    assert len(list(manager.all_collectors)) == 2
+
+
+@pytest.mark.asyncio
+async def test_collector_manager_remove(
+    mock_entity: MockCollectorEntityFunc, mock_sink
+) -> None:
+    """Ensure that collector manager solo remove works as expected"""
+    entity1 = mock_entity(telemetry_on=True)
+    entity2 = mock_entity(telemetry_on=True)
+
+    con_col1 = DBConnectionCollector(entity1, mock_sink())
+    mem_col1 = DBMemoryCollector(entity1, mock_sink())
+    manager = CollectorManager()
+
+    # ensure multi-add does not produce dupes
+    con_col2 = DBConnectionCollector(entity2, mock_sink())
+    mem_col2 = DBMemoryCollector(entity2, mock_sink())
+
+    manager.add_all([con_col1, mem_col1, con_col2, mem_col2])
+    assert len(manager.all_collectors) == 4
+
+    await manager.remove(entity1)
+    assert len(manager.all_collectors) == 2
+
+    await manager.remove(entity1)
+    assert len(manager.all_collectors) == 2
+
+    await manager.remove(entity2)
+    assert len(manager.all_collectors) == 0
+
+
+@pytest.mark.asyncio
+async def test_collector_manager_remove_all(
+    mock_entity: MockCollectorEntityFunc, mock_sink
+) -> None:
+    """Ensure that collector manager multi-remove works as expected"""
+    entity1 = mock_entity(telemetry_on=True)
+    entity2 = mock_entity(telemetry_on=True)
+
+    con_col1 = DBConnectionCollector(entity1, mock_sink())
+    mem_col1 = DBMemoryCollector(entity1, mock_sink())
+    manager = CollectorManager()
+
+    # ensure multi-add does not produce dupes
+    con_col2 = DBConnectionCollector(entity2, mock_sink())
+    mem_col2 = DBMemoryCollector(entity2, mock_sink())
+
+    manager.add_all([con_col1, mem_col1, con_col2, mem_col2])
+    assert len(manager.all_collectors) == 4
+
+    await manager.remove_all([entity1, entity2])
+    assert len(manager.all_collectors) == 0
+
+
+@pytest.mark.asyncio
+async def test_collector_manager_collect(
+    mock_entity: MockCollectorEntityFunc,
+    mock_redis,
+    monkeypatch: pytest.MonkeyPatch,
+    mock_con,
+    mock_mem,
+    mock_sink,
+) -> None:
+    """Ensure that all collectors are executed and some metric is retrieved
+    NOTE: responses & producer are mocked"""
+    entity1 = mock_entity(port=1234, name="entity1", telemetry_on=True)
+    entity2 = mock_entity(port=2345, name="entity2", telemetry_on=True)
+
+    sinks = [mock_sink(), mock_sink(), mock_sink()]
+    con_col1 = DBConnectionCollector(entity1, sinks[0])
+    mem_col1 = DBMemoryCollector(entity1, sinks[1])
+    mem_col2 = DBMemoryCollector(entity2, sinks[2])
+
+    manager = CollectorManager()
+    manager.add_all([con_col1, mem_col1, mem_col2])
+
+    # Execute collection
+    with monkeypatch.context() as ctx:
+        ctx.setattr(
+            redisa,
+            "Redis",
+            mock_redis(client_stats=mock_con(1, 10), mem_stats=mock_mem(1, 10)),
+        )
+        await manager.collect()
+
+    # verify each collector retrieved some metric & sent it to the sink
+    for sink in sinks:
+        value = sink.args
+        assert value
+
+
+@pytest.mark.asyncio
+async def test_collector_manager_collect_filesink(
+    mock_entity: MockCollectorEntityFunc,
+    mock_redis,
+    monkeypatch,
+    mock_mem,
+    mock_con,
+) -> None:
+    """Ensure that all collectors are executed and some metric is retrieved
+    and the FileSink is written to as expected"""
+    entity1 = mock_entity(port=1234, name="entity1", telemetry_on=True)
+    entity2 = mock_entity(port=2345, name="entity2", telemetry_on=True)
+
+    sinks = [
+        FileSink(entity1.status_dir + "/1_con.csv"),
+        FileSink(entity1.status_dir + "/1_mem.csv"),
+        FileSink(entity2.status_dir + "/2_mem.csv"),
+    ]
+    con_col1 = DBConnectionCollector(entity1, sinks[0])
+    mem_col1 = DBMemoryCollector(entity1, sinks[1])
+    mem_col2 = DBMemoryCollector(entity2, sinks[2])
+
+    manager = CollectorManager()
+    manager.add_all([con_col1, mem_col1, mem_col2])
+
+    # Execute collection
+    with monkeypatch.context() as ctx:
+        ctx.setattr(
+            redisa,
+            "Redis",
+            mock_redis(client_stats=mock_con(1, 10), mem_stats=mock_mem(1, 10)),
+        )
+        await manager.collect()
+
+    # verify each collector retrieved some metric & sent it to the sink
+    for sink in sinks:
+        save_to = sink.path
+        assert save_to.exists()
+        if "con" in str(save_to):
+            assert "127.0.0." in save_to.read_text()
+        else:
+            # look for something multiplied by 1000
+            assert "000" in save_to.read_text()
+
+
+@pytest.mark.asyncio
+async def test_collector_manager_collect_integration(
+    test_dir: str, mock_entity: MockCollectorEntityFunc, prepare_db, local_db, mock_sink
+) -> None:
+    """Ensure that all collectors are executed and some metric is retrieved"""
+
+    db = prepare_db(local_db).orchestrator
+    entity1 = mock_entity(port=db.ports[0], name="e1", telemetry_on=True)
+    entity2 = mock_entity(port=db.ports[0], name="e2", telemetry_on=True)
+
+    # todo: consider a MockSink so i don't have to save the last value in the collector
+    sinks = [mock_sink(), mock_sink(), mock_sink()]
+    con_col1 = DBConnectionCollector(entity1, sinks[0])
+    mem_col1 = DBMemoryCollector(entity1, sinks[1])
+    mem_col2 = DBMemoryCollector(entity2, sinks[2])
+
+    manager = CollectorManager()
+    manager.add_all([con_col1, mem_col1, mem_col2])
+
+    # Execute collection
+    await manager.collect()
+
+    # verify each collector retrieved some metric & sent it to the sink
+    for sink in sinks:
+        value = sink.args
+        assert value
+
+
+@pytest.mark.parametrize(
+    "timeout_at,delay_for,expect_fail",
+    [
+        pytest.param(1000, 5000, True, id="1s timeout"),
+        pytest.param(2000, 5000, True, id="2s timeout"),
+        pytest.param(3000, 5000, True, id="3s timeout"),
+        pytest.param(4000, 5000, True, id="4s timeout"),
+        pytest.param(2000, 1000, False, id="under timeout"),
+    ],
+)
+@pytest.mark.asyncio
+async def test_collector_manager_timeout_db(
+    mock_entity: MockCollectorEntityFunc,
+    mock_redis,
+    monkeypatch: pytest.MonkeyPatch,
+    mock_mem,
+    mock_con,
+    timeout_at: int,
+    delay_for: int,
+    expect_fail: bool,
+    mock_sink,
+) -> None:
+    """Ensure that the collector timeout is honored"""
+    entity1 = mock_entity(port=1234, name="e1", telemetry_on=True)
+    entity2 = mock_entity(port=2345, name="e2", telemetry_on=True)
+
+    sinks = [mock_sink(), mock_sink(), mock_sink()]
+    con_col1 = DBConnectionCollector(entity1, sinks[0])
+    mem_col1 = DBMemoryCollector(entity1, sinks[1])
+    mem_col2 = DBMemoryCollector(entity2, sinks[2])
+
+    manager = CollectorManager(timeout_ms=timeout_at)
+    manager.add_all([con_col1, mem_col1, mem_col2])
+
+    async def snooze() -> None:
+        await asyncio.sleep(delay_for / 1000)
+
+    # Execute collection
+    with monkeypatch.context() as ctx:
+        ctx.setattr(
+            redisa,
+            "Redis",
+            mock_redis(
+                client_stats=mock_con(1, 10),
+                mem_stats=mock_mem(1, 10),
+                coll_side_effect=snooze,
+            ),
+        )
+
+        ts0 = datetime.datetime.utcnow()
+        await manager.collect()
+        ts1 = datetime.datetime.utcnow()
+
+        t_diff = ts1 - ts0
+        actual_delay = 1000 * t_diff.seconds
+
+        if expect_fail:
+            assert timeout_at <= actual_delay < delay_for
+        else:
+            assert delay_for <= actual_delay < timeout_at
+
+
+@pytest.mark.parametrize(
+    "e_type,telemetry_on",
+    [
+        pytest.param("model", False, id="models"),
+        pytest.param("model", True, id="models, telemetry enabled"),
+        pytest.param("ensemble", False, id="ensemble"),
+        pytest.param("ensemble", True, id="ensemble, telemetry enabled"),
+        pytest.param("orchestrator", False, id="orchestrator"),
+        pytest.param("orchestrator", True, id="orchestrator, telemetry enabled"),
+        pytest.param("dbnode", False, id="dbnode"),
+        pytest.param("dbnode", True, id="dbnode, telemetry enabled"),
+    ],
+)
+@pytest.mark.asyncio
+async def test_collector_manager_find_nondb(
+    mock_entity: MockCollectorEntityFunc,
+    e_type: str,
+    telemetry_on: bool,
+) -> None:
+    """Ensure that the number of collectors returned for entity types match expectations
+    NOTE: even orchestrator returns 0 mapped collectors because no collector output
+    paths are set on the entity"""
+    entity = mock_entity(port=1234, name="e1", type=e_type, telemetry_on=telemetry_on)
+    manager = CollectorManager(timeout_ms=10000)
+
+    # Ask manager to produce appliable collectors
+    manager.register_collectors(entity)
+    collectors = manager.all_collectors
+
+    # Verify collector counts, assuming no per-collector config
+    assert 0 == len(collectors)
+
+
+@pytest.mark.asyncio
+async def test_collector_manager_find_db(mock_entity: MockCollectorEntityFunc) -> None:
+    """Ensure that the manifest allows individually enabling a given collector"""
+    entity: JobEntity = mock_entity(
+        port=1234, name="entity1", type="model", telemetry_on=True
+    )
+    manager = CollectorManager()
+
+    # 0. popping all should result in no collectors mapping to the entity
+    manager.register_collectors(entity)
+    collectors = manager.all_collectors
+
+    assert len(collectors) == 0
+
+    # 1. ensure DBConnectionCountCollector is mapped
+    entity = mock_entity(
+        port=1234, name="entity1", type="orchestrator", telemetry_on=True
+    )
+    entity.collectors["client"] = "mock/path.csv"
+    manager = CollectorManager()
+
+    # 2. client count collector should be mapped
+    manager.register_collectors(entity)
+    collectors = manager.all_collectors
+
+    assert len(collectors) == 1
+    assert isinstance(collectors[0], DBConnectionCollector)
+
+    # 3. ensure DBConnectionCountCollector is mapped
+    entity = mock_entity(
+        port=1234, name="entity1", type="orchestrator", telemetry_on=True
+    )
+    entity.collectors["client_count"] = "mock/path.csv"
+    manager = CollectorManager()
+
+    # 4. client count collector should be mapped
+    manager.register_collectors(entity)
+    collectors = manager.all_collectors
+
+    assert len(collectors) == 1
+    assert isinstance(collectors[0], DBConnectionCountCollector)
+
+    # ensure DbMemoryCollector is mapped
+    entity = mock_entity(
+        port=1234, name="entity1", type="orchestrator", telemetry_on=True
+    )
+    entity.collectors["memory"] = "mock/path.csv"
+    manager = CollectorManager()
+
+    # 5. memory collector should be mapped
+    manager.register_collectors(entity)
+    collectors = manager.all_collectors
+
+    assert len(collectors) == 1
+    assert isinstance(collectors[0], DBMemoryCollector)
+
+
+@pytest.mark.asyncio
+async def test_collector_manager_find_entity_disabled(
+    mock_entity: MockCollectorEntityFunc,
+) -> None:
+    """Ensure that disabling telemetry on the entity results in no collectors"""
+    entity: JobEntity = mock_entity(port=1234, name="entity1", type="orchestrator")
+
+    # set paths for all known collectors
+    entity.collectors["client"] = "mock/path.csv"
+    entity.collectors["client_count"] = "mock/path.csv"
+    entity.collectors["memory"] = "mock/path.csv"
+
+    manager = CollectorManager()
+
+    # ON behavior should locate multiple collectors
+    entity.telemetry_on = True
+    manager.register_collectors(entity)
+    collectors = manager.all_collectors
+    assert len(collectors) > 0
+
+    # OFF behavior should locate ZERO collectors
+    entity.telemetry_on = False
+    manager.register_collectors(entity)
+    collectors = manager.all_collectors
+    assert len(collectors) == 0
+
+
+@pytest.mark.asyncio
+async def test_collector_manager_find_entity_unmapped(
+    mock_entity: MockCollectorEntityFunc,
+) -> None:
+    """Ensure that an entity type that is not mapped results in no collectors"""
+    entity: JobEntity = mock_entity(
+        port=1234, name="entity1", type="model", telemetry_on=True
+    )
+    manager = CollectorManager()
+
+    # set paths for all known collectors
+    entity.collectors["client"] = "mock/path.csv"
+    entity.collectors["client_count"] = "mock/path.csv"
+    entity.collectors["memory"] = "mock/path.csv"
+
+    manager = CollectorManager()
+
+    # ON behavior should locate ZERO collectors
+    entity.telemetry_on = True
+    manager.register_collectors(entity)
+    collectors = manager.all_collectors
+    assert len(collectors) == 0
+
+    # OFF behavior should locate ZERO collectors
+    entity.telemetry_on = False
+    manager.register_collectors(entity)
+    collectors = manager.all_collectors
+    assert len(collectors) == 0
diff --git a/tests/test_collector_sink.py b/tests/test_collector_sink.py
new file mode 100644
index 000000000..148a72ef7
--- /dev/null
+++ b/tests/test_collector_sink.py
@@ -0,0 +1,107 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import uuid
+
+import pytest
+
+from conftest import MockCollectorEntityFunc
+from smartsim._core.utils.telemetry.collector import FileSink
+
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+
+@pytest.mark.asyncio
+async def test_sink_null_filename(mock_entity: MockCollectorEntityFunc) -> None:
+    """Ensure the filesink handles a null filename as expected"""
+    with pytest.raises(ValueError):
+        # pass null file path
+        sink = FileSink(None)  # type: ignore
+
+
+@pytest.mark.asyncio
+async def test_sink_write(mock_entity: MockCollectorEntityFunc) -> None:
+    """Ensure the FileSink writes values to the output file as expected"""
+    entity = mock_entity(port=1234, name="e1")
+    sink = FileSink(entity.status_dir + "/test.csv")
+
+    # all values are converted to strings before saving
+    v1, v2, v3 = str(uuid.uuid4()), str(uuid.uuid4()), str(uuid.uuid4())
+    await sink.save(v1, v2, v3)
+
+    # show file was written
+    path = sink.path
+    assert path.exists()
+
+    # show each value is found in the file
+    content = path.read_text()
+    for value in [v1, v2, v3]:
+        assert str(value) in content
+
+
+@pytest.mark.asyncio
+async def test_sink_write_nonstring_input(mock_entity: MockCollectorEntityFunc) -> None:
+    """Ensure the FileSink writes values to the output file as expected
+    when inputs are non-strings"""
+    entity = mock_entity(port=1234, name="e1")
+    sink = FileSink(entity.status_dir + "/test.csv")
+
+    # v1, v2 are not converted to strings
+    v1, v2 = 1, uuid.uuid4()
+    await sink.save(v1, v2)
+
+    # show file was written
+    path = sink.path
+    assert path.exists()
+
+    # split down to individual elements to ensure expected default format
+    content = path.read_text()
+    lines = content.splitlines()
+    line = lines[0].split(",")
+
+    # show each value can be found
+    assert [str(v1), str(v2)] == line
+
+
+@pytest.mark.asyncio
+async def test_sink_write_no_inputs(mock_entity: MockCollectorEntityFunc) -> None:
+    """Ensure the FileSink writes to an output file without error if no
+    values are supplied"""
+    entity = mock_entity(port=1234, name="e1")
+    sink = FileSink(entity.status_dir + "/test.csv")
+
+    num_saves = 5
+    for _ in range(num_saves):
+        await sink.save()
+
+    path = sink.path
+    assert path.exists()
+
+    # show file was written
+    content = path.read_text()
+
+    # show a line was written for each call to save
+    assert len(content.splitlines()) == num_saves
diff --git a/tests/test_collectors.py b/tests/test_collectors.py
new file mode 100644
index 000000000..2eb61d62d
--- /dev/null
+++ b/tests/test_collectors.py
@@ -0,0 +1,305 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# import pathlib
+
+import typing as t
+
+import pytest
+
+import smartsim._core.entrypoints.telemetrymonitor
+import smartsim._core.utils.telemetry.collector
+from conftest import MockCollectorEntityFunc, MockSink
+from smartsim._core.utils.telemetry.collector import (
+    DBConnectionCollector,
+    DBConnectionCountCollector,
+    DBMemoryCollector,
+    redisa,
+)
+
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+PrepareDB = t.Callable[[dict], smartsim.experiment.Orchestrator]
+
+
+@pytest.mark.asyncio
+async def test_dbmemcollector_prepare(
+    mock_entity: MockCollectorEntityFunc, mock_sink
+) -> None:
+    """Ensure that collector preparation succeeds when expected"""
+    entity = mock_entity(telemetry_on=True)
+
+    collector = DBMemoryCollector(entity, mock_sink())
+    await collector.prepare()
+    assert collector._client
+
+
+@pytest.mark.asyncio
+async def test_dbmemcollector_prepare_fail(
+    mock_entity: MockCollectorEntityFunc,
+    mock_sink: MockSink,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Ensure that collector preparation reports a failure to connect
+    when the redis client cannot be created"""
+    entity = mock_entity(telemetry_on=True)
+
+    with monkeypatch.context() as ctx:
+        # mock up a redis constructor that returns None
+        ctx.setattr(redisa, "Redis", lambda host, port: None)
+
+        sink = mock_sink()
+        collector = DBMemoryCollector(entity, sink)
+        assert sink.num_saves == 0
+
+        await collector.prepare()
+
+        # Attempt to save header when preparing...
+        assert not collector._client
+        assert sink.num_saves == 1
+
+
+@pytest.mark.asyncio
+async def test_dbcollector_config(
+    mock_entity: MockCollectorEntityFunc,
+    mock_sink,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Ensure that missing required db collector config causes an exception"""
+
+    # Check that a bad host causes exception
+    entity = mock_entity(host="", telemetry_on=True)
+    with pytest.raises(ValueError):
+        DBMemoryCollector(entity, mock_sink())
+
+    entity = mock_entity(host="   ", telemetry_on=True)
+    with pytest.raises(ValueError):
+        DBMemoryCollector(entity, mock_sink())
+
+    # Check that a bad port causes exception
+    entity = mock_entity(port="", telemetry_on=True)  # type: ignore
+    with pytest.raises(ValueError):
+        DBMemoryCollector(entity, mock_sink())
+
+
+@pytest.mark.asyncio
+async def test_dbmemcollector_prepare_fail_dep(
+    mock_entity: MockCollectorEntityFunc,
+    mock_sink,
+    monkeypatch: pytest.MonkeyPatch,
+    capsys: pytest.CaptureFixture[t.Any],
+) -> None:
+    """Ensure that collector preparation attempts to connect, ensure it
+    reports a failure if the db conn bombs"""
+    entity = mock_entity(telemetry_on=True)
+
+    def raiser(*args: t.Any, **kwargs: t.Any) -> None:
+        # mock raising exception on connect attempts to test err handling
+        raise redisa.ConnectionError("mock connection failure")
+
+    sink = mock_sink()
+    collector = DBMemoryCollector(entity, sink)
+    with monkeypatch.context() as ctx:
+        ctx.setattr(redisa, "Redis", raiser)
+
+        assert sink.num_saves == 0
+        await collector.prepare()
+
+        assert sink.num_saves == 1
+        assert not collector._client
+
+
+@pytest.mark.asyncio
+async def test_dbmemcollector_collect(
+    mock_entity: MockCollectorEntityFunc,
+    mock_redis,
+    mock_mem,
+    mock_sink,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Ensure that a valid response is returned as expected"""
+    entity = mock_entity(telemetry_on=True)
+
+    sink = mock_sink()
+    collector = DBMemoryCollector(entity, sink)
+    with monkeypatch.context() as ctx:
+        ctx.setattr(redisa, "Redis", mock_redis(mem_stats=mock_mem(1, 2)))
+        ctx.setattr(
+            smartsim._core.utils.telemetry.collector,
+            "get_ts_ms",
+            lambda: 12131415,
+        )
+
+        await collector.prepare()
+        await collector.collect()
+
+        reqd_items = {
+            "timestamp",
+            "total_system_memory",
+            "used_memory",
+            "used_memory_peak",
+        }
+        actual_items = set(sink.args)
+
+        reqd_values = {12131415, 1000.0, 1111.0, 1234.0}
+        actual_values = set(sink.args)
+        assert actual_values == reqd_values
+
+
+@pytest.mark.asyncio
+async def test_dbmemcollector_integration(
+    mock_entity: MockCollectorEntityFunc,
+    mock_sink: MockSink,
+    prepare_db: PrepareDB,
+    local_db: dict,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Integration test with a real orchestrator instance to ensure
+    output data matches expectations and proper db client API uage"""
+
+    db = prepare_db(local_db).orchestrator
+    entity = mock_entity(port=db.ports[0], telemetry_on=True)
+
+    sink = mock_sink()
+    collector = DBMemoryCollector(entity, sink)
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr(
+            smartsim._core.utils.telemetry.collector,
+            "get_ts_ms",
+            lambda: 12131415,
+        )
+        assert sink.num_saves == 0
+        await collector.prepare()
+        assert sink.num_saves == 1
+        await collector.collect()
+        assert sink.num_saves == 2
+
+        stats = sink.args
+        assert len(stats) == 4  # show we have the expected amount of data points
+        ts = 12131415
+
+        assert ts in stats
+
+
+@pytest.mark.asyncio
+async def test_dbconncollector_collect(
+    mock_entity: MockCollectorEntityFunc,
+    mock_sink,
+    mock_redis,
+    mock_con,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Ensure that a valid response is returned as expected"""
+    entity = mock_entity(telemetry_on=True)
+
+    sink = mock_sink()
+    collector = DBConnectionCollector(entity, sink)
+    with monkeypatch.context() as ctx:
+        ctx.setattr(redisa, "Redis", mock_redis(client_stats=mock_con(1, 2)))
+
+        assert sink.num_saves == 0
+        await collector.prepare()
+        assert sink.num_saves == 1
+        await collector.collect()
+        assert sink.num_saves == 3  # save twice w/two datapoints
+
+        stats = sink.args
+
+        idx = 1
+        id0, ip0 = f"ABC{idx}", f"127.0.0.{idx}:1234"
+        id1, ip1 = f"XYZ{idx}", f"127.0.0.{idx}:2345"
+        exp_clients = [{"id": id0, "addr": ip0}, {"id": id1, "addr": ip1}]
+
+        assert len(exp_clients) + 1 == len(stats)  # output includes timestamp
+        assert id0 in set(client["id"] for client in exp_clients)
+        assert id1 in set(client["id"] for client in exp_clients)
+        assert ip0 in set(client["addr"] for client in exp_clients)
+        assert ip1 in set(client["addr"] for client in exp_clients)
+
+
+@pytest.mark.asyncio
+async def test_dbconn_count_collector_collect(
+    mock_entity: MockCollectorEntityFunc,
+    mock_sink,
+    mock_redis,
+    mock_con,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Ensure that a valid response is returned as expected"""
+    entity = mock_entity(telemetry_on=True)
+
+    sink = mock_sink()
+    collector = DBConnectionCountCollector(entity, sink)
+    with monkeypatch.context() as ctx:
+        ctx.setattr(redisa, "Redis", mock_redis(client_stats=mock_con(1, 2)))
+
+        assert sink.num_saves == 0
+        await collector.prepare()
+        assert sink.num_saves == 1
+        await collector.collect()
+        assert sink.num_saves == 2
+
+        stats = sink.args
+        exp_counts = 2
+
+        assert exp_counts == len(stats)  # output includes timestamp
+
+
+@pytest.mark.asyncio
+async def test_dbconncollector_integration(
+    mock_entity: MockCollectorEntityFunc,
+    mock_sink: MockSink,
+    prepare_db: PrepareDB,
+    local_db: dict,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Integration test with a real orchestrator instance to ensure
+    output data matches expectations and proper db client API uage"""
+
+    db = prepare_db(local_db).orchestrator
+    entity = mock_entity(port=db.ports[0], telemetry_on=True)
+
+    sink = mock_sink()
+    collector = DBConnectionCollector(entity, sink)
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr(
+            smartsim._core.utils.telemetry.collector,
+            "get_ts_ms",
+            lambda: 12131415,
+        )
+        await collector.prepare()
+        await collector.collect()
+        stats = sink.args
+
+        ip = "127.0.0.1:"
+        num_conns = int(stats[1])
+        ts = 12131415
+
+        assert ts in stats
+        assert num_conns > 0
+        assert ip in stats[2]
diff --git a/tests/test_colo_model_local.py b/tests/test_colo_model_local.py
index 138ceb4b7..fe347ee30 100644
--- a/tests/test_colo_model_local.py
+++ b/tests/test_colo_model_local.py
@@ -28,9 +28,10 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
 from smartsim.entity import Model
 from smartsim.error import SSUnsupportedError
+from smartsim.status import SmartSimStatus
 
 # The tests in this file belong to the slow_tests group
 pytestmark = pytest.mark.slow_tests
@@ -139,13 +140,13 @@ def test_launch_colocated_model_defaults(
     exp.generate(colo_model)
     exp.start(colo_model, block=True)
     statuses = exp.get_status(colo_model)
-    assert all(stat == status.STATUS_COMPLETED for stat in statuses)
+    assert all(stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses)
 
     # test restarting the colocated model
     exp.start(colo_model, block=True)
     statuses = exp.get_status(colo_model)
     assert all(
-        stat == status.STATUS_COMPLETED for stat in statuses
+        stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses
     ), f"Statuses {statuses}"
 
 
@@ -181,12 +182,12 @@ def test_launch_multiple_colocated_models(
     exp.generate(*colo_models)
     exp.start(*colo_models, block=True)
     statuses = exp.get_status(*colo_models)
-    assert all(stat == status.STATUS_COMPLETED for stat in statuses)
+    assert all(stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses)
 
     # test restarting the colocated model
     exp.start(*colo_models, block=True)
     statuses = exp.get_status(*colo_models)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
 
 @pytest.mark.parametrize("db_type", supported_dbs)
@@ -212,7 +213,7 @@ def test_colocated_model_disable_pinning(
     exp.generate(colo_model)
     exp.start(colo_model, block=True)
     statuses = exp.get_status(colo_model)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
 
 @pytest.mark.parametrize("db_type", supported_dbs)
@@ -245,7 +246,7 @@ def test_colocated_model_pinning_auto_2cpu(
     exp.generate(colo_model)
     exp.start(colo_model, block=True)
     statuses = exp.get_status(colo_model)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
 
 @pytest.mark.skipif(is_mac, reason="unsupported on MacOSX")
@@ -272,7 +273,7 @@ def test_colocated_model_pinning_range(
     exp.generate(colo_model)
     exp.start(colo_model, block=True)
     statuses = exp.get_status(colo_model)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
 
 @pytest.mark.skipif(is_mac, reason="unsupported on MacOSX")
@@ -299,7 +300,7 @@ def test_colocated_model_pinning_list(
     exp.generate(colo_model)
     exp.start(colo_model, block=True)
     statuses = exp.get_status(colo_model)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
 
 def test_colo_uds_verifies_socket_file_name(test_dir, launcher="local"):
diff --git a/tests/test_config.py b/tests/test_config.py
index 0716ac0d5..00a1fcdd3 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -61,9 +61,7 @@ def get_redisai_env(
     """Convenience method to create a set of environment variables
     that include RedisAI-specific variables
     :param rai_path: The path to the RedisAI library
-    :type: str (optional)
     :param lib_path: The path to the SMARTSIM_DEP_INSTALL_PATH
-    :type: str (optional)
     :return: A dictionary containing an updated set of environment variables
     """
     env = os.environ.copy()
@@ -255,3 +253,31 @@ def test_telemetry_cooldown(
         monkeypatch.delenv("SMARTSIM_TELEMETRY_COOLDOWN", raising=False)
     config = Config()
     assert config.telemetry_cooldown == exp_result
+
+
+def test_key_path_unset(monkeypatch: pytest.MonkeyPatch):
+    """Ensure that the default value of the key path meets expectations"""
+    monkeypatch.delenv("SMARTSIM_KEY_PATH", raising=False)
+
+    config = Config()
+
+    key_path = config.smartsim_key_path
+
+    exp_default = Path.home() / ".smartsim" / "keys"
+    assert str(exp_default) == key_path, "Unexpected default key path"
+
+
+def test_key_path_non_default(monkeypatch: pytest.MonkeyPatch):
+    """Ensure that the environment variable for key path overrides
+    the default when it is set"""
+    key_path1 = "/foo/bar"
+    key_path2 = "/foo/baz"
+    config = Config()
+
+    monkeypatch.setenv("SMARTSIM_KEY_PATH", key_path1)
+    actual_value = config.smartsim_key_path
+    assert key_path1 == actual_value, "Key path 1 didn't match overridden value"
+
+    monkeypatch.setenv("SMARTSIM_KEY_PATH", key_path2)
+    actual_value = config.smartsim_key_path
+    assert key_path2 == actual_value, "Key path 2 didn't match overridden value"
diff --git a/tests/test_configs/mpi/mpi_hello.c b/tests/test_configs/mpi/mpi_hello.c
new file mode 100755
index 000000000..dcf80f3ac
--- /dev/null
+++ b/tests/test_configs/mpi/mpi_hello.c
@@ -0,0 +1,35 @@
+#include <assert.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <mpi.h>
+
+
+int main(int argc, char** argv) {
+    sleep(1);
+    // Initialize the MPI environment
+    MPI_Init(NULL, NULL);
+
+    // Get the number of processes
+    int world_size;
+    MPI_Comm_size(MPI_COMM_WORLD, &world_size);
+
+    // Get the rank of the process
+    int world_rank;
+    MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);
+
+    char filename[64];
+    sprintf(filename, "mpi_hello.%d.log", world_rank);
+    FILE *log = fopen(filename, "w");
+
+    fprintf(log, "Hello world from rank %d out of %d processors\n",
+            world_rank, world_size);
+    fflush(log);
+
+    // unlink(filename);
+    fclose(log);
+
+    // Finalize the MPI environment.
+    MPI_Finalize();
+}
diff --git a/tests/test_configs/smartredis/multidbid_colo_env_vars_only.py b/tests/test_configs/smartredis/multidbid_colo_env_vars_only.py
new file mode 100644
index 000000000..74a15c010
--- /dev/null
+++ b/tests/test_configs/smartredis/multidbid_colo_env_vars_only.py
@@ -0,0 +1,52 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2023, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import os
+
+from smartredis import Client, ConfigOptions
+
+if __name__ == "__main__":
+    """For inclusion in test with two unique database identifiers with multiple
+    databases where one (presumably colocated) database is started before the
+    other, and thus only one DB ID is known at application runtime and
+    available via environment variable.
+    """
+
+    parser = argparse.ArgumentParser(description="SmartRedis")
+    parser.add_argument("--exchange", action="store_true")
+    parser.add_argument("--should-see-reg-db", action="store_true")
+    args = parser.parse_args()
+
+    env_vars = [
+        "SSDB_testdb_colo",
+        "SR_DB_TYPE_testdb_colo",
+    ]
+
+    assert all([var in os.environ for var in env_vars])
+
+    opts = ConfigOptions.create_from_environment("testdb_colo")
+    Client(opts, logger_name="SmartSim")
diff --git a/tests/test_configs/telemetry/db_and_model.json b/tests/test_configs/telemetry/db_and_model.json
index 58c1c841a..36edc7486 100644
--- a/tests/test_configs/telemetry/db_and_model.json
+++ b/tests/test_configs/telemetry/db_and_model.json
@@ -29,6 +29,9 @@
                             "conf_file": null,
                             "out_file": "/path/to/some/file.out",
                             "err_file": "/path/to/some/file.err",
+                            "client_file": "/path/to/some/client.log",
+                            "client_count_file": null,
+                            "memory_file": "/path/to/some/mem.log",
                             "telemetry_metadata": {
                                 "status_dir": "/tmp/my-exp/.smartsim/telemetry/telemetry_db_and_model/2ca19ad/database/orchestrator/orchestrator_0",
                                 "step_id": "4139111.27",
diff --git a/tests/test_configs/telemetry/ensembles.json b/tests/test_configs/telemetry/ensembles.json
index 841324ec6..67e53ca09 100644
--- a/tests/test_configs/telemetry/ensembles.json
+++ b/tests/test_configs/telemetry/ensembles.json
@@ -1,329 +1,329 @@
 {
-    "schema info": {
-      "schema_name": "entity manifest",
-      "version": "0.0.1"
-    },
-    "experiment": {
-      "name": "my-exp",
-      "path": "/home/someuser/code/ss/my-exp",
-      "launcher": "Local"
-    },
-    "runs": [
-      {
-        "run_id": "d041b90",
-        "timestamp": 1698679830384608928,
-        "model": [],
-        "orchestrator": [],
-        "ensemble": [
-          {
-            "name": "my-ens",
-            "params": {
-              "START": [
-                "spam",
-                "foo"
+  "schema info": {
+    "schema_name": "entity manifest",
+    "version": "0.0.1"
+  },
+  "experiment": {
+    "name": "my-exp",
+    "path": "/home/someuser/code/ss/my-exp",
+    "launcher": "Local"
+  },
+  "runs": [
+    {
+      "run_id": "d041b90",
+      "timestamp": 1698679830384608928,
+      "model": [],
+      "orchestrator": [],
+      "ensemble": [
+        {
+          "name": "my-ens",
+          "params": {
+            "START": [
+              "spam",
+              "foo"
+            ],
+            "MID": [
+              "eggs",
+              "bar"
+            ],
+            "END": [
+              "ham",
+              "baz"
+            ]
+          },
+          "batch_settings": {},
+          "models": [
+            {
+              "name": "my-ens_0",
+              "path": "/home/someuser/code/ss",
+              "exe_args": [
+                "yo.py"
               ],
-              "MID": [
-                "eggs",
-                "bar"
+              "run_settings": {
+                "exe": [
+                  "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
+                ],
+                "run_command": null,
+                "run_args": {}
+              },
+              "batch_settings": {},
+              "params": {
+                "START": "spam",
+                "MID": "eggs",
+                "END": "ham"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/home/someuser/code/ss/manifest/demo/yo.py"
+                ],
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_0",
+                "step_id": null,
+                "task_id": "88118",
+                "managed": false
+              },
+              "out_file": "/home/someuser/code/ss/my-ens_0.out",
+              "err_file": "/home/someuser/code/ss/my-ens_0.err"
+            },
+            {
+              "name": "my-ens_1",
+              "path": "/home/someuser/code/ss",
+              "exe_args": [
+                "yo.py"
               ],
-              "END": [
-                "ham",
-                "baz"
-              ]
+              "run_settings": {
+                "exe": [
+                  "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
+                ],
+                "run_command": null,
+                "run_args": {}
+              },
+              "batch_settings": {},
+              "params": {
+                "START": "spam",
+                "MID": "eggs",
+                "END": "baz"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/home/someuser/code/ss/manifest/demo/yo.py"
+                ],
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_1",
+                "step_id": null,
+                "task_id": "88131",
+                "managed": false
+              },
+              "out_file": "/home/someuser/code/ss/my-ens_1.out",
+              "err_file": "/home/someuser/code/ss/my-ens_1.err"
+            },
+            {
+              "name": "my-ens_2",
+              "path": "/home/someuser/code/ss",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
+                ],
+                "run_command": null,
+                "run_args": {}
+              },
+              "batch_settings": {},
+              "params": {
+                "START": "spam",
+                "MID": "bar",
+                "END": "ham"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/home/someuser/code/ss/manifest/demo/yo.py"
+                ],
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_2",
+                "step_id": null,
+                "task_id": "88146",
+                "managed": false
+              },
+              "out_file": "/home/someuser/code/ss/my-ens_2.out",
+              "err_file": "/home/someuser/code/ss/my-ens_2.err"
+            },
+            {
+              "name": "my-ens_3",
+              "path": "/home/someuser/code/ss",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
+                ],
+                "run_command": null,
+                "run_args": {}
+              },
+              "batch_settings": {},
+              "params": {
+                "START": "spam",
+                "MID": "bar",
+                "END": "baz"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/home/someuser/code/ss/manifest/demo/yo.py"
+                ],
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_3",
+                "step_id": null,
+                "task_id": "88170",
+                "managed": false
+              },
+              "out_file": "/home/someuser/code/ss/my-ens_3.out",
+              "err_file": "/home/someuser/code/ss/my-ens_3.err"
             },
-            "batch_settings": {},
-            "models": [
-              {
-                "name": "my-ens_0",
-                "path": "/home/someuser/code/ss",
-                "exe_args": [
-                  "yo.py"
+            {
+              "name": "my-ens_4",
+              "path": "/home/someuser/code/ss",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
                 ],
-                "run_settings": {
-                  "exe": [
-                    "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
-                  ],
-                  "run_command": null,
-                  "run_args": {}
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "spam",
-                  "MID": "eggs",
-                  "END": "ham"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/home/someuser/code/ss/manifest/demo/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_0",
-                  "step_id": null,
-                  "task_id": "88118",
-                  "managed": false
-                },
-                "out_file": "/home/someuser/code/ss/my-ens_0.out",
-                "err_file": "/home/someuser/code/ss/my-ens_0.err"
-              },
-              {
-                "name": "my-ens_1",
-                "path": "/home/someuser/code/ss",
-                "exe_args": [
-                  "yo.py"
+                "run_command": null,
+                "run_args": {}
+              },
+              "batch_settings": {},
+              "params": {
+                "START": "foo",
+                "MID": "eggs",
+                "END": "ham"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/home/someuser/code/ss/manifest/demo/yo.py"
                 ],
-                "run_settings": {
-                  "exe": [
-                    "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
-                  ],
-                  "run_command": null,
-                  "run_args": {}
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "spam",
-                  "MID": "eggs",
-                  "END": "baz"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/home/someuser/code/ss/manifest/demo/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_1",
-                  "step_id": null,
-                  "task_id": "88131",
-                  "managed": false
-                },
-                "out_file": "/home/someuser/code/ss/my-ens_1.out",
-                "err_file": "/home/someuser/code/ss/my-ens_1.err"
-              },
-              {
-                "name": "my-ens_2",
-                "path": "/home/someuser/code/ss",
-                "exe_args": [
-                  "yo.py"
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_4",
+                "step_id": null,
+                "task_id": "88178",
+                "managed": false
+              },
+              "out_file": "/home/someuser/code/ss/my-ens_4.out",
+              "err_file": "/home/someuser/code/ss/my-ens_4.err"
+            },
+            {
+              "name": "my-ens_5",
+              "path": "/home/someuser/code/ss",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
                 ],
-                "run_settings": {
-                  "exe": [
-                    "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
-                  ],
-                  "run_command": null,
-                  "run_args": {}
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "spam",
-                  "MID": "bar",
-                  "END": "ham"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/home/someuser/code/ss/manifest/demo/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_2",
-                  "step_id": null,
-                  "task_id": "88146",
-                  "managed": false
-                },
-                "out_file": "/home/someuser/code/ss/my-ens_2.out",
-                "err_file": "/home/someuser/code/ss/my-ens_2.err"
-              },
-              {
-                "name": "my-ens_3",
-                "path": "/home/someuser/code/ss",
-                "exe_args": [
-                  "yo.py"
+                "run_command": null,
+                "run_args": {}
+              },
+              "batch_settings": {},
+              "params": {
+                "START": "foo",
+                "MID": "eggs",
+                "END": "baz"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/home/someuser/code/ss/manifest/demo/yo.py"
                 ],
-                "run_settings": {
-                  "exe": [
-                    "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
-                  ],
-                  "run_command": null,
-                  "run_args": {}
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "spam",
-                  "MID": "bar",
-                  "END": "baz"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/home/someuser/code/ss/manifest/demo/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_3",
-                  "step_id": null,
-                  "task_id": "88170",
-                  "managed": false
-                },
-                "out_file": "/home/someuser/code/ss/my-ens_3.out",
-                "err_file": "/home/someuser/code/ss/my-ens_3.err"
-              },
-              {
-                "name": "my-ens_4",
-                "path": "/home/someuser/code/ss",
-                "exe_args": [
-                  "yo.py"
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_5",
+                "step_id": null,
+                "task_id": "88193",
+                "managed": false
+              },
+              "out_file": "/home/someuser/code/ss/my-ens_5.out",
+              "err_file": "/home/someuser/code/ss/my-ens_5.err"
+            },
+            {
+              "name": "my-ens_6",
+              "path": "/home/someuser/code/ss",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
                 ],
-                "run_settings": {
-                  "exe": [
-                    "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
-                  ],
-                  "run_command": null,
-                  "run_args": {}
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "foo",
-                  "MID": "eggs",
-                  "END": "ham"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/home/someuser/code/ss/manifest/demo/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_4",
-                  "step_id": null,
-                  "task_id": "88178",
-                  "managed": false
-                },
-                "out_file": "/home/someuser/code/ss/my-ens_4.out",
-                "err_file": "/home/someuser/code/ss/my-ens_4.err"
-              },
-              {
-                "name": "my-ens_5",
-                "path": "/home/someuser/code/ss",
-                "exe_args": [
-                  "yo.py"
+                "run_command": null,
+                "run_args": {}
+              },
+              "batch_settings": {},
+              "params": {
+                "START": "foo",
+                "MID": "bar",
+                "END": "ham"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/home/someuser/code/ss/manifest/demo/yo.py"
                 ],
-                "run_settings": {
-                  "exe": [
-                    "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
-                  ],
-                  "run_command": null,
-                  "run_args": {}
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "foo",
-                  "MID": "eggs",
-                  "END": "baz"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/home/someuser/code/ss/manifest/demo/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_5",
-                  "step_id": null,
-                  "task_id": "88193",
-                  "managed": false
-                },
-                "out_file": "/home/someuser/code/ss/my-ens_5.out",
-                "err_file": "/home/someuser/code/ss/my-ens_5.err"
-              },
-              {
-                "name": "my-ens_6",
-                "path": "/home/someuser/code/ss",
-                "exe_args": [
-                  "yo.py"
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_6",
+                "step_id": null,
+                "task_id": "88221",
+                "managed": false
+              },
+              "out_file": "/home/someuser/code/ss/my-ens_6.out",
+              "err_file": "/home/someuser/code/ss/my-ens_6.err"
+            },
+            {
+              "name": "my-ens_7",
+              "path": "/home/someuser/code/ss",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
                 ],
-                "run_settings": {
-                  "exe": [
-                    "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
-                  ],
-                  "run_command": null,
-                  "run_args": {}
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "foo",
-                  "MID": "bar",
-                  "END": "ham"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/home/someuser/code/ss/manifest/demo/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_6",
-                  "step_id": null,
-                  "task_id": "88221",
-                  "managed": false
-                },
-                "out_file": "/home/someuser/code/ss/my-ens_6.out",
-                "err_file": "/home/someuser/code/ss/my-ens_6.err"
-              },
-              {
-                "name": "my-ens_7",
-                "path": "/home/someuser/code/ss",
-                "exe_args": [
-                  "yo.py"
+                "run_command": null,
+                "run_args": {}
+              },
+              "batch_settings": {},
+              "params": {
+                "START": "foo",
+                "MID": "bar",
+                "END": "baz"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/home/someuser/code/ss/manifest/demo/yo.py"
                 ],
-                "run_settings": {
-                  "exe": [
-                    "/home/someuser/.pyenv/versions/3.9.16/envs/ss/bin/python"
-                  ],
-                  "run_command": null,
-                  "run_args": {}
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "foo",
-                  "MID": "bar",
-                  "END": "baz"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/home/someuser/code/ss/manifest/demo/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_7",
-                  "step_id": null,
-                  "task_id": "88241",
-                  "managed": false
-                },
-                "out_file": "/home/someuser/code/ss/my-ens_7.out",
-                "err_file": "/home/someuser/code/ss/my-ens_7.err"
-              }
-            ]
-          }
-        ]
-      }
-    ]
-  }
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/home/someuser/code/ss/my-exp/.smartsim/telemetry/my-exp/d041b90/ensemble/my-ens/my-ens_7",
+                "step_id": null,
+                "task_id": "88241",
+                "managed": false
+              },
+              "out_file": "/home/someuser/code/ss/my-ens_7.out",
+              "err_file": "/home/someuser/code/ss/my-ens_7.err"
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
diff --git a/tests/test_configs/telemetry/telemetry.json b/tests/test_configs/telemetry/telemetry.json
index a380bc5fb..916f5922b 100644
--- a/tests/test_configs/telemetry/telemetry.json
+++ b/tests/test_configs/telemetry/telemetry.json
@@ -1,946 +1,945 @@
 {
-    "experiment": {
-      "name": "my-exp",
-      "path": "/path/to/my-exp",
-      "launcher": "Slurm"
-    },
-    "runs": [
-      {
-        "run_id": "d999ad89-020f-4e6a-b834-dbd88658ce84",
-        "timestamp": 1697824072792854287,
-        "model": [
-          {
-            "name": "my-model",
-            "path": "/path/to/my-exp/my-model",
-            "exe_args": [
-              "hello",
-              "world"
+  "experiment": {
+    "name": "my-exp",
+    "path": "/path/to/my-exp",
+    "launcher": "Slurm"
+  },
+  "runs": [
+    {
+      "run_id": "d999ad89-020f-4e6a-b834-dbd88658ce84",
+      "timestamp": 1697824072792854287,
+      "model": [
+        {
+          "name": "my-model",
+          "path": "/path/to/my-exp/my-model",
+          "exe_args": [
+            "hello",
+            "world"
+          ],
+          "run_settings": {
+            "exe": [
+              "/usr/bin/echo"
             ],
-            "run_settings": {
-              "exe": [
-                "/usr/bin/echo"
-              ],
-              "run_command": "/opt/slurm/20.11.5/bin/srun",
-              "run_args": {
-                "nodes": 1,
-                "ntasks": 1
-              }
-            },
-            "batch_settings": {},
-            "params": {},
-            "files": {
-              "Symlink": [],
-              "Configure": [],
-              "Copy": []
+            "run_command": "/opt/slurm/20.11.5/bin/srun",
+            "run_args": {
+              "nodes": 1,
+              "ntasks": 1
+            }
+          },
+          "batch_settings": {},
+          "params": {},
+          "files": {
+            "Symlink": [],
+            "Configure": [],
+            "Copy": []
+          },
+          "colocated_db": {
+            "settings": {
+              "port": 5757,
+              "ifname": "lo",
+              "cpus": 1,
+              "custom_pinning": "0",
+              "debug": false,
+              "db_identifier": "COLO",
+              "rai_args": {
+                "threads_per_queue": null,
+                "inter_op_parallelism": null,
+                "intra_op_parallelism": null
+              },
+              "extra_db_args": {}
             },
-            "colocated_db": {
-              "settings": {
-                "port": 5757,
-                "ifname": "lo",
-                "cpus": 1,
-                "custom_pinning": "0",
-                "debug": false,
-                "db_identifier": "COLO",
-                "rai_args": {
-                  "threads_per_queue": null,
-                  "inter_op_parallelism": null,
-                  "intra_op_parallelism": null
-                },
-                "extra_db_args": {}
-              },
-              "scripts": [],
-              "models": [
-                {
-                  "cnn": {
-                    "backend": "TORCH",
-                    "device": "CPU"
-                  }
+            "scripts": [],
+            "models": [
+              {
+                "cnn": {
+                  "backend": "TORCH",
+                  "device": "CPU"
                 }
-              ]
+              }
+            ]
+          },
+          "telemetry_metadata": {
+            "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d999ad89-020f-4e6a-b834-dbd88658ce84/model/my-model",
+            "step_id": "4121050.30",
+            "task_id": "25230",
+            "managed": true
+          },
+          "out_file": "/path/to/my-exp/my-model/my-model.out",
+          "err_file": "/path/to/my-exp/my-model/my-model.err"
+        }
+      ],
+      "orchestrator": [],
+      "ensemble": []
+    },
+    {
+      "run_id": "fd3cd1a8-cb8f-4f61-b847-73a8eb0881fa",
+      "timestamp": 1697824102122439975,
+      "model": [],
+      "orchestrator": [
+        {
+          "name": "orchestrator",
+          "type": "redis",
+          "interface": [
+            "ipogif0"
+          ],
+          "shards": [
+            {
+              "name": "orchestrator_1",
+              "hostname": "10.128.0.70",
+              "port": 2424,
+              "cluster": true,
+              "conf_file": "nodes-orchestrator_1-2424.conf",
+              "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
+              "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/fd3cd1a8-cb8f-4f61-b847-73a8eb0881fa/database/orchestrator/orchestrator",
+                "step_id": "4121050.31+2",
+                "task_id": "25241",
+                "managed": true
+              }
             },
-            "telemetry_metadata": {
-              "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d999ad89-020f-4e6a-b834-dbd88658ce84/model/my-model",
-              "step_id": "4121050.30",
-              "task_id": "25230",
-              "managed": true
+            {
+              "name": "orchestrator_2",
+              "hostname": "10.128.0.71",
+              "port": 2424,
+              "cluster": true,
+              "conf_file": "nodes-orchestrator_2-2424.conf",
+              "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
+              "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/fd3cd1a8-cb8f-4f61-b847-73a8eb0881fa/database/orchestrator/orchestrator",
+                "step_id": "4121050.31+2",
+                "task_id": "25241",
+                "managed": true
+              }
             },
-            "out_file": "/path/to/my-exp/my-model/my-model.out",
-            "err_file": "/path/to/my-exp/my-model/my-model.err"
-          }
-        ],
-        "orchestrator": [],
-        "ensemble": []
-      },
-      {
-        "run_id": "fd3cd1a8-cb8f-4f61-b847-73a8eb0881fa",
-        "timestamp": 1697824102122439975,
-        "model": [],
-        "orchestrator": [
-          {
-            "name": "orchestrator",
-            "type": "redis",
-            "interface": [
-              "ipogif0"
+            {
+              "name": "orchestrator_0",
+              "hostname": "10.128.0.69",
+              "port": 2424,
+              "cluster": true,
+              "conf_file": "nodes-orchestrator_0-2424.conf",
+              "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
+              "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/fd3cd1a8-cb8f-4f61-b847-73a8eb0881fa/database/orchestrator/orchestrator",
+                "step_id": "4121050.31+2",
+                "task_id": "25241",
+                "managed": true
+              }
+            }
+          ]
+        }
+      ],
+      "ensemble": []
+    },
+    {
+      "run_id": "d65ae1df-cb5e-45d9-ab09-6fa641755997",
+      "timestamp": 1697824127962219505,
+      "model": [],
+      "orchestrator": [],
+      "ensemble": [
+        {
+          "name": "my-ens",
+          "params": {
+            "START": [
+              "spam",
+              "foo"
             ],
-            "shards": [
-              {
-                "name": "orchestrator_1",
-                "hostname": "10.128.0.70",
-                "port": 2424,
-                "cluster": true,
-                "conf_file": "nodes-orchestrator_1-2424.conf",
-                "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
-                "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/fd3cd1a8-cb8f-4f61-b847-73a8eb0881fa/database/orchestrator/orchestrator",
-                  "step_id": "4121050.31+2",
-                  "task_id": "25241",
-                  "managed": true
+            "MID": [
+              "eggs",
+              "bar"
+            ],
+            "END": [
+              "ham",
+              "baz"
+            ]
+          },
+          "batch_settings": {},
+          "models": [
+            {
+              "name": "my-ens_0",
+              "path": "/path/to/my-exp/my-ens/my-ens_0",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/path/to/my/python3"
+                ],
+                "run_command": "/opt/slurm/20.11.5/bin/srun",
+                "run_args": {
+                  "nodes": 1,
+                  "ntasks": 1
                 }
               },
-              {
-                "name": "orchestrator_2",
-                "hostname": "10.128.0.71",
-                "port": 2424,
-                "cluster": true,
-                "conf_file": "nodes-orchestrator_2-2424.conf",
-                "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
-                "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/fd3cd1a8-cb8f-4f61-b847-73a8eb0881fa/database/orchestrator/orchestrator",
-                  "step_id": "4121050.31+2",
-                  "task_id": "25241",
-                  "managed": true
+              "batch_settings": {},
+              "params": {
+                "START": "spam",
+                "MID": "eggs",
+                "END": "ham"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/path/to/yo.py"
+                ],
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_0",
+                "step_id": "4121050.32",
+                "task_id": "25639",
+                "managed": true
+              },
+              "out_file": "/path/to/my-exp/my-ens/my-ens_0/my-ens_0.out",
+              "err_file": "/path/to/my-exp/my-ens/my-ens_0/my-ens_0.err"
+            },
+            {
+              "name": "my-ens_1",
+              "path": "/path/to/my-exp/my-ens/my-ens_1",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/path/to/my/python3"
+                ],
+                "run_command": "/opt/slurm/20.11.5/bin/srun",
+                "run_args": {
+                  "nodes": 1,
+                  "ntasks": 1
                 }
               },
-              {
-                "name": "orchestrator_0",
-                "hostname": "10.128.0.69",
-                "port": 2424,
-                "cluster": true,
-                "conf_file": "nodes-orchestrator_0-2424.conf",
-                "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
-                "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/fd3cd1a8-cb8f-4f61-b847-73a8eb0881fa/database/orchestrator/orchestrator",
-                  "step_id": "4121050.31+2",
-                  "task_id": "25241",
-                  "managed": true
+              "batch_settings": {},
+              "params": {
+                "START": "spam",
+                "MID": "eggs",
+                "END": "baz"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/path/to/yo.py"
+                ],
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_1",
+                "step_id": "4121050.33",
+                "task_id": "25768",
+                "managed": true
+              },
+              "out_file": "/path/to/my-exp/my-ens/my-ens_1/my-ens_1.out",
+              "err_file": "/path/to/my-exp/my-ens/my-ens_1/my-ens_1.err"
+            },
+            {
+              "name": "my-ens_2",
+              "path": "/path/to/my-exp/my-ens/my-ens_2",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/path/to/my/python3"
+                ],
+                "run_command": "/opt/slurm/20.11.5/bin/srun",
+                "run_args": {
+                  "nodes": 1,
+                  "ntasks": 1
                 }
-              }
-            ]
-          }
-        ],
-        "ensemble": []
-      },
-      {
-        "run_id": "d65ae1df-cb5e-45d9-ab09-6fa641755997",
-        "timestamp": 1697824127962219505,
-        "model": [],
-        "orchestrator": [],
-        "ensemble": [
-          {
-            "name": "my-ens",
-            "params": {
-              "START": [
-                "spam",
-                "foo"
+              },
+              "batch_settings": {},
+              "params": {
+                "START": "spam",
+                "MID": "bar",
+                "END": "ham"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/path/to/yo.py"
+                ],
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_2",
+                "step_id": "4121050.34",
+                "task_id": "25817",
+                "managed": true
+              },
+              "out_file": "/path/to/my-exp/my-ens/my-ens_2/my-ens_2.out",
+              "err_file": "/path/to/my-exp/my-ens/my-ens_2/my-ens_2.err"
+            },
+            {
+              "name": "my-ens_3",
+              "path": "/path/to/my-exp/my-ens/my-ens_3",
+              "exe_args": [
+                "yo.py"
               ],
-              "MID": [
-                "eggs",
-                "bar"
+              "run_settings": {
+                "exe": [
+                  "/path/to/my/python3"
+                ],
+                "run_command": "/opt/slurm/20.11.5/bin/srun",
+                "run_args": {
+                  "nodes": 1,
+                  "ntasks": 1
+                }
+              },
+              "batch_settings": {},
+              "params": {
+                "START": "spam",
+                "MID": "bar",
+                "END": "baz"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/path/to/yo.py"
+                ],
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_3",
+                "step_id": "4121050.35",
+                "task_id": "25837",
+                "managed": true
+              },
+              "out_file": "/path/to/my-exp/my-ens/my-ens_3/my-ens_3.out",
+              "err_file": "/path/to/my-exp/my-ens/my-ens_3/my-ens_3.err"
+            },
+            {
+              "name": "my-ens_4",
+              "path": "/path/to/my-exp/my-ens/my-ens_4",
+              "exe_args": [
+                "yo.py"
               ],
-              "END": [
-                "ham",
-                "baz"
-              ]
+              "run_settings": {
+                "exe": [
+                  "/path/to/my/python3"
+                ],
+                "run_command": "/opt/slurm/20.11.5/bin/srun",
+                "run_args": {
+                  "nodes": 1,
+                  "ntasks": 1
+                }
+              },
+              "batch_settings": {},
+              "params": {
+                "START": "foo",
+                "MID": "eggs",
+                "END": "ham"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/path/to/yo.py"
+                ],
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_4",
+                "step_id": "4121050.36",
+                "task_id": "25872",
+                "managed": true
+              },
+              "out_file": "/path/to/my-exp/my-ens/my-ens_4/my-ens_4.out",
+              "err_file": "/path/to/my-exp/my-ens/my-ens_4/my-ens_4.err"
             },
-            "batch_settings": {},
-            "models": [
-              {
-                "name": "my-ens_0",
-                "path": "/path/to/my-exp/my-ens/my-ens_0",
-                "exe_args": [
-                  "yo.py"
-                ],
-                "run_settings": {
-                  "exe": [
-                    "/path/to/my/python3"
-                  ],
-                  "run_command": "/opt/slurm/20.11.5/bin/srun",
-                  "run_args": {
-                    "nodes": 1,
-                    "ntasks": 1
-                  }
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "spam",
-                  "MID": "eggs",
-                  "END": "ham"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/path/to/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_0",
-                  "step_id": "4121050.32",
-                  "task_id": "25639",
-                  "managed": true
-                },
-                "out_file": "/path/to/my-exp/my-ens/my-ens_0/my-ens_0.out",
-                "err_file": "/path/to/my-exp/my-ens/my-ens_0/my-ens_0.err"
+            {
+              "name": "my-ens_5",
+              "path": "/path/to/my-exp/my-ens/my-ens_5",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/path/to/my/python3"
+                ],
+                "run_command": "/opt/slurm/20.11.5/bin/srun",
+                "run_args": {
+                  "nodes": 1,
+                  "ntasks": 1
+                }
               },
-              {
-                "name": "my-ens_1",
-                "path": "/path/to/my-exp/my-ens/my-ens_1",
-                "exe_args": [
-                  "yo.py"
-                ],
-                "run_settings": {
-                  "exe": [
-                    "/path/to/my/python3"
-                  ],
-                  "run_command": "/opt/slurm/20.11.5/bin/srun",
-                  "run_args": {
-                    "nodes": 1,
-                    "ntasks": 1
-                  }
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "spam",
-                  "MID": "eggs",
-                  "END": "baz"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/path/to/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_1",
-                  "step_id": "4121050.33",
-                  "task_id": "25768",
-                  "managed": true
-                },
-                "out_file": "/path/to/my-exp/my-ens/my-ens_1/my-ens_1.out",
-                "err_file": "/path/to/my-exp/my-ens/my-ens_1/my-ens_1.err"
+              "batch_settings": {},
+              "params": {
+                "START": "foo",
+                "MID": "eggs",
+                "END": "baz"
               },
-              {
-                "name": "my-ens_2",
-                "path": "/path/to/my-exp/my-ens/my-ens_2",
-                "exe_args": [
-                  "yo.py"
-                ],
-                "run_settings": {
-                  "exe": [
-                    "/path/to/my/python3"
-                  ],
-                  "run_command": "/opt/slurm/20.11.5/bin/srun",
-                  "run_args": {
-                    "nodes": 1,
-                    "ntasks": 1
-                  }
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "spam",
-                  "MID": "bar",
-                  "END": "ham"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/path/to/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_2",
-                  "step_id": "4121050.34",
-                  "task_id": "25817",
-                  "managed": true
-                },
-                "out_file": "/path/to/my-exp/my-ens/my-ens_2/my-ens_2.out",
-                "err_file": "/path/to/my-exp/my-ens/my-ens_2/my-ens_2.err"
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/path/to/yo.py"
+                ],
+                "Copy": []
               },
-              {
-                "name": "my-ens_3",
-                "path": "/path/to/my-exp/my-ens/my-ens_3",
-                "exe_args": [
-                  "yo.py"
-                ],
-                "run_settings": {
-                  "exe": [
-                    "/path/to/my/python3"
-                  ],
-                  "run_command": "/opt/slurm/20.11.5/bin/srun",
-                  "run_args": {
-                    "nodes": 1,
-                    "ntasks": 1
-                  }
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "spam",
-                  "MID": "bar",
-                  "END": "baz"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/path/to/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_3",
-                  "step_id": "4121050.35",
-                  "task_id": "25837",
-                  "managed": true
-                },
-                "out_file": "/path/to/my-exp/my-ens/my-ens_3/my-ens_3.out",
-                "err_file": "/path/to/my-exp/my-ens/my-ens_3/my-ens_3.err"
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_5",
+                "step_id": "4121050.37",
+                "task_id": "25930",
+                "managed": true
               },
-              {
-                "name": "my-ens_4",
-                "path": "/path/to/my-exp/my-ens/my-ens_4",
-                "exe_args": [
-                  "yo.py"
-                ],
-                "run_settings": {
-                  "exe": [
-                    "/path/to/my/python3"
-                  ],
-                  "run_command": "/opt/slurm/20.11.5/bin/srun",
-                  "run_args": {
-                    "nodes": 1,
-                    "ntasks": 1
-                  }
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "foo",
-                  "MID": "eggs",
-                  "END": "ham"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/path/to/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_4",
-                  "step_id": "4121050.36",
-                  "task_id": "25872",
-                  "managed": true
-                },
-                "out_file": "/path/to/my-exp/my-ens/my-ens_4/my-ens_4.out",
-                "err_file": "/path/to/my-exp/my-ens/my-ens_4/my-ens_4.err"
+              "out_file": "/path/to/my-exp/my-ens/my-ens_5/my-ens_5.out",
+              "err_file": "/path/to/my-exp/my-ens/my-ens_5/my-ens_5.err"
+            },
+            {
+              "name": "my-ens_6",
+              "path": "/path/to/my-exp/my-ens/my-ens_6",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/path/to/my/python3"
+                ],
+                "run_command": "/opt/slurm/20.11.5/bin/srun",
+                "run_args": {
+                  "nodes": 1,
+                  "ntasks": 1
+                }
               },
-              {
-                "name": "my-ens_5",
-                "path": "/path/to/my-exp/my-ens/my-ens_5",
-                "exe_args": [
-                  "yo.py"
-                ],
-                "run_settings": {
-                  "exe": [
-                    "/path/to/my/python3"
-                  ],
-                  "run_command": "/opt/slurm/20.11.5/bin/srun",
-                  "run_args": {
-                    "nodes": 1,
-                    "ntasks": 1
-                  }
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "foo",
-                  "MID": "eggs",
-                  "END": "baz"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/path/to/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_5",
-                  "step_id": "4121050.37",
-                  "task_id": "25930",
-                  "managed": true
-                },
-                "out_file": "/path/to/my-exp/my-ens/my-ens_5/my-ens_5.out",
-                "err_file": "/path/to/my-exp/my-ens/my-ens_5/my-ens_5.err"
+              "batch_settings": {},
+              "params": {
+                "START": "foo",
+                "MID": "bar",
+                "END": "ham"
               },
-              {
-                "name": "my-ens_6",
-                "path": "/path/to/my-exp/my-ens/my-ens_6",
-                "exe_args": [
-                  "yo.py"
-                ],
-                "run_settings": {
-                  "exe": [
-                    "/path/to/my/python3"
-                  ],
-                  "run_command": "/opt/slurm/20.11.5/bin/srun",
-                  "run_args": {
-                    "nodes": 1,
-                    "ntasks": 1
-                  }
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "foo",
-                  "MID": "bar",
-                  "END": "ham"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/path/to/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_6",
-                  "step_id": "4121050.38",
-                  "task_id": "25945",
-                  "managed": true
-                },
-                "out_file": "/path/to/my-exp/my-ens/my-ens_6/my-ens_6.out",
-                "err_file": "/path/to/my-exp/my-ens/my-ens_6/my-ens_6.err"
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/path/to/yo.py"
+                ],
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_6",
+                "step_id": "4121050.38",
+                "task_id": "25945",
+                "managed": true
+              },
+              "out_file": "/path/to/my-exp/my-ens/my-ens_6/my-ens_6.out",
+              "err_file": "/path/to/my-exp/my-ens/my-ens_6/my-ens_6.err"
+            },
+            {
+              "name": "my-ens_7",
+              "path": "/path/to/my-exp/my-ens/my-ens_7",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/path/to/my/python3"
+                ],
+                "run_command": "/opt/slurm/20.11.5/bin/srun",
+                "run_args": {
+                  "nodes": 1,
+                  "ntasks": 1
+                }
+              },
+              "batch_settings": {},
+              "params": {
+                "START": "foo",
+                "MID": "bar",
+                "END": "baz"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/path/to/yo.py"
+                ],
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_7",
+                "step_id": "4121050.39",
+                "task_id": "25967",
+                "managed": true
+              },
+              "out_file": "/path/to/my-exp/my-ens/my-ens_7/my-ens_7.out",
+              "err_file": "/path/to/my-exp/my-ens/my-ens_7/my-ens_7.err"
+            }
+          ]
+        }
+      ]
+    },
+    {
+      "run_id": "e41f8e17-c4b2-441d-adf9-707443ee2c72",
+      "timestamp": 1697835227560376025,
+      "model": [
+        {
+          "name": "my-model",
+          "path": "/path/to/my-exp/my-model",
+          "exe_args": [
+            "hello",
+            "world"
+          ],
+          "run_settings": {
+            "exe": [
+              "/usr/bin/echo"
+            ],
+            "run_command": "/opt/slurm/20.11.5/bin/srun",
+            "run_args": {
+              "nodes": 1,
+              "ntasks": 1
+            }
+          },
+          "batch_settings": {},
+          "params": {},
+          "files": {
+            "Symlink": [],
+            "Configure": [],
+            "Copy": []
+          },
+          "colocated_db": {
+            "settings": {
+              "port": 5757,
+              "ifname": "lo",
+              "cpus": 1,
+              "custom_pinning": "0",
+              "debug": false,
+              "db_identifier": "COLO",
+              "rai_args": {
+                "threads_per_queue": null,
+                "inter_op_parallelism": null,
+                "intra_op_parallelism": null
               },
+              "extra_db_args": {}
+            },
+            "scripts": [],
+            "models": [
               {
-                "name": "my-ens_7",
-                "path": "/path/to/my-exp/my-ens/my-ens_7",
-                "exe_args": [
-                  "yo.py"
-                ],
-                "run_settings": {
-                  "exe": [
-                    "/path/to/my/python3"
-                  ],
-                  "run_command": "/opt/slurm/20.11.5/bin/srun",
-                  "run_args": {
-                    "nodes": 1,
-                    "ntasks": 1
-                  }
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "foo",
-                  "MID": "bar",
-                  "END": "baz"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/path/to/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/d65ae1df-cb5e-45d9-ab09-6fa641755997/ensemble/my-ens/my-ens_7",
-                  "step_id": "4121050.39",
-                  "task_id": "25967",
-                  "managed": true
-                },
-                "out_file": "/path/to/my-exp/my-ens/my-ens_7/my-ens_7.out",
-                "err_file": "/path/to/my-exp/my-ens/my-ens_7/my-ens_7.err"
+                "cnn": {
+                  "backend": "TORCH",
+                  "device": "CPU"
+                }
               }
             ]
-          }
-        ]
-      },
-      {
-        "run_id": "e41f8e17-c4b2-441d-adf9-707443ee2c72",
-        "timestamp": 1697835227560376025,
-        "model": [
-          {
-            "name": "my-model",
-            "path": "/path/to/my-exp/my-model",
-            "exe_args": [
-              "hello",
-              "world"
-            ],
-            "run_settings": {
-              "exe": [
-                "/usr/bin/echo"
-              ],
-              "run_command": "/opt/slurm/20.11.5/bin/srun",
-              "run_args": {
-                "nodes": 1,
-                "ntasks": 1
+          },
+          "telemetry_metadata": {
+            "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/e41f8e17-c4b2-441d-adf9-707443ee2c72/model/my-model",
+            "step_id": "4121904.0",
+            "task_id": "28277",
+            "managed": true
+          },
+          "out_file": "/path/to/my-exp/my-model/my-model.out",
+          "err_file": "/path/to/my-exp/my-model/my-model.err"
+        }
+      ],
+      "orchestrator": [],
+      "ensemble": []
+    },
+    {
+      "run_id": "b33a5d27-6822-4795-8e0e-cfea18551fa4",
+      "timestamp": 1697835261956135240,
+      "model": [],
+      "orchestrator": [
+        {
+          "name": "orchestrator",
+          "type": "redis",
+          "interface": [
+            "ipogif0"
+          ],
+          "shards": [
+            {
+              "name": "orchestrator_0",
+              "hostname": "10.128.0.2",
+              "port": 2424,
+              "cluster": true,
+              "conf_file": "nodes-orchestrator_0-2424.conf",
+              "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
+              "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/b33a5d27-6822-4795-8e0e-cfea18551fa4/database/orchestrator/orchestrator",
+                "step_id": "4121904.1+2",
+                "task_id": "28289",
+                "managed": true
               }
             },
-            "batch_settings": {},
-            "params": {},
-            "files": {
-              "Symlink": [],
-              "Configure": [],
-              "Copy": []
+            {
+              "name": "orchestrator_2",
+              "hostname": "10.128.0.4",
+              "port": 2424,
+              "cluster": true,
+              "conf_file": "nodes-orchestrator_2-2424.conf",
+              "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
+              "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/b33a5d27-6822-4795-8e0e-cfea18551fa4/database/orchestrator/orchestrator",
+                "step_id": "4121904.1+2",
+                "task_id": "28289",
+                "managed": true
+              }
             },
-            "colocated_db": {
-              "settings": {
-                "port": 5757,
-                "ifname": "lo",
-                "cpus": 1,
-                "custom_pinning": "0",
-                "debug": false,
-                "db_identifier": "COLO",
-                "rai_args": {
-                  "threads_per_queue": null,
-                  "inter_op_parallelism": null,
-                  "intra_op_parallelism": null
-                },
-                "extra_db_args": {}
-              },
-              "scripts": [],
-              "models": [
-                {
-                  "cnn": {
-                    "backend": "TORCH",
-                    "device": "CPU"
-                  }
+            {
+              "name": "orchestrator_1",
+              "hostname": "10.128.0.3",
+              "port": 2424,
+              "cluster": true,
+              "conf_file": "nodes-orchestrator_1-2424.conf",
+              "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
+              "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/b33a5d27-6822-4795-8e0e-cfea18551fa4/database/orchestrator/orchestrator",
+                "step_id": "4121904.1+2",
+                "task_id": "28289",
+                "managed": true
+              }
+            }
+          ]
+        }
+      ],
+      "ensemble": []
+    },
+    {
+      "run_id": "45772df2-fd80-43fd-adf0-d5e319870182",
+      "timestamp": 1697835287798613875,
+      "model": [],
+      "orchestrator": [],
+      "ensemble": [
+        {
+          "name": "my-ens",
+          "params": {
+            "START": [
+              "spam",
+              "foo"
+            ],
+            "MID": [
+              "eggs",
+              "bar"
+            ],
+            "END": [
+              "ham",
+              "baz"
+            ]
+          },
+          "batch_settings": {},
+          "models": [
+            {
+              "name": "my-ens_0",
+              "path": "/path/to/my-exp/my-ens/my-ens_0",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/path/to/my/python3"
+                ],
+                "run_command": "/opt/slurm/20.11.5/bin/srun",
+                "run_args": {
+                  "nodes": 1,
+                  "ntasks": 1
                 }
-              ]
-            },
-            "telemetry_metadata": {
-              "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/e41f8e17-c4b2-441d-adf9-707443ee2c72/model/my-model",
-              "step_id": "4121904.0",
-              "task_id": "28277",
-              "managed": true
+              },
+              "batch_settings": {},
+              "params": {
+                "START": "spam",
+                "MID": "eggs",
+                "END": "ham"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/path/to/yo.py"
+                ],
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_0",
+                "step_id": "4121904.2",
+                "task_id": "28333",
+                "managed": true
+              },
+              "out_file": "/path/to/my-exp/my-ens/my-ens_0/my-ens_0.out",
+              "err_file": "/path/to/my-exp/my-ens/my-ens_0/my-ens_0.err"
             },
-            "out_file": "/path/to/my-exp/my-model/my-model.out",
-            "err_file": "/path/to/my-exp/my-model/my-model.err"
-          }
-        ],
-        "orchestrator": [],
-        "ensemble": []
-      },
-      {
-        "run_id": "b33a5d27-6822-4795-8e0e-cfea18551fa4",
-        "timestamp": 1697835261956135240,
-        "model": [],
-        "orchestrator": [
-          {
-            "name": "orchestrator",
-            "type": "redis",
-            "interface": [
-              "ipogif0"
-            ],
-            "shards": [
-              {
-                "name": "orchestrator_0",
-                "hostname": "10.128.0.2",
-                "port": 2424,
-                "cluster": true,
-                "conf_file": "nodes-orchestrator_0-2424.conf",
-                "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
-                "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/b33a5d27-6822-4795-8e0e-cfea18551fa4/database/orchestrator/orchestrator",
-                  "step_id": "4121904.1+2",
-                  "task_id": "28289",
-                  "managed": true
+            {
+              "name": "my-ens_1",
+              "path": "/path/to/my-exp/my-ens/my-ens_1",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/path/to/my/python3"
+                ],
+                "run_command": "/opt/slurm/20.11.5/bin/srun",
+                "run_args": {
+                  "nodes": 1,
+                  "ntasks": 1
                 }
               },
-              {
-                "name": "orchestrator_2",
-                "hostname": "10.128.0.4",
-                "port": 2424,
-                "cluster": true,
-                "conf_file": "nodes-orchestrator_2-2424.conf",
-                "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
-                "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/b33a5d27-6822-4795-8e0e-cfea18551fa4/database/orchestrator/orchestrator",
-                  "step_id": "4121904.1+2",
-                  "task_id": "28289",
-                  "managed": true
+              "batch_settings": {},
+              "params": {
+                "START": "spam",
+                "MID": "eggs",
+                "END": "baz"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/path/to/yo.py"
+                ],
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_1",
+                "step_id": "4121904.3",
+                "task_id": "28342",
+                "managed": true
+              },
+              "out_file": "/path/to/my-exp/my-ens/my-ens_1/my-ens_1.out",
+              "err_file": "/path/to/my-exp/my-ens/my-ens_1/my-ens_1.err"
+            },
+            {
+              "name": "my-ens_2",
+              "path": "/path/to/my-exp/my-ens/my-ens_2",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/path/to/my/python3"
+                ],
+                "run_command": "/opt/slurm/20.11.5/bin/srun",
+                "run_args": {
+                  "nodes": 1,
+                  "ntasks": 1
                 }
               },
-              {
-                "name": "orchestrator_1",
-                "hostname": "10.128.0.3",
-                "port": 2424,
-                "cluster": true,
-                "conf_file": "nodes-orchestrator_1-2424.conf",
-                "out_file": "/path/to/my-exp/orchestrator/orchestrator.out",
-                "err_file": "/path/to/my-exp/orchestrator/orchestrator.err",
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/b33a5d27-6822-4795-8e0e-cfea18551fa4/database/orchestrator/orchestrator",
-                  "step_id": "4121904.1+2",
-                  "task_id": "28289",
-                  "managed": true
+              "batch_settings": {},
+              "params": {
+                "START": "spam",
+                "MID": "bar",
+                "END": "ham"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/path/to/yo.py"
+                ],
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_2",
+                "step_id": "4121904.4",
+                "task_id": "28353",
+                "managed": true
+              },
+              "out_file": "/path/to/my-exp/my-ens/my-ens_2/my-ens_2.out",
+              "err_file": "/path/to/my-exp/my-ens/my-ens_2/my-ens_2.err"
+            },
+            {
+              "name": "my-ens_3",
+              "path": "/path/to/my-exp/my-ens/my-ens_3",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/path/to/my/python3"
+                ],
+                "run_command": "/opt/slurm/20.11.5/bin/srun",
+                "run_args": {
+                  "nodes": 1,
+                  "ntasks": 1
                 }
-              }
-            ]
-          }
-        ],
-        "ensemble": []
-      },
-      {
-        "run_id": "45772df2-fd80-43fd-adf0-d5e319870182",
-        "timestamp": 1697835287798613875,
-        "model": [],
-        "orchestrator": [],
-        "ensemble": [
-          {
-            "name": "my-ens",
-            "params": {
-              "START": [
-                "spam",
-                "foo"
+              },
+              "batch_settings": {},
+              "params": {
+                "START": "spam",
+                "MID": "bar",
+                "END": "baz"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/path/to/yo.py"
+                ],
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_3",
+                "step_id": "4121904.5",
+                "task_id": "28362",
+                "managed": true
+              },
+              "out_file": "/path/to/my-exp/my-ens/my-ens_3/my-ens_3.out",
+              "err_file": "/path/to/my-exp/my-ens/my-ens_3/my-ens_3.err"
+            },
+            {
+              "name": "my-ens_4",
+              "path": "/path/to/my-exp/my-ens/my-ens_4",
+              "exe_args": [
+                "yo.py"
               ],
-              "MID": [
-                "eggs",
-                "bar"
+              "run_settings": {
+                "exe": [
+                  "/path/to/my/python3"
+                ],
+                "run_command": "/opt/slurm/20.11.5/bin/srun",
+                "run_args": {
+                  "nodes": 1,
+                  "ntasks": 1
+                }
+              },
+              "batch_settings": {},
+              "params": {
+                "START": "foo",
+                "MID": "eggs",
+                "END": "ham"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/path/to/yo.py"
+                ],
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_4",
+                "step_id": "4121904.6",
+                "task_id": "28371",
+                "managed": true
+              },
+              "out_file": "/path/to/my-exp/my-ens/my-ens_4/my-ens_4.out",
+              "err_file": "/path/to/my-exp/my-ens/my-ens_4/my-ens_4.err"
+            },
+            {
+              "name": "my-ens_5",
+              "path": "/path/to/my-exp/my-ens/my-ens_5",
+              "exe_args": [
+                "yo.py"
               ],
-              "END": [
-                "ham",
-                "baz"
-              ]
+              "run_settings": {
+                "exe": [
+                  "/path/to/my/python3"
+                ],
+                "run_command": "/opt/slurm/20.11.5/bin/srun",
+                "run_args": {
+                  "nodes": 1,
+                  "ntasks": 1
+                }
+              },
+              "batch_settings": {},
+              "params": {
+                "START": "foo",
+                "MID": "eggs",
+                "END": "baz"
+              },
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/path/to/yo.py"
+                ],
+                "Copy": []
+              },
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_5",
+                "step_id": "4121904.7",
+                "task_id": "28380",
+                "managed": true
+              },
+              "out_file": "/path/to/my-exp/my-ens/my-ens_5/my-ens_5.out",
+              "err_file": "/path/to/my-exp/my-ens/my-ens_5/my-ens_5.err"
             },
-            "batch_settings": {},
-            "models": [
-              {
-                "name": "my-ens_0",
-                "path": "/path/to/my-exp/my-ens/my-ens_0",
-                "exe_args": [
-                  "yo.py"
-                ],
-                "run_settings": {
-                  "exe": [
-                    "/path/to/my/python3"
-                  ],
-                  "run_command": "/opt/slurm/20.11.5/bin/srun",
-                  "run_args": {
-                    "nodes": 1,
-                    "ntasks": 1
-                  }
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "spam",
-                  "MID": "eggs",
-                  "END": "ham"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/path/to/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_0",
-                  "step_id": "4121904.2",
-                  "task_id": "28333",
-                  "managed": true
-                },
-                "out_file": "/path/to/my-exp/my-ens/my-ens_0/my-ens_0.out",
-                "err_file": "/path/to/my-exp/my-ens/my-ens_0/my-ens_0.err"
+            {
+              "name": "my-ens_6",
+              "path": "/path/to/my-exp/my-ens/my-ens_6",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/path/to/my/python3"
+                ],
+                "run_command": "/opt/slurm/20.11.5/bin/srun",
+                "run_args": {
+                  "nodes": 1,
+                  "ntasks": 1
+                }
               },
-              {
-                "name": "my-ens_1",
-                "path": "/path/to/my-exp/my-ens/my-ens_1",
-                "exe_args": [
-                  "yo.py"
-                ],
-                "run_settings": {
-                  "exe": [
-                    "/path/to/my/python3"
-                  ],
-                  "run_command": "/opt/slurm/20.11.5/bin/srun",
-                  "run_args": {
-                    "nodes": 1,
-                    "ntasks": 1
-                  }
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "spam",
-                  "MID": "eggs",
-                  "END": "baz"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/path/to/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_1",
-                  "step_id": "4121904.3",
-                  "task_id": "28342",
-                  "managed": true
-                },
-                "out_file": "/path/to/my-exp/my-ens/my-ens_1/my-ens_1.out",
-                "err_file": "/path/to/my-exp/my-ens/my-ens_1/my-ens_1.err"
+              "batch_settings": {},
+              "params": {
+                "START": "foo",
+                "MID": "bar",
+                "END": "ham"
               },
-              {
-                "name": "my-ens_2",
-                "path": "/path/to/my-exp/my-ens/my-ens_2",
-                "exe_args": [
-                  "yo.py"
-                ],
-                "run_settings": {
-                  "exe": [
-                    "/path/to/my/python3"
-                  ],
-                  "run_command": "/opt/slurm/20.11.5/bin/srun",
-                  "run_args": {
-                    "nodes": 1,
-                    "ntasks": 1
-                  }
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "spam",
-                  "MID": "bar",
-                  "END": "ham"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/path/to/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_2",
-                  "step_id": "4121904.4",
-                  "task_id": "28353",
-                  "managed": true
-                },
-                "out_file": "/path/to/my-exp/my-ens/my-ens_2/my-ens_2.out",
-                "err_file": "/path/to/my-exp/my-ens/my-ens_2/my-ens_2.err"
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/path/to/yo.py"
+                ],
+                "Copy": []
               },
-              {
-                "name": "my-ens_3",
-                "path": "/path/to/my-exp/my-ens/my-ens_3",
-                "exe_args": [
-                  "yo.py"
-                ],
-                "run_settings": {
-                  "exe": [
-                    "/path/to/my/python3"
-                  ],
-                  "run_command": "/opt/slurm/20.11.5/bin/srun",
-                  "run_args": {
-                    "nodes": 1,
-                    "ntasks": 1
-                  }
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "spam",
-                  "MID": "bar",
-                  "END": "baz"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/path/to/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_3",
-                  "step_id": "4121904.5",
-                  "task_id": "28362",
-                  "managed": true
-                },
-                "out_file": "/path/to/my-exp/my-ens/my-ens_3/my-ens_3.out",
-                "err_file": "/path/to/my-exp/my-ens/my-ens_3/my-ens_3.err"
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_6",
+                "step_id": "4121904.8",
+                "task_id": "28389",
+                "managed": true
               },
-              {
-                "name": "my-ens_4",
-                "path": "/path/to/my-exp/my-ens/my-ens_4",
-                "exe_args": [
-                  "yo.py"
-                ],
-                "run_settings": {
-                  "exe": [
-                    "/path/to/my/python3"
-                  ],
-                  "run_command": "/opt/slurm/20.11.5/bin/srun",
-                  "run_args": {
-                    "nodes": 1,
-                    "ntasks": 1
-                  }
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "foo",
-                  "MID": "eggs",
-                  "END": "ham"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/path/to/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_4",
-                  "step_id": "4121904.6",
-                  "task_id": "28371",
-                  "managed": true
-                },
-                "out_file": "/path/to/my-exp/my-ens/my-ens_4/my-ens_4.out",
-                "err_file": "/path/to/my-exp/my-ens/my-ens_4/my-ens_4.err"
+              "out_file": "/path/to/my-exp/my-ens/my-ens_6/my-ens_6.out",
+              "err_file": "/path/to/my-exp/my-ens/my-ens_6/my-ens_6.err"
+            },
+            {
+              "name": "my-ens_7",
+              "path": "/path/to/my-exp/my-ens/my-ens_7",
+              "exe_args": [
+                "yo.py"
+              ],
+              "run_settings": {
+                "exe": [
+                  "/path/to/my/python3"
+                ],
+                "run_command": "/opt/slurm/20.11.5/bin/srun",
+                "run_args": {
+                  "nodes": 1,
+                  "ntasks": 1
+                }
               },
-              {
-                "name": "my-ens_5",
-                "path": "/path/to/my-exp/my-ens/my-ens_5",
-                "exe_args": [
-                  "yo.py"
-                ],
-                "run_settings": {
-                  "exe": [
-                    "/path/to/my/python3"
-                  ],
-                  "run_command": "/opt/slurm/20.11.5/bin/srun",
-                  "run_args": {
-                    "nodes": 1,
-                    "ntasks": 1
-                  }
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "foo",
-                  "MID": "eggs",
-                  "END": "baz"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/path/to/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_5",
-                  "step_id": "4121904.7",
-                  "task_id": "28380",
-                  "managed": true
-                },
-                "out_file": "/path/to/my-exp/my-ens/my-ens_5/my-ens_5.out",
-                "err_file": "/path/to/my-exp/my-ens/my-ens_5/my-ens_5.err"
+              "batch_settings": {},
+              "params": {
+                "START": "foo",
+                "MID": "bar",
+                "END": "baz"
               },
-              {
-                "name": "my-ens_6",
-                "path": "/path/to/my-exp/my-ens/my-ens_6",
-                "exe_args": [
-                  "yo.py"
-                ],
-                "run_settings": {
-                  "exe": [
-                    "/path/to/my/python3"
-                  ],
-                  "run_command": "/opt/slurm/20.11.5/bin/srun",
-                  "run_args": {
-                    "nodes": 1,
-                    "ntasks": 1
-                  }
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "foo",
-                  "MID": "bar",
-                  "END": "ham"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/path/to/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_6",
-                  "step_id": "4121904.8",
-                  "task_id": "28389",
-                  "managed": true
-                },
-                "out_file": "/path/to/my-exp/my-ens/my-ens_6/my-ens_6.out",
-                "err_file": "/path/to/my-exp/my-ens/my-ens_6/my-ens_6.err"
+              "files": {
+                "Symlink": [],
+                "Configure": [
+                  "/path/to/yo.py"
+                ],
+                "Copy": []
               },
-              {
-                "name": "my-ens_7",
-                "path": "/path/to/my-exp/my-ens/my-ens_7",
-                "exe_args": [
-                  "yo.py"
-                ],
-                "run_settings": {
-                  "exe": [
-                    "/path/to/my/python3"
-                  ],
-                  "run_command": "/opt/slurm/20.11.5/bin/srun",
-                  "run_args": {
-                    "nodes": 1,
-                    "ntasks": 1
-                  }
-                },
-                "batch_settings": {},
-                "params": {
-                  "START": "foo",
-                  "MID": "bar",
-                  "END": "baz"
-                },
-                "files": {
-                  "Symlink": [],
-                  "Configure": [
-                    "/path/to/yo.py"
-                  ],
-                  "Copy": []
-                },
-                "colocated_db": {},
-                "telemetry_metadata": {
-                  "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_7",
-                  "step_id": "4121904.9",
-                  "task_id": "28398",
-                  "managed": true
-                },
-                "out_file": "/path/to/my-exp/my-ens/my-ens_7/my-ens_7.out",
-                "err_file": "/path/to/my-exp/my-ens/my-ens_7/my-ens_7.err"
-              }
-            ]
-          }
-        ]
-      }
-    ]
-  }
-  
+              "colocated_db": {},
+              "telemetry_metadata": {
+                "status_dir": "/path/to/my-exp/.smartsim/telemetry/my-exp/45772df2-fd80-43fd-adf0-d5e319870182/ensemble/my-ens/my-ens_7",
+                "step_id": "4121904.9",
+                "task_id": "28398",
+                "managed": true
+              },
+              "out_file": "/path/to/my-exp/my-ens/my-ens_7/my-ens_7.out",
+              "err_file": "/path/to/my-exp/my-ens/my-ens_7/my-ens_7.err"
+            }
+          ]
+        }
+      ]
+    }
+  ]
+}
diff --git a/tests/test_containers.py b/tests/test_containers.py
index 21fe50ad4..5d0f933ff 100644
--- a/tests/test_containers.py
+++ b/tests/test_containers.py
@@ -32,9 +32,9 @@
 import pytest
 
 from smartsim import Experiment, status
-from smartsim.database import Orchestrator
 from smartsim.entity import Ensemble
 from smartsim.settings.containers import Singularity
+from smartsim.status import SmartSimStatus
 
 # The tests in this file belong to the group_a group
 pytestmark = pytest.mark.group_a
@@ -109,7 +109,7 @@ def test_singularity_basic(fileutils, test_dir):
 
     # get and confirm status
     stat = exp.get_status(model)[0]
-    assert stat == status.STATUS_COMPLETED
+    assert stat == SmartSimStatus.STATUS_COMPLETED
 
     print(exp.summary())
 
@@ -136,13 +136,13 @@ def test_singularity_args(fileutils, test_dir):
 
     # get and confirm status
     stat = exp.get_status(model)[0]
-    assert stat == status.STATUS_COMPLETED
+    assert stat == SmartSimStatus.STATUS_COMPLETED
 
     print(exp.summary())
 
 
 @pytest.mark.skipif(not singularity_exists, reason="Test needs singularity to run")
-def test_singularity_smartredis(test_dir, fileutils, wlmutils):
+def test_singularity_smartredis(local_experiment, prepare_db, local_db, fileutils):
     """Run two processes, each process puts a tensor on
     the DB, then accesses the other process's tensor.
     Finally, the tensor is used to run a model.
@@ -150,18 +150,13 @@ def test_singularity_smartredis(test_dir, fileutils, wlmutils):
     Note: This is a containerized port of test_smartredis.py
     """
 
-    exp = Experiment(
-        "smartredis_ensemble_exchange", exp_path=test_dir, launcher="local"
-    )
-
     # create and start a database
-    orc = Orchestrator(port=wlmutils.get_test_port())
-    exp.generate(orc)
-    exp.start(orc, block=False)
+    db = prepare_db(local_db).orchestrator
+    local_experiment.reconnect_orchestrator(db.checkpoint_file)
 
     container = Singularity(containerURI)
 
-    rs = exp.create_run_settings(
+    rs = local_experiment.create_run_settings(
         "python3", "producer.py --exchange", container=container
     )
     params = {"mult": [1, -10]}
@@ -178,18 +173,12 @@ def test_singularity_smartredis(test_dir, fileutils, wlmutils):
     config = fileutils.get_test_conf_path("smartredis")
     ensemble.attach_generator_files(to_copy=[config])
 
-    exp.generate(ensemble)
+    local_experiment.generate(ensemble)
 
     # start the models
-    exp.start(ensemble, summary=False)
+    local_experiment.start(ensemble, summary=False)
 
     # get and confirm statuses
-    statuses = exp.get_status(ensemble)
-    if not all([stat == status.STATUS_COMPLETED for stat in statuses]):
-        exp.stop(orc)
+    statuses = local_experiment.get_status(ensemble)
+    if not all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses]):
         assert False  # client ensemble failed
-
-    # stop the orchestrator
-    exp.stop(orc)
-
-    print(exp.summary())
diff --git a/tests/test_controller_errors.py b/tests/test_controller_errors.py
index a02c17678..2d623cdd1 100644
--- a/tests/test_controller_errors.py
+++ b/tests/test_controller_errors.py
@@ -28,15 +28,28 @@
 import pytest
 
 from smartsim._core.control import Controller, Manifest
+from smartsim._core.launcher.step import Step
+from smartsim._core.launcher.step.dragonStep import DragonStep
 from smartsim.database import Orchestrator
 from smartsim.entity import Model
+from smartsim.entity.ensemble import Ensemble
 from smartsim.error import SmartSimError, SSUnsupportedError
 from smartsim.error.errors import SSUnsupportedError
-from smartsim.settings import RunSettings
+from smartsim.settings import RunSettings, SrunSettings
 
 # The tests in this file belong to the group_a group
 pytestmark = pytest.mark.group_a
 
+entity_settings = SrunSettings("echo", ["spam", "eggs"])
+model_dup_setting = RunSettings("echo", ["spam_1", "eggs_2"])
+model = Model("model_name", run_settings=entity_settings, params={}, path="")
+# Model entity slightly different but with same name
+model_2 = Model("model_name", run_settings=model_dup_setting, params={}, path="")
+ens = Ensemble("ensemble_name", params={}, run_settings=entity_settings, replicas=2)
+# Ensemble entity slightly different but with same name
+ens_2 = Ensemble("ensemble_name", params={}, run_settings=entity_settings, replicas=3)
+orc = Orchestrator(db_nodes=3, batch=True, launcher="slurm", run_command="srun")
+
 
 def test_finished_entity_orc_error():
     """Orchestrators are never 'finished', either run forever or stopped by user"""
@@ -108,3 +121,84 @@ def test_bad_orc_checkpoint():
     cont = Controller(launcher="local")
     with pytest.raises(FileNotFoundError):
         cont.reload_saved_db(checkpoint)
+
+
+class MockStep(Step):
+    """Mock step to implement any abstract methods so that it can be
+    instanced for test purposes
+    """
+
+    def get_launch_cmd(self):
+        return ["echo", "spam"]
+
+
+@pytest.mark.parametrize(
+    "entity",
+    [
+        pytest.param(ens, id="Ensemble_running"),
+        pytest.param(model, id="Model_running"),
+        pytest.param(orc, id="Orch_running"),
+    ],
+)
+def test_duplicate_running_entity(test_dir, wlmutils, entity):
+    """This test validates that users cannot reuse entity names
+    that are running in JobManager.jobs or JobManager.db_jobs
+    """
+    step_settings = RunSettings("echo")
+    step = MockStep("mock-step", test_dir, step_settings)
+    test_launcher = wlmutils.get_test_launcher()
+    controller = Controller(test_launcher)
+    controller._jobs.add_job(entity.name, job_id="1234", entity=entity)
+    with pytest.raises(SSUnsupportedError) as ex:
+        controller._launch_step(step, entity=entity)
+    assert ex.value.args[0] == "SmartSim entities cannot have duplicate names."
+
+
+@pytest.mark.parametrize(
+    "entity",
+    [pytest.param(ens, id="Ensemble_running"), pytest.param(model, id="Model_running")],
+)
+def test_restarting_entity(test_dir, wlmutils, entity):
+    """Validate restarting a completed Model/Ensemble job"""
+    step_settings = RunSettings("echo")
+    test_launcher = wlmutils.get_test_launcher()
+    step = MockStep("mock-step", test_dir, step_settings)
+    step.meta["status_dir"] = test_dir
+    entity.path = test_dir
+    controller = Controller(test_launcher)
+    controller._jobs.add_job(entity.name, job_id="1234", entity=entity)
+    controller._jobs.move_to_completed(controller._jobs.jobs.get(entity.name))
+    controller._launch_step(step, entity=entity)
+
+
+def test_restarting_orch(test_dir, wlmutils):
+    """Validate restarting a completed Orchestrator job"""
+    step_settings = RunSettings("echo")
+    test_launcher = wlmutils.get_test_launcher()
+    step = MockStep("mock-step", test_dir, step_settings)
+    step.meta["status_dir"] = test_dir
+    orc.path = test_dir
+    controller = Controller(test_launcher)
+    controller._jobs.add_job(orc.name, job_id="1234", entity=orc)
+    controller._jobs.move_to_completed(controller._jobs.db_jobs.get(orc.name))
+    controller._launch_step(step, entity=orc)
+
+
+@pytest.mark.parametrize(
+    "entity,entity_2",
+    [
+        pytest.param(ens, ens_2, id="Ensemble_running"),
+        pytest.param(model, model_2, id="Model_running"),
+    ],
+)
+def test_starting_entity(test_dir, wlmutils, entity, entity_2):
+    """Test launching a job of Model/Ensemble with same name in completed"""
+    step_settings = RunSettings("echo")
+    step = MockStep("mock-step", test_dir, step_settings)
+    test_launcher = wlmutils.get_test_launcher()
+    controller = Controller(test_launcher)
+    controller._jobs.add_job(entity.name, job_id="1234", entity=entity)
+    controller._jobs.move_to_completed(controller._jobs.jobs.get(entity.name))
+    with pytest.raises(SSUnsupportedError) as ex:
+        controller._launch_step(step, entity=entity_2)
+    assert ex.value.args[0] == "SmartSim entities cannot have duplicate names."
diff --git a/tests/test_dbnode.py b/tests/test_dbnode.py
index 227572ac9..04845344c 100644
--- a/tests/test_dbnode.py
+++ b/tests/test_dbnode.py
@@ -49,22 +49,12 @@ def test_parse_db_host_error():
         orc.entities[0].host
 
 
-def test_hosts(test_dir, wlmutils):
-    exp_name = "test_hosts"
-    exp = Experiment(exp_name, exp_path=test_dir)
-
-    orc = Orchestrator(port=wlmutils.get_test_port(), interface="lo", launcher="local")
-    orc.set_path(test_dir)
-    exp.start(orc)
-
-    hosts = []
-    try:
-        hosts = orc.hosts
-        assert len(hosts) == orc.db_nodes == 1
-    finally:
-        # stop the database even if there is an error raised
-        exp.stop(orc)
-        orc.remove_stale_files()
+def test_hosts(local_experiment, prepare_db, local_db):
+    db = prepare_db(local_db).orchestrator
+    orc = local_experiment.reconnect_orchestrator(db.checkpoint_file)
+
+    hosts = orc.hosts
+    assert len(hosts) == orc.db_nodes == 1
 
 
 def _random_shard_info():
diff --git a/tests/test_dragon_backend.py b/tests/test_dragon_backend.py
new file mode 100644
index 000000000..a510f660a
--- /dev/null
+++ b/tests/test_dragon_backend.py
@@ -0,0 +1,453 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import collections
+import sys
+import textwrap
+import time
+from unittest.mock import MagicMock
+
+import pytest
+
+# The tests in this file belong to the group_b group
+pytestmark = pytest.mark.group_a
+
+try:
+    import dragon
+except ImportError:
+    pass
+else:
+    pytest.skip(
+        reason="Using dragon as launcher, not running Dragon unit tests",
+        allow_module_level=True,
+    )
+
+from smartsim._core.config import CONFIG
+from smartsim._core.schemas.dragonRequests import *
+from smartsim._core.schemas.dragonResponses import *
+from smartsim._core.utils.helpers import create_short_id_str
+from smartsim.status import TERMINAL_STATUSES, SmartSimStatus
+
+if t.TYPE_CHECKING:
+    from smartsim._core.launcher.dragon.dragonBackend import (
+        DragonBackend,
+        ProcessGroupInfo,
+    )
+
+
+class NodeMock(MagicMock):
+    @property
+    def hostname(self) -> str:
+        return create_short_id_str()
+
+
+class GroupStateMock(MagicMock):
+    def Running(self) -> MagicMock:
+        running = MagicMock(**{"__str__.return_value": "Running"})
+        return running
+
+    def Error(self) -> MagicMock:
+        error = MagicMock(**{"__str__.return_value": "Error"})
+        return error
+
+
+class ProcessGroupMock(MagicMock):
+    puids = [121, 122]
+
+
+def get_mock_backend(monkeypatch: pytest.MonkeyPatch) -> "DragonBackend":
+
+    process_mock = MagicMock(returncode=0)
+    process_group_mock = MagicMock(**{"Process.return_value": ProcessGroupMock()})
+    process_module_mock = MagicMock()
+    process_module_mock.Process = process_mock
+    node_mock = NodeMock()
+    system_mock = MagicMock(nodes=["node1", "node2", "node3"])
+    monkeypatch.setitem(
+        sys.modules,
+        "dragon",
+        MagicMock(
+            **{
+                "native.machine.Node.return_value": node_mock,
+                "native.machine.System.return_value": system_mock,
+                "native.group_state": GroupStateMock(),
+                "native.process_group.ProcessGroup.return_value": ProcessGroupMock(),
+            }
+        ),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "dragon.infrastructure.connection",
+        MagicMock(),
+    )
+    monkeypatch.setitem(
+        sys.modules,
+        "dragon.infrastructure.policy",
+        MagicMock(**{"Policy.return_value": MagicMock()}),
+    )
+    monkeypatch.setitem(sys.modules, "dragon.native.process", process_module_mock)
+    monkeypatch.setitem(sys.modules, "dragon.native.process_group", process_group_mock)
+
+    monkeypatch.setitem(sys.modules, "dragon.native.group_state", GroupStateMock())
+    monkeypatch.setitem(
+        sys.modules,
+        "dragon.native.machine",
+        MagicMock(
+            **{"System.return_value": system_mock, "Node.return_value": node_mock}
+        ),
+    )
+    from smartsim._core.launcher.dragon.dragonBackend import DragonBackend
+
+    dragon_backend = DragonBackend(pid=99999)
+    monkeypatch.setattr(
+        dragon_backend, "_free_hosts", collections.deque(dragon_backend._hosts)
+    )
+
+    return dragon_backend
+
+
+def set_mock_group_infos(
+    monkeypatch: pytest.MonkeyPatch, dragon_backend: "DragonBackend"
+) -> t.Dict[str, "ProcessGroupInfo"]:
+    dragon_mock = MagicMock()
+    process_mock = MagicMock()
+    process_mock.configure_mock(**{"returncode": 0})
+    dragon_mock.configure_mock(**{"native.process.Process.return_value": process_mock})
+    monkeypatch.setitem(sys.modules, "dragon", dragon_mock)
+    from smartsim._core.launcher.dragon.dragonBackend import ProcessGroupInfo
+
+    running_group = MagicMock(status="Running")
+    error_group = MagicMock(status="Error")
+    hosts = dragon_backend._hosts
+
+    group_infos = {
+        "abc123-1": ProcessGroupInfo(
+            SmartSimStatus.STATUS_RUNNING,
+            running_group,
+            [123],
+            [],
+            hosts[0:1],
+            MagicMock(),
+        ),
+        "del999-2": ProcessGroupInfo(
+            SmartSimStatus.STATUS_CANCELLED,
+            error_group,
+            [124],
+            [-9],
+            hosts[1:2],
+            MagicMock(),
+        ),
+        "c101vz-3": ProcessGroupInfo(
+            SmartSimStatus.STATUS_COMPLETED,
+            MagicMock(),
+            [125, 126],
+            [0],
+            hosts[1:3],
+            MagicMock(),
+        ),
+        "0ghjk1-4": ProcessGroupInfo(
+            SmartSimStatus.STATUS_FAILED,
+            error_group,
+            [127],
+            [-1],
+            hosts[2:3],
+            MagicMock(),
+        ),
+        "ljace0-5": ProcessGroupInfo(
+            SmartSimStatus.STATUS_NEVER_STARTED, None, [], [], [], None
+        ),
+    }
+
+    monkeypatch.setattr(dragon_backend, "_group_infos", group_infos)
+    monkeypatch.setattr(dragon_backend, "_free_hosts", collections.deque(hosts[1:3]))
+    monkeypatch.setattr(dragon_backend, "_allocated_hosts", {hosts[0]: "abc123-1"})
+    monkeypatch.setattr(dragon_backend, "_running_steps", ["abc123-1"])
+
+    return group_infos
+
+
+def test_handshake_request(monkeypatch: pytest.MonkeyPatch) -> None:
+    dragon_backend = get_mock_backend(monkeypatch)
+
+    handshake_req = DragonHandshakeRequest()
+    handshake_resp = dragon_backend.process_request(handshake_req)
+
+    assert isinstance(handshake_resp, DragonHandshakeResponse)
+    assert handshake_resp.dragon_pid == 99999
+
+
+def test_run_request(monkeypatch: pytest.MonkeyPatch) -> None:
+    dragon_backend = get_mock_backend(monkeypatch)
+    run_req = DragonRunRequest(
+        exe="sleep",
+        exe_args=["5"],
+        path="/a/fake/path",
+        nodes=2,
+        tasks=1,
+        tasks_per_node=1,
+        env={},
+        current_env={},
+        pmi_enabled=False,
+    )
+
+    run_resp = dragon_backend.process_request(run_req)
+    assert isinstance(run_resp, DragonRunResponse)
+
+    step_id = run_resp.step_id
+    assert dragon_backend._queued_steps[step_id] == run_req
+
+    mock_process_group = MagicMock(puids=[123, 124])
+
+    dragon_backend._group_infos[step_id].process_group = mock_process_group
+    dragon_backend._group_infos[step_id].puids = [123, 124]
+    dragon_backend._start_steps()
+
+    assert dragon_backend._running_steps == [step_id]
+    assert len(dragon_backend._queued_steps) == 0
+    assert len(dragon_backend._free_hosts) == 1
+    assert dragon_backend._allocated_hosts[dragon_backend.hosts[0]] == step_id
+    assert dragon_backend._allocated_hosts[dragon_backend.hosts[1]] == step_id
+
+    monkeypatch.setattr(
+        dragon_backend._group_infos[step_id].process_group, "status", "Running"
+    )
+
+    dragon_backend._update()
+
+    assert dragon_backend._running_steps == [step_id]
+    assert len(dragon_backend._queued_steps) == 0
+    assert len(dragon_backend._free_hosts) == 1
+    assert dragon_backend._allocated_hosts[dragon_backend.hosts[0]] == step_id
+    assert dragon_backend._allocated_hosts[dragon_backend.hosts[1]] == step_id
+
+    dragon_backend._group_infos[step_id].status = SmartSimStatus.STATUS_CANCELLED
+
+    dragon_backend._update()
+    assert not dragon_backend._running_steps
+
+
+def test_deny_run_request(monkeypatch: pytest.MonkeyPatch) -> None:
+    dragon_backend = get_mock_backend(monkeypatch)
+
+    dragon_backend._shutdown_requested = True
+
+    run_req = DragonRunRequest(
+        exe="sleep",
+        exe_args=["5"],
+        path="/a/fake/path",
+        nodes=2,
+        tasks=1,
+        tasks_per_node=1,
+        env={},
+        current_env={},
+        pmi_enabled=False,
+    )
+
+    run_resp = dragon_backend.process_request(run_req)
+    assert isinstance(run_resp, DragonRunResponse)
+    assert run_resp.error_message == "Cannot satisfy request, server is shutting down."
+    step_id = run_resp.step_id
+
+    assert dragon_backend.group_infos[step_id].status == SmartSimStatus.STATUS_FAILED
+
+
+def test_udpate_status_request(monkeypatch: pytest.MonkeyPatch) -> None:
+    dragon_backend = get_mock_backend(monkeypatch)
+
+    group_infos = set_mock_group_infos(monkeypatch, dragon_backend)
+
+    status_update_request = DragonUpdateStatusRequest(step_ids=list(group_infos.keys()))
+
+    status_update_response = dragon_backend.process_request(status_update_request)
+
+    assert isinstance(status_update_response, DragonUpdateStatusResponse)
+    assert status_update_response.statuses == {
+        step_id: (grp_info.status, grp_info.return_codes)
+        for step_id, grp_info in group_infos.items()
+    }
+
+
+def test_stop_request(monkeypatch: pytest.MonkeyPatch) -> None:
+    dragon_backend = get_mock_backend(monkeypatch)
+    group_infos = set_mock_group_infos(monkeypatch, dragon_backend)
+
+    running_steps = [
+        step_id
+        for step_id, group in group_infos.items()
+        if group.status == SmartSimStatus.STATUS_RUNNING
+    ]
+
+    step_id_to_stop = running_steps[0]
+
+    stop_request = DragonStopRequest(step_id=step_id_to_stop)
+
+    stop_response = dragon_backend.process_request(stop_request)
+
+    assert isinstance(stop_response, DragonStopResponse)
+    assert len(dragon_backend._stop_requests) == 1
+
+    dragon_backend._update()
+
+    assert len(dragon_backend._stop_requests) == 0
+    assert (
+        dragon_backend._group_infos[step_id_to_stop].status
+        == SmartSimStatus.STATUS_CANCELLED
+    )
+
+    assert len(dragon_backend._allocated_hosts) == 0
+    assert len(dragon_backend._free_hosts) == 3
+
+
+@pytest.mark.parametrize(
+    "immediate, kill_jobs, frontend_shutdown",
+    [
+        [True, True, True],
+        [True, True, False],
+        [True, False, True],
+        [True, False, False],
+        [False, True, True],
+        [False, True, False],
+    ],
+)
+def test_shutdown_request(
+    monkeypatch: pytest.MonkeyPatch,
+    immediate: bool,
+    kill_jobs: bool,
+    frontend_shutdown: bool,
+) -> None:
+    monkeypatch.setenv("SMARTSIM_FLAG_TELEMETRY", "0")
+    dragon_backend = get_mock_backend(monkeypatch)
+    monkeypatch.setattr(dragon_backend, "_cooldown_period", 1)
+    set_mock_group_infos(monkeypatch, dragon_backend)
+
+    if kill_jobs:
+        for group_info in dragon_backend.group_infos.values():
+            if not group_info.status in TERMINAL_STATUSES:
+                group_info.status = SmartSimStatus.STATUS_FAILED
+                group_info.return_codes = [-9]
+            group_info.process_group = None
+            group_info.redir_workers = None
+        dragon_backend._running_steps.clear()
+
+    shutdown_req = DragonShutdownRequest(
+        immediate=immediate, frontend_shutdown=frontend_shutdown
+    )
+    shutdown_resp = dragon_backend.process_request(shutdown_req)
+
+    if not kill_jobs:
+        stop_request_ids = (
+            stop_request.step_id for stop_request in dragon_backend._stop_requests
+        )
+        for step_id, group_info in dragon_backend.group_infos.items():
+            if not group_info.status in TERMINAL_STATUSES:
+                assert step_id in stop_request_ids
+
+    assert isinstance(shutdown_resp, DragonShutdownResponse)
+    assert dragon_backend._shutdown_requested
+    assert dragon_backend.frontend_shutdown == frontend_shutdown
+
+    dragon_backend._update()
+    assert not dragon_backend.should_shutdown
+    time.sleep(dragon_backend._cooldown_period + 0.1)
+    dragon_backend._update()
+
+    assert dragon_backend._can_shutdown == kill_jobs
+    assert dragon_backend.should_shutdown == kill_jobs
+    assert dragon_backend._has_cooled_down == kill_jobs
+
+
+@pytest.mark.parametrize("telemetry_flag", ["0", "1"])
+def test_cooldown_is_set(monkeypatch: pytest.MonkeyPatch, telemetry_flag: str) -> None:
+    monkeypatch.setenv("SMARTSIM_FLAG_TELEMETRY", telemetry_flag)
+    dragon_backend = get_mock_backend(monkeypatch)
+
+    expected_cooldown = (
+        2 * CONFIG.telemetry_frequency + 5 if int(telemetry_flag) > 0 else 5
+    )
+
+    if telemetry_flag:
+        assert dragon_backend.cooldown_period == expected_cooldown
+    else:
+        assert dragon_backend.cooldown_period == expected_cooldown
+
+
+def test_heartbeat_and_time(monkeypatch: pytest.MonkeyPatch) -> None:
+    dragon_backend = get_mock_backend(monkeypatch)
+    first_heartbeat = dragon_backend.last_heartbeat
+    assert dragon_backend.current_time > first_heartbeat
+    dragon_backend._heartbeat()
+    assert dragon_backend.last_heartbeat > first_heartbeat
+
+
+@pytest.mark.parametrize("num_nodes", [1, 3, 100])
+def test_can_honor(monkeypatch: pytest.MonkeyPatch, num_nodes: int) -> None:
+    dragon_backend = get_mock_backend(monkeypatch)
+    run_req = DragonRunRequest(
+        exe="sleep",
+        exe_args=["5"],
+        path="/a/fake/path",
+        nodes=num_nodes,
+        tasks=1,
+        tasks_per_node=1,
+        env={},
+        current_env={},
+        pmi_enabled=False,
+    )
+
+    assert dragon_backend._can_honor(run_req)[0] == (
+        num_nodes <= len(dragon_backend._hosts)
+    )
+
+
+def test_get_id(monkeypatch: pytest.MonkeyPatch) -> None:
+    dragon_backend = get_mock_backend(monkeypatch)
+    step_id = next(dragon_backend._step_ids)
+
+    assert step_id.endswith("0")
+    assert step_id != next(dragon_backend._step_ids)
+
+
+def test_view(monkeypatch: pytest.MonkeyPatch) -> None:
+    dragon_backend = get_mock_backend(monkeypatch)
+    set_mock_group_infos(monkeypatch, dragon_backend)
+    hosts = dragon_backend.hosts
+
+    expected_message = textwrap.dedent(f"""\
+        Dragon server backend update
+        | Host    |  Status  |
+        |---------|----------|
+        | {hosts[0]} |   Busy   |
+        | {hosts[1]} |   Free   |
+        | {hosts[2]} |   Free   |
+        | Step     | Status       | Hosts           |  Return codes  |  Num procs  |
+        |----------|--------------|-----------------|----------------|-------------|
+        | abc123-1 | Running      | {hosts[0]}         |                |      1      |
+        | del999-2 | Cancelled    | {hosts[1]}         |       -9       |      1      |
+        | c101vz-3 | Completed    | {hosts[1]},{hosts[2]} |       0        |      2      |
+        | 0ghjk1-4 | Failed       | {hosts[2]}         |       -1       |      1      |
+        | ljace0-5 | NeverStarted |                 |                |      0      |""")
+
+    assert dragon_backend.status_message == expected_message
diff --git a/tests/test_dragon_installer.py b/tests/test_dragon_installer.py
new file mode 100644
index 000000000..b23a1a7ef
--- /dev/null
+++ b/tests/test_dragon_installer.py
@@ -0,0 +1,471 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pathlib
+import sys
+import tarfile
+import typing as t
+from collections import namedtuple
+
+import pytest
+from github.GitReleaseAsset import GitReleaseAsset
+from github.Requester import Requester
+
+import smartsim
+import smartsim._core.utils.helpers as helpers
+from smartsim._core._cli.scripts.dragon_install import (
+    cleanup,
+    create_dotenv,
+    install_dragon,
+    install_package,
+    retrieve_asset,
+    retrieve_asset_info,
+)
+from smartsim.error.errors import SmartSimCLIActionCancelled
+
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+
+mock_archive_name = "dragon-0.8-py3.9.4.1-CRAYEX-ac132fe95.tar.gz"
+_git_attr = namedtuple("_git_attr", "value")
+
+
+@pytest.fixture
+def test_archive(test_dir: str, archive_path: pathlib.Path) -> pathlib.Path:
+    """Fixture for returning a simple tarfile to test on"""
+    num_files = 10
+    with tarfile.TarFile.open(archive_path, mode="w:gz") as tar:
+        mock_whl = pathlib.Path(test_dir) / "mock.whl"
+        mock_whl.touch()
+
+        for i in range(num_files):
+            content = pathlib.Path(test_dir) / f"{i:04}.txt"
+            content.write_text(f"i am file {i}\n")
+            tar.add(content)
+    return archive_path
+
+
+@pytest.fixture
+def archive_path(test_dir: str) -> pathlib.Path:
+    """Fixture for returning a dir path based on the default mock asset archive name"""
+    path = pathlib.Path(test_dir) / mock_archive_name
+    return path
+
+
+@pytest.fixture
+def extraction_dir(test_dir: str) -> pathlib.Path:
+    """Fixture for returning a dir path based on the default mock asset archive name"""
+    path = pathlib.Path(test_dir) / mock_archive_name.replace(".tar.gz", "")
+    return path
+
+
+@pytest.fixture
+def test_assets(monkeypatch: pytest.MonkeyPatch) -> t.Dict[str, GitReleaseAsset]:
+    requester = Requester(
+        auth=None,
+        base_url="https://github.com",
+        user_agent="mozilla",
+        per_page=10,
+        verify=False,
+        timeout=1,
+        retry=1,
+        pool_size=1,
+    )
+    headers = {"mock-header": "mock-value"}
+    attributes = {"mock-attr": "mock-attr-value"}
+    completed = True
+
+    assets: t.List[GitReleaseAsset] = []
+    mock_archive_name_tpl = "{}-{}.4.1-{}ac132fe95.tar.gz"
+
+    for python_version in ["py3.9", "py3.10", "py3.11"]:
+        for dragon_version in ["dragon-0.8", "dragon-0.9", "dragon-0.10"]:
+            for platform in ["", "CRAYEX-"]:
+
+                asset = GitReleaseAsset(requester, headers, attributes, completed)
+
+                archive_name = mock_archive_name_tpl.format(
+                    dragon_version, python_version, platform
+                )
+
+                monkeypatch.setattr(
+                    asset,
+                    "_browser_download_url",
+                    _git_attr(value=f"http://foo/{archive_name}"),
+                )
+                monkeypatch.setattr(asset, "_name", _git_attr(value=archive_name))
+                assets.append(asset)
+
+    return assets
+
+
+def test_cleanup_no_op(archive_path: pathlib.Path) -> None:
+    """Ensure that the cleanup method doesn't bomb when called with
+    missing archive path; simulate a failed download"""
+    # confirm assets do not exist
+    assert not archive_path.exists()
+
+    # call cleanup. any exceptions should break test...
+    cleanup(archive_path)
+
+
+def test_cleanup_archive_exists(test_archive: pathlib.Path) -> None:
+    """Ensure that the cleanup method removes the archive"""
+    assert test_archive.exists()
+
+    cleanup(test_archive)
+
+    # verify archive is gone after cleanup
+    assert not test_archive.exists()
+
+
+def test_retrieve_cached(
+    test_dir: str,
+    # archive_path: pathlib.Path,
+    test_archive: pathlib.Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Verify that a previously retrieved asset archive is re-used"""
+    with tarfile.TarFile.open(test_archive) as tar:
+        tar.extractall(test_dir)
+
+    ts1 = test_archive.parent.stat().st_ctime
+
+    requester = Requester(
+        auth=None,
+        base_url="https://github.com",
+        user_agent="mozilla",
+        per_page=10,
+        verify=False,
+        timeout=1,
+        retry=1,
+        pool_size=1,
+    )
+    headers = {"mock-header": "mock-value"}
+    attributes = {"mock-attr": "mock-attr-value"}
+    completed = True
+
+    asset = GitReleaseAsset(requester, headers, attributes, completed)
+
+    # ensure mocked asset has values that we use...
+    monkeypatch.setattr(asset, "_browser_download_url", _git_attr(value="http://foo"))
+    monkeypatch.setattr(asset, "_name", _git_attr(value=mock_archive_name))
+
+    asset_path = retrieve_asset(test_archive.parent, asset)
+    ts2 = asset_path.stat().st_ctime
+
+    assert (
+        asset_path == test_archive.parent
+    )  # show that the expected path matches the output path
+    assert ts1 == ts2  # show that the file wasn't changed...
+
+
+@pytest.mark.parametrize(
+    "dragon_pin,pyv,is_found,is_crayex",
+    [
+        pytest.param("0.8", "py3.8", False, False, id="0.8,python 3.8"),
+        pytest.param("0.8", "py3.9", True, False, id="0.8,python 3.9"),
+        pytest.param("0.8", "py3.10", True, False, id="0.8,python 3.10"),
+        pytest.param("0.8", "py3.11", True, False, id="0.8,python 3.11"),
+        pytest.param("0.8", "py3.12", False, False, id="0.8,python 3.12"),
+        pytest.param("0.8", "py3.8", False, True, id="0.8,python 3.8,CrayEX"),
+        pytest.param("0.8", "py3.9", True, True, id="0.8,python 3.9,CrayEX"),
+        pytest.param("0.8", "py3.10", True, True, id="0.8,python 3.10,CrayEX"),
+        pytest.param("0.8", "py3.11", True, True, id="0.8,python 3.11,CrayEX"),
+        pytest.param("0.8", "py3.12", False, True, id="0.8,python 3.12,CrayEX"),
+        pytest.param("0.9", "py3.8", False, False, id="0.9,python 3.8"),
+        pytest.param("0.9", "py3.9", True, False, id="0.9,python 3.9"),
+        pytest.param("0.9", "py3.10", True, False, id="0.9,python 3.10"),
+        pytest.param("0.9", "py3.11", True, False, id="0.9,python 3.11"),
+        pytest.param("0.9", "py3.12", False, False, id="0.9,python 3.12"),
+        pytest.param("0.9", "py3.8", False, True, id="0.9,python 3.8,CrayEX"),
+        pytest.param("0.9", "py3.9", True, True, id="0.9,python 3.9,CrayEX"),
+        pytest.param("0.9", "py3.10", True, True, id="0.9,python 3.10,CrayEX"),
+        pytest.param("0.9", "py3.11", True, True, id="0.9,python 3.11,CrayEX"),
+        pytest.param("0.9", "py3.12", False, True, id="0.9,python 3.12,CrayEX"),
+        # add a couple variants for a dragon version that isn't in the asset list
+        pytest.param("0.7", "py3.9", False, False, id="0.7,python 3.9"),
+        pytest.param("0.7", "py3.9", False, True, id="0.7,python 3.9,CrayEX"),
+    ],
+)
+def test_retrieve_asset_info(
+    test_assets: t.Collection[GitReleaseAsset],
+    monkeypatch: pytest.MonkeyPatch,
+    dragon_pin: str,
+    pyv: str,
+    is_found: bool,
+    is_crayex: bool,
+) -> None:
+    """Verify that an information is retrieved correctly based on the python
+    version, platform (e.g. CrayEX, !CrayEx), and target dragon pin"""
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr(
+            smartsim._core._cli.scripts.dragon_install,
+            "python_version",
+            lambda: pyv,
+        )
+        ctx.setattr(
+            smartsim._core._cli.scripts.dragon_install,
+            "is_crayex_platform",
+            lambda: is_crayex,
+        )
+        ctx.setattr(
+            smartsim._core._cli.scripts.dragon_install,
+            "dragon_pin",
+            lambda: dragon_pin,
+        )
+        # avoid hitting github API
+        ctx.setattr(
+            smartsim._core._cli.scripts.dragon_install,
+            "_get_release_assets",
+            lambda: test_assets,
+        )
+
+        if is_found:
+            chosen_asset = retrieve_asset_info()
+
+            assert chosen_asset
+            assert pyv in chosen_asset.name
+            assert dragon_pin in chosen_asset.name
+
+            if is_crayex:
+                assert "crayex" in chosen_asset.name.lower()
+            else:
+                assert "crayex" not in chosen_asset.name.lower()
+        else:
+            with pytest.raises(SmartSimCLIActionCancelled):
+                retrieve_asset_info()
+
+
+def test_check_for_utility_missing(test_dir: str) -> None:
+    """Ensure that looking for a missing utility doesn't raise an exception"""
+    ld_config = pathlib.Path(test_dir) / "ldconfig"
+
+    utility = helpers.check_for_utility(ld_config)
+
+    assert not utility
+
+
+def test_check_for_utility_exists() -> None:
+    """Ensure that looking for an existing utility returns a non-empty path"""
+    utility = helpers.check_for_utility("ls")
+    assert utility
+
+
+def test_is_crayex_missing_ldconfig(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Ensure the cray ex platform check doesn't fail when ldconfig isn't
+    available for use"""
+
+    def mock_util_check(util: str) -> str:
+        if util == "ldconfig":
+            return ""
+        return "w00t!"
+
+    with monkeypatch.context() as ctx:
+        # mock utility existence
+        ctx.setattr(
+            helpers,
+            "check_for_utility",
+            mock_util_check,
+        )
+
+        is_cray = helpers.is_crayex_platform()
+        assert not is_cray
+
+
+def test_is_crayex_missing_fi_info(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Ensure the cray ex platform check doesn't fail when fi_info isn't
+    available for use"""
+
+    def mock_util_check(util: str) -> str:
+        if util == "fi_info":
+            return ""
+        return "w00t!"
+
+    with monkeypatch.context() as ctx:
+        # mock utility existence
+        ctx.setattr(
+            helpers,
+            "check_for_utility",
+            mock_util_check,
+        )
+
+        is_cray = helpers.is_crayex_platform()
+        assert not is_cray
+
+
+@pytest.mark.parametrize(
+    "is_cray,output,return_code",
+    [
+        pytest.param(True, "cray pmi2.so\ncxi\ncray pmi.so\npni.so", 0, id="CrayEX"),
+        pytest.param(False, "cray pmi2.so\ncxi\npni.so", 0, id="No PMI"),
+        pytest.param(False, "cxi\ncray pmi.so\npni.so", 0, id="No PMI 2"),
+        pytest.param(False, "cray pmi2.so\ncray pmi.so\npni.so", 0, id="No CXI"),
+        pytest.param(False, "pmi.so\ncray pmi2.so\ncxi", 0, id="Non Cray PMI"),
+        pytest.param(False, "cray pmi.so\npmi2.so\ncxi", 0, id="Non Cray PMI2"),
+    ],
+)
+def test_is_cray_ex(
+    monkeypatch: pytest.MonkeyPatch, is_cray: bool, output: str, return_code: int
+) -> None:
+    """Test that cray ex platform check result is returned as expected"""
+
+    def mock_util_check(util: str) -> bool:
+        # mock that we have the necessary tools
+        return True
+
+    with monkeypatch.context() as ctx:
+        # make it look like the utilies always exist
+        ctx.setattr(
+            helpers,
+            "check_for_utility",
+            mock_util_check,
+        )
+        # mock
+        ctx.setattr(
+            helpers,
+            "execute_platform_cmd",
+            lambda x: (output, return_code),
+        )
+
+        platform_result = helpers.is_crayex_platform()
+        assert is_cray == platform_result
+
+
+def test_install_package_no_wheel(extraction_dir: pathlib.Path):
+    """Verify that a missing wheel does not blow up and has a failure retcode"""
+    exp_path = extraction_dir
+
+    result = install_package(exp_path)
+    assert result != 0
+
+
+def test_install_macos(monkeypatch: pytest.MonkeyPatch, extraction_dir: pathlib.Path):
+    """Verify that installation exits cleanly if installing on unsupported platform"""
+    with monkeypatch.context() as ctx:
+        ctx.setattr(sys, "platform", "darwin")
+
+        result = install_dragon(extraction_dir)
+        assert result == 1
+
+
+def test_create_dotenv(monkeypatch: pytest.MonkeyPatch, test_dir: str):
+    """Verify that attempting to create a .env file without any existing
+    file or container directory works"""
+    test_path = pathlib.Path(test_dir)
+    mock_dragon_root = pathlib.Path(test_dir) / "dragon"
+    exp_env_path = pathlib.Path(test_dir) / "dragon" / ".env"
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr(smartsim._core.config.CONFIG, "conf_dir", test_path)
+
+        # ensure no .env exists before trying to create it.
+        assert not exp_env_path.exists()
+
+        create_dotenv(mock_dragon_root)
+
+        # ensure the .env is created as side-effect of create_dotenv
+        assert exp_env_path.exists()
+
+
+def test_create_dotenv_existing_dir(monkeypatch: pytest.MonkeyPatch, test_dir: str):
+    """Verify that attempting to create a .env file in an existing
+    target dir works"""
+    test_path = pathlib.Path(test_dir)
+    mock_dragon_root = pathlib.Path(test_dir) / "dragon"
+    exp_env_path = pathlib.Path(test_dir) / "dragon" / ".env"
+
+    # set up the parent directory that will contain the .env
+    exp_env_path.parent.mkdir(parents=True)
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr(smartsim._core.config.CONFIG, "conf_dir", test_path)
+
+        # ensure no .env exists before trying to create it.
+        assert not exp_env_path.exists()
+
+        create_dotenv(mock_dragon_root)
+
+        # ensure the .env is created as side-effect of create_dotenv
+        assert exp_env_path.exists()
+
+
+def test_create_dotenv_existing_dotenv(monkeypatch: pytest.MonkeyPatch, test_dir: str):
+    """Verify that attempting to create a .env file when one exists works as expected"""
+    test_path = pathlib.Path(test_dir)
+    mock_dragon_root = pathlib.Path(test_dir) / "dragon"
+    exp_env_path = pathlib.Path(test_dir) / "dragon" / ".env"
+
+    # set up the parent directory that will contain the .env
+    exp_env_path.parent.mkdir(parents=True)
+
+    # write something into file to verify it is overwritten
+    var_name = "DRAGON_BASE_DIR"
+    exp_env_path.write_text(f"{var_name}=/foo/bar")
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr(smartsim._core.config.CONFIG, "conf_dir", test_path)
+
+        # ensure .env exists so we can update it
+        assert exp_env_path.exists()
+
+        create_dotenv(mock_dragon_root)
+
+        # ensure the .env is created as side-effect of create_dotenv
+        assert exp_env_path.exists()
+
+        # ensure file was overwritten and env vars are not duplicated
+        dotenv_content = exp_env_path.read_text(encoding="utf-8")
+        split_content = dotenv_content.split(var_name)
+
+        # split to confirm env var only appars once
+        assert len(split_content) == 2
+
+
+def test_create_dotenv_format(monkeypatch: pytest.MonkeyPatch, test_dir: str):
+    """Verify that created .env files are correctly formatted"""
+    test_path = pathlib.Path(test_dir)
+    mock_dragon_root = pathlib.Path(test_dir) / "dragon"
+    exp_env_path = pathlib.Path(test_dir) / "dragon" / ".env"
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr(smartsim._core.config.CONFIG, "conf_dir", test_path)
+
+        create_dotenv(mock_dragon_root)
+
+        # ensure the .env is created as side-effect of create_dotenv
+        content = exp_env_path.read_text(encoding="utf-8")
+
+        # ensure we have values written, but ignore empty lines
+        lines = [line for line in content.split("\n") if line]
+        assert lines
+
+        # ensure each line is formatted as key=value
+        for line in lines:
+            line_split = line.split("=")
+            assert len(line_split) == 2
diff --git a/tests/test_dragon_launcher.py b/tests/test_dragon_launcher.py
new file mode 100644
index 000000000..ee0fcb14b
--- /dev/null
+++ b/tests/test_dragon_launcher.py
@@ -0,0 +1,523 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import logging
+import multiprocessing as mp
+import os
+import pathlib
+import sys
+import time
+import typing as t
+
+import pytest
+import zmq
+
+import smartsim._core.config
+from smartsim._core._cli.scripts.dragon_install import create_dotenv
+from smartsim._core.config.config import get_config
+from smartsim._core.launcher.dragon.dragonLauncher import DragonConnector
+from smartsim._core.launcher.dragon.dragonSockets import (
+    get_authenticator,
+    get_secure_socket,
+)
+from smartsim._core.schemas.dragonRequests import DragonBootstrapRequest
+from smartsim._core.schemas.dragonResponses import DragonHandshakeResponse
+from smartsim._core.utils.network import IFConfig, find_free_port
+from smartsim._core.utils.security import KeyManager
+
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+
+is_mac = sys.platform == "darwin"
+
+
+class MockPopen:
+    calls = []
+
+    def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
+        self.args = args
+        self.kwargs = kwargs
+
+        MockPopen.calls.append((args, kwargs))
+
+    @property
+    def pid(self) -> int:
+        return 99999
+
+    @property
+    def returncode(self) -> int:
+        return 0
+
+    @property
+    def stdout(self):
+        return None
+
+    @property
+    def stderr(self):
+        return None
+
+    def wait(self, timeout: float) -> None:
+        time.sleep(timeout)
+
+
+class MockSocket:
+    def __init__(self, *args: t.Any, **kwargs: t.Any) -> None:
+        self._bind_address = ""
+
+    def __call__(self, *args: t.Any, **kwds: t.Any) -> t.Any:
+        return self
+
+    def bind(self, addr: str) -> None:
+        self._bind_address = addr
+
+    def recv_string(self, flags: int) -> str:
+        dbr = DragonBootstrapRequest(address=self._bind_address)
+        return f"bootstrap|{dbr.json()}"
+
+    def close(self) -> None: ...
+
+    def send(self, *args, **kwargs) -> None: ...
+
+    def send_json(self, json: str) -> None: ...
+
+    def send_string(*args, **kwargs) -> None: ...
+
+    def connect(*args, **kwargs) -> None: ...
+
+    @property
+    def bind_address(self) -> str:
+        return self._bind_address
+
+
+class MockAuthenticator:
+    def __init__(self, context: zmq.Context, log: t.Any) -> None:
+        self.num_starts: int = 0
+        self.num_stops: int = 0
+        self.num_configure_curves: int = 0
+        self.context = context
+        self.thread = None
+
+    def configure_curve(self, *args, **kwargs) -> None:
+        self.cfg_args = args
+        self.cfg_kwargs = kwargs
+        self.num_configure_curves += 1
+
+    def start(self) -> None:
+        self.num_starts += 1
+
+    def stop(self) -> None:
+        self.num_stops += 1
+
+    def is_alive(self) -> bool:
+        return self.num_starts > 0 and self.num_stops == 0
+
+
+def mock_dragon_env(test_dir, *args, **kwargs):
+    """Create a mock dragon environment that can talk to the launcher through ZMQ"""
+    logger = logging.getLogger(__name__)
+    config = get_config()
+    logging.basicConfig(level=logging.DEBUG)
+    try:
+        addr = "127.0.0.1"
+        callback_port = kwargs["port"]
+        head_port = find_free_port(start=callback_port + 1)
+        context = zmq.Context.instance()
+        context.setsockopt(zmq.SNDTIMEO, config.dragon_server_timeout)
+        context.setsockopt(zmq.RCVTIMEO, config.dragon_server_timeout)
+        authenticator = get_authenticator(context, -1)
+
+        callback_socket = get_secure_socket(context, zmq.REQ, False)
+        dragon_head_socket = get_secure_socket(context, zmq.REP, True)
+
+        full_addr = f"{addr}:{callback_port}"
+        callback_socket.connect(f"tcp://{full_addr}")
+
+        full_head_addr = f"tcp://{addr}:{head_port}"
+        dragon_head_socket.bind(full_head_addr)
+
+        req = DragonBootstrapRequest(address=full_head_addr)
+
+        msg_sent = False
+        while not msg_sent:
+            logger.info("Sending bootstrap request to callback socket")
+            callback_socket.send_string("bootstrap|" + req.json())
+            # hold until bootstrap response is received
+            logger.info("Receiving bootstrap response from callback socket")
+            _ = callback_socket.recv()
+            msg_sent = True
+
+        hand_shaken = False
+        while not hand_shaken:
+            # other side should set up a socket and push me a `HandshakeRequest`
+            logger.info("Receiving handshake request through dragon head socket")
+            _ = dragon_head_socket.recv()
+            # acknowledge handshake success w/DragonHandshakeResponse
+            logger.info("Sending handshake response through dragon head socket")
+            handshake_ack = DragonHandshakeResponse(dragon_pid=os.getpid())
+            dragon_head_socket.send_string(f"handshake|{handshake_ack.json()}")
+
+            hand_shaken = True
+
+        shutting_down = False
+        while not shutting_down:
+            logger.info("Waiting for shutdown request through dragon head socket")
+            # any incoming request at this point in test is my shutdown...
+            try:
+                message = dragon_head_socket.recv()
+                logger.info(f"Received final message {message}")
+            finally:
+                shutting_down = True
+        try:
+            logger.info("Handshake complete. Shutting down mock dragon env.")
+            authenticator.stop()
+        finally:
+            logger.info("Dragon mock env exiting...")
+
+    except Exception as ex:
+        logger.info(f"exception occurred while configuring mock handshaker: {ex}")
+        raise ex from None
+
+
+def test_dragon_connect_attributes(monkeypatch: pytest.MonkeyPatch, test_dir: str):
+    """Test the connection to a dragon environment dynamically selects an open port
+    in the range supplied and passes the correct environment"""
+    test_path = pathlib.Path(test_dir)
+
+    with monkeypatch.context() as ctx:
+        # make sure we don't touch "real keys" during a test
+        ctx.setenv("SMARTSIM_KEY_PATH", test_dir)
+
+        mock_socket = MockSocket()
+
+        # look at test_dir for dragon config
+        ctx.setenv("SMARTSIM_DRAGON_SERVER_PATH", test_dir)
+        # avoid finding real interface
+        ctx.setattr(
+            "smartsim._core.launcher.dragon.dragonConnector.get_best_interface_and_address",
+            lambda: IFConfig(interface="faux_interface", address="127.0.0.1"),
+        )
+        # we need to set the socket value or is_connected returns False
+        ctx.setattr(
+            "smartsim._core.launcher.dragon.dragonLauncher.DragonConnector._handshake",
+            lambda self, address: ...,
+        )
+        # avoid starting a real authenticator thread
+        ctx.setattr("zmq.auth.thread.ThreadAuthenticator", MockAuthenticator)
+        # avoid starting a real zmq socket
+        ctx.setattr("zmq.Context.socket", mock_socket)
+        # avoid starting a real process for dragon entrypoint
+        ctx.setattr(
+            "subprocess.Popen", lambda *args, **kwargs: MockPopen(*args, **kwargs)
+        )
+
+        # avoid reading "real" config in test...
+        ctx.setattr(smartsim._core.config.CONFIG, "conf_dir", test_path)
+        dotenv_path = smartsim._core.config.CONFIG.dragon_dotenv
+        dotenv_path.parent.mkdir(parents=True)
+        dotenv_path.write_text("FOO=BAR\nBAZ=BOO")
+
+        dragon_connector = DragonConnector()
+        dragon_connector.connect_to_dragon()
+
+        chosen_port = int(mock_socket.bind_address.split(":")[-1])
+        assert chosen_port >= 5995
+
+        # grab the kwargs env=xxx from the mocked popen to check what was passed
+        env = MockPopen.calls[0][1].get("env", None)
+
+        # confirm the environment values were passed from .env file to dragon process
+        assert "PYTHONUNBUFFERED" in env
+        assert "FOO" in env
+        assert "BAZ" in env
+
+        dragon_connector._authenticator.stop()
+
+
+@pytest.mark.parametrize(
+    "socket_type, is_server",
+    [
+        pytest.param(zmq.REQ, True, id="as-server"),
+        pytest.param(zmq.REP, False, id="as-client"),
+    ],
+)
+def test_secure_socket_authenticator_setup(
+    test_dir: str, monkeypatch: pytest.MonkeyPatch, socket_type: int, is_server: bool
+):
+    """Ensure the authenticator created by the secure socket factory method
+    is fully configured and started when returned to a client"""
+
+    with monkeypatch.context() as ctx:
+        # look at test dir for dragon config
+        ctx.setenv("SMARTSIM_KEY_PATH", test_dir)
+        # avoid starting a real authenticator thread
+        ctx.setattr("zmq.auth.thread.ThreadAuthenticator", MockAuthenticator)
+
+        authenticator = get_authenticator(zmq.Context.instance())
+
+        km = KeyManager(get_config(), as_server=is_server)
+
+        assert isinstance(authenticator, MockAuthenticator)
+
+        # ensure authenticator was configured
+        assert authenticator.num_configure_curves > 0
+        # ensure authenticator was started
+        assert authenticator.num_starts > 0
+        assert authenticator.context == zmq.Context.instance()
+        # ensure authenticator will accept any secured connection
+        assert authenticator.cfg_kwargs.get("domain", "") == "*"
+        # ensure authenticator is using the expected set of keys
+        assert authenticator.cfg_kwargs.get("location", "") == km.client_keys_dir
+
+        authenticator.stop()
+
+
+@pytest.mark.parametrize(
+    "as_server",
+    [
+        pytest.param(True, id="server-socket"),
+        pytest.param(False, id="client-socket"),
+    ],
+)
+def test_secure_socket_setup(
+    test_dir: str, monkeypatch: pytest.MonkeyPatch, as_server: bool
+):
+    """Ensure the authenticator created by the secure socket factory method
+    is fully configured and started when returned to a client"""
+
+    with monkeypatch.context() as ctx:
+        # look at test dir for dragon config
+        ctx.setenv("SMARTSIM_KEY_PATH", test_dir)
+        # avoid starting a real authenticator thread
+        ctx.setattr("zmq.auth.thread.ThreadAuthenticator", MockAuthenticator)
+
+        context = zmq.Context.instance()
+
+        socket = get_secure_socket(context, zmq.REP, as_server)
+
+        # verify the socket is correctly configured to use curve authentication
+        assert bool(socket.CURVE_SERVER) == as_server
+        assert not socket.closed
+
+        socket.close()
+
+
+def test_secure_socket(test_dir: str, monkeypatch: pytest.MonkeyPatch):
+    """Ensure the authenticator created by the secure socket factory method
+    is fully configured and started when returned to a client"""
+    logger = logging.getLogger(__name__)
+    logging.basicConfig(level=logging.DEBUG)
+    with monkeypatch.context() as ctx:
+        # make sure we don't touch "real keys" during a test
+        ctx.setenv("SMARTSIM_KEY_PATH", test_dir)
+        context = zmq.Context.instance()
+        authenticator = get_authenticator(context)
+        server = get_secure_socket(context, zmq.REP, True)
+
+        ip, port = "127.0.0.1", find_free_port(start=9999)
+
+        try:
+            server.bind(f"tcp://*:{port}")
+
+            client = get_secure_socket(context, zmq.REQ, False)
+
+            client.connect(f"tcp://{ip}:{port}")
+
+            to_send = "you get a foo! you get a foo! everybody gets a foo!"
+            client.send_string(to_send, flags=zmq.NOBLOCK)
+
+            received_msg = server.recv_string()
+            assert received_msg == to_send
+            logger.debug(f"server received: {received_msg}")
+        finally:
+            if authenticator:
+                authenticator.stop()
+            if client:
+                client.close()
+            if server:
+                server.close()
+
+
+@pytest.mark.skipif(is_mac, reason="unsupported on MacOSX")
+def test_dragon_launcher_handshake(monkeypatch: pytest.MonkeyPatch, test_dir: str):
+    """Test that a real handshake between a launcher & dragon environment
+    completes successfully using secure sockets"""
+    addr = "127.0.0.1"
+    bootstrap_port = find_free_port(start=5995)
+
+    with monkeypatch.context() as ctx:
+        # make sure we don't touch "real keys" during a test
+        ctx.setenv("SMARTSIM_KEY_PATH", test_dir)
+
+        # look at test dir for dragon config
+        ctx.setenv("SMARTSIM_DRAGON_SERVER_PATH", test_dir)
+        # avoid finding real interface since we may not be on a super
+        ctx.setattr(
+            "smartsim._core.launcher.dragon.dragonConnector.get_best_interface_and_address",
+            lambda: IFConfig("faux_interface", addr),
+        )
+
+        ctx.setattr(
+            "smartsim._core.launcher.dragon.dragonConnector._dragon_cleanup",
+            lambda server_socket, server_process_pid, server_authenticator: server_authenticator.stop(),
+        )
+
+        # start up a faux dragon env that knows how to do the handshake process
+        # but uses secure sockets for all communication.
+        mock_dragon = mp.Process(
+            target=mock_dragon_env,
+            daemon=True,
+            kwargs={"port": bootstrap_port, "test_dir": test_dir},
+        )
+
+        def fn(*args, **kwargs):
+            mock_dragon.start()
+            return mock_dragon
+
+        ctx.setattr("subprocess.Popen", fn)
+
+        connector = DragonConnector()
+
+        try:
+            # connect executes the complete handshake and raises an exception if comms fails
+            connector.connect_to_dragon()
+        finally:
+            connector.cleanup()
+
+
+def test_load_env_no_file(monkeypatch: pytest.MonkeyPatch, test_dir: str):
+    """Ensure an empty dragon .env file doesn't break the launcher"""
+    test_path = pathlib.Path(test_dir)
+    # mock_dragon_root = pathlib.Path(test_dir) / "dragon"
+    # exp_env_path = pathlib.Path(test_dir) / "dragon" / ".env"
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr(smartsim._core.config.CONFIG, "conf_dir", test_path)
+
+        dragon_conf = smartsim._core.config.CONFIG.dragon_dotenv
+        # verify config doesn't exist
+        assert not dragon_conf.exists()
+
+        connector = DragonConnector()
+
+        loaded_env = connector.load_persisted_env()
+        assert not loaded_env
+
+
+def test_load_env_env_file_created(monkeypatch: pytest.MonkeyPatch, test_dir: str):
+    """Ensure a populated dragon .env file is loaded correctly by the launcher"""
+    test_path = pathlib.Path(test_dir)
+    mock_dragon_root = pathlib.Path(test_dir) / "dragon"
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr(smartsim._core.config.CONFIG, "conf_dir", test_path)
+        create_dotenv(mock_dragon_root)
+        dragon_conf = smartsim._core.config.CONFIG.dragon_dotenv
+
+        # verify config does exist
+        assert dragon_conf.exists()
+
+        # load config w/launcher
+        connector = DragonConnector()
+
+        loaded_env = connector.load_persisted_env()
+        assert loaded_env
+
+        # confirm .env was parsed as expected by inspecting a key
+        assert "DRAGON_ROOT_DIR" in loaded_env
+
+
+def test_load_env_cached_env(monkeypatch: pytest.MonkeyPatch, test_dir: str):
+    """Ensure repeated attempts to use dragon env don't hit file system"""
+    test_path = pathlib.Path(test_dir)
+    mock_dragon_root = pathlib.Path(test_dir) / "dragon"
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr(smartsim._core.config.CONFIG, "conf_dir", test_path)
+        create_dotenv(mock_dragon_root)
+
+        # load config w/launcher
+        connector = DragonConnector()
+
+        loaded_env = connector.load_persisted_env()
+        assert loaded_env
+
+        # ensure attempting to reload would bomb
+        ctx.setattr(smartsim._core.config.CONFIG, "conf_dir", None)
+
+        # attempt to load and if it doesn't blow up, it used the cached copy
+
+        loaded_env = connector.load_persisted_env()
+        assert loaded_env
+
+
+def test_merge_env(monkeypatch: pytest.MonkeyPatch, test_dir: str):
+    """Ensure that merging dragon .env file into current env has correct precedences"""
+    test_path = pathlib.Path(test_dir)
+    mock_dragon_root = pathlib.Path(test_dir) / "dragon"
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr(smartsim._core.config.CONFIG, "conf_dir", test_path)
+        create_dotenv(mock_dragon_root)
+
+        # load config w/launcher
+        connector = DragonConnector()
+        loaded_env = {**connector.load_persisted_env()}
+        assert loaded_env
+
+        curr_base_dir = "/foo"
+        curr_path = "/foo:/bar"
+        curr_only = "some-value"
+
+        loaded_path = loaded_env.get("PATH", "")
+
+        # ensure some non-dragon value exists in env; we want
+        # to see that it is in merged output without empty prepending
+        non_dragon_key = "NON_DRAGON_KEY"
+        non_dragon_value = "non_dragon_value"
+        connector._env_vars[non_dragon_key] = non_dragon_value
+
+        curr_env = {
+            "DRAGON_BASE_DIR": curr_base_dir,  # expect overwrite
+            "PATH": curr_path,  # expect prepend
+            "ONLY_IN_CURRENT": curr_only,  # expect pass-through
+        }
+
+        merged_env = connector.merge_persisted_env(curr_env)
+
+        # any dragon env vars should be overwritten
+        assert merged_env["DRAGON_BASE_DIR"] != curr_base_dir
+
+        # any non-dragon collisions should result in prepending
+        assert merged_env["PATH"] == f"{loaded_path}:{curr_path}"
+        # ensure we actually see a change
+        assert merged_env["PATH"] != loaded_env["PATH"]
+
+        # any keys that were in curr env should still exist, unchanged
+        assert merged_env["ONLY_IN_CURRENT"] == curr_only
+
+        # any non-dragon keys that didn't exist avoid unnecessary prepending
+        assert merged_env[non_dragon_key] == non_dragon_value
diff --git a/tests/test_experiment.py b/tests/test_experiment.py
index 12b2f1579..4bae09e68 100644
--- a/tests/test_experiment.py
+++ b/tests/test_experiment.py
@@ -24,22 +24,33 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 import os
+import os.path as osp
+import pathlib
+import shutil
+import typing as t
 
 import pytest
 
 from smartsim import Experiment
 from smartsim._core.config import CONFIG
+from smartsim._core.config.config import Config
+from smartsim._core.utils import serialize
+from smartsim.database import Orchestrator
 from smartsim.entity import Model
 from smartsim.error import SmartSimError
 from smartsim.error.errors import SSUnsupportedError
 from smartsim.settings import RunSettings
-from smartsim.status import STATUS_NEVER_STARTED
+from smartsim.status import SmartSimStatus
+
+if t.TYPE_CHECKING:
+    import conftest
+
 
 # The tests in this file belong to the slow_tests group
 pytestmark = pytest.mark.slow_tests
 
 
-def test_model_prefix(test_dir):
+def test_model_prefix(test_dir: str) -> None:
     exp_name = "test_prefix"
     exp = Experiment(exp_name)
 
@@ -52,24 +63,38 @@ def test_model_prefix(test_dir):
     assert model._key_prefixing_enabled == True
 
 
-def test_bad_exp_path():
+def test_model_no_name():
+    exp = Experiment("test_model_no_name")
+    with pytest.raises(AttributeError):
+        _ = exp.create_model(name=None, run_settings=RunSettings("python"))
+
+
+def test_ensemble_no_name():
+    exp = Experiment("test_ensemble_no_name")
+    with pytest.raises(AttributeError):
+        _ = exp.create_ensemble(
+            name=None, run_settings=RunSettings("python"), replicas=2
+        )
+
+
+def test_bad_exp_path() -> None:
     with pytest.raises(NotADirectoryError):
         exp = Experiment("test", "not-a-directory")
 
 
-def test_type_exp_path():
+def test_type_exp_path() -> None:
     with pytest.raises(TypeError):
         exp = Experiment("test", ["this-is-a-list-dummy"])
 
 
-def test_stop_type():
+def test_stop_type() -> None:
     """Wrong argument type given to stop"""
     exp = Experiment("name")
     with pytest.raises(TypeError):
         exp.stop("model")
 
 
-def test_finished_new_model():
+def test_finished_new_model() -> None:
     # finished should fail as this model hasn't been
     # launched yet.
 
@@ -79,40 +104,40 @@ def test_finished_new_model():
         exp.finished(model)
 
 
-def test_status_typeerror():
+def test_status_typeerror() -> None:
     exp = Experiment("test")
     with pytest.raises(TypeError):
         exp.get_status([])
 
 
-def test_status_pre_launch():
+def test_status_pre_launch() -> None:
     model = Model("name", {}, "./", RunSettings("python"))
     exp = Experiment("test")
-    assert exp.get_status(model)[0] == STATUS_NEVER_STARTED
+    assert exp.get_status(model)[0] == SmartSimStatus.STATUS_NEVER_STARTED
 
 
-def test_bad_ensemble_init_no_rs():
+def test_bad_ensemble_init_no_rs(test_dir: str) -> None:
     """params supplied without run settings"""
-    exp = Experiment("test")
+    exp = Experiment("test", exp_path=test_dir)
     with pytest.raises(SmartSimError):
         exp.create_ensemble("name", {"param1": 1})
 
 
-def test_bad_ensemble_init_no_params():
+def test_bad_ensemble_init_no_params(test_dir: str) -> None:
     """params supplied without run settings"""
-    exp = Experiment("test")
+    exp = Experiment("test", exp_path=test_dir)
     with pytest.raises(SmartSimError):
         exp.create_ensemble("name", run_settings=RunSettings("python"))
 
 
-def test_bad_ensemble_init_no_rs_bs():
+def test_bad_ensemble_init_no_rs_bs(test_dir: str) -> None:
     """ensemble init without run settings or batch settings"""
-    exp = Experiment("test")
+    exp = Experiment("test", exp_path=test_dir)
     with pytest.raises(SmartSimError):
         exp.create_ensemble("name")
 
 
-def test_stop_entity(test_dir):
+def test_stop_entity(test_dir: str) -> None:
     exp_name = "test_stop_entity"
     exp = Experiment(exp_name, exp_path=test_dir)
     m = exp.create_model("model", path=test_dir, run_settings=RunSettings("sleep", "5"))
@@ -122,7 +147,7 @@ def test_stop_entity(test_dir):
     assert exp.finished(m) == True
 
 
-def test_poll(test_dir):
+def test_poll(test_dir: str) -> None:
     # Ensure that a SmartSimError is not raised
     exp_name = "test_exp_poll"
     exp = Experiment(exp_name, exp_path=test_dir)
@@ -134,7 +159,7 @@ def test_poll(test_dir):
     exp.stop(model)
 
 
-def test_summary(test_dir):
+def test_summary(test_dir: str) -> None:
     exp_name = "test_exp_summary"
     exp = Experiment(exp_name, exp_path=test_dir)
     m = exp.create_model(
@@ -157,30 +182,189 @@ def test_summary(test_dir):
     assert 0 == int(row["Returncode"])
 
 
-def test_launcher_detection(wlmutils, monkeypatch):
+def test_launcher_detection(
+    wlmutils: "conftest.WLMUtils", monkeypatch: pytest.MonkeyPatch
+) -> None:
     if wlmutils.get_test_launcher() == "pals":
         pytest.skip(reason="Launcher detection cannot currently detect pbs vs pals")
     if wlmutils.get_test_launcher() == "local":
         monkeypatch.setenv("PATH", "")  # Remove all WLMs from PATH
+    if wlmutils.get_test_launcher() == "dragon":
+        pytest.skip(reason="Launcher detection cannot currently detect dragon")
 
     exp = Experiment("test-launcher-detection", launcher="auto")
 
     assert exp._launcher == wlmutils.get_test_launcher()
 
 
-def test_enable_disable_telemtery(monkeypatch):
-    # TODO: Currently these are implemented by setting an environment variable
-    #       so that ALL experiments instanced in a driver script will begin
-    #       producing telemetry data. In the future it is planned to have this
-    #       work on a "per-instance" basis
+def test_enable_disable_telemetry(
+    monkeypatch: pytest.MonkeyPatch, test_dir: str, config: Config
+) -> None:
+    # Global telemetry defaults to `on` and can be modified by
+    # setting the value of env var SMARTSIM_FLAG_TELEMETRY to 0/1
     monkeypatch.setattr(os, "environ", {})
-    exp = Experiment("my-exp")
-    exp.enable_telemetry()
-    assert CONFIG.telemetry_enabled
-    exp.disable_telemetry()
-    assert not CONFIG.telemetry_enabled
+    exp = Experiment("my-exp", exp_path=test_dir)
+    exp.telemetry.enable()
+    assert exp.telemetry.is_enabled
+
+    exp.telemetry.disable()
+    assert not exp.telemetry.is_enabled
+
+    exp.telemetry.enable()
+    assert exp.telemetry.is_enabled
+
+    exp.telemetry.disable()
+    assert not exp.telemetry.is_enabled
+
+    exp.start()
+    mani_path = (
+        pathlib.Path(test_dir) / config.telemetry_subdir / serialize.MANIFEST_FILENAME
+    )
+    assert mani_path.exists()
+
 
+def test_telemetry_default(
+    monkeypatch: pytest.MonkeyPatch, test_dir: str, config: Config
+) -> None:
+    """Ensure the default values for telemetry configuration match expectation
+    that experiment telemetry is on"""
 
-def test_error_on_cobalt():
+    # If env var related to telemetry doesn't exist, experiment should default to True
+    monkeypatch.setattr(os, "environ", {})
+    exp = Experiment("my-exp", exp_path=test_dir)
+    assert exp.telemetry.is_enabled
+
+    # If telemetry disabled in env, should get False
+    monkeypatch.setenv("SMARTSIM_FLAG_TELEMETRY", "0")
+    exp = Experiment("my-exp", exp_path=test_dir)
+    assert not exp.telemetry.is_enabled
+
+    # If telemetry enabled in env, should get True
+    monkeypatch.setenv("SMARTSIM_FLAG_TELEMETRY", "1")
+    exp = Experiment("my-exp", exp_path=test_dir)
+    assert exp.telemetry.is_enabled
+
+
+def test_error_on_cobalt() -> None:
     with pytest.raises(SSUnsupportedError):
         exp = Experiment("cobalt_exp", launcher="cobalt")
+
+
+def test_default_orch_path(
+    monkeypatch: pytest.MonkeyPatch, test_dir: str, wlmutils: "conftest.WLMUtils"
+) -> None:
+    """Ensure the default file structure is created for Orchestrator"""
+
+    exp_name = "default-orch-path"
+    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir)
+    monkeypatch.setattr(exp._control, "start", lambda *a, **kw: ...)
+    db = exp.create_database(
+        port=wlmutils.get_test_port(), interface=wlmutils.get_test_interface()
+    )
+    exp.start(db)
+    orch_path = pathlib.Path(test_dir) / db.name
+    assert orch_path.exists()
+    assert db.path == str(orch_path)
+
+
+def test_default_model_path(
+    monkeypatch: pytest.MonkeyPatch, test_dir: str, wlmutils: "conftest.WLMUtils"
+) -> None:
+    """Ensure the default file structure is created for Model"""
+
+    exp_name = "default-model-path"
+    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir)
+    monkeypatch.setattr(exp._control, "start", lambda *a, **kw: ...)
+    settings = exp.create_run_settings(exe="echo", exe_args="hello")
+    model = exp.create_model(name="model_name", run_settings=settings)
+    exp.start(model)
+    model_path = pathlib.Path(test_dir) / model.name
+    assert model_path.exists()
+    assert model.path == str(model_path)
+
+
+def test_default_ensemble_path(
+    monkeypatch: pytest.MonkeyPatch, test_dir: str, wlmutils: "conftest.WLMUtils"
+) -> None:
+    """Ensure the default file structure is created for Ensemble"""
+
+    exp_name = "default-ensemble-path"
+    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir)
+    monkeypatch.setattr(exp._control, "start", lambda *a, **kw: ...)
+    settings = exp.create_run_settings(exe="echo", exe_args="hello")
+    ensemble = exp.create_ensemble(
+        name="ensemble_name", run_settings=settings, replicas=2
+    )
+    exp.start(ensemble)
+    ensemble_path = pathlib.Path(test_dir) / ensemble.name
+    assert ensemble_path.exists()
+    assert ensemble.path == str(ensemble_path)
+    for member in ensemble.models:
+        member_path = ensemble_path / member.name
+        assert member_path.exists()
+        assert member.path == str(ensemble_path / member.name)
+
+
+def test_user_orch_path(
+    monkeypatch: pytest.MonkeyPatch, test_dir: str, wlmutils: "conftest.WLMUtils"
+) -> None:
+    """Ensure a relative path is used to created Orchestrator folder"""
+
+    exp_name = "default-orch-path"
+    exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
+    monkeypatch.setattr(exp._control, "start", lambda *a, **kw: ...)
+    db = exp.create_database(
+        port=wlmutils.get_test_port(),
+        interface=wlmutils.get_test_interface(),
+        path="./testing_folder1234",
+    )
+    exp.start(db)
+    orch_path = pathlib.Path(osp.abspath("./testing_folder1234"))
+    assert orch_path.exists()
+    assert db.path == str(orch_path)
+    shutil.rmtree(orch_path)
+
+
+def test_default_model_with_path(
+    monkeypatch: pytest.MonkeyPatch, test_dir: str, wlmutils: "conftest.WLMUtils"
+) -> None:
+    """Ensure a relative path is used to created Model folder"""
+
+    exp_name = "default-ensemble-path"
+    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir)
+    monkeypatch.setattr(exp._control, "start", lambda *a, **kw: ...)
+    settings = exp.create_run_settings(exe="echo", exe_args="hello")
+    model = exp.create_model(
+        name="model_name", run_settings=settings, path="./testing_folder1234"
+    )
+    exp.start(model)
+    model_path = pathlib.Path(osp.abspath("./testing_folder1234"))
+    assert model_path.exists()
+    assert model.path == str(model_path)
+    shutil.rmtree(model_path)
+
+
+def test_default_ensemble_with_path(
+    monkeypatch: pytest.MonkeyPatch, test_dir: str, wlmutils: "conftest.WLMUtils"
+) -> None:
+    """Ensure a relative path is used to created Ensemble folder"""
+
+    exp_name = "default-ensemble-path"
+    exp = Experiment(exp_name, launcher=wlmutils.get_test_launcher(), exp_path=test_dir)
+    monkeypatch.setattr(exp._control, "start", lambda *a, **kw: ...)
+    settings = exp.create_run_settings(exe="echo", exe_args="hello")
+    ensemble = exp.create_ensemble(
+        name="ensemble_name",
+        run_settings=settings,
+        path="./testing_folder1234",
+        replicas=2,
+    )
+    exp.start(ensemble)
+    ensemble_path = pathlib.Path(osp.abspath("./testing_folder1234"))
+    assert ensemble_path.exists()
+    assert ensemble.path == str(ensemble_path)
+    for member in ensemble.models:
+        member_path = ensemble_path / member.name
+        assert member_path.exists()
+        assert member.path == str(member_path)
+    shutil.rmtree(ensemble_path)
diff --git a/tests/test_fixtures.py b/tests/test_fixtures.py
new file mode 100644
index 000000000..ea753374e
--- /dev/null
+++ b/tests/test_fixtures.py
@@ -0,0 +1,56 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+import os
+
+import psutil
+import pytest
+
+from smartsim import Experiment
+from smartsim.database import Orchestrator
+from smartsim.error import SmartSimError
+from smartsim.error.errors import SSUnsupportedError
+
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+
+def test_db_fixtures(local_experiment, local_db, prepare_db):
+    db = prepare_db(local_db).orchestrator
+    local_experiment.reconnect_orchestrator(db.checkpoint_file)
+    assert db.is_active()
+    local_experiment.stop(db)
+
+
+def test_create_new_db_fixture_if_stopped(local_experiment, local_db, prepare_db):
+    # Run this twice to make sure that there is a stopped database
+    output = prepare_db(local_db)
+    local_experiment.reconnect_orchestrator(output.orchestrator.checkpoint_file)
+    local_experiment.stop(output.orchestrator)
+
+    output = prepare_db(local_db)
+    assert output.new_db
+    local_experiment.reconnect_orchestrator(output.orchestrator.checkpoint_file)
+    assert output.orchestrator.is_active()
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
index 025f53d32..523ed7191 100644
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -24,6 +24,9 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+import collections
+import signal
+
 import pytest
 
 from smartsim._core.utils import helpers
@@ -68,3 +71,115 @@ def test_encode_raises_on_empty():
 def test_decode_raises_on_empty():
     with pytest.raises(ValueError):
         helpers.decode_cmd("")
+
+
+class MockSignal:
+    def __init__(self):
+        self.signal_handlers = collections.defaultdict(lambda: signal.SIG_IGN)
+
+    def signal(self, signalnum, handler):
+        orig = self.getsignal(signalnum)
+        self.signal_handlers[signalnum] = handler
+        return orig
+
+    def getsignal(self, signalnum):
+        return self.signal_handlers[signalnum]
+
+
+@pytest.fixture
+def mock_signal(monkeypatch):
+    mock_signal = MockSignal()
+    monkeypatch.setattr(helpers, "signal", mock_signal)
+    yield mock_signal
+
+
+def test_signal_intercept_stack_will_register_itself_with_callback_fn(mock_signal):
+    callback = lambda num, frame: ...
+    stack = helpers.SignalInterceptionStack.get(signal.NSIG)
+    stack.push(callback)
+    assert isinstance(stack, helpers.SignalInterceptionStack)
+    assert stack is mock_signal.signal_handlers[signal.NSIG]
+    assert len(stack) == 1
+    assert list(stack)[0] == callback
+
+
+def test_signal_intercept_stack_keeps_track_of_previous_handlers(mock_signal):
+    default_handler = lambda num, frame: ...
+    mock_signal.signal_handlers[signal.NSIG] = default_handler
+    stack = helpers.SignalInterceptionStack.get(signal.NSIG)
+    stack.push(lambda n, f: ...)
+    assert stack._original is default_handler
+
+
+def test_signal_intercept_stacks_are_registered_per_signal_number(mock_signal):
+    handler = lambda num, frame: ...
+    stack_1 = helpers.SignalInterceptionStack.get(signal.NSIG)
+    stack_1.push(handler)
+    stack_2 = helpers.SignalInterceptionStack.get(signal.NSIG + 1)
+    stack_2.push(handler)
+
+    assert mock_signal.signal_handlers[signal.NSIG] is stack_1
+    assert mock_signal.signal_handlers[signal.NSIG + 1] is stack_2
+    assert stack_1 is not stack_2
+    assert list(stack_1) == list(stack_2) == [handler]
+
+
+def test_signal_intercept_handlers_will_not_overwrite_if_handler_already_exists(
+    mock_signal,
+):
+    handler_1 = lambda num, frame: ...
+    handler_2 = lambda num, frame: ...
+    stack_1 = helpers.SignalInterceptionStack.get(signal.NSIG)
+    stack_1.push(handler_1)
+    stack_2 = helpers.SignalInterceptionStack.get(signal.NSIG)
+    stack_2.push(handler_2)
+    assert stack_1 is stack_2 is mock_signal.signal_handlers[signal.NSIG]
+    assert list(stack_1) == [handler_2, handler_1]
+
+
+def test_signal_intercept_stack_can_add_multiple_instances_of_the_same_handler(
+    mock_signal,
+):
+    handler = lambda num, frame: ...
+    stack = helpers.SignalInterceptionStack.get(signal.NSIG)
+    stack.push(handler)
+    stack.push(handler)
+    assert list(stack) == [handler, handler]
+
+
+def test_signal_intercept_stack_enforces_that_unique_push_handlers_are_unique(
+    mock_signal,
+):
+    handler = lambda num, frame: ...
+    stack = helpers.SignalInterceptionStack.get(signal.NSIG)
+    assert stack.push_unique(handler)
+    assert not helpers.SignalInterceptionStack.get(signal.NSIG).push_unique(handler)
+    assert list(stack) == [handler]
+
+
+def test_signal_intercept_stack_enforces_that_unique_push_method_handlers_are_unique(
+    mock_signal,
+):
+    class C:
+        def fn(num, frame): ...
+
+    c1 = C()
+    c2 = C()
+    stack = helpers.SignalInterceptionStack.get(signal.NSIG)
+    stack.push_unique(c1.fn)
+    assert helpers.SignalInterceptionStack.get(signal.NSIG).push_unique(c2.fn)
+    assert not helpers.SignalInterceptionStack.get(signal.NSIG).push_unique(c1.fn)
+    assert list(stack) == [c2.fn, c1.fn]
+
+
+def test_signal_handler_calls_functions_in_reverse_order(mock_signal):
+    called_list = []
+    default = lambda num, frame: called_list.append("default")
+    handler_1 = lambda num, frame: called_list.append("handler_1")
+    handler_2 = lambda num, frame: called_list.append("handler_2")
+
+    mock_signal.signal_handlers[signal.NSIG] = default
+    helpers.SignalInterceptionStack.get(signal.NSIG).push(handler_1)
+    helpers.SignalInterceptionStack.get(signal.NSIG).push(handler_2)
+    mock_signal.signal_handlers[signal.NSIG](signal.NSIG, None)
+    assert called_list == ["handler_2", "handler_1", "default"]
diff --git a/tests/test_indirect.py b/tests/test_indirect.py
index 73f381441..814302968 100644
--- a/tests/test_indirect.py
+++ b/tests/test_indirect.py
@@ -24,15 +24,15 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
 import pathlib
 import sys
 
 import psutil
 import pytest
 
+import conftest
 from smartsim._core.config import CONFIG
-from smartsim._core.entrypoints.indirect import cleanup, get_parser, get_ts, main
+from smartsim._core.entrypoints.indirect import cleanup, get_parser, get_ts_ms, main
 from smartsim._core.utils.helpers import encode_cmd
 
 ALL_ARGS = {
@@ -141,7 +141,7 @@ def terminate(self) -> None:
 
 def test_ts():
     """Ensure expected output type"""
-    ts = get_ts()
+    ts = get_ts_ms()
     assert isinstance(ts, int)
 
 
@@ -182,24 +182,70 @@ def test_indirect_main_cmd_check(capsys, test_dir, monkeypatch):
     assert "Invalid cmd supplied" in ex.value.args[0]
 
 
-def test_complete_process(fileutils, test_dir):
-    """Ensure the happy-path completes and returns a success return code"""
+def test_process_failure(fileutils, test_dir: str, monkeypatch: pytest.MonkeyPatch):
+    """Ensure that a stop event is logged if the process unexpectedly terminates"""
+    mock_pid = 1122334455
+    create_msg = "creating: {0}"
+    term_msg = "term: {0}"
+    wait_msg = "wait: {0}"
+
+    class MockProc:
+        def __init__(self, *args, **kwargs):
+            print(create_msg.format(mock_pid))
+
+        @property
+        def pid(self):
+            return mock_pid
+
+        def terminate(self):
+            print(term_msg.format(mock_pid))
+
+        def wait(self):
+            print(wait_msg.format(mock_pid))
+            raise Exception("You shall not pass!")
+
     script = fileutils.get_test_conf_path("sleep.py")
 
     exp_dir = pathlib.Path(test_dir)
 
-    raw_cmd = f"{sys.executable} {script} --time=1"
+    raw_cmd = f"{sys.executable} {script} --time=10"
     cmd = encode_cmd(raw_cmd.split())
 
-    rc = main(cmd, "application", exp_dir, exp_dir / CONFIG.telemetry_subdir)
-    assert rc == 0
+    mock_track = conftest.CountingCallable()
+
+    with monkeypatch.context() as ctx:
+        ctx.setattr("smartsim._core.entrypoints.indirect.write_event", mock_track)
+        ctx.setattr("psutil.pid_exists", lambda pid: True)
+        ctx.setattr("psutil.Popen", MockProc)
+        ctx.setattr("psutil.Process", MockProc)  # handle the proc.terminate()
+        ctx.setattr("smartsim._core.entrypoints.indirect.STEP_PID", mock_pid)
+
+        rc = main(cmd, "application", exp_dir, exp_dir / CONFIG.telemetry_subdir)
+        assert rc == -1
+
+    (args1, _), (args2, kwargs2) = mock_track.details
+    assert "start" in args1
+    assert "stop" in args2
+    assert kwargs2.get("returncode", -1)
+
+
+def test_complete_process(
+    fileutils: conftest.FileUtils, test_dir: str, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """Ensure the happy-path completes and returns a success return code"""
+    script = fileutils.get_test_conf_path("sleep.py")
 
-    assert exp_dir.exists()
+    exp_dir = pathlib.Path(test_dir)
 
-    # NOTE: don't have a manifest so we're falling back to default event path
-    data_dir = exp_dir / CONFIG.telemetry_subdir
-    start_events = list(data_dir.rglob("start.json"))
-    stop_events = list(data_dir.rglob("stop.json"))
+    raw_cmd = f"{sys.executable} {script} --time=1"
+    cmd = encode_cmd(raw_cmd.split())
+
+    mock_track = conftest.CountingCallable()
+    with monkeypatch.context() as ctx:
+        ctx.setattr("smartsim._core.entrypoints.indirect.write_event", mock_track)
+        rc = main(cmd, "application", exp_dir, exp_dir / CONFIG.telemetry_subdir)
+        assert rc == 0
 
-    assert start_events
-    assert stop_events
+    (args1, _), (args2, _) = mock_track.details
+    assert "start" in args1
+    assert "stop" in args2
diff --git a/tests/test_interrupt.py b/tests/test_interrupt.py
index 28c48e0db..c38ae0225 100644
--- a/tests/test_interrupt.py
+++ b/tests/test_interrupt.py
@@ -63,22 +63,22 @@ def test_interrupt_blocked_jobs(test_dir):
         replicas=2,
         run_settings=RunSettings("sleep", "100"),
     )
-    ensemble.set_path(test_dir)
     num_jobs = 1 + len(ensemble)
-    try:
-        pid = os.getpid()
-        keyboard_interrupt_thread = Thread(
-            name="sigint_thread", target=keyboard_interrupt, args=(pid,)
-        )
-        keyboard_interrupt_thread.start()
+    pid = os.getpid()
+    keyboard_interrupt_thread = Thread(
+        name="sigint_thread", target=keyboard_interrupt, args=(pid,)
+    )
+    keyboard_interrupt_thread.start()
+
+    with pytest.raises(KeyboardInterrupt):
         exp.start(model, ensemble, block=True, kill_on_interrupt=True)
-    except KeyboardInterrupt:
-        time.sleep(2)  # allow time for jobs to be stopped
-        active_jobs = exp._control._jobs.jobs
-        active_db_jobs = exp._control._jobs.db_jobs
-        completed_jobs = exp._control._jobs.completed
-        assert len(active_jobs) + len(active_db_jobs) == 0
-        assert len(completed_jobs) == num_jobs
+
+    time.sleep(2)  # allow time for jobs to be stopped
+    active_jobs = exp._control._jobs.jobs
+    active_db_jobs = exp._control._jobs.db_jobs
+    completed_jobs = exp._control._jobs.completed
+    assert len(active_jobs) + len(active_db_jobs) == 0
+    assert len(completed_jobs) == num_jobs
 
 
 def test_interrupt_multi_experiment_unblocked_jobs(test_dir):
@@ -104,22 +104,23 @@ def test_interrupt_multi_experiment_unblocked_jobs(test_dir):
             replicas=2,
             run_settings=RunSettings("sleep", "100"),
         )
-        ensemble.set_path(test_dir)
         jobs_per_experiment[i] = 1 + len(ensemble)
-    try:
-        pid = os.getpid()
-        keyboard_interrupt_thread = Thread(
-            name="sigint_thread", target=keyboard_interrupt, args=(pid,)
-        )
-        keyboard_interrupt_thread.start()
+
+    pid = os.getpid()
+    keyboard_interrupt_thread = Thread(
+        name="sigint_thread", target=keyboard_interrupt, args=(pid,)
+    )
+    keyboard_interrupt_thread.start()
+
+    with pytest.raises(KeyboardInterrupt):
         for experiment in experiments:
             experiment.start(model, ensemble, block=False, kill_on_interrupt=True)
-        time.sleep(9)  # since jobs aren't blocked, wait for SIGINT
-    except KeyboardInterrupt:
-        time.sleep(2)  # allow time for jobs to be stopped
-        for i, experiment in enumerate(experiments):
-            active_jobs = experiment._control._jobs.jobs
-            active_db_jobs = experiment._control._jobs.db_jobs
-            completed_jobs = experiment._control._jobs.completed
-            assert len(active_jobs) + len(active_db_jobs) == 0
-            assert len(completed_jobs) == jobs_per_experiment[i]
+        keyboard_interrupt_thread.join()  # since jobs aren't blocked, wait for SIGINT
+
+    time.sleep(2)  # allow time for jobs to be stopped
+    for i, experiment in enumerate(experiments):
+        active_jobs = experiment._control._jobs.jobs
+        active_db_jobs = experiment._control._jobs.db_jobs
+        completed_jobs = experiment._control._jobs.completed
+        assert len(active_jobs) + len(active_db_jobs) == 0
+        assert len(completed_jobs) == jobs_per_experiment[i]
diff --git a/tests/test_launch_errors.py b/tests/test_launch_errors.py
index 0557f3cf4..21b3184e5 100644
--- a/tests/test_launch_errors.py
+++ b/tests/test_launch_errors.py
@@ -27,18 +27,19 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
 from smartsim.database import Orchestrator
 from smartsim.error import SSUnsupportedError
 from smartsim.settings import JsrunSettings, RunSettings
+from smartsim.status import SmartSimStatus
 
 # The tests in this file belong to the group_a group
 pytestmark = pytest.mark.group_a
 
 
-def test_unsupported_run_settings():
+def test_unsupported_run_settings(test_dir):
     exp_name = "test-unsupported-run-settings"
-    exp = Experiment(exp_name, launcher="slurm")
+    exp = Experiment(exp_name, launcher="slurm", exp_path=test_dir)
     bad_settings = JsrunSettings("echo", "hello")
     model = exp.create_model("bad_rs", bad_settings)
 
@@ -57,7 +58,7 @@ def test_model_failure(fileutils, test_dir):
 
     exp.start(M1, block=True)
     statuses = exp.get_status(M1)
-    assert all([stat == status.STATUS_FAILED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_FAILED for stat in statuses])
 
 
 def test_orchestrator_relaunch(test_dir, wlmutils):
@@ -65,9 +66,9 @@ def test_orchestrator_relaunch(test_dir, wlmutils):
     exp_name = "test-orc-on-relaunch"
     exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
 
-    orc = Orchestrator(port=wlmutils.get_test_port())
+    orc = Orchestrator(port=wlmutils.get_test_port(), db_identifier="orch_1")
     orc.set_path(test_dir)
-    orc_1 = Orchestrator(port=wlmutils.get_test_port() + 1)
+    orc_1 = Orchestrator(port=wlmutils.get_test_port() + 1, db_identifier="orch_2")
     orc_1.set_path(test_dir)
     try:
         exp.start(orc)
diff --git a/tests/test_local_launch.py b/tests/test_local_launch.py
index 7befff95e..85687e014 100644
--- a/tests/test_local_launch.py
+++ b/tests/test_local_launch.py
@@ -26,7 +26,8 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
+from smartsim.status import SmartSimStatus
 
 # The tests in this file belong to the group_a group
 pytestmark = pytest.mark.group_a
@@ -49,7 +50,7 @@ def test_models(fileutils, test_dir):
 
     exp.start(M1, M2, block=True, summary=True)
     statuses = exp.get_status(M1, M2)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
 
 def test_ensemble(fileutils, test_dir):
@@ -60,8 +61,7 @@ def test_ensemble(fileutils, test_dir):
     settings = exp.create_run_settings("python", f"{script} --time=3")
 
     ensemble = exp.create_ensemble("e1", run_settings=settings, replicas=2)
-    ensemble.set_path(test_dir)
 
     exp.start(ensemble, block=True, summary=True)
     statuses = exp.get_status(ensemble)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
diff --git a/tests/test_local_multi_run.py b/tests/test_local_multi_run.py
index 576e290ca..a2c1d70ee 100644
--- a/tests/test_local_multi_run.py
+++ b/tests/test_local_multi_run.py
@@ -26,7 +26,8 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
+from smartsim.status import SmartSimStatus
 
 # The tests in this file belong to the group_a group
 pytestmark = pytest.mark.group_a
@@ -49,9 +50,9 @@ def test_models(fileutils, test_dir):
 
     exp.start(M1, block=False)
     statuses = exp.get_status(M1)
-    assert all([stat != status.STATUS_FAILED for stat in statuses])
+    assert all([stat != SmartSimStatus.STATUS_FAILED for stat in statuses])
 
     # start another while first model is running
     exp.start(M2, block=True)
     statuses = exp.get_status(M1, M2)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
diff --git a/tests/test_local_restart.py b/tests/test_local_restart.py
index c59aebd7b..2556c5597 100644
--- a/tests/test_local_restart.py
+++ b/tests/test_local_restart.py
@@ -26,7 +26,8 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
+from smartsim.status import SmartSimStatus
 
 # The tests in this file belong to the group_b group
 pytestmark = pytest.mark.group_b
@@ -48,12 +49,12 @@ def test_restart(fileutils, test_dir):
 
     exp.start(M1, block=True)
     statuses = exp.get_status(M1)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
     # restart the model
     exp.start(M1, block=True)
     statuses = exp.get_status(M1)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
 
 def test_ensemble(fileutils, test_dir):
@@ -64,13 +65,12 @@ def test_ensemble(fileutils, test_dir):
     settings = exp.create_run_settings("python", f"{script} --time=3")
 
     ensemble = exp.create_ensemble("e1", run_settings=settings, replicas=2)
-    ensemble.set_path(test_dir)
 
     exp.start(ensemble, block=True)
     statuses = exp.get_status(ensemble)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
     # restart the ensemble
     exp.start(ensemble, block=True)
     statuses = exp.get_status(ensemble)
-    assert all([stat == status.STATUS_COMPLETED for stat in statuses])
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
diff --git a/tests/test_logs.py b/tests/test_logs.py
index 88c6a738f..a187baa2a 100644
--- a/tests/test_logs.py
+++ b/tests/test_logs.py
@@ -27,6 +27,7 @@
 import io
 import logging
 import pathlib
+import socket
 
 import pytest
 
@@ -91,8 +92,10 @@ def test_add_exp_loggers(test_dir):
     logger = logging.getLogger("smartsim_test_add_exp_loggers")
     logger.addHandler(logging.StreamHandler(faux_out_stream))
 
-    out_file = pathlib.Path(test_dir) / "smartsim.out"
-    err_file = pathlib.Path(test_dir) / "smartsim.err"
+    logger.addFilter(smartsim.log.HostnameFilter())
+
+    out_file = pathlib.Path(test_dir) / "logs/smartsim.out"
+    err_file = pathlib.Path(test_dir) / "logs/smartsim.err"
 
     filter_fn = lambda x: True
 
@@ -210,3 +213,35 @@ def thrower(_self):
         assert ctx_var.get() == original_ctx_value
         ctx_var.reset(token)
         assert ctx_var.get() == ""
+
+
+def test_hostname_filter_results() -> None:
+    """Ensure the hostname filter returns true for all records, even if not enriched"""
+    filter = smartsim.log.HostnameFilter("test-filter")
+    record = logging.LogRecord(
+        "name", logging.INFO, "/foo/bar", 42, "this is your message", None, None
+    )
+
+    # no hostname, will be enriched.
+    passes_filter = filter.filter(record)
+    assert passes_filter
+
+    # has hostname, will NOT be enriched.
+    passes_filter = filter.filter(record)
+    assert passes_filter
+
+
+def test_hostname_filter() -> None:
+    """Ensure the hostname filter adds a hostname to the log record"""
+    filter = smartsim.log.HostnameFilter("test-filter")
+
+    exp_name = socket.gethostname()
+    record = logging.LogRecord(
+        "name", logging.INFO, "/foo/bar", 42, "this is your message", None, None
+    )
+
+    filter.filter(record)
+    assert hasattr(record, "hostname")
+
+    name = getattr(record, "hostname")
+    assert exp_name == name
diff --git a/tests/test_manifest.py b/tests/test_manifest.py
index 33fc6b163..c26868ebb 100644
--- a/tests/test_manifest.py
+++ b/tests/test_manifest.py
@@ -27,6 +27,7 @@
 
 import os.path
 from copy import deepcopy
+from uuid import uuid4
 
 import pytest
 
@@ -60,7 +61,6 @@
 orc = Orchestrator()
 orc_1 = deepcopy(orc)
 orc_1.name = "orc2"
-model_no_name = exp.create_model(name=None, run_settings=rs)
 
 db_script = DBScript("some-script", "def main():\n    print('hello world')\n")
 db_model = DBModel("some-model", "TORCH", b"some-model-bytes")
@@ -75,11 +75,6 @@ def test_separate():
     assert manifest.dbs[0] == orc
 
 
-def test_no_name():
-    with pytest.raises(AttributeError):
-        _ = Manifest(model_no_name)
-
-
 def test_separate_type():
     with pytest.raises(TypeError):
         _ = Manifest([1, 2, 3])
@@ -159,7 +154,7 @@ def test_launched_manifest_transform_data():
 
 
 def test_launched_manifest_builder_correctly_maps_data():
-    lmb = LaunchedManifestBuilder("name", "path", "launcher name")
+    lmb = LaunchedManifestBuilder("name", "path", "launcher name", str(uuid4()))
     lmb.add_model(model, 1)
     lmb.add_model(model_2, 1)
     lmb.add_ensemble(ensemble, [i for i in range(len(ensemble.entities))])
@@ -172,7 +167,7 @@ def test_launched_manifest_builder_correctly_maps_data():
 
 
 def test_launced_manifest_builder_raises_if_lens_do_not_match():
-    lmb = LaunchedManifestBuilder("name", "path", "launcher name")
+    lmb = LaunchedManifestBuilder("name", "path", "launcher name", str(uuid4()))
     with pytest.raises(ValueError):
         lmb.add_ensemble(ensemble, list(range(123)))
     with pytest.raises(ValueError):
@@ -182,7 +177,7 @@ def test_launced_manifest_builder_raises_if_lens_do_not_match():
 def test_launched_manifest_builer_raises_if_attaching_data_to_empty_collection(
     monkeypatch,
 ):
-    lmb = LaunchedManifestBuilder("name", "path", "launcher")
+    lmb = LaunchedManifestBuilder("name", "path", "launcher", str(uuid4()))
     monkeypatch.setattr(ensemble, "entities", [])
     with pytest.raises(ValueError):
         lmb.add_ensemble(ensemble, [])
@@ -190,7 +185,7 @@ def test_launched_manifest_builer_raises_if_attaching_data_to_empty_collection(
 
 def test_lmb_and_launched_manifest_have_same_paths_for_launched_metadata():
     exp_path = "/path/to/some/exp"
-    lmb = LaunchedManifestBuilder("exp_name", exp_path, "launcher")
+    lmb = LaunchedManifestBuilder("exp_name", exp_path, "launcher", str(uuid4()))
     manifest = lmb.finalize()
     assert (
         lmb.exp_telemetry_subdirectory == manifest.metadata.exp_telemetry_subdirectory
diff --git a/tests/test_model.py b/tests/test_model.py
index a1b5ba505..64a68b299 100644
--- a/tests/test_model.py
+++ b/tests/test_model.py
@@ -24,6 +24,8 @@
 # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+from uuid import uuid4
+
 import pytest
 
 from smartsim import Experiment
@@ -114,8 +116,10 @@ def launch_step_nop(self, step, entity):
     return _monkeypatch_exp_controller
 
 
-def test_model_with_batch_settings_makes_batch_step(monkeypatch_exp_controller):
-    exp = Experiment("experiment", launcher="slurm")
+def test_model_with_batch_settings_makes_batch_step(
+    monkeypatch_exp_controller, test_dir
+):
+    exp = Experiment("experiment", launcher="slurm", exp_path=test_dir)
     bs = SbatchSettings()
     rs = SrunSettings("python", exe_args="sleep.py")
     model = exp.create_model("test_model", run_settings=rs, batch_settings=bs)
@@ -130,9 +134,9 @@ def test_model_with_batch_settings_makes_batch_step(monkeypatch_exp_controller):
 
 
 def test_model_without_batch_settings_makes_run_step(
-    monkeypatch, monkeypatch_exp_controller
+    monkeypatch, monkeypatch_exp_controller, test_dir
 ):
-    exp = Experiment("experiment", launcher="slurm")
+    exp = Experiment("experiment", launcher="slurm", exp_path=test_dir)
     rs = SrunSettings("python", exe_args="sleep.py")
     model = exp.create_model("test_model", run_settings=rs)
 
@@ -148,8 +152,10 @@ def test_model_without_batch_settings_makes_run_step(
     assert isinstance(step, SrunStep)
 
 
-def test_models_batch_settings_are_ignored_in_ensemble(monkeypatch_exp_controller):
-    exp = Experiment("experiment", launcher="slurm")
+def test_models_batch_settings_are_ignored_in_ensemble(
+    monkeypatch_exp_controller, test_dir
+):
+    exp = Experiment("experiment", launcher="slurm", exp_path=test_dir)
     bs_1 = SbatchSettings(nodes=5)
     rs = SrunSettings("python", exe_args="sleep.py")
     model = exp.create_model("test_model", run_settings=rs, batch_settings=bs_1)
diff --git a/tests/test_multidb.py b/tests/test_multidb.py
index af21f5a1e..81f21856a 100644
--- a/tests/test_multidb.py
+++ b/tests/test_multidb.py
@@ -27,11 +27,12 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
 from smartsim.database import Orchestrator
 from smartsim.entity.entity import SmartSimEntity
 from smartsim.error.errors import SSDBIDConflictError
 from smartsim.log import get_logger
+from smartsim.status import SmartSimStatus
 
 # The tests in this file belong to the group_b group
 pytestmark = pytest.mark.group_b
@@ -51,7 +52,7 @@ def make_entity_context(exp: Experiment, entity: SmartSimEntity):
     try:
         yield entity
     finally:
-        if exp.get_status(entity)[0] == status.STATUS_RUNNING:
+        if exp.get_status(entity)[0] == SmartSimStatus.STATUS_RUNNING:
             exp.stop(entity)
 
 
@@ -65,7 +66,7 @@ def choose_host(wlmutils, index=0):
 
 def check_not_failed(exp, *args):
     statuses = exp.get_status(*args)
-    assert all(stat is not status.STATUS_FAILED for stat in statuses)
+    assert all(stat is not SmartSimStatus.STATUS_FAILED for stat in statuses)
 
 
 @pytest.mark.parametrize("db_type", supported_dbs)
@@ -152,7 +153,6 @@ def test_db_identifier_colo_then_standard(
 
     # Create the SmartSim Model
     smartsim_model = exp.create_model("colocated_model", colo_settings)
-    smartsim_model.set_path(test_dir)
 
     db_args = {
         "port": test_port,
@@ -227,7 +227,7 @@ def test_db_identifier_standard_twice_not_unique(wlmutils, test_dir):
     assert orc2.name == "my_db"
 
     # CREATE DATABASE with db_identifier
-    with make_entity_context(exp, orc), make_entity_context(exp, orc2):
+    with make_entity_context(exp, orc2), make_entity_context(exp, orc):
         exp.start(orc)
         with pytest.raises(SSDBIDConflictError) as ex:
             exp.start(orc2)
@@ -325,7 +325,6 @@ def test_multidb_colo_once(fileutils, test_dir, wlmutils, coloutils, db_type):
 
     # Create the SmartSim Model
     smartsim_model = exp.create_model("smartsim_model", run_settings)
-    smartsim_model.set_path(test_dir)
 
     db_args = {
         "port": test_port + 1,
@@ -404,7 +403,9 @@ def test_multidb_colo_then_standard(fileutils, test_dir, wlmutils, coloutils, db
     # Retrieve parameters from testing environment
     test_port = wlmutils.get_test_port()
 
-    test_script = fileutils.get_test_conf_path("smartredis/multidbid.py")
+    test_script = fileutils.get_test_conf_path(
+        "smartredis/multidbid_colo_env_vars_only.py"
+    )
     test_interface = wlmutils.get_test_interface()
     test_launcher = wlmutils.get_test_launcher()
 
@@ -434,8 +435,9 @@ def test_multidb_colo_then_standard(fileutils, test_dir, wlmutils, coloutils, db
     )
 
     with make_entity_context(exp, db), make_entity_context(exp, smartsim_model):
+        exp.start(smartsim_model, block=False)
         exp.start(db)
-        exp.start(smartsim_model, block=True)
+        exp.poll(smartsim_model)
 
     check_not_failed(exp, db, smartsim_model)
 
diff --git a/tests/test_orc_config_settings.py b/tests/test_orc_config_settings.py
index 365596496..74d0c1af2 100644
--- a/tests/test_orc_config_settings.py
+++ b/tests/test_orc_config_settings.py
@@ -27,6 +27,7 @@
 
 import pytest
 
+from smartsim.database import Orchestrator
 from smartsim.error import SmartSimError
 
 try:
@@ -40,14 +41,15 @@
 pytestmark = pytest.mark.group_b
 
 
-def test_config_methods(dbutils, local_db):
+def test_config_methods(dbutils, prepare_db, local_db):
     """Test all configuration file edit methods on an active db"""
+    db = prepare_db(local_db).orchestrator
 
     # test the happy path and ensure all configuration file edit methods
     # successfully execute when given correct key-value pairs
     configs = dbutils.get_db_configs()
     for setting, value in configs.items():
-        config_set_method = dbutils.get_config_edit_method(local_db, setting)
+        config_set_method = dbutils.get_config_edit_method(db, setting)
         config_set_method(value)
 
     # ensure SmartSimError is raised when Orchestrator.set_db_conf
@@ -56,7 +58,7 @@ def test_config_methods(dbutils, local_db):
     for key, value_list in ss_error_configs.items():
         for value in value_list:
             with pytest.raises(SmartSimError):
-                local_db.set_db_conf(key, value)
+                db.set_db_conf(key, value)
 
     # ensure TypeError is raised when Orchestrator.set_db_conf
     # is given either a key or a value that is not a string
@@ -64,14 +66,14 @@ def test_config_methods(dbutils, local_db):
     for key, value_list in type_error_configs.items():
         for value in value_list:
             with pytest.raises(TypeError):
-                local_db.set_db_conf(key, value)
+                db.set_db_conf(key, value)
 
 
-def test_config_methods_inactive(wlmutils, dbutils):
+def test_config_methods_inactive(dbutils):
     """Ensure a SmartSimError is raised when trying to
     set configurations on an inactive database
     """
-    db = wlmutils.get_orchestrator()
+    db = Orchestrator()
     configs = dbutils.get_db_configs()
     for setting, value in configs.items():
         config_set_method = dbutils.get_config_edit_method(db, setting)
diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py
index f87aa9331..66fb894f7 100644
--- a/tests/test_orchestrator.py
+++ b/tests/test_orchestrator.py
@@ -25,6 +25,8 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
+import typing as t
+
 import psutil
 import pytest
 
@@ -37,7 +39,11 @@
 pytestmark = pytest.mark.slow_tests
 
 
-def test_orc_parameters():
+if t.TYPE_CHECKING:
+    import conftest
+
+
+def test_orc_parameters() -> None:
     threads_per_queue = 2
     inter_op_threads = 2
     intra_op_threads = 2
@@ -57,45 +63,33 @@ def test_orc_parameters():
     assert "INTER_OP_PARALLELISM" in module_str
 
 
-def test_is_not_active():
+def test_is_not_active() -> None:
     db = Orchestrator(db_nodes=1)
     assert not db.is_active()
 
 
-def test_inactive_orc_get_address():
+def test_inactive_orc_get_address() -> None:
     db = Orchestrator()
     with pytest.raises(SmartSimError):
         db.get_address()
 
 
-def test_orc_active_functions(test_dir, wlmutils):
-    exp_name = "test_orc_active_functions"
-    exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
-
-    db = Orchestrator(port=wlmutils.get_test_port())
-    db.set_path(test_dir)
-
-    exp.start(db)
-
-    # check if the orchestrator is active
+def test_orc_is_active_functions(
+    local_experiment,
+    prepare_db,
+    local_db,
+) -> None:
+    db = prepare_db(local_db).orchestrator
+    db = local_experiment.reconnect_orchestrator(db.checkpoint_file)
     assert db.is_active()
 
     # check if the orchestrator can get the address
-    correct_address = db.get_address() == ["127.0.0.1:" + str(wlmutils.get_test_port())]
-    if not correct_address:
-        exp.stop(db)
-        assert False
+    assert db.get_address() == [f"127.0.0.1:{db.ports[0]}"]
 
-    exp.stop(db)
 
-    assert not db.is_active()
-
-    # check if orchestrator.get_address() raises an exception
-    with pytest.raises(SmartSimError):
-        db.get_address()
-
-
-def test_multiple_interfaces(test_dir, wlmutils):
+def test_multiple_interfaces(
+    test_dir: str, wlmutils: t.Type["conftest.WLMUtils"]
+) -> None:
     exp_name = "test_multiple_interfaces"
     exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
 
@@ -106,7 +100,8 @@ def test_multiple_interfaces(test_dir, wlmutils):
 
     net_if_addrs = ["lo", net_if_addrs[0]]
 
-    db = Orchestrator(port=wlmutils.get_test_port(), interface=net_if_addrs)
+    port = wlmutils.get_test_port()
+    db = Orchestrator(port=port, interface=net_if_addrs)
     db.set_path(test_dir)
 
     exp.start(db)
@@ -115,15 +110,16 @@ def test_multiple_interfaces(test_dir, wlmutils):
     assert db.is_active()
 
     # check if the orchestrator can get the address
-    correct_address = db.get_address() == ["127.0.0.1:" + str(wlmutils.get_test_port())]
-    if not correct_address:
+    correct_address = [f"127.0.0.1:{port}"]
+
+    if not correct_address == db.get_address():
         exp.stop(db)
         assert False
 
     exp.stop(db)
 
 
-def test_catch_local_db_errors():
+def test_catch_local_db_errors() -> None:
     # local database with more than one node not allowed
     with pytest.raises(SSUnsupportedError):
         db = Orchestrator(db_nodes=2)
@@ -140,7 +136,7 @@ def test_catch_local_db_errors():
 #####  PBS  ######
 
 
-def test_pbs_set_run_arg(wlmutils):
+def test_pbs_set_run_arg(wlmutils: t.Type["conftest.WLMUtils"]) -> None:
     orc = Orchestrator(
         wlmutils.get_test_port(),
         db_nodes=3,
@@ -159,7 +155,7 @@ def test_pbs_set_run_arg(wlmutils):
     )
 
 
-def test_pbs_set_batch_arg(wlmutils):
+def test_pbs_set_batch_arg(wlmutils: t.Type["conftest.WLMUtils"]) -> None:
     orc = Orchestrator(
         wlmutils.get_test_port(),
         db_nodes=3,
@@ -188,7 +184,7 @@ def test_pbs_set_batch_arg(wlmutils):
 ##### Slurm ######
 
 
-def test_slurm_set_run_arg(wlmutils):
+def test_slurm_set_run_arg(wlmutils: t.Type["conftest.WLMUtils"]) -> None:
     orc = Orchestrator(
         wlmutils.get_test_port(),
         db_nodes=3,
@@ -203,7 +199,7 @@ def test_slurm_set_run_arg(wlmutils):
     )
 
 
-def test_slurm_set_batch_arg(wlmutils):
+def test_slurm_set_batch_arg(wlmutils: t.Type["conftest.WLMUtils"]) -> None:
     orc = Orchestrator(
         wlmutils.get_test_port(),
         db_nodes=3,
@@ -234,7 +230,7 @@ def test_slurm_set_batch_arg(wlmutils):
         pytest.param(False, id="Multiple `srun`s"),
     ],
 )
-def test_orc_results_in_correct_number_of_shards(single_cmd):
+def test_orc_results_in_correct_number_of_shards(single_cmd: bool) -> None:
     num_shards = 5
     orc = Orchestrator(
         port=12345,
@@ -259,7 +255,7 @@ def test_orc_results_in_correct_number_of_shards(single_cmd):
 ###### LSF ######
 
 
-def test_catch_orc_errors_lsf(wlmutils):
+def test_catch_orc_errors_lsf(wlmutils: t.Type["conftest.WLMUtils"]) -> None:
     with pytest.raises(SSUnsupportedError):
         orc = Orchestrator(
             wlmutils.get_test_port(),
@@ -282,7 +278,7 @@ def test_catch_orc_errors_lsf(wlmutils):
         orc.set_batch_arg("P", "MYPROJECT")
 
 
-def test_lsf_set_run_args(wlmutils):
+def test_lsf_set_run_args(wlmutils: t.Type["conftest.WLMUtils"]) -> None:
     orc = Orchestrator(
         wlmutils.get_test_port(),
         db_nodes=3,
@@ -295,7 +291,7 @@ def test_lsf_set_run_args(wlmutils):
     assert all(["l" not in db.run_settings.run_args for db in orc.entities])
 
 
-def test_lsf_set_batch_args(wlmutils):
+def test_lsf_set_batch_args(wlmutils: t.Type["conftest.WLMUtils"]) -> None:
     orc = Orchestrator(
         wlmutils.get_test_port(),
         db_nodes=3,
@@ -308,3 +304,24 @@ def test_lsf_set_batch_args(wlmutils):
     assert orc.batch_settings.batch_args["m"] == '"batch host1 host2"'
     orc.set_batch_arg("D", "102400000")
     assert orc.batch_settings.batch_args["D"] == "102400000"
+
+
+def test_orc_telemetry(test_dir: str, wlmutils: t.Type["conftest.WLMUtils"]) -> None:
+    """Ensure the default behavior for an orchestrator is to disable telemetry"""
+    db = Orchestrator(port=wlmutils.get_test_port())
+    db.set_path(test_dir)
+
+    # default is disabled
+    assert not db.telemetry.is_enabled
+
+    # ensure updating value works as expected
+    db.telemetry.enable()
+    assert db.telemetry.is_enabled
+
+    # toggle back
+    db.telemetry.disable()
+    assert not db.telemetry.is_enabled
+
+    # toggle one more time
+    db.telemetry.enable()
+    assert db.telemetry.is_enabled
diff --git a/tests/test_output_files.py b/tests/test_output_files.py
new file mode 100644
index 000000000..f3830051c
--- /dev/null
+++ b/tests/test_output_files.py
@@ -0,0 +1,169 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import pathlib
+
+import pytest
+
+from smartsim import Experiment
+from smartsim._core.config import CONFIG
+from smartsim._core.control.controller import Controller, _AnonymousBatchJob
+from smartsim._core.launcher.step import Step
+from smartsim.database.orchestrator import Orchestrator
+from smartsim.entity.ensemble import Ensemble
+from smartsim.entity.model import Model
+from smartsim.settings.base import RunSettings
+from smartsim.settings.slurmSettings import SbatchSettings, SrunSettings
+
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+controller = Controller()
+slurm_controller = Controller(launcher="slurm")
+
+rs = RunSettings("echo", ["spam", "eggs"])
+bs = SbatchSettings()
+batch_rs = SrunSettings("echo", ["spam", "eggs"])
+
+ens = Ensemble("ens", params={}, run_settings=rs, batch_settings=bs, replicas=3)
+orc = Orchestrator(db_nodes=3, batch=True, launcher="slurm", run_command="srun")
+model = Model("test_model", params={}, path="", run_settings=rs)
+batch_model = Model(
+    "batch_test_model", params={}, path="", run_settings=batch_rs, batch_settings=bs
+)
+anon_batch_model = _AnonymousBatchJob(batch_model)
+
+
+def test_mutated_model_output(test_dir):
+    exp_name = "test-mutated-model-output"
+    exp = Experiment(exp_name, launcher="local", exp_path=test_dir)
+
+    test_model = exp.create_model("test_model", path=test_dir, run_settings=rs)
+    exp.generate(test_model)
+    exp.start(test_model, block=True)
+
+    assert pathlib.Path(test_model.path).exists()
+    assert pathlib.Path(test_model.path, f"{test_model.name}.out").is_symlink()
+    assert pathlib.Path(test_model.path, f"{test_model.name}.err").is_symlink()
+
+    with open(pathlib.Path(test_model.path, f"{test_model.name}.out"), "r") as file:
+        log_contents = file.read()
+
+    assert "spam eggs" in log_contents
+
+    first_link = os.readlink(pathlib.Path(test_model.path, f"{test_model.name}.out"))
+
+    test_model.run_settings.exe_args = ["hello", "world"]
+    exp.generate(test_model, overwrite=True)
+    exp.start(test_model, block=True)
+
+    assert pathlib.Path(test_model.path).exists()
+    assert pathlib.Path(test_model.path, f"{test_model.name}.out").is_symlink()
+    assert pathlib.Path(test_model.path, f"{test_model.name}.err").is_symlink()
+
+    with open(pathlib.Path(test_model.path, f"{test_model.name}.out"), "r") as file:
+        log_contents = file.read()
+
+    assert "hello world" in log_contents
+
+    second_link = os.readlink(pathlib.Path(test_model.path, f"{test_model.name}.out"))
+
+    with open(first_link, "r") as file:
+        first_historical_log = file.read()
+
+    assert "spam eggs" in first_historical_log
+
+    with open(second_link, "r") as file:
+        second_historical_log = file.read()
+
+    assert "hello world" in second_historical_log
+
+
+def test_get_output_files_with_create_job_step(test_dir):
+    """Testing output files through _create_job_step"""
+    exp_dir = pathlib.Path(test_dir)
+    status_dir = exp_dir / CONFIG.telemetry_subdir / model.type
+    step = controller._create_job_step(model, status_dir)
+    expected_out_path = status_dir / model.name / (model.name + ".out")
+    expected_err_path = status_dir / model.name / (model.name + ".err")
+    assert step.get_output_files() == (str(expected_out_path), str(expected_err_path))
+
+
+@pytest.mark.parametrize(
+    "entity",
+    [pytest.param(ens, id="ensemble"), pytest.param(orc, id="orchestrator")],
+)
+def test_get_output_files_with_create_batch_job_step(entity, test_dir):
+    """Testing output files through _create_batch_job_step"""
+    exp_dir = pathlib.Path(test_dir)
+    status_dir = exp_dir / CONFIG.telemetry_subdir / entity.type
+    batch_step, substeps = slurm_controller._create_batch_job_step(entity, status_dir)
+    for step in substeps:
+        # example output path for a member of an Ensemble is
+        # .smartsim/telemetry/Ensemble/ens/ens_0/ens_0.out
+        expected_out_path = (
+            status_dir / entity.name / step.entity_name / (step.entity_name + ".out")
+        )
+        expected_err_path = (
+            status_dir / entity.name / step.entity_name / (step.entity_name + ".err")
+        )
+        assert step.get_output_files() == (
+            str(expected_out_path),
+            str(expected_err_path),
+        )
+
+
+def test_model_get_output_files(test_dir):
+    """Testing model output files with manual step creation"""
+    exp_dir = pathlib.Path(test_dir)
+    step = Step(model.name, model.path, model.run_settings)
+    step.meta["status_dir"] = exp_dir / "output_dir"
+    expected_out_path = step.meta["status_dir"] / (model.name + ".out")
+    expected_err_path = step.meta["status_dir"] / (model.name + ".err")
+    assert step.get_output_files() == (str(expected_out_path), str(expected_err_path))
+
+
+def test_ensemble_get_output_files(test_dir):
+    """Testing ensemble output files with manual step creation"""
+    exp_dir = pathlib.Path(test_dir)
+    for member in ens.models:
+        step = Step(member.name, member.path, member.run_settings)
+        step.meta["status_dir"] = exp_dir / "output_dir"
+        expected_out_path = step.meta["status_dir"] / (member.name + ".out")
+        expected_err_path = step.meta["status_dir"] / (member.name + ".err")
+        assert step.get_output_files() == (
+            str(expected_out_path),
+            str(expected_err_path),
+        )
+
+
+def test_get_output_files_no_status_dir(test_dir):
+    """Test that a step not having a status directory throws a KeyError"""
+    step_settings = RunSettings("echo")
+    step = Step("mock-step", test_dir, step_settings)
+    with pytest.raises(KeyError):
+        out, err = step.get_output_files()
diff --git a/tests/test_pbs_parser.py b/tests/test_pbs_parser.py
index f77eb7c93..ae01ffb19 100644
--- a/tests/test_pbs_parser.py
+++ b/tests/test_pbs_parser.py
@@ -72,3 +72,23 @@ def test_parse_qstat_status():
     status = "R"
     parsed_status = pbsParser.parse_qstat_jobid(output, "1289903.sdb")
     assert status == parsed_status
+
+
+def test_parse_qstat_status_not_found():
+    output = (
+        "Job id            Name             User              Time Use S Queue\n"
+        "----------------  ---------------- ----------------  -------- - -----\n"
+        "1289903.sdb       jobname          username          00:00:00 R queue\n"
+    )
+    parsed_status = pbsParser.parse_qstat_jobid(output, "9999999.sdb")
+
+    assert parsed_status is None
+
+
+def test_parse_qstat_status_json(fileutils):
+    """Parse nodes from qsub called with -f -F json"""
+    file_path = fileutils.get_test_conf_path("qstat.json")
+    output = Path(file_path).read_text()
+    status = "R"
+    parsed_status = pbsParser.parse_qstat_jobid_json(output, "16705.sdb")
+    assert status == parsed_status
diff --git a/tests/test_preview.py b/tests/test_preview.py
new file mode 100644
index 000000000..3c7bed6fe
--- /dev/null
+++ b/tests/test_preview.py
@@ -0,0 +1,1330 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2023, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import pathlib
+import sys
+import typing as t
+from os import path as osp
+
+import jinja2
+import numpy as np
+import pytest
+
+import smartsim
+import smartsim._core._cli.utils as _utils
+from smartsim import Experiment
+from smartsim._core import Manifest, previewrenderer
+from smartsim._core.config import CONFIG
+from smartsim._core.control.controller import Controller
+from smartsim._core.control.job import Job
+from smartsim.database import Orchestrator
+from smartsim.entity.entity import SmartSimEntity
+from smartsim.error.errors import PreviewFormatError
+from smartsim.settings import QsubBatchSettings, RunSettings
+
+pytestmark = pytest.mark.group_b
+
+
+@pytest.fixture
+def choose_host():
+    def _choose_host(wlmutils, index: int = 0):
+        hosts = wlmutils.get_test_hostlist()
+        if hosts:
+            return hosts[index]
+        return None
+
+    return _choose_host
+
+
+@pytest.fixture
+def preview_object(test_dir) -> t.Dict[str, Job]:
+    """
+    Bare bones orch
+    """
+    rs = RunSettings(exe="echo", exe_args="ifname=lo")
+    s = SmartSimEntity(name="faux-name", path=test_dir, run_settings=rs)
+    o = Orchestrator()
+    o.entity = s
+    s.db_identifier = "test_db_id"
+    s.ports = [1235]
+    s.num_shards = 1
+    job = Job("faux-name", "faux-step-id", s, "slurm", True)
+    active_dbjobs: t.Dict[str, Job] = {"mock_job": job}
+    return active_dbjobs
+
+
+@pytest.fixture
+def preview_object_multidb(test_dir) -> t.Dict[str, Job]:
+    """
+    Bare bones orch
+    """
+    rs = RunSettings(exe="echo", exe_args="ifname=lo")
+    s = SmartSimEntity(name="faux-name", path=test_dir, run_settings=rs)
+    o = Orchestrator()
+    o.entity = s
+    s.db_identifier = "testdb_reg"
+    s.ports = [8750]
+    s.num_shards = 1
+    job = Job("faux-name", "faux-step-id", s, "slurm", True)
+
+    rs2 = RunSettings(exe="echo", exe_args="ifname=lo")
+    s2 = SmartSimEntity(name="faux-name_2", path=test_dir, run_settings=rs)
+    o2 = Orchestrator()
+    o2.entity = s2
+    s2.db_identifier = "testdb_reg2"
+    s2.ports = [8752]
+    s2.num_shards = 1
+    job2 = Job("faux-name_2", "faux-step-id_2", s2, "slurm", True)
+
+    active_dbjobs: t.Dict[str, Job] = {"mock_job": job, "mock_job2": job2}
+    return active_dbjobs
+
+
+def add_batch_resources(wlmutils, batch_settings):
+    if isinstance(batch_settings, QsubBatchSettings):
+        for key, value in wlmutils.get_batch_resources().items():
+            batch_settings.set_resource(key, value)
+
+
+def test_get_ifname_filter():
+    """Test get_ifname filter"""
+
+    # Test input and expected output
+    value_dict = (
+        (["+ifname=ib0"], "ib0"),
+        ("", ""),
+        ("+ifnameib0", ""),
+        ("=ib0", ""),
+        (["_ifname=bad_if_key"], "bad_if_key"),
+        (["ifname=mock_if_name"], "mock_if_name"),
+        ("IFname=case_sensitive_key", ""),
+        ("xfname=not_splittable", ""),
+        (None, ""),
+    )
+
+    template_str = "{{ value | get_ifname }}"
+    template_dict = {"ts": template_str}
+
+    loader = jinja2.DictLoader(template_dict)
+    env = jinja2.Environment(loader=loader, autoescape=True)
+    env.filters["get_ifname"] = previewrenderer.get_ifname
+
+    t = env.get_template("ts")
+
+    for input, expected_output in value_dict:
+        output = t.render(value=input)
+        # assert that that filter output matches expected output
+        assert output == expected_output
+
+
+def test_get_dbtype_filter():
+    """Test get_dbtype filter to extract database backend from config"""
+
+    template_str = "{{ config | get_dbtype }}"
+    template_dict = {"ts": template_str}
+    loader = jinja2.DictLoader(template_dict)
+    env = jinja2.Environment(loader=loader, autoescape=True)
+    env.filters["get_dbtype"] = previewrenderer.get_dbtype
+
+    t = env.get_template("ts")
+    output = t.render(config=CONFIG.database_cli)
+
+    assert output in CONFIG.database_cli
+    # Test empty input
+    test_string = ""
+    output = t.render(config=test_string)
+    assert output == ""
+    # Test empty path
+    test_string = "SmartSim/smartsim/_core/bin/"
+    output = t.render(config=test_string)
+    assert output == ""
+    # Test no hyphen
+    test_string = "SmartSim/smartsim/_core/bin/rediscli"
+    output = t.render(config=test_string)
+    assert output == ""
+    # Test no LHS
+    test_string = "SmartSim/smartsim/_core/bin/redis-"
+    output = t.render(config=test_string)
+    assert output == ""
+    # Test no RHS
+    test_string = "SmartSim/smartsim/_core/bin/-cli"
+    output = t.render(config=test_string)
+    assert output == ""
+
+
+def test_experiment_preview(test_dir, wlmutils):
+    """Test correct preview output fields for Experiment preview"""
+    # Prepare entities
+    test_launcher = wlmutils.get_test_launcher()
+    exp_name = "test_experiment_preview"
+    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
+
+    # Execute method for template rendering
+    output = previewrenderer.render(exp, verbosity_level="debug")
+
+    # Evaluate output
+    summary_lines = output.split("\n")
+    summary_lines = [item.replace("\t", "").strip() for item in summary_lines[-3:]]
+    assert 3 == len(summary_lines)
+    summary_dict = dict(row.split(": ") for row in summary_lines)
+    assert set(["Experiment Name", "Experiment Path", "Launcher"]).issubset(
+        summary_dict
+    )
+
+
+def test_experiment_preview_properties(test_dir, wlmutils):
+    """Test correct preview output properties for Experiment preview"""
+    # Prepare entities
+    test_launcher = wlmutils.get_test_launcher()
+    exp_name = "test_experiment_preview_properties"
+    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
+
+    # Execute method for template rendering
+    output = previewrenderer.render(exp, verbosity_level="debug")
+
+    # Evaluate output
+    summary_lines = output.split("\n")
+    summary_lines = [item.replace("\t", "").strip() for item in summary_lines[-3:]]
+    assert 3 == len(summary_lines)
+    summary_dict = dict(row.split(": ") for row in summary_lines)
+    assert exp.name == summary_dict["Experiment Name"]
+    assert exp.exp_path == summary_dict["Experiment Path"]
+    assert exp.launcher == summary_dict["Launcher"]
+
+
+def test_orchestrator_preview_render(test_dir, wlmutils, choose_host):
+    """Test correct preview output properties for Orchestrator preview"""
+    # Prepare entities
+    test_launcher = wlmutils.get_test_launcher()
+    test_interface = wlmutils.get_test_interface()
+    test_port = wlmutils.get_test_port()
+    exp_name = "test_orchestrator_preview_properties"
+    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
+    # create regular database
+    orc = exp.create_database(
+        port=test_port,
+        interface=test_interface,
+        hosts=choose_host(wlmutils),
+    )
+    preview_manifest = Manifest(orc)
+
+    # Execute method for template rendering
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    # Evaluate output
+    assert "Database Identifier" in output
+    assert "Shards" in output
+    assert "TCP/IP Port(s)" in output
+    assert "Network Interface" in output
+    assert "Type" in output
+    assert "Executable" in output
+
+    db_path = _utils.get_db_path()
+    if db_path:
+        db_type, _ = db_path.name.split("-", 1)
+
+    assert orc.db_identifier in output
+    assert str(orc.num_shards) in output
+    assert orc._interfaces[0] in output
+    assert db_type in output
+    assert CONFIG.database_exe in output
+    assert orc.run_command in output
+    assert str(orc.db_nodes) in output
+
+
+def test_preview_to_file(test_dir, wlmutils):
+    """
+    Test that if an output_filename is given, a file
+    is rendered for Experiment preview"
+    """
+    # Prepare entities
+    test_launcher = wlmutils.get_test_launcher()
+    exp_name = "test_preview_output_filename"
+    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
+    filename = "test_preview_output_filename.txt"
+    path = pathlib.Path(test_dir) / filename
+    # Execute preview method
+    exp.preview(
+        output_format=previewrenderer.Format.PLAINTEXT,
+        output_filename=str(path),
+        verbosity_level="debug",
+    )
+
+    # Evaluate output
+    assert path.exists()
+    assert path.is_file()
+
+
+def test_model_preview(test_dir, wlmutils):
+    """
+    Test correct preview output fields for Model preview
+    """
+    # Prepare entities
+    exp_name = "test_model_preview"
+    test_launcher = wlmutils.get_test_launcher()
+    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
+    model_params = {"port": 6379, "password": "unbreakable_password"}
+    rs1 = RunSettings("bash", "multi_tags_template.sh")
+    rs2 = exp.create_run_settings("echo", ["spam", "eggs"])
+
+    hello_world_model = exp.create_model(
+        "echo-hello", run_settings=rs1, params=model_params
+    )
+
+    spam_eggs_model = exp.create_model("echo-spam", run_settings=rs2)
+
+    preview_manifest = Manifest(hello_world_model, spam_eggs_model)
+
+    # Execute preview method
+    rendered_preview = previewrenderer.render(
+        exp, preview_manifest, verbosity_level="debug"
+    )
+
+    # Evaluate output
+    assert "Model Name" in rendered_preview
+    assert "Executable" in rendered_preview
+    assert "Executable Arguments" in rendered_preview
+    assert "Model Parameters" in rendered_preview
+
+
+def test_model_preview_properties(test_dir, wlmutils):
+    """
+    Test correct preview output properties for Model preview
+    """
+    # Prepare entities
+    exp_name = "test_model_preview_parameters"
+    test_launcher = wlmutils.get_test_launcher()
+    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
+
+    hw_name = "echo-hello"
+    hw_port = 6379
+    hw_password = "unbreakable_password"
+    hw_rs = "multi_tags_template.sh"
+    model_params = {"port": hw_port, "password": hw_password}
+    hw_param1 = "bash"
+    rs1 = RunSettings(hw_param1, hw_rs)
+
+    se_name = "echo-spam"
+    se_param1 = "echo"
+    se_param2 = "spam"
+    se_param3 = "eggs"
+    rs2 = exp.create_run_settings(se_param1, [se_param2, se_param3])
+
+    hello_world_model = exp.create_model(hw_name, run_settings=rs1, params=model_params)
+    spam_eggs_model = exp.create_model(se_name, run_settings=rs2)
+
+    preview_manifest = Manifest(hello_world_model, spam_eggs_model)
+
+    # Execute preview method
+    rendered_preview = previewrenderer.render(
+        exp, preview_manifest, verbosity_level="debug"
+    )
+
+    # Evaluate output for hello world model
+    assert hw_name in rendered_preview
+    assert hw_param1 in rendered_preview
+    assert hw_rs in rendered_preview
+    assert "port" in rendered_preview
+    assert "password" in rendered_preview
+    assert str(hw_port) in rendered_preview
+    assert hw_password in rendered_preview
+
+    assert hw_name == hello_world_model.name
+    assert hw_param1 in hello_world_model.run_settings.exe[0]
+    assert hw_rs == hello_world_model.run_settings.exe_args[0]
+    assert None == hello_world_model.batch_settings
+    assert "port" in list(hello_world_model.params.items())[0]
+    assert hw_port in list(hello_world_model.params.items())[0]
+    assert "password" in list(hello_world_model.params.items())[1]
+    assert hw_password in list(hello_world_model.params.items())[1]
+
+    # Evaluate outputfor spam eggs model
+    assert se_name in rendered_preview
+    assert se_param1 in rendered_preview
+    assert se_param2 in rendered_preview
+    assert se_param3 in rendered_preview
+
+    assert se_name == spam_eggs_model.name
+    assert se_param1 in spam_eggs_model.run_settings.exe[0]
+    assert se_param2 == spam_eggs_model.run_settings.exe_args[0]
+    assert se_param3 == spam_eggs_model.run_settings.exe_args[1]
+
+
+def test_preview_model_tagged_files(fileutils, test_dir, wlmutils):
+    """
+    Test model with tagged files in preview.
+    """
+    # Prepare entities
+    exp_name = "test_model_preview_parameters"
+    test_launcher = wlmutils.get_test_launcher()
+    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
+
+    model_params = {"port": 6379, "password": "unbreakable_password"}
+    model_settings = RunSettings("bash", "multi_tags_template.sh")
+
+    hello_world_model = exp.create_model(
+        "echo-hello", run_settings=model_settings, params=model_params
+    )
+
+    config = fileutils.get_test_conf_path(
+        osp.join("generator_files", "multi_tags_template.sh")
+    )
+    hello_world_model.attach_generator_files(to_configure=[config])
+    exp.generate(hello_world_model, overwrite=True)
+
+    preview_manifest = Manifest(hello_world_model)
+
+    # Execute preview method
+    rendered_preview = previewrenderer.render(
+        exp, preview_manifest, verbosity_level="debug"
+    )
+
+    # Evaluate output
+    assert "Tagged Files for Model Configuration" in rendered_preview
+    assert "generator_files/multi_tags_template.sh" in rendered_preview
+    assert "generator_files/multi_tags_template.sh" in hello_world_model.files.tagged[0]
+
+
+def test_model_key_prefixing(test_dir, wlmutils):
+    """
+    Test preview for enabling key prefixing for a Model
+    """
+    # Prepare entities
+    exp_name = "test_model_key_prefixing"
+    test_launcher = wlmutils.get_test_launcher()
+    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
+
+    db = exp.create_database(port=6780, interface="lo")
+    exp.generate(db, overwrite=True)
+    rs1 = exp.create_run_settings("echo", ["hello", "world"])
+    model = exp.create_model("model_test", run_settings=rs1)
+
+    # enable key prefixing on model
+    model.enable_key_prefixing()
+    exp.generate(model, overwrite=True)
+
+    preview_manifest = Manifest(db, model)
+
+    # Execute preview method
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    # Evaluate output
+    assert "Key Prefix" in output
+    assert "model_test" in output
+    assert "Outgoing Key Collision Prevention (Key Prefixing)" in output
+    assert "Tensors: On" in output
+    assert "Datasets: On" in output
+    assert "ML Models/Torch Scripts: Off" in output
+    assert "Aggregation Lists: On" in output
+
+
+def test_ensembles_preview(test_dir, wlmutils):
+    """
+    Test ensemble preview fields are correct in template render
+    """
+    test_launcher = wlmutils.get_test_launcher()
+    exp = Experiment(
+        "test-ensembles-preview", exp_path=test_dir, launcher=test_launcher
+    )
+
+    # setup ensemble parameter space
+    learning_rate = list(np.linspace(0.01, 0.5))
+    train_params = {"LR": learning_rate}
+
+    # define how each member should run
+    run = exp.create_run_settings(exe="python", exe_args="./train-model.py")
+
+    ensemble = exp.create_ensemble(
+        "Training-Ensemble",
+        params=train_params,
+        params_as_args=["LR"],
+        run_settings=run,
+        perm_strategy="random",
+        n_models=4,
+    )
+
+    preview_manifest = Manifest(ensemble)
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    # Evaluate output
+    assert "Ensemble Name" in output
+    assert "Members" in output
+    assert "Ensemble Parameters" in output
+
+
+def test_preview_models_and_ensembles(test_dir, wlmutils):
+    """
+    Test preview of separate model entity and ensemble entity
+    """
+    exp_name = "test-preview-model-and-ensemble"
+    test_dir = pathlib.Path(test_dir) / exp_name
+    test_dir.mkdir(parents=True)
+    test_launcher = wlmutils.get_test_launcher()
+    exp = Experiment(exp_name, exp_path=str(test_dir), launcher=test_launcher)
+
+    rs1 = exp.create_run_settings("echo", ["hello", "world"])
+    rs2 = exp.create_run_settings("echo", ["spam", "eggs"])
+
+    hw_name = "echo-hello"
+    se_name = "echo-spam"
+    ens_name = "echo-ensemble"
+    hello_world_model = exp.create_model(hw_name, run_settings=rs1)
+    spam_eggs_model = exp.create_model(se_name, run_settings=rs2)
+    hello_ensemble = exp.create_ensemble(ens_name, run_settings=rs1, replicas=3)
+
+    exp.generate(hello_world_model, spam_eggs_model, hello_ensemble)
+
+    preview_manifest = Manifest(hello_world_model, spam_eggs_model, hello_ensemble)
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    # Evaluate output
+    assert "Models" in output
+    assert hw_name in output
+    assert se_name in output
+
+    assert "Ensembles" in output
+    assert ens_name + "_1" in output
+    assert ens_name + "_2" in output
+
+
+def test_ensemble_preview_client_configuration(test_dir, wlmutils):
+    """
+    Test preview of client configuration and key prefixing in Ensemble preview
+    """
+    # Prepare entities
+    test_launcher = wlmutils.get_test_launcher()
+    exp = Experiment(
+        "test-preview-ensemble-clientconfig", exp_path=test_dir, launcher=test_launcher
+    )
+    # Create Orchestrator
+    db = exp.create_database(port=6780, interface="lo")
+    exp.generate(db, overwrite=True)
+    rs1 = exp.create_run_settings("echo", ["hello", "world"])
+    # Create ensemble
+    ensemble = exp.create_ensemble("fd_simulation", run_settings=rs1, replicas=2)
+    # enable key prefixing on ensemble
+    ensemble.enable_key_prefixing()
+    exp.generate(ensemble, overwrite=True)
+    rs2 = exp.create_run_settings("echo", ["spam", "eggs"])
+    # Create model
+    ml_model = exp.create_model("tf_training", rs2)
+
+    for sim in ensemble.entities:
+        ml_model.register_incoming_entity(sim)
+
+    exp.generate(ml_model, overwrite=True)
+    preview_manifest = Manifest(db, ml_model, ensemble)
+
+    # Call preview renderer for testing output
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    # Evaluate output
+    assert "Client Configuration" in output
+    assert "Database Identifier" in output
+    assert "Database Backend" in output
+    assert "Type" in output
+
+
+def test_ensemble_preview_client_configuration_multidb(test_dir, wlmutils):
+    """
+    Test preview of client configuration and key prefixing in Ensemble preview
+    with multiple databases
+    """
+    # Prepare entities
+    test_launcher = wlmutils.get_test_launcher()
+    exp = Experiment(
+        "test-preview-multidb-clinet-config", exp_path=test_dir, launcher=test_launcher
+    )
+    # Create Orchestrator
+    db1_dbid = "db_1"
+    db1 = exp.create_database(port=6780, interface="lo", db_identifier=db1_dbid)
+    exp.generate(db1, overwrite=True)
+    # Create another Orchestrator
+    db2_dbid = "db_2"
+    db2 = exp.create_database(port=6784, interface="lo", db_identifier=db2_dbid)
+    exp.generate(db2, overwrite=True)
+
+    rs1 = exp.create_run_settings("echo", ["hello", "world"])
+    # Create ensemble
+    ensemble = exp.create_ensemble("fd_simulation", run_settings=rs1, replicas=2)
+    # enable key prefixing on ensemble
+    ensemble.enable_key_prefixing()
+    exp.generate(ensemble, overwrite=True)
+    rs2 = exp.create_run_settings("echo", ["spam", "eggs"])
+    # Create model
+    ml_model = exp.create_model("tf_training", rs2)
+    for sim in ensemble.entities:
+        ml_model.register_incoming_entity(sim)
+    exp.generate(ml_model, overwrite=True)
+    preview_manifest = Manifest(db1, db2, ml_model, ensemble)
+
+    # Call preview renderer for testing output
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    # Evaluate output
+    assert "Client Configuration" in output
+    assert "Database Identifier" in output
+    assert "Database Backend" in output
+    assert "TCP/IP Port(s)" in output
+    assert "Type" in output
+
+    assert db1_dbid in output
+    assert db2_dbid in output
+
+
+def test_ensemble_preview_attached_files(fileutils, test_dir, wlmutils):
+    """
+    Test the preview of tagged, copy, and symlink files attached
+    to an ensemble
+    """
+    # Prepare entities
+    test_launcher = wlmutils.get_test_launcher()
+    exp = Experiment(
+        "test-preview-attached-files", exp_path=test_dir, launcher=test_launcher
+    )
+    ensemble = exp.create_ensemble(
+        "dir_test", replicas=1, run_settings=RunSettings("python", exe_args="sleep.py")
+    )
+    ensemble.entities = []
+    params = {"THERMO": [10, 20], "STEPS": [20, 30]}
+    ensemble = exp.create_ensemble(
+        "dir_test",
+        params=params,
+        run_settings=RunSettings("python", exe_args="sleep.py"),
+    )
+    gen_dir = fileutils.get_test_conf_path(osp.join("generator_files", "test_dir"))
+    symlink_dir = fileutils.get_test_conf_path(
+        osp.join("generator_files", "to_symlink_dir")
+    )
+    copy_dir = fileutils.get_test_conf_path(osp.join("generator_files", "to_copy_dir"))
+
+    ensemble.attach_generator_files()
+    ensemble.attach_generator_files(
+        to_configure=[gen_dir, copy_dir], to_copy=copy_dir, to_symlink=symlink_dir
+    )
+    preview_manifest = Manifest(ensemble)
+
+    # Call preview renderer for testing output
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    # Evaluate output
+    assert "Tagged Files for Model Configuration" in output
+    assert "Copy Files" in output
+    assert "Symlink" in output
+    assert "Ensemble Parameters" in output
+    assert "Model Parameters" in output
+
+    assert "generator_files/test_dir" in output
+    assert "generator_files/to_copy_dir" in output
+    assert "generator_files/to_symlink_dir" in output
+
+    for model in ensemble:
+        assert "generator_files/test_dir" in model.files.tagged[0]
+        for copy in model.files.copy:
+            assert "generator_files/to_copy_dir" in copy
+        for link in model.files.link:
+            assert "generator_files/to_symlink_dir" in link
+
+
+def test_preview_colocated_db_model_ensemble(fileutils, test_dir, wlmutils, mlutils):
+    """
+    Test preview of DBModel on colocated ensembles
+    """
+
+    exp_name = "test-preview-colocated-db-model-ensemble"
+    test_launcher = wlmutils.get_test_launcher()
+    test_interface = wlmutils.get_test_interface()
+    test_port = wlmutils.get_test_port()
+    test_device = mlutils.get_test_device()
+    test_num_gpus = 1
+
+    test_script = fileutils.get_test_conf_path("run_tf_dbmodel_smartredis.py")
+
+    exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
+    colo_settings = exp.create_run_settings(exe=sys.executable, exe_args=test_script)
+    colo_settings.set_nodes(1)
+    colo_settings.set_tasks(1)
+
+    # Create the ensemble of two identical SmartSim Model
+    colo_ensemble = exp.create_ensemble(
+        "colocated_ens", run_settings=colo_settings, replicas=2
+    )
+
+    # Create colocated SmartSim Model
+    colo_model = exp.create_model("colocated_model", colo_settings)
+
+    # Create and save ML model to filesystem
+    content = "empty test"
+    model_path = pathlib.Path(test_dir) / "model1.pt"
+    model_path.write_text(content)
+
+    # Test adding a model from ensemble
+    colo_ensemble.add_ml_model(
+        "cnn",
+        "TF",
+        model_path=model_path,
+        device=test_device,
+        devices_per_node=test_num_gpus,
+        first_device=0,
+        inputs="args_0",
+        outputs="Identity",
+    )
+
+    # Colocate a database with the first ensemble members
+    for i, entity in enumerate(colo_ensemble):
+        entity.colocate_db_tcp(
+            port=test_port + i, db_cpus=1, debug=True, ifname=test_interface
+        )
+        # Add ML models to each ensemble member to make sure they
+        # do not conflict with other ML models
+        entity.add_ml_model(
+            "cnn2",
+            "TF",
+            model_path=model_path,
+            device=test_device,
+            devices_per_node=test_num_gpus,
+            first_device=0,
+            inputs="args_0",
+            outputs="Identity",
+        )
+        entity.disable_key_prefixing()
+
+    # Add another ensemble member
+    colo_ensemble.add_model(colo_model)
+
+    # Colocate a database with the new ensemble member
+    colo_model.colocate_db_tcp(
+        port=test_port + len(colo_ensemble) - 1,
+        db_cpus=1,
+        debug=True,
+        ifname=test_interface,
+    )
+    # Add a ML model to the new ensemble member
+    model_inputs = "args_0"
+    model_outputs = "Identity"
+    model_name = "cnn2"
+    model_backend = "TF"
+    colo_model.add_ml_model(
+        model_name,
+        model_backend,
+        model_path=model_path,
+        device=test_device,
+        devices_per_node=test_num_gpus,
+        first_device=0,
+        inputs=model_inputs,
+        outputs=model_outputs,
+    )
+
+    exp.generate(colo_ensemble)
+
+    preview_manifest = Manifest(colo_ensemble)
+
+    # Execute preview method
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    # Evaluate output
+    assert "Models" in output
+    assert "Name" in output
+    assert "Backend" in output
+    assert "Path" in output
+    assert "Device" in output
+    assert "Devices Per Node" in output
+    assert "Inputs" in output
+    assert "Outputs" in output
+
+    assert model_name in output
+    assert model_backend in output
+    assert "Path" in output
+    assert "/model1.pt" in output
+    assert "CPU" in output
+    assert model_inputs in output
+    assert model_outputs in output
+
+
+def test_preview_colocated_db_script_ensemble(fileutils, test_dir, wlmutils, mlutils):
+    """
+    Test preview of DB Scripts on colocated DB from ensemble
+    """
+
+    exp_name = "test-preview-colocated-db-script"
+
+    test_launcher = wlmutils.get_test_launcher()
+    test_interface = wlmutils.get_test_interface()
+    test_port = wlmutils.get_test_port()
+    test_device = mlutils.get_test_device()
+    test_num_gpus = mlutils.get_test_num_gpus() if pytest.test_device == "GPU" else 1
+
+    expected_torch_script = "torchscript.py"
+    test_script = fileutils.get_test_conf_path("run_dbscript_smartredis.py")
+    torch_script = fileutils.get_test_conf_path(expected_torch_script)
+
+    # Create SmartSim Experiment
+    exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
+
+    colo_settings = exp.create_run_settings(exe=sys.executable, exe_args=test_script)
+    colo_settings.set_nodes(1)
+    colo_settings.set_tasks(1)
+
+    # Create SmartSim Ensemble with two identical models
+    colo_ensemble = exp.create_ensemble(
+        "colocated_ensemble", run_settings=colo_settings, replicas=2
+    )
+
+    # Create a SmartSim model
+    colo_model = exp.create_model("colocated_model", colo_settings)
+
+    # Colocate a db with each ensemble entity and add a script
+    # to each entity via file
+    for i, entity in enumerate(colo_ensemble):
+        entity.disable_key_prefixing()
+        entity.colocate_db_tcp(
+            port=test_port + i,
+            db_cpus=1,
+            debug=True,
+            ifname=test_interface,
+        )
+
+        entity.add_script(
+            "test_script1",
+            script_path=torch_script,
+            device=test_device,
+            devices_per_node=test_num_gpus,
+            first_device=0,
+        )
+
+    # Colocate a db with the non-ensemble Model
+    colo_model.colocate_db_tcp(
+        port=test_port + len(colo_ensemble),
+        db_cpus=1,
+        debug=True,
+        ifname=test_interface,
+    )
+
+    # Add a script to the non-ensemble model
+    torch_script_str = "def negate(x):\n\treturn torch.neg(x)\n"
+    cm_name2 = "test_script2"
+    colo_ensemble.add_script(
+        cm_name2,
+        script=torch_script_str,
+        device=test_device,
+        devices_per_node=test_num_gpus,
+        first_device=0,
+    )
+
+    # Add the third SmartSim model to the ensemble
+    colo_ensemble.add_model(colo_model)
+
+    # Add another script via file to the entire ensemble
+    cm_name1 = "test_script1"
+    colo_model.add_script(
+        cm_name1,
+        script_path=torch_script,
+        device=test_device,
+        devices_per_node=test_num_gpus,
+        first_device=0,
+    )
+
+    # Assert we have added one model to the ensemble
+    assert len(colo_ensemble._db_scripts) == 1
+    # Assert we have added both models to each entity
+    assert all([len(entity._db_scripts) == 2 for entity in colo_ensemble])
+
+    exp.generate(colo_ensemble)
+
+    preview_manifest = Manifest(colo_ensemble)
+
+    # Execute preview method
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    # Evaluate output
+    assert "Torch Scripts" in output
+    assert "Name" in output
+    assert "Path" in output
+    assert "Devices Per Node" in output
+
+    assert cm_name2 in output
+    assert expected_torch_script in output
+    assert test_device in output
+    assert cm_name1 in output
+
+
+def test_preview_active_infrastructure(wlmutils, test_dir, preview_object):
+    """Test active infrastructure without other orchestrators"""
+
+    # Prepare entities
+    test_launcher = wlmutils.get_test_launcher()
+    exp_name = "test_active_infrastructure_preview"
+    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
+
+    # Execute method for template rendering
+    output = previewrenderer.render(
+        exp, active_dbjobs=preview_object, verbosity_level="debug"
+    )
+
+    assert "Active Infrastructure" in output
+    assert "Database Identifier" in output
+    assert "Shards" in output
+    assert "Network Interface" in output
+    assert "Type" in output
+    assert "TCP/IP" in output
+
+
+def test_preview_orch_active_infrastructure(
+    wlmutils, test_dir, choose_host, preview_object
+):
+    """
+    Test correct preview output properties for active infrastructure preview
+    with other orchestrators
+    """
+    # Prepare entities
+    test_launcher = wlmutils.get_test_launcher()
+    test_interface = wlmutils.get_test_interface()
+    test_port = wlmutils.get_test_port()
+    exp_name = "test_orchestrator_active_infrastructure_preview"
+    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
+
+    orc2 = exp.create_database(
+        port=test_port,
+        interface=test_interface,
+        hosts=choose_host(wlmutils),
+        db_identifier="orc_2",
+    )
+
+    orc3 = exp.create_database(
+        port=test_port,
+        interface=test_interface,
+        hosts=choose_host(wlmutils),
+        db_identifier="orc_3",
+    )
+
+    preview_manifest = Manifest(orc2, orc3)
+
+    # Execute method for template rendering
+    output = previewrenderer.render(
+        exp, preview_manifest, active_dbjobs=preview_object, verbosity_level="debug"
+    )
+
+    assert "Active Infrastructure" in output
+    assert "Database Identifier" in output
+    assert "Shards" in output
+    assert "Network Interface" in output
+    assert "Type" in output
+    assert "TCP/IP" in output
+
+
+def test_preview_multidb_active_infrastructure(
+    wlmutils, test_dir, choose_host, preview_object_multidb
+):
+    """multiple started databases active infrastructure"""
+
+    # Retrieve parameters from testing environment
+    test_launcher = wlmutils.get_test_launcher()
+    test_interface = wlmutils.get_test_interface()
+    test_port = wlmutils.get_test_port()
+
+    # start a new Experiment for this section
+    exp = Experiment(
+        "test_preview_multidb_active_infrastructure",
+        exp_path=test_dir,
+        launcher=test_launcher,
+    )
+
+    # Execute method for template rendering
+    output = previewrenderer.render(
+        exp, active_dbjobs=preview_object_multidb, verbosity_level="debug"
+    )
+
+    assert "Active Infrastructure" in output
+    assert "Database Identifier" in output
+    assert "Shards" in output
+    assert "Network Interface" in output
+    assert "Type" in output
+    assert "TCP/IP" in output
+
+    assert "testdb_reg" in output
+    assert "testdb_reg2" in output
+    assert "Ochestrators" not in output
+
+
+def test_preview_active_infrastructure_orchestrator_error(
+    wlmutils, test_dir, choose_host, monkeypatch: pytest.MonkeyPatch
+):
+    """Demo error when trying to preview a started orchestrator"""
+    # Prepare entities
+    test_launcher = wlmutils.get_test_launcher()
+    test_interface = wlmutils.get_test_interface()
+    test_port = wlmutils.get_test_port()
+    exp_name = "test_active_infrastructure_preview_orch_error"
+    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
+
+    monkeypatch.setattr(
+        smartsim.database.orchestrator.Orchestrator, "is_active", lambda x: True
+    )
+
+    orc = exp.create_database(
+        port=test_port,
+        interface=test_interface,
+        hosts=choose_host(wlmutils),
+        db_identifier="orc_1",
+    )
+
+    # Retrieve any active jobs
+    active_dbjobs = exp._control.active_orchestrator_jobs
+
+    preview_manifest = Manifest(orc)
+
+    # Execute method for template rendering
+    output = previewrenderer.render(
+        exp, preview_manifest, active_dbjobs=active_dbjobs, verbosity_level="debug"
+    )
+
+    assert "WARNING: Cannot preview orc_1, because it is already started" in output
+
+
+def test_active_orchestrator_jobs_property(
+    wlmutils,
+    test_dir,
+    preview_object,
+):
+    """Ensure db_jobs remaines unchanged after deletion
+    of active_orchestrator_jobs property stays intact when retrieving db_jobs"""
+
+    # Retrieve parameters from testing environment
+    test_launcher = wlmutils.get_test_launcher()
+
+    # start a new Experiment for this section
+    exp = Experiment(
+        "test-active_orchestrator_jobs-property",
+        exp_path=test_dir,
+        launcher=test_launcher,
+    )
+
+    controller = Controller()
+    controller._jobs.db_jobs = preview_object
+
+    # Modify the returned job collection
+    active_orchestrator_jobs = exp._control.active_orchestrator_jobs
+    active_orchestrator_jobs["test"] = "test_value"
+
+    # Verify original collection is not also modified
+    assert not exp._control.active_orchestrator_jobs.get("test", None)
+
+
+def test_verbosity_info_ensemble(test_dir, wlmutils):
+    """
+    Test preview of separate model entity and ensemble entity
+    with verbosity level set to info
+    """
+    exp_name = "test-model-and-ensemble"
+    test_dir = pathlib.Path(test_dir) / exp_name
+    test_dir.mkdir(parents=True)
+    test_launcher = wlmutils.get_test_launcher()
+    exp = Experiment(exp_name, exp_path=str(test_dir), launcher=test_launcher)
+
+    rs1 = exp.create_run_settings("echo", ["hello", "world"])
+    rs2 = exp.create_run_settings("echo", ["spam", "eggs"])
+
+    hw_name = "echo-hello"
+    se_name = "echo-spam"
+    ens_name = "echo-ensemble"
+    hello_world_model = exp.create_model(hw_name, run_settings=rs1)
+    spam_eggs_model = exp.create_model(se_name, run_settings=rs2)
+    hello_ensemble = exp.create_ensemble(ens_name, run_settings=rs1, replicas=3)
+
+    exp.generate(hello_world_model, spam_eggs_model, hello_ensemble)
+
+    preview_manifest = Manifest(hello_world_model, spam_eggs_model, hello_ensemble)
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="info")
+
+    assert "Executable" not in output
+    assert "Executable Arguments" not in output
+
+    assert "echo_ensemble_1" not in output
+
+
+def test_verbosity_info_colocated_db_model_ensemble(
+    fileutils, test_dir, wlmutils, mlutils
+):
+    """Test preview of DBModel on colocated ensembles, first adding the DBModel to the
+    ensemble, then colocating DB.
+    """
+
+    exp_name = "test-colocated-db-model-ensemble-reordered"
+    test_launcher = wlmutils.get_test_launcher()
+    test_interface = wlmutils.get_test_interface()
+    test_port = wlmutils.get_test_port()
+    test_device = mlutils.get_test_device()
+    test_num_gpus = 1
+
+    test_script = fileutils.get_test_conf_path("run_tf_dbmodel_smartredis.py")
+
+    exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
+    colo_settings = exp.create_run_settings(exe=sys.executable, exe_args=test_script)
+    colo_settings.set_nodes(1)
+    colo_settings.set_tasks(1)
+
+    # Create the ensemble of two identical SmartSim Model
+    colo_ensemble = exp.create_ensemble(
+        "colocated_ens", run_settings=colo_settings, replicas=2
+    )
+
+    # Create colocated SmartSim Model
+    colo_model = exp.create_model("colocated_model", colo_settings)
+
+    # Create and save ML model to filesystem
+    content = "empty test"
+    model_path = pathlib.Path(test_dir) / "model1.pt"
+    model_path.write_text(content)
+
+    # Test adding a model from ensemble
+    colo_ensemble.add_ml_model(
+        "cnn",
+        "TF",
+        model_path=model_path,
+        device=test_device,
+        devices_per_node=test_num_gpus,
+        first_device=0,
+        inputs="args_0",
+        outputs="Identity",
+    )
+
+    # Colocate a database with the first ensemble members
+    for i, entity in enumerate(colo_ensemble):
+        entity.colocate_db_tcp(
+            port=test_port + i, db_cpus=1, debug=True, ifname=test_interface
+        )
+        # Add ML models to each ensemble member to make sure they
+        # do not conflict with other ML models
+        entity.add_ml_model(
+            "cnn2",
+            "TF",
+            model_path=model_path,
+            device=test_device,
+            devices_per_node=test_num_gpus,
+            first_device=0,
+            inputs="args_0",
+            outputs="Identity",
+        )
+        entity.disable_key_prefixing()
+
+    # Add another ensemble member
+    colo_ensemble.add_model(colo_model)
+
+    # Colocate a database with the new ensemble member
+    colo_model.colocate_db_tcp(
+        port=test_port + len(colo_ensemble) - 1,
+        db_cpus=1,
+        debug=True,
+        ifname=test_interface,
+    )
+    # Add a ML model to the new ensemble member
+    model_inputs = "args_0"
+    model_outputs = "Identity"
+    model_name = "cnn2"
+    model_backend = "TF"
+    colo_model.add_ml_model(
+        model_name,
+        model_backend,
+        model_path=model_path,
+        device=test_device,
+        devices_per_node=test_num_gpus,
+        first_device=0,
+        inputs=model_inputs,
+        outputs=model_outputs,
+    )
+
+    exp.generate(colo_ensemble)
+
+    preview_manifest = Manifest(colo_ensemble)
+
+    # Execute preview method
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="info")
+
+    assert "Outgoing Key Collision Prevention (Key Prefixing)" not in output
+    assert "Devices Per Node" not in output
+
+
+def test_verbosity_info_orchestrator(test_dir, wlmutils, choose_host):
+    """Test correct preview output properties for Orchestrator preview"""
+    # Prepare entities
+    test_launcher = wlmutils.get_test_launcher()
+    test_interface = wlmutils.get_test_interface()
+    test_port = wlmutils.get_test_port()
+    exp_name = "test_orchestrator_preview_properties"
+    exp = Experiment(exp_name, exp_path=test_dir, launcher=test_launcher)
+    # create regular database
+    orc = exp.create_database(
+        port=test_port,
+        interface=test_interface,
+        hosts=choose_host(wlmutils),
+    )
+    preview_manifest = Manifest(orc)
+
+    # Execute method for template rendering
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="info")
+
+    # Evaluate output
+    assert "Executable" not in output
+    assert "Run Command" not in output
+
+
+def test_verbosity_info_ensemble(test_dir, wlmutils):
+    """
+    Test client configuration and key prefixing in Ensemble preview
+    """
+    # Prepare entities
+    test_launcher = wlmutils.get_test_launcher()
+    exp = Experiment("key_prefix_test", exp_path=test_dir, launcher=test_launcher)
+    # Create Orchestrator
+    db = exp.create_database(port=6780, interface="lo")
+    exp.generate(db, overwrite=True)
+    rs1 = exp.create_run_settings("echo", ["hello", "world"])
+    # Create ensemble
+    ensemble = exp.create_ensemble("fd_simulation", run_settings=rs1, replicas=2)
+    # enable key prefixing on ensemble
+    ensemble.enable_key_prefixing()
+    exp.generate(ensemble, overwrite=True)
+    rs2 = exp.create_run_settings("echo", ["spam", "eggs"])
+    # Create model
+    ml_model = exp.create_model("tf_training", rs2)
+
+    for sim in ensemble.entities:
+        ml_model.register_incoming_entity(sim)
+
+    exp.generate(ml_model, overwrite=True)
+    preview_manifest = Manifest(db, ml_model, ensemble)
+
+    # Call preview renderer for testing output
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="info")
+
+    # Evaluate output
+    assert "Outgoing Key Collision Prevention (Key Prefixing)" in output
+
+
+def test_check_output_format_error():
+    """
+    Test error when invalid ouput format is given.
+    """
+    # Prepare entities
+    exp_name = "test_output_format"
+    exp = Experiment(exp_name)
+
+    # Execute preview method
+    with pytest.raises(PreviewFormatError) as ex:
+        exp.preview(output_format="hello")
+    assert (
+        "The only valid output format currently available is plain_text"
+        in ex.value.args[0]
+    )
+
+
+def test_check_verbosity_level_error():
+    """
+    Testing that an error does occur when a string verbosity is passed
+    """
+    # Prepare entities
+    exp_name = "test_verbosity_level_error"
+    exp = Experiment(exp_name)
+
+    # Execute preview method
+    with pytest.raises(ValueError) as ex:
+        exp.preview(verbosity_level="hello")
+
+
+def test_check_verbosity_level():
+    """
+    Testing that an error doesnt occur when a string verbosity is passed
+    """
+    # Prepare entities
+    exp_name = "test_verbosity_level"
+    exp = Experiment(exp_name)
+
+    # Execute preview method
+    exp.preview(verbosity_level="info")
+
+
+def test_preview_colocated_db_singular_model(wlmutils, test_dir):
+    """Test preview behavior when a colocated db is only added to
+    one model. The expected behviour is that both models are colocated
+    """
+
+    test_launcher = wlmutils.get_test_launcher()
+
+    exp = Experiment("colocated test", exp_path=test_dir, launcher=test_launcher)
+
+    rs = exp.create_run_settings("sleep", ["100"])
+
+    model_1 = exp.create_model("model_1", run_settings=rs)
+    model_2 = exp.create_model("model_2", run_settings=rs)
+
+    model_1.colocate_db()
+
+    exp.generate(model_1, model_2, overwrite=True)
+
+    preview_manifest = Manifest(model_1, model_2)
+
+    # Call preview renderer for testing output
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    assert "model_1" in output
+    assert "model_2" in output
+    assert "Client Configuration" in output
+
+
+def test_preview_db_script(wlmutils, test_dir):
+    """
+    Test preview of model instance with a torch script.
+    """
+    test_launcher = wlmutils.get_test_launcher()
+    # Initialize the Experiment and set the launcher to auto
+
+    exp = Experiment("getting-started", launcher=test_launcher)
+
+    # Initialize a RunSettings object
+    model_settings = exp.create_run_settings(exe="python", exe_args="params.py")
+
+    # Initialize a Model object
+    model_instance = exp.create_model("model_name", model_settings)
+    model_instance.colocate_db_tcp()
+
+    # TorchScript string
+    torch_script_str = "def negate(x):\n\treturn torch.neg(x)\n"
+
+    # Attach TorchScript to Model
+    model_instance.add_script(
+        name="example_script",
+        script=torch_script_str,
+        device="GPU",
+        devices_per_node=2,
+        first_device=0,
+    )
+    preview_manifest = Manifest(model_instance)
+
+    # Call preview renderer for testing output
+    output = previewrenderer.render(exp, preview_manifest, verbosity_level="debug")
+
+    # Evaluate output
+    assert "Torch Script" in output
diff --git a/tests/test_reconnect_orchestrator.py b/tests/test_reconnect_orchestrator.py
index 554e42cbd..6ce93c6f9 100644
--- a/tests/test_reconnect_orchestrator.py
+++ b/tests/test_reconnect_orchestrator.py
@@ -29,8 +29,9 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
 from smartsim.database import Orchestrator
+from smartsim.status import SmartSimStatus
 
 # The tests in this file belong to the group_b group
 pytestmark = pytest.mark.group_b
@@ -50,11 +51,11 @@ def test_local_orchestrator(test_dir, wlmutils):
     first_dir = test_dir
 
     orc = Orchestrator(port=wlmutils.get_test_port())
-    orc.set_path(test_dir)
+    orc.set_path(osp.join(test_dir, "orchestrator"))
 
     exp.start(orc)
     statuses = exp.get_status(orc)
-    assert [stat != status.STATUS_FAILED for stat in statuses]
+    assert [stat != SmartSimStatus.STATUS_FAILED for stat in statuses]
 
     # simulate user shutting down main thread
     exp._control._jobs.actively_monitoring = False
@@ -68,7 +69,7 @@ def test_reconnect_local_orc(test_dir):
     exp_name = "test-orc-local-reconnect-2nd"
     exp_2 = Experiment(exp_name, launcher="local", exp_path=test_dir)
 
-    checkpoint = osp.join(first_dir, "smartsim_db.dat")
+    checkpoint = osp.join(first_dir, "orchestrator", "smartsim_db.dat")
     reloaded_orc = exp_2.reconnect_orchestrator(checkpoint)
 
     # let statuses update once
@@ -76,7 +77,7 @@ def test_reconnect_local_orc(test_dir):
 
     statuses = exp_2.get_status(reloaded_orc)
     for stat in statuses:
-        if stat == status.STATUS_FAILED:
+        if stat == SmartSimStatus.STATUS_FAILED:
             exp_2.stop(reloaded_orc)
             assert False
     exp_2.stop(reloaded_orc)
diff --git a/tests/test_run_settings.py b/tests/test_run_settings.py
index b9439f41a..056dad64b 100644
--- a/tests/test_run_settings.py
+++ b/tests/test_run_settings.py
@@ -185,55 +185,34 @@ def test_add_exe_args_list_of_mixed():
         settings.add_exe_args(["1", "2", 3])
 
 
-def test_add_exe_args_space_delimited_string():
+def test_add_exe_args_list_of_lists():
     """Ensure that any non-string exe arg fails validation for all"""
     settings = RunSettings("python")
-    expected = ["1", "2", "3"]
-    settings.add_exe_args("1 2 3")
-
-    assert settings.exe_args == expected
-
-
-def test_add_exe_args_list_of_mixed_lists():
-    """Ensure that any non-string exe arg fails validation for all"""
-    settings = RunSettings("python")
-    with pytest.raises(TypeError) as type_error:
-        settings.add_exe_args([["1", "2", 3], ["4", "5", 6]])
-
-    assert "Executable arguments should be a list of str" in type_error.value.args
-
-
-def test_add_exe_args_list_of_mixed_lists_init():
-    """Ensure that any non-string exe arg fails validation for all"""
-    exe_args = [["1", "2", 3], ["4", "5", 6]]
-
-    with pytest.raises(TypeError) as type_error:
-        settings = RunSettings("python", exe_args=exe_args)
-
-    assert "Executable arguments were not list of str or str" in type_error.value.args
+    with pytest.raises(TypeError):
+        settings.add_exe_args(["1", "2", "3"], ["1", "2", "3"])
 
 
-def test_add_exe_args_list_of_str_lists_init():
-    """Ensure that list[list[str]] pass validation"""
+def test_init_exe_args_list_of_lists():
+    """Ensure that a list of lists exe arg fails validation"""
     exe_args = [["1", "2", "3"], ["4", "5", "6"]]
+    with pytest.raises(TypeError):
+        _ = RunSettings("python", exe_args=exe_args)
 
-    settings = RunSettings("python", exe_args=exe_args)
 
-    assert settings.exe_args == exe_args
+def test_init_exe_args_list_of_lists_mixed():
+    """Ensure that a list of lists exe arg fails validation"""
+    exe_args = [["1", "2", 3], ["4", "5", 6]]
+    with pytest.raises(TypeError):
+        _ = RunSettings("python", exe_args=exe_args)
 
 
-def test_add_exe_args_list_of_str_lists():
-    """Ensure that list[list[str]] fail validation when added via method"""
-    exe_args = [["1", "2", "3"], ["4", "5", "6"]]
-
+def test_add_exe_args_space_delimited_string():
+    """Ensure that any non-string exe arg fails validation for all"""
     settings = RunSettings("python")
+    expected = ["1", "2", "3"]
+    settings.add_exe_args("1 2 3")
 
-    with pytest.raises(TypeError) as type_error:
-        settings.add_exe_args(exe_args)
-
-    # NOTE that this behavior differs from sending constructor args like
-    # tested in test_add_exe_args_list_of_str_lists_init where it's allowed
-    assert "Executable arguments should be a list of str" in type_error.value.args
+    assert settings.exe_args == expected
 
 
 def test_format_run_args():
@@ -360,6 +339,7 @@ def test_set_format_args(set_str, val, key):
         pytest.param("set_task_map", (3,), id="set_task_map"),
         pytest.param("set_cpus_per_task", (4,), id="set_cpus_per_task"),
         pytest.param("set_hostlist", ("hostlist",), id="set_hostlist"),
+        pytest.param("set_node_feature", ("P100",), id="set_node_feature"),
         pytest.param(
             "set_hostlist_from_file", ("~/hostfile",), id="set_hostlist_from_file"
         ),
diff --git a/tests/test_schema_utils.py b/tests/test_schema_utils.py
new file mode 100644
index 000000000..78789f8ef
--- /dev/null
+++ b/tests/test_schema_utils.py
@@ -0,0 +1,217 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import collections
+import json
+
+import pydantic
+import pytest
+
+from smartsim._core.schemas.utils import (
+    _DEFAULT_MSG_DELIM,
+    SchemaRegistry,
+    SocketSchemaTranslator,
+    _Message,
+)
+
+# The tests in this file belong to the group_b group
+pytestmark = pytest.mark.group_b
+
+
+class Person(pydantic.BaseModel):
+    name: str
+    age: int
+
+
+class Dog(pydantic.BaseModel):
+    name: str
+    age: int
+
+
+class Book(pydantic.BaseModel):
+    title: str
+    num_pages: int
+
+
+def test_equivalent_messages_are_equivalent():
+    book = Book(title="A Story", num_pages=250)
+    msg_1 = _Message(book, "header")
+    msg_2 = _Message(book, "header")
+
+    assert msg_1 is not msg_2
+    assert msg_1 == msg_2
+    assert str(msg_1) == str(msg_2)
+    assert msg_1 == _Message.from_str(str(msg_1), Book)
+
+
+def test_schema_registrartion():
+    registry = SchemaRegistry()
+    assert registry._map == {}
+
+    registry.register("person")(Person)
+    assert registry._map == {"person": Person}
+
+    registry.register("book")(Book)
+    assert registry._map == {"person": Person, "book": Book}
+
+
+def test_cannot_register_a_schema_under_an_empty_str():
+    registry = SchemaRegistry()
+    with pytest.raises(KeyError, match="Key cannot be the empty string"):
+        registry.register("")
+
+
+def test_schema_to_string():
+    registry = SchemaRegistry()
+    registry.register("person")(Person)
+    registry.register("book")(Book)
+    person = Person(name="Bob", age=36)
+    book = Book(title="The Greatest Story of All Time", num_pages=10_000)
+    assert registry.to_string(person) == str(_Message(person, "person"))
+    assert registry.to_string(book) == str(_Message(book, "book"))
+
+
+def test_schemas_with_same_shape_are_mapped_correctly():
+    registry = SchemaRegistry()
+    registry.register("person")(Person)
+    registry.register("dog")(Dog)
+
+    person = Person(name="Mark", age=34)
+    dog = Dog(name="Fido", age=5)
+
+    parsed_person = registry.from_string(registry.to_string(person))
+    parsed_dog = registry.from_string(registry.to_string(dog))
+
+    assert isinstance(parsed_person, Person)
+    assert isinstance(parsed_dog, Dog)
+
+    assert parsed_person == person
+    assert parsed_dog == dog
+
+
+def test_registry_errors_if_types_overloaded():
+    registry = SchemaRegistry()
+    registry.register("schema")(Person)
+
+    with pytest.raises(KeyError):
+        registry.register("schema")(Book)
+
+
+def test_registry_errors_if_msg_type_registered_with_delim_present():
+    registry = SchemaRegistry()
+    with pytest.raises(ValueError, match="cannot contain delimiter"):
+        registry.register(f"some_key_with_the_{_DEFAULT_MSG_DELIM}_as_a_substring")
+
+
+def test_registry_errors_on_unknown_schema():
+    registry = SchemaRegistry()
+    registry.register("person")(Person)
+
+    with pytest.raises(TypeError):
+        registry.to_string(Book(title="The Shortest Story of All Time", num_pages=1))
+
+
+def test_registry_correctly_maps_to_expected_type():
+    registry = SchemaRegistry()
+    registry.register("person")(Person)
+    registry.register("book")(Book)
+    person = Person(name="Bob", age=36)
+    book = Book(title="The Most Average Story of All Time", num_pages=500)
+    assert registry.from_string(str(_Message(person, "person"))) == person
+    assert registry.from_string(str(_Message(book, "book"))) == book
+
+
+def test_registery_errors_if_type_key_not_recognized():
+    registry = SchemaRegistry()
+    registry.register("person")(Person)
+
+    with pytest.raises(ValueError, match="^No type of value .* registered$"):
+        registry.from_string(str(_Message(Person(name="Grunk", age=5_000), "alien")))
+
+
+def test_registry_errors_if_type_key_is_missing():
+    registry = SchemaRegistry()
+    registry.register("person")(Person)
+
+    with pytest.raises(ValueError, match="Failed to determine schema type"):
+        registry.from_string("This string does not contain a delimiter")
+
+
+class MockSocket:
+    def __init__(self, send_queue, recv_queue):
+        self.send_queue = send_queue
+        self.recv_queue = recv_queue
+
+    def send_string(self, str_, *_args, **_kwargs):
+        assert isinstance(str_, str)
+        self.send_queue.append(str_)
+
+    def recv_string(self, *_args, **_kwargs):
+        str_ = self.recv_queue.popleft()
+        assert isinstance(str_, str)
+        return str_
+
+
+class Request(pydantic.BaseModel): ...
+
+
+class Response(pydantic.BaseModel): ...
+
+
+def test_socket_schema_translator_uses_schema_registries():
+    server_to_client = collections.deque()
+    client_to_server = collections.deque()
+
+    server_socket = MockSocket(server_to_client, client_to_server)
+    client_socket = MockSocket(client_to_server, server_to_client)
+
+    req_reg = SchemaRegistry()
+    res_reg = SchemaRegistry()
+
+    req_reg.register("message")(Request)
+    res_reg.register("message")(Response)
+
+    server = SocketSchemaTranslator(server_socket, res_reg, req_reg)
+    client = SocketSchemaTranslator(client_socket, req_reg, res_reg)
+
+    # Check sockets are able to communicate seamlessly with schemas only
+    client.send(Request())
+    assert len(client_to_server) == 1
+    req = server.recv()
+    assert len(client_to_server) == 0
+    assert isinstance(req, Request)
+
+    server.send(Response())
+    assert len(server_to_client) == 1
+    res = client.recv()
+    assert len(server_to_client) == 0
+    assert isinstance(res, Response)
+
+    # Ensure users cannot send unexpected schemas
+    with pytest.raises(TypeError, match="Unregistered schema"):
+        client.send(Response())
+    with pytest.raises(TypeError, match="Unregistered schema"):
+        server.send(Request())
diff --git a/tests/test_serialize.py b/tests/test_serialize.py
index 9e92a4866..b2dc0b7a7 100644
--- a/tests/test_serialize.py
+++ b/tests/test_serialize.py
@@ -27,6 +27,7 @@
 import json
 import logging
 from pathlib import Path
+from uuid import uuid4
 
 import pytest
 
@@ -59,7 +60,7 @@ def manifest_json(test_dir, config) -> str:
 
 
 def test_serialize_creates_a_manifest_json_file_if_dne(test_dir, manifest_json):
-    lmb = LaunchedManifestBuilder("exp", test_dir, "launcher")
+    lmb = LaunchedManifestBuilder("exp", test_dir, "launcher", str(uuid4()))
     serialize.save_launch_manifest(lmb.finalize())
 
     assert manifest_json.is_file()
@@ -71,28 +72,29 @@ def test_serialize_creates_a_manifest_json_file_if_dne(test_dir, manifest_json):
         assert len(manifest["runs"]) == 1
 
 
-def test_serialize_does_not_write_manifest_json_if_telemetry_monitor_is_off(
+def test_serialize_does_write_manifest_json_if_telemetry_monitor_is_off(
     test_dir, monkeypatch, manifest_json
 ):
+    """Ensure that the manifest is written even if telemetry is not collected"""
     monkeypatch.setattr(
         smartsim._core.config.config.Config,
         _CFG_TM_ENABLED_ATTR,
         property(lambda self: False),
     )
-    lmb = LaunchedManifestBuilder("exp", test_dir, "launcher")
+    lmb = LaunchedManifestBuilder("exp", test_dir, "launcher", str(uuid4()))
     serialize.save_launch_manifest(lmb.finalize())
-    assert not manifest_json.exists()
+    assert manifest_json.exists()
 
 
 def test_serialize_appends_a_manifest_json_exists(test_dir, manifest_json):
     serialize.save_launch_manifest(
-        LaunchedManifestBuilder("exp", test_dir, "launcher").finalize()
+        LaunchedManifestBuilder("exp", test_dir, "launcher", str(uuid4())).finalize()
     )
     serialize.save_launch_manifest(
-        LaunchedManifestBuilder("exp", test_dir, "launcher").finalize()
+        LaunchedManifestBuilder("exp", test_dir, "launcher", str(uuid4())).finalize()
     )
     serialize.save_launch_manifest(
-        LaunchedManifestBuilder("exp", test_dir, "launcher").finalize()
+        LaunchedManifestBuilder("exp", test_dir, "launcher", str(uuid4())).finalize()
     )
 
     assert manifest_json.is_file()
@@ -108,7 +110,7 @@ def test_serialize_overwites_file_if_not_json(test_dir, manifest_json):
     with open(manifest_json, "w") as f:
         f.write("This is not a json\n")
 
-    lmb = LaunchedManifestBuilder("exp", test_dir, "launcher")
+    lmb = LaunchedManifestBuilder("exp", test_dir, "launcher", str(uuid4()))
     serialize.save_launch_manifest(lmb.finalize())
     with open(manifest_json, "r") as f:
         assert isinstance(json.load(f), dict)
diff --git a/tests/test_slurm_get_alloc.py b/tests/test_slurm_get_alloc.py
index aa12ce362..561e3d984 100644
--- a/tests/test_slurm_get_alloc.py
+++ b/tests/test_slurm_get_alloc.py
@@ -26,7 +26,7 @@
 
 import pytest
 
-from smartsim.slurm import _get_alloc_cmd
+from smartsim.wlm.slurm import _get_alloc_cmd
 
 # The tests in this file belong to the group_b group
 pytestmark = pytest.mark.group_b
diff --git a/tests/test_slurm_settings.py b/tests/test_slurm_settings.py
index aa5b2be11..d9d820244 100644
--- a/tests/test_slurm_settings.py
+++ b/tests/test_slurm_settings.py
@@ -105,6 +105,7 @@ def test_mpmd_compound_env_exports():
 
     step = SrunStep("teststep", "./", srun)
 
+    step.meta["status_dir"] = ""
     launch_cmd = step.get_launch_cmd()
     env_cmds = [v for v in launch_cmd if v == "env"]
     assert "env" in launch_cmd and len(env_cmds) == 1
@@ -164,6 +165,7 @@ def test_mpmd_non_compound_env_exports():
 
     step = SrunStep("teststep", "./", srun)
 
+    step.meta["status_dir"] = ""
     launch_cmd = step.get_launch_cmd()
     env_cmds = [v for v in launch_cmd if v == "env"]
     assert "env" not in launch_cmd and len(env_cmds) == 0
@@ -223,6 +225,7 @@ def test_mpmd_non_compound_no_exports():
 
     step = SrunStep("teststep", "./", srun)
 
+    step.meta["status_dir"] = ""
     launch_cmd = step.get_launch_cmd()
     env_cmds = [v for v in launch_cmd if v == "env"]
     assert "env" not in launch_cmd and len(env_cmds) == 0
@@ -338,6 +341,21 @@ def test_set_hostlist():
         rs.set_hostlist([5])
 
 
+def test_set_node_feature():
+    rs = SrunSettings("python")
+    rs.set_node_feature(["P100", "V100"])
+    assert rs.run_args["C"] == "P100,V100"
+
+    rs.set_node_feature("P100")
+    assert rs.run_args["C"] == "P100"
+
+    with pytest.raises(TypeError):
+        rs.set_node_feature(5)
+
+    with pytest.raises(TypeError):
+        rs.set_node_feature(["P100", 5])
+
+
 def test_set_hostlist_from_file():
     rs = SrunSettings("python")
     rs.set_hostlist_from_file("./path/to/hostfile")
diff --git a/tests/test_slurm_validation.py b/tests/test_slurm_validation.py
index 02baddce6..fbb6406c6 100644
--- a/tests/test_slurm_validation.py
+++ b/tests/test_slurm_validation.py
@@ -28,7 +28,11 @@
 import pytest
 
 from smartsim.error.errors import LauncherError
-from smartsim.slurm import _get_system_partition_info, get_default_partition, validate
+from smartsim.wlm.slurm import (
+    _get_system_partition_info,
+    get_default_partition,
+    validate,
+)
 
 # The tests in this file belong to the group_b group
 pytestmark = pytest.mark.group_b
diff --git a/tests/test_smartredis.py b/tests/test_smartredis.py
index 282e708cc..6f7b19934 100644
--- a/tests/test_smartredis.py
+++ b/tests/test_smartredis.py
@@ -27,10 +27,11 @@
 
 import pytest
 
-from smartsim import Experiment, status
+from smartsim import Experiment
 from smartsim._core.utils import installed_redisai_backends
 from smartsim.database import Orchestrator
 from smartsim.entity import Ensemble, Model
+from smartsim.status import SmartSimStatus
 
 # The tests in this file belong to the group_b group
 pytestmark = pytest.mark.group_b
@@ -59,22 +60,17 @@
 )
 
 
-def test_exchange(fileutils, test_dir, wlmutils):
+def test_exchange(local_experiment, local_db, prepare_db, fileutils):
     """Run two processes, each process puts a tensor on
     the DB, then accesses the other process's tensor.
     Finally, the tensor is used to run a model.
     """
 
-    exp = Experiment(
-        "smartredis_ensemble_exchange", exp_path=test_dir, launcher="local"
-    )
-
+    db = prepare_db(local_db).orchestrator
     # create and start a database
-    orc = Orchestrator(port=wlmutils.get_test_port())
-    exp.generate(orc)
-    exp.start(orc, block=False)
+    local_experiment.reconnect_orchestrator(db.checkpoint_file)
 
-    rs = exp.create_run_settings("python", "producer.py --exchange")
+    rs = local_experiment.create_run_settings("python", "producer.py --exchange")
     params = {"mult": [1, -10]}
     ensemble = Ensemble(
         name="producer",
@@ -89,21 +85,17 @@ def test_exchange(fileutils, test_dir, wlmutils):
     config = fileutils.get_test_conf_path("smartredis")
     ensemble.attach_generator_files(to_copy=[config])
 
-    exp.generate(ensemble)
+    local_experiment.generate(ensemble)
 
     # start the models
-    exp.start(ensemble, summary=False)
+    local_experiment.start(ensemble, summary=False)
 
     # get and confirm statuses
-    statuses = exp.get_status(ensemble)
-    try:
-        assert all([stat == status.STATUS_COMPLETED for stat in statuses])
-    finally:
-        # stop the orchestrator
-        exp.stop(orc)
+    statuses = local_experiment.get_status(ensemble)
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
 
 
-def test_consumer(fileutils, test_dir, wlmutils):
+def test_consumer(local_experiment, local_db, prepare_db, fileutils):
     """Run three processes, each one of the first two processes
     puts a tensor on the DB; the third process accesses the
     tensors put by the two producers.
@@ -111,17 +103,11 @@ def test_consumer(fileutils, test_dir, wlmutils):
     and the consumer accesses the two results.
     """
 
-    exp = Experiment(
-        "smartredis_ensemble_consumer", exp_path=test_dir, launcher="local"
-    )
-
-    # create and start a database
-    orc = Orchestrator(port=wlmutils.get_test_port())
-    exp.generate(orc)
-    exp.start(orc, block=False)
+    db = prepare_db(local_db).orchestrator
+    local_experiment.reconnect_orchestrator(db.checkpoint_file)
 
-    rs_prod = exp.create_run_settings("python", "producer.py")
-    rs_consumer = exp.create_run_settings("python", "consumer.py")
+    rs_prod = local_experiment.create_run_settings("python", "producer.py")
+    rs_consumer = local_experiment.create_run_settings("python", "consumer.py")
     params = {"mult": [1, -10]}
     ensemble = Ensemble(
         name="producer", params=params, run_settings=rs_prod, perm_strat="step"
@@ -138,15 +124,11 @@ def test_consumer(fileutils, test_dir, wlmutils):
     config = fileutils.get_test_conf_path("smartredis")
     ensemble.attach_generator_files(to_copy=[config])
 
-    exp.generate(ensemble)
+    local_experiment.generate(ensemble)
 
     # start the models
-    exp.start(ensemble, summary=False)
+    local_experiment.start(ensemble, summary=False)
 
     # get and confirm statuses
-    statuses = exp.get_status(ensemble)
-    try:
-        assert all([stat == status.STATUS_COMPLETED for stat in statuses])
-    finally:
-        # stop the orchestrator
-        exp.stop(orc)
+    statuses = local_experiment.get_status(ensemble)
+    assert all([stat == SmartSimStatus.STATUS_COMPLETED for stat in statuses])
diff --git a/tests/test_step_info.py b/tests/test_step_info.py
index ec589ae76..fcccaa9cd 100644
--- a/tests/test_step_info.py
+++ b/tests/test_step_info.py
@@ -26,8 +26,8 @@
 
 import pytest
 
-from smartsim import status
 from smartsim._core.launcher.stepInfo import *
+from smartsim.status import SmartSimStatus
 
 # The tests in this file belong to the group_b group
 pytestmark = pytest.mark.group_b
@@ -35,7 +35,9 @@
 
 def test_str():
     step_info = StepInfo(
-        status=status.STATUS_COMPLETED, launcher_status="COMPLETED", returncode=0
+        status=SmartSimStatus.STATUS_COMPLETED,
+        launcher_status="COMPLETED",
+        returncode=0,
     )
     expected_output = "Status: Completed | Launcher Status COMPLETED | Returncode 0"
 
@@ -45,4 +47,4 @@ def test_str():
 def test_default():
     step_info = UnmanagedStepInfo()
 
-    assert step_info._get_smartsim_status(None) == status.STATUS_FAILED
+    assert step_info._get_smartsim_status(None) == SmartSimStatus.STATUS_FAILED
diff --git a/tests/test_symlinking.py b/tests/test_symlinking.py
new file mode 100644
index 000000000..2b70e3e9f
--- /dev/null
+++ b/tests/test_symlinking.py
@@ -0,0 +1,254 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+import pathlib
+
+import pytest
+
+from smartsim import Experiment
+from smartsim._core.config import CONFIG
+from smartsim._core.control.controller import Controller, _AnonymousBatchJob
+from smartsim.database.orchestrator import Orchestrator
+from smartsim.entity.ensemble import Ensemble
+from smartsim.entity.model import Model
+from smartsim.settings.base import RunSettings
+from smartsim.settings.slurmSettings import SbatchSettings, SrunSettings
+
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+controller = Controller()
+slurm_controller = Controller(launcher="slurm")
+
+rs = RunSettings("echo", ["spam", "eggs"])
+bs = SbatchSettings()
+batch_rs = SrunSettings("echo", ["spam", "eggs"])
+
+ens = Ensemble("ens", params={}, run_settings=rs, batch_settings=bs, replicas=3)
+orc = Orchestrator(db_nodes=3, batch=True, launcher="slurm", run_command="srun")
+model = Model("test_model", params={}, path="", run_settings=rs)
+batch_model = Model(
+    "batch_test_model", params={}, path="", run_settings=batch_rs, batch_settings=bs
+)
+anon_batch_model = _AnonymousBatchJob(batch_model)
+
+
+@pytest.mark.parametrize(
+    "entity",
+    [pytest.param(ens, id="ensemble"), pytest.param(model, id="model")],
+)
+def test_symlink(test_dir, entity):
+    """Test symlinking historical output files"""
+    entity.path = test_dir
+    if entity.type == Ensemble:
+        for member in ens.models:
+            symlink_with_create_job_step(test_dir, member)
+    else:
+        symlink_with_create_job_step(test_dir, entity)
+
+
+def symlink_with_create_job_step(test_dir, entity):
+    """Function that helps cut down on repeated testing code"""
+    exp_dir = pathlib.Path(test_dir)
+    entity.path = test_dir
+    status_dir = exp_dir / CONFIG.telemetry_subdir / entity.type
+    step = controller._create_job_step(entity, status_dir)
+    controller.symlink_output_files(step, entity)
+    assert pathlib.Path(entity.path, f"{entity.name}.out").is_symlink()
+    assert pathlib.Path(entity.path, f"{entity.name}.err").is_symlink()
+    assert os.readlink(pathlib.Path(entity.path, f"{entity.name}.out")) == str(
+        status_dir / entity.name / (entity.name + ".out")
+    )
+    assert os.readlink(pathlib.Path(entity.path, f"{entity.name}.err")) == str(
+        status_dir / entity.name / (entity.name + ".err")
+    )
+
+
+@pytest.mark.parametrize(
+    "entity",
+    [
+        pytest.param(ens, id="ensemble"),
+        pytest.param(orc, id="orchestrator"),
+        pytest.param(anon_batch_model, id="model"),
+    ],
+)
+def test_batch_symlink(entity, test_dir):
+    """Test symlinking historical output files"""
+    exp_dir = pathlib.Path(test_dir)
+    entity.path = test_dir
+    status_dir = exp_dir / CONFIG.telemetry_subdir / entity.type
+    batch_step, substeps = slurm_controller._create_batch_job_step(entity, status_dir)
+    for step in substeps:
+        slurm_controller.symlink_output_files(step, entity)
+        assert pathlib.Path(entity.path, f"{entity.name}.out").is_symlink()
+        assert pathlib.Path(entity.path, f"{entity.name}.err").is_symlink()
+        assert os.readlink(pathlib.Path(entity.path, f"{entity.name}.out")) == str(
+            status_dir / entity.name / step.entity_name / (step.entity_name + ".out")
+        )
+        assert os.readlink(pathlib.Path(entity.path, f"{entity.name}.err")) == str(
+            status_dir / entity.name / step.entity_name / (step.entity_name + ".err")
+        )
+
+
+def test_symlink_error(test_dir):
+    """Ensure FileNotFoundError is thrown"""
+    bad_model = Model(
+        "bad_model",
+        params={},
+        path=pathlib.Path(test_dir, "badpath"),
+        run_settings=RunSettings("echo"),
+    )
+    telem_dir = pathlib.Path(test_dir, "bad_model_telemetry")
+    bad_step = controller._create_job_step(bad_model, telem_dir)
+    with pytest.raises(FileNotFoundError):
+        controller.symlink_output_files(bad_step, bad_model)
+
+
+def test_failed_model_launch_symlinks(test_dir):
+    exp_name = "failed-exp"
+    exp = Experiment(exp_name, exp_path=test_dir)
+    test_model = exp.create_model(
+        "test_model", run_settings=batch_rs, batch_settings=bs
+    )
+    exp.generate(test_model)
+    with pytest.raises(TypeError):
+        exp.start(test_model)
+
+    _should_not_be_symlinked(pathlib.Path(test_model.path))
+    assert not pathlib.Path(test_model.path, f"{test_model.name}.out").is_symlink()
+    assert not pathlib.Path(test_model.path, f"{test_model.name}.err").is_symlink()
+
+
+def test_failed_ensemble_launch_symlinks(test_dir):
+    exp_name = "failed-exp"
+    exp = Experiment(exp_name, exp_path=test_dir)
+    test_ensemble = exp.create_ensemble(
+        "test_ensemble", params={}, batch_settings=bs, run_settings=batch_rs, replicas=3
+    )
+    exp.generate(test_ensemble)
+    with pytest.raises(TypeError):
+        exp.start(test_ensemble)
+
+    _should_not_be_symlinked(pathlib.Path(test_ensemble.path))
+    assert not pathlib.Path(
+        test_ensemble.path, f"{test_ensemble.name}.out"
+    ).is_symlink()
+    assert not pathlib.Path(
+        test_ensemble.path, f"{test_ensemble.name}.err"
+    ).is_symlink()
+
+    for i in range(len(test_ensemble.models)):
+        assert not pathlib.Path(
+            test_ensemble.path,
+            f"{test_ensemble.name}_{i}",
+            f"{test_ensemble.name}_{i}.out",
+        ).is_symlink()
+        assert not pathlib.Path(
+            test_ensemble.path,
+            f"{test_ensemble.name}_{i}",
+            f"{test_ensemble.name}_{i}.err",
+        ).is_symlink()
+
+
+def test_non_batch_ensemble_symlinks(test_dir):
+    exp_name = "test-non-batch-ensemble"
+    rs = RunSettings("echo", ["spam", "eggs"])
+    exp = Experiment(exp_name, exp_path=test_dir)
+    test_ensemble = exp.create_ensemble(
+        "test_ensemble", params={}, run_settings=rs, replicas=3
+    )
+    exp.generate(test_ensemble)
+    exp.start(test_ensemble, block=True)
+
+    for i in range(len(test_ensemble.models)):
+        _should_be_symlinked(
+            pathlib.Path(
+                test_ensemble.path,
+                f"{test_ensemble.name}_{i}",
+                f"{test_ensemble.name}_{i}.out",
+            ),
+            True,
+        )
+        _should_be_symlinked(
+            pathlib.Path(
+                test_ensemble.path,
+                f"{test_ensemble.name}_{i}",
+                f"{test_ensemble.name}_{i}.err",
+            ),
+            False,
+        )
+
+    _should_not_be_symlinked(pathlib.Path(exp.exp_path, "smartsim_params.txt"))
+
+
+def test_non_batch_model_symlinks(test_dir):
+    exp_name = "test-non-batch-model"
+    exp = Experiment(exp_name, exp_path=test_dir)
+    rs = RunSettings("echo", ["spam", "eggs"])
+
+    test_model = exp.create_model("test_model", path=test_dir, run_settings=rs)
+    exp.generate(test_model)
+    exp.start(test_model, block=True)
+
+    assert pathlib.Path(test_model.path).exists()
+
+    _should_be_symlinked(pathlib.Path(test_model.path, f"{test_model.name}.out"), True)
+    _should_be_symlinked(pathlib.Path(test_model.path, f"{test_model.name}.err"), False)
+    _should_not_be_symlinked(pathlib.Path(exp.exp_path, "smartsim_params.txt"))
+
+
+def test_non_batch_orchestrator_symlinks(test_dir):
+    exp = Experiment("test-non-batch-orc", exp_path=test_dir)
+
+    db = exp.create_database(interface="lo")
+    exp.generate(db)
+    exp.start(db, block=True)
+    exp.stop(db)
+
+    for i in range(db.db_nodes):
+        _should_be_symlinked(pathlib.Path(db.path, f"{db.name}_{i}.out"), False)
+        _should_be_symlinked(pathlib.Path(db.path, f"{db.name}_{i}.err"), False)
+
+    _should_not_be_symlinked(pathlib.Path(exp.exp_path, "smartsim_params.txt"))
+
+
+def _should_not_be_symlinked(non_linked_path: pathlib.Path):
+    """Helper function for assertions about paths that should NOT be symlinked"""
+    assert non_linked_path.exists()
+    assert not non_linked_path.is_symlink()
+
+
+def _should_be_symlinked(linked_path: pathlib.Path, open_file: bool):
+    """Helper function for assertions about paths that SHOULD be symlinked"""
+    assert linked_path.exists()
+    assert linked_path.is_symlink()
+    # ensure the source file exists
+    assert pathlib.Path(os.readlink(linked_path)).exists()
+    if open_file:
+        with open(pathlib.Path(os.readlink(linked_path)), "r") as file:
+            log_contents = file.read()
+        assert "spam eggs" in log_contents
diff --git a/tests/test_telemetry_monitor.py b/tests/test_telemetry_monitor.py
index ac3599d7d..c1bfe2719 100644
--- a/tests/test_telemetry_monitor.py
+++ b/tests/test_telemetry_monitor.py
@@ -26,6 +26,7 @@
 
 
 import logging
+import multiprocessing as mp
 import pathlib
 import sys
 import time
@@ -39,32 +40,23 @@
 from smartsim import Experiment
 from smartsim._core.control.job import Job, JobEntity
 from smartsim._core.control.jobmanager import JobManager
-from smartsim._core.entrypoints.telemetrymonitor import (
-    ManifestEventHandler,
-    can_shutdown,
-    event_loop,
-    faux_return_code,
-    get_parser,
-    get_ts,
-    hydrate_persistable,
-    load_manifest,
-    track_event,
-)
+from smartsim._core.entrypoints.telemetrymonitor import get_parser
 from smartsim._core.launcher.launcher import WLMLauncher
 from smartsim._core.launcher.slurm.slurmLauncher import SlurmLauncher
 from smartsim._core.launcher.step.step import Step, proxyable_launch_cmd
 from smartsim._core.launcher.stepInfo import StepInfo
 from smartsim._core.utils import serialize
+from smartsim._core.utils.helpers import get_ts_ms
+from smartsim._core.utils.telemetry.manifest import Run, RuntimeManifest
+from smartsim._core.utils.telemetry.telemetry import (
+    ManifestEventHandler,
+    TelemetryMonitor,
+    TelemetryMonitorArgs,
+)
+from smartsim._core.utils.telemetry.util import map_return_code, write_event
 from smartsim.error.errors import UnproxyableStepError
 from smartsim.settings.base import RunSettings
-from smartsim.status import (
-    STATUS_CANCELLED,
-    STATUS_COMPLETED,
-    STATUS_FAILED,
-    STATUS_NEW,
-    STATUS_PAUSED,
-    STATUS_RUNNING,
-)
+from smartsim.status import SmartSimStatus
 
 ALL_ARGS = {"-exp_dir", "-frequency"}
 PROXY_ENTRY_POINT = "smartsim._core.entrypoints.indirect"
@@ -80,8 +72,7 @@
     pytest.test_launcher == "local", reason="Test requires WLM"
 )
 
-
-logger = logging.getLogger()
+logger = logging.getLogger(__name__)
 
 # The tests in this file belong to the slow_tests group
 pytestmark = pytest.mark.slow_tests
@@ -93,7 +84,21 @@ def turn_on_tm(monkeypatch):
     yield
 
 
-def snooze_nonblocking(
+def write_stop_file(entity: JobEntity, test_dir: pathlib.Path, duration: int):
+    time.sleep(duration)
+    write_event(
+        get_ts_ms(),
+        entity.task_id,
+        entity.step_id,
+        entity.type,
+        "stop",
+        test_dir,
+        "mock stop event",
+        0,
+    )
+
+
+def snooze_blocking(
     test_dir: pathlib.Path, max_delay: int = 20, post_data_delay: int = 2
 ):
     # let the non-blocking experiment complete.
@@ -151,18 +156,56 @@ def test_parser():
 
 def test_ts():
     """Ensure expected output type"""
-    ts = get_ts()
+    ts = get_ts_ms()
     assert isinstance(ts, int)
 
 
+@pytest.mark.parametrize(
+    ["freq"],
+    [
+        pytest.param("1", id="1s delay"),
+        pytest.param("1.0", id="1s (float) freq"),
+        pytest.param("1.5", id="1.5s (float) freq"),
+        pytest.param("60", id="upper bound freq"),
+        pytest.param("60.0", id="upper bound (float) freq"),
+    ],
+)
+def test_valid_frequencies(freq: t.Union[int, float], test_dir: str):
+    """Ensure validation does not raise an exception on values in valid range"""
+    # check_frequency(float(freq))
+    telmon_args = TelemetryMonitorArgs(test_dir, float(freq), 30, logging.DEBUG)
+    # telmon_args raises ValueError on bad inputs
+    assert telmon_args is not None
+
+
+@pytest.mark.parametrize(
+    ["freq"],
+    [
+        pytest.param("-1", id="negative freq"),
+        pytest.param("0", id="0s freq"),
+        pytest.param("0.9", id="0.9s freq"),
+        pytest.param("0.9999", id="lower bound"),
+        pytest.param("600.0001", id="just over upper"),
+        pytest.param("3600", id="too high"),
+        pytest.param("100000", id="bonkers high"),
+    ],
+)
+def test_invalid_frequencies(freq: t.Union[int, float], test_dir: str):
+    """Ensure validation raises an exception on values outside valid range"""
+    exp_err_msg = "in the range"
+    with pytest.raises(ValueError) as ex:
+        TelemetryMonitorArgs(test_dir, float(freq), 30, logging.DEBUG)
+    assert exp_err_msg in "".join(ex.value.args)
+
+
 @pytest.mark.parametrize(
     ["etype", "task_id", "step_id", "timestamp", "evt_type"],
     [
-        pytest.param("ensemble", "", "123", get_ts(), "start", id="start event"),
-        pytest.param("ensemble", "", "123", get_ts(), "stop", id="stop event"),
+        pytest.param("ensemble", "", "123", get_ts_ms(), "start", id="start event"),
+        pytest.param("ensemble", "", "123", get_ts_ms(), "stop", id="stop event"),
     ],
 )
-def test_track_event(
+def test_write_event(
     etype: str,
     task_id: str,
     step_id: str,
@@ -172,7 +215,7 @@ def test_track_event(
 ):
     """Ensure that track event writes a file to the expected location"""
     exp_path = pathlib.Path(test_dir)
-    track_event(timestamp, task_id, step_id, etype, evt_type, exp_path, logger)
+    write_event(timestamp, task_id, step_id, etype, evt_type, exp_path)
 
     expected_output = exp_path / f"{evt_type}.json"
 
@@ -180,6 +223,59 @@ def test_track_event(
     assert expected_output.is_file()
 
 
+@pytest.mark.parametrize(
+    ["entity_type", "task_id", "step_id", "timestamp", "evt_type"],
+    [
+        pytest.param("ensemble", "", "123", get_ts_ms(), "start", id="start event"),
+        pytest.param("ensemble", "", "123", get_ts_ms(), "stop", id="stop event"),
+    ],
+)
+def test_write_event_overwrite(
+    entity_type: str,
+    task_id: str,
+    step_id: str,
+    timestamp: int,
+    evt_type: str,
+    test_dir: str,
+):
+    """Ensure that `write_event` does not overwrite an existing file if called more than once"""
+    exp_path = pathlib.Path(test_dir)
+    write_event(timestamp, task_id, step_id, entity_type, evt_type, exp_path)
+
+    expected_output = exp_path / f"{evt_type}.json"
+
+    assert expected_output.exists()
+    assert expected_output.is_file()
+
+    # grab whatever is in the file now to compare against
+    original_content = expected_output.read_text()
+
+    updated_timestamp = get_ts_ms()
+    updated_task_id = task_id + "xxx"
+    updated_step_id = step_id + "xxx"
+    updated_entity = entity_type + "xxx"
+
+    # write to the same location
+    write_event(
+        updated_timestamp,
+        updated_task_id,
+        updated_step_id,
+        updated_entity,
+        evt_type,
+        exp_path,
+    )
+
+    # read in file content after attempted overwrite
+    with open(expected_output, "r") as validate_fp:
+        validate_output = validate_fp.read()
+
+    # verify the content matches the old content
+    assert str(timestamp) in validate_output
+    assert str(updated_timestamp) not in validate_output
+    assert "xxx" not in validate_output
+    assert validate_output == original_content
+
+
 def test_load_manifest(fileutils: FileUtils, test_dir: str, config: cfg.Config):
     """Ensure that the runtime manifest loads correctly"""
     sample_manifest_path = fileutils.get_test_conf_path("telemetry/telemetry.json")
@@ -194,7 +290,7 @@ def test_load_manifest(fileutils: FileUtils, test_dir: str, config: cfg.Config):
     test_manifest = pathlib.Path(test_manifest_path)
     assert test_manifest.exists()
 
-    manifest = load_manifest(test_manifest_path)
+    manifest = RuntimeManifest.load_manifest(test_manifest_path)
     assert manifest.name == "my-exp"
     assert str(manifest.path) == "/path/to/my-exp"
     assert manifest.launcher == "Slurm"
@@ -213,7 +309,7 @@ def test_load_manifest_colo_model(fileutils: FileUtils):
     sample_manifest = pathlib.Path(sample_manifest_path)
     assert sample_manifest.exists()
 
-    manifest = load_manifest(sample_manifest_path)
+    manifest = RuntimeManifest.load_manifest(sample_manifest_path)
     assert manifest.name == "my-exp"
     assert str(manifest.path) == "/tmp/my-exp"
     assert manifest.launcher == "Slurm"
@@ -229,7 +325,7 @@ def test_load_manifest_serial_models(fileutils: FileUtils):
     sample_manifest = pathlib.Path(sample_manifest_path)
     assert sample_manifest.exists()
 
-    manifest = load_manifest(sample_manifest_path)
+    manifest = RuntimeManifest.load_manifest(sample_manifest_path)
     assert manifest.name == "my-exp"
     assert str(manifest.path) == "/tmp/my-exp"
     assert manifest.launcher == "Slurm"
@@ -246,7 +342,7 @@ def test_load_manifest_db_and_models(fileutils: FileUtils):
     sample_manifest = pathlib.Path(sample_manifest_path)
     assert sample_manifest.exists()
 
-    manifest = load_manifest(sample_manifest_path)
+    manifest = RuntimeManifest.load_manifest(sample_manifest_path)
     assert manifest.name == "my-exp"
     assert str(manifest.path) == "/tmp/my-exp"
     assert manifest.launcher == "Slurm"
@@ -255,6 +351,12 @@ def test_load_manifest_db_and_models(fileutils: FileUtils):
     assert len(manifest.runs[0].orchestrators) == 1
     assert len(manifest.runs[1].models) == 1
 
+    # verify collector paths from manifest are deserialized to collector config
+    assert manifest.runs[0].orchestrators[0].collectors["client"]
+    assert manifest.runs[0].orchestrators[0].collectors["memory"]
+    # verify collector paths missing from manifest are empty
+    assert not manifest.runs[0].orchestrators[0].collectors["client_count"]
+
 
 def test_load_manifest_db_and_models_1run(fileutils: FileUtils):
     """Ensure that the runtime manifest loads correctly when containing models &
@@ -266,7 +368,7 @@ def test_load_manifest_db_and_models_1run(fileutils: FileUtils):
     sample_manifest = pathlib.Path(sample_manifest_path)
     assert sample_manifest.exists()
 
-    manifest = load_manifest(sample_manifest_path)
+    manifest = RuntimeManifest.load_manifest(sample_manifest_path)
     assert manifest.name == "my-exp"
     assert str(manifest.path) == "/tmp/my-exp"
     assert manifest.launcher == "Slurm"
@@ -289,7 +391,7 @@ def test_persistable_computed_properties(
     task_id: str, step_id: str, etype: str, exp_isorch: bool, exp_ismanaged: bool
 ):
     name = f"test-{etype}-{uuid.uuid4()}"
-    timestamp = get_ts()
+    timestamp = get_ts_ms()
     exp_dir = pathlib.Path("/foo/bar")
     stored = {
         "name": name,
@@ -300,7 +402,8 @@ def test_persistable_computed_properties(
             "step_id": step_id,
         },
     }
-    persistables = hydrate_persistable(etype, stored, exp_dir)
+    faux_experiment = {"launcher": "local"}
+    persistables = Run.load_entity(etype, stored, exp_dir, faux_experiment)
     persistable = persistables[0] if persistables else None
 
     assert persistable.is_managed == exp_ismanaged
@@ -314,7 +417,7 @@ def test_deserialize_ensemble(fileutils: FileUtils):
     sample_manifest = pathlib.Path(sample_manifest_path)
     assert sample_manifest.exists()
 
-    manifest = load_manifest(sample_manifest_path)
+    manifest = RuntimeManifest.load_manifest(sample_manifest_path)
     assert manifest
 
     assert len(manifest.runs) == 1
@@ -324,70 +427,85 @@ def test_deserialize_ensemble(fileutils: FileUtils):
     assert len(manifest.runs[0].models) == 8
 
 
-def test_shutdown_conditions():
-    """Ensure conditions to shutdown telemetry monitor are correctly evaluated"""
+def test_shutdown_conditions__no_monitored_jobs(test_dir: str):
+    """Show that an event handler w/no monitored jobs can shutdown"""
     job_entity1 = JobEntity()
     job_entity1.name = "xyz"
     job_entity1.step_id = "123"
     job_entity1.task_id = ""
 
-    logger = logging.getLogger()
+    mani_handler = ManifestEventHandler("xyz")
 
-    # show that an event handler w/no monitored jobs can shutdown
-    mani_handler = ManifestEventHandler("xyz", logger)
-    assert can_shutdown(mani_handler, logger)
+    tm_args = TelemetryMonitorArgs(test_dir, 1, 10, logging.DEBUG)
+    telmon = TelemetryMonitor(tm_args)
+    telmon._action_handler = mani_handler  # replace w/mock handler
 
-    # show that an event handler w/a monitored job cannot shutdown
-    mani_handler = ManifestEventHandler("xyz", logger)
+    assert telmon._can_shutdown()
+
+
+def test_shutdown_conditions__has_monitored_job(test_dir: str):
+    """Show that an event handler w/a monitored job cannot shutdown"""
+    job_entity1 = JobEntity()
+    job_entity1.name = "xyz"
+    job_entity1.step_id = "123"
+    job_entity1.task_id = ""
+
+    mani_handler = ManifestEventHandler("xyz")
     mani_handler.job_manager.add_job(
         job_entity1.name, job_entity1.step_id, job_entity1, False
     )
-    assert not can_shutdown(mani_handler, logger)
+    tm_args = TelemetryMonitorArgs(test_dir, 1, 10, logging.DEBUG)
+    telmon = TelemetryMonitor(tm_args)
+    telmon._action_handler = mani_handler
+
+    assert not telmon._can_shutdown()
     assert not bool(mani_handler.job_manager.db_jobs)
     assert bool(mani_handler.job_manager.jobs)
 
-    # show that an event handler w/a monitored db cannot shutdown
-    mani_handler = ManifestEventHandler("xyz", logger)
-    job_entity1.type = "orchestrator"
-    mani_handler.job_manager.add_job(
-        job_entity1.name, job_entity1.step_id, job_entity1, False
-    )
-    assert not can_shutdown(mani_handler, logger)
-    assert bool(mani_handler.job_manager.db_jobs)
-    assert not bool(mani_handler.job_manager.jobs)
 
-    # show that an event handler w/a dbs & tasks cannot shutdown
-    job_entity2 = JobEntity()
-    job_entity2.name = "xyz"
-    job_entity2.step_id = "123"
-    job_entity2.task_id = ""
+def test_shutdown_conditions__has_db(test_dir: str):
+    """Show that an event handler w/a monitored db cannot shutdown"""
+    job_entity1 = JobEntity()
+    job_entity1.name = "xyz"
+    job_entity1.step_id = "123"
+    job_entity1.task_id = ""
+    job_entity1.type = "orchestrator"  # <---- make entity appear as db
 
-    mani_handler = ManifestEventHandler("xyz", logger)
-    job_entity1.type = "orchestrator"
+    mani_handler = ManifestEventHandler("xyz")
+    ## TODO: see next comment and combine an add_job method on manieventhandler
+    # and _use within_ manieventhandler
+    # PROBABLY just encapsulating the body of for run in runs: for entity in run.flatten()...
     mani_handler.job_manager.add_job(
         job_entity1.name, job_entity1.step_id, job_entity1, False
     )
-
-    mani_handler.job_manager.add_job(
-        job_entity2.name, job_entity2.step_id, job_entity2, False
-    )
-    assert not can_shutdown(mani_handler, logger)
-    assert bool(mani_handler.job_manager.db_jobs)
-    assert bool(mani_handler.job_manager.jobs)
-
-    # ... now, show that removing 1 of 2 jobs still doesn't shutdown
-    mani_handler.job_manager.db_jobs.popitem()
-    assert not can_shutdown(mani_handler, logger)
-
-    # ... now, show that removing final job will allow shutdown
-    mani_handler.job_manager.jobs.popitem()
-    assert can_shutdown(mani_handler, logger)
+    ## TODO: !!!!!! shouldn't add_job (or something on mani_handler)
+    # allow me to add a job to "all the places" in one call... even a private one?
+    mani_handler._tracked_jobs[job_entity1.key] = job_entity1
+    tm_args = TelemetryMonitorArgs(test_dir, 1, 10, logging.DEBUG)
+    telmon = TelemetryMonitor(tm_args)
+    telmon._action_handler = mani_handler  # replace w/mock handler
+
+    assert not telmon._can_shutdown()
+    assert bool([j for j in mani_handler._tracked_jobs.values() if j.is_db])
+    assert not bool(mani_handler.job_manager.jobs)
 
 
-def test_auto_shutdown():
+@pytest.mark.parametrize(
+    "expected_duration",
+    [
+        pytest.param(2000, id="2s cooldown"),
+        pytest.param(3000, id="3s cooldown"),
+        pytest.param(5000, id="5s cooldown"),
+        pytest.param(10000, id="10s cooldown"),
+    ],
+)
+@pytest.mark.asyncio
+async def test_auto_shutdown__no_jobs(test_dir: str, expected_duration: int):
     """Ensure that the cooldown timer is respected"""
 
     class FauxObserver:
+        """Mock for the watchdog file system event listener"""
+
         def __init__(self):
             self.stop_count = 0
 
@@ -400,35 +518,96 @@ def is_alive(self) -> bool:
 
             return True
 
-    job_entity1 = JobEntity()
-    job_entity1.name = "xyz"
-    job_entity1.step_id = "123"
-    job_entity1.task_id = ""
-
-    frequency = 1
+    frequency = 1000
 
+    # monitor_pattern = f"{test_dir}/mock_mani.json"
     # show that an event handler w/out a monitored task will automatically stop
     mani_handler = ManifestEventHandler("xyz", logger)
     observer = FauxObserver()
-    duration = 2
+    expected_duration = 2000
 
-    ts0 = get_ts()
-    event_loop(observer, mani_handler, frequency, logger, cooldown_duration=duration)
-    ts1 = get_ts()
+    ts0 = get_ts_ms()
+    tm_args = TelemetryMonitorArgs(
+        test_dir, frequency / 1000, expected_duration / 1000, logging.DEBUG
+    )
+    telmon = TelemetryMonitor(tm_args)
+    telmon._observer = observer  # replace w/mock observer
+    telmon._action_handler = mani_handler  # replace w/mock handler
+
+    # with NO jobs registered, monitor should notice that it can
+    # shutdown immediately but wait for the cooldown period
+    await telmon.monitor()  # observer, mani_handler, frequency, duration)
+    ts1 = get_ts_ms()
 
-    assert ts1 - ts0 >= duration
+    test_duration = ts1 - ts0
+    assert test_duration >= expected_duration
     assert observer.stop_count == 1
 
-    # show that the new cooldown duration is respected
+
+@pytest.mark.parametrize(
+    "cooldown_ms, task_duration_ms",
+    [
+        pytest.param(2000, 2000, id="2s task + 2s cooldown"),
+        pytest.param(3000, 4000, id="3s task + 4s cooldown"),
+        pytest.param(5000, 5000, id="5s task + 5s cooldown"),
+        pytest.param(5000, 10000, id="5s task + 10s cooldown"),
+    ],
+)
+@pytest.mark.asyncio
+async def test_auto_shutdown__has_db(
+    test_dir: str, cooldown_ms: int, task_duration_ms: int
+):
+    """Ensure that the cooldown timer is respected with a running db"""
+
+    class FauxObserver:
+        """Mock for the watchdog file system event listener"""
+
+        def __init__(self):
+            self.stop_count = 0
+
+        def stop(self):
+            self.stop_count += 1
+
+        def is_alive(self) -> bool:
+            if self.stop_count > 0:
+                return False
+
+            return True
+
+    entity = JobEntity()
+    entity.name = "db_0"
+    entity.step_id = "123"
+    entity.task_id = ""
+    entity.type = "orchestrator"
+    entity.telemetry_on = True
+    entity.status_dir = test_dir
+
+    p = mp.Process(
+        target=write_stop_file,
+        args=(entity, pathlib.Path(test_dir), (task_duration_ms / 1000)),
+    )
+
+    frequency = 1000
+
+    # show that when a monitored task completes,the telmon automatically stops
     mani_handler = ManifestEventHandler("xyz", logger)
     observer = FauxObserver()
-    duration = 5
+    expected_duration = (cooldown_ms / 1000) + (task_duration_ms / 1000)
 
-    ts0 = get_ts()
-    event_loop(observer, mani_handler, frequency, logger, cooldown_duration=duration)
-    ts1 = get_ts()
+    tm_args = TelemetryMonitorArgs(
+        test_dir, frequency / 1000, (cooldown_ms / 1000), logging.DEBUG
+    )
+    telmon = TelemetryMonitor(tm_args)
+    telmon._observer = observer  # replace w/mock observer
+    telmon._action_handler = mani_handler  # replace w/mock handler
+
+    ts0 = get_ts_ms()
+    p.start()  # another process write the stop.json and telmon picks it up
+    await telmon.monitor()
+    ts1 = get_ts_ms()
 
-    assert ts1 - ts0 >= duration
+    test_duration = ts1 - ts0
+    assert test_duration >= expected_duration
     assert observer.stop_count == 1
 
 
@@ -455,7 +634,7 @@ def test_telemetry_single_model(fileutils, test_dir, wlmutils, config):
     smartsim_model = exp.create_model("perroquet", app_settings)
     exp.generate(smartsim_model)
     exp.start(smartsim_model, block=True)
-    assert exp.get_status(smartsim_model)[0] == STATUS_COMPLETED
+    assert exp.get_status(smartsim_model)[0] == SmartSimStatus.STATUS_COMPLETED
 
     telemetry_output_path = pathlib.Path(test_dir) / config.telemetry_subdir
     start_events = list(telemetry_output_path.rglob("start.json"))
@@ -494,9 +673,9 @@ def test_telemetry_single_model_nonblocking(
         exp.start(smartsim_model)
 
         telemetry_output_path = pathlib.Path(test_dir) / config.telemetry_subdir
-        snooze_nonblocking(telemetry_output_path, max_delay=60, post_data_delay=30)
+        snooze_blocking(telemetry_output_path, max_delay=10, post_data_delay=1)
 
-        assert exp.get_status(smartsim_model)[0] == STATUS_COMPLETED
+        assert exp.get_status(smartsim_model)[0] == SmartSimStatus.STATUS_COMPLETED
 
         start_events = list(telemetry_output_path.rglob("start.json"))
         stop_events = list(telemetry_output_path.rglob("stop.json"))
@@ -534,7 +713,10 @@ def test_telemetry_serial_models(fileutils, test_dir, wlmutils, monkeypatch, con
         exp.generate(*smartsim_models)
         exp.start(*smartsim_models, block=True)
         assert all(
-            [status == STATUS_COMPLETED for status in exp.get_status(*smartsim_models)]
+            [
+                status == SmartSimStatus.STATUS_COMPLETED
+                for status in exp.get_status(*smartsim_models)
+            ]
         )
 
         telemetry_output_path = pathlib.Path(test_dir) / config.telemetry_subdir
@@ -578,10 +760,13 @@ def test_telemetry_serial_models_nonblocking(
         exp.start(*smartsim_models)
 
         telemetry_output_path = pathlib.Path(test_dir) / config.telemetry_subdir
-        snooze_nonblocking(telemetry_output_path, max_delay=60, post_data_delay=10)
+        snooze_blocking(telemetry_output_path, max_delay=10, post_data_delay=1)
 
         assert all(
-            [status == STATUS_COMPLETED for status in exp.get_status(*smartsim_models)]
+            [
+                status == SmartSimStatus.STATUS_COMPLETED
+                for status in exp.get_status(*smartsim_models)
+            ]
         )
 
         start_events = list(telemetry_output_path.rglob("start.json"))
@@ -618,7 +803,7 @@ def test_telemetry_db_only_with_generate(test_dir, wlmutils, monkeypatch, config
         try:
             exp.start(orc, block=True)
 
-            snooze_nonblocking(telemetry_output_path, max_delay=60, post_data_delay=10)
+            snooze_blocking(telemetry_output_path, max_delay=10, post_data_delay=1)
 
             start_events = list(telemetry_output_path.rglob("start.json"))
             stop_events = list(telemetry_output_path.rglob("stop.json"))
@@ -627,9 +812,9 @@ def test_telemetry_db_only_with_generate(test_dir, wlmutils, monkeypatch, config
             assert len(stop_events) <= 1
         finally:
             exp.stop(orc)
-            snooze_nonblocking(telemetry_output_path, max_delay=60, post_data_delay=10)
+            snooze_blocking(telemetry_output_path, max_delay=10, post_data_delay=1)
 
-        assert exp.get_status(orc)[0] == STATUS_CANCELLED
+        assert exp.get_status(orc)[0] == SmartSimStatus.STATUS_CANCELLED
 
         stop_events = list(telemetry_output_path.rglob("stop.json"))
         assert len(stop_events) == 1
@@ -655,13 +840,12 @@ def test_telemetry_db_only_without_generate(test_dir, wlmutils, monkeypatch, con
 
         # create regular database
         orc = exp.create_database(port=test_port, interface=test_interface)
-        orc.set_path(test_dir)
         telemetry_output_path = pathlib.Path(test_dir) / config.telemetry_subdir
 
         try:
             exp.start(orc)
 
-            snooze_nonblocking(telemetry_output_path, max_delay=60, post_data_delay=30)
+            snooze_blocking(telemetry_output_path, max_delay=10, post_data_delay=1)
 
             start_events = list(telemetry_output_path.rglob("start.json"))
             stop_events = list(telemetry_output_path.rglob("stop.json"))
@@ -671,8 +855,8 @@ def test_telemetry_db_only_without_generate(test_dir, wlmutils, monkeypatch, con
         finally:
             exp.stop(orc)
 
-        snooze_nonblocking(telemetry_output_path, max_delay=60, post_data_delay=10)
-        assert exp.get_status(orc)[0] == STATUS_CANCELLED
+        snooze_blocking(telemetry_output_path, max_delay=10, post_data_delay=1)
+        assert exp.get_status(orc)[0] == SmartSimStatus.STATUS_CANCELLED
 
         stop_events = list(telemetry_output_path.rglob("stop.json"))
         assert len(stop_events) == 1
@@ -717,10 +901,10 @@ def test_telemetry_db_and_model(fileutils, test_dir, wlmutils, monkeypatch, conf
             exp.stop(orc)
 
         telemetry_output_path = pathlib.Path(test_dir) / config.telemetry_subdir
-        snooze_nonblocking(telemetry_output_path, max_delay=60, post_data_delay=30)
+        snooze_blocking(telemetry_output_path, max_delay=10, post_data_delay=1)
 
-        assert exp.get_status(orc)[0] == STATUS_CANCELLED
-        assert exp.get_status(smartsim_model)[0] == STATUS_COMPLETED
+        assert exp.get_status(orc)[0] == SmartSimStatus.STATUS_CANCELLED
+        assert exp.get_status(smartsim_model)[0] == SmartSimStatus.STATUS_COMPLETED
 
         start_events = list(telemetry_output_path.rglob("database/**/start.json"))
         stop_events = list(telemetry_output_path.rglob("database/**/stop.json"))
@@ -759,10 +943,15 @@ def test_telemetry_ensemble(fileutils, test_dir, wlmutils, monkeypatch, config):
         ens = exp.create_ensemble("troupeau", run_settings=app_settings, replicas=5)
         exp.generate(ens)
         exp.start(ens, block=True)
-        assert all([status == STATUS_COMPLETED for status in exp.get_status(ens)])
+        assert all(
+            [
+                status == SmartSimStatus.STATUS_COMPLETED
+                for status in exp.get_status(ens)
+            ]
+        )
 
         telemetry_output_path = pathlib.Path(test_dir) / config.telemetry_subdir
-        snooze_nonblocking(telemetry_output_path, max_delay=60, post_data_delay=30)
+        snooze_blocking(telemetry_output_path, max_delay=10, post_data_delay=1)
         start_events = list(telemetry_output_path.rglob("start.json"))
         stop_events = list(telemetry_output_path.rglob("stop.json"))
 
@@ -798,7 +987,10 @@ def test_telemetry_colo(fileutils, test_dir, wlmutils, coloutils, monkeypatch, c
         exp.generate(smartsim_model)
         exp.start(smartsim_model, block=True)
         assert all(
-            [status == STATUS_COMPLETED for status in exp.get_status(smartsim_model)]
+            [
+                status == SmartSimStatus.STATUS_COMPLETED
+                for status in exp.get_status(smartsim_model)
+            ]
         )
 
         telemetry_output_path = pathlib.Path(test_dir) / config.telemetry_subdir
@@ -819,7 +1011,12 @@ def test_telemetry_colo(fileutils, test_dir, wlmutils, coloutils, monkeypatch, c
     ],
 )
 def test_telemetry_autoshutdown(
-    test_dir, wlmutils, monkeypatch, frequency, cooldown, config
+    test_dir: str,
+    wlmutils,
+    monkeypatch: pytest.MonkeyPatch,
+    frequency: int,
+    cooldown: int,
+    config: cfg.Config,
 ):
     """
     Ensure that the telemetry monitor process shuts down after the desired
@@ -830,6 +1027,8 @@ def test_telemetry_autoshutdown(
         ctx.setattr(cfg.Config, "telemetry_frequency", frequency)
         ctx.setattr(cfg.Config, "telemetry_cooldown", cooldown)
 
+        cooldown_ms = cooldown * 1000
+
         # Set experiment name
         exp_name = "telemetry_ensemble"
 
@@ -839,9 +1038,11 @@ def test_telemetry_autoshutdown(
         # Create SmartSim Experiment
         exp = Experiment(exp_name, launcher=test_launcher, exp_path=test_dir)
 
-        start_time = get_ts()
-        stop_time = start_time
-        exp.start(block=False)
+        rs = RunSettings("python", exe_args=["sleep.py", "1"])
+        model = exp.create_model("model", run_settings=rs)
+
+        start_time = get_ts_ms()
+        exp.start(model, block=True)
 
         telemetry_output_path = pathlib.Path(test_dir) / config.telemetry_subdir
         empty_mani = list(telemetry_output_path.rglob("manifest.json"))
@@ -854,13 +1055,15 @@ def test_telemetry_autoshutdown(
         # give some leeway during testing for the cooldown to get hit
         for i in range(10):
             if popen.poll() is not None:
-                stop_time = get_ts()
                 print(f"Completed polling for telemetry shutdown after {i} attempts")
                 break
-            time.sleep(3)
+            time.sleep(2)
+
+        stop_time = get_ts_ms()
+        duration = stop_time - start_time
 
         assert popen.returncode is not None
-        assert stop_time >= (start_time + cooldown)
+        assert duration >= cooldown_ms
 
 
 class MockStep(Step):
@@ -935,7 +1138,7 @@ def test_unmanaged_steps_are_proxyed_through_indirect(
 
 
 @for_all_wlm_launchers
-def test_unmanaged_steps_are_not_proxied_if_the_telemetry_monitor_is_disabled(
+def test_unmanaged_steps_are_not_proxyed_if_the_telemetry_monitor_is_disabled(
     wlm_launcher, mock_step_meta_dict, test_dir, monkeypatch
 ):
     monkeypatch.setattr(cfg.Config, CFG_TM_ENABLED_ATTR, False)
@@ -1026,52 +1229,53 @@ def test_multistart_experiment(
     telemetry_output_path = pathlib.Path(test_dir) / config.telemetry_subdir
 
     db_start_events = list(telemetry_output_path.rglob("database/**/start.json"))
-    db_stop_events = list(telemetry_output_path.rglob("database/**/stop.json"))
     assert len(db_start_events) == 1
-    assert len(db_stop_events) == 1
 
     m_start_events = list(telemetry_output_path.rglob("model/**/start.json"))
-    m_stop_events = list(telemetry_output_path.rglob("model/**/stop.json"))
     assert len(m_start_events) == 1
-    assert len(m_stop_events) == 1
 
     e_start_events = list(telemetry_output_path.rglob("ensemble/**/start.json"))
-    e_stop_events = list(telemetry_output_path.rglob("ensemble/**/stop.json"))
     assert len(e_start_events) == 2
-    assert len(e_stop_events) == 2
 
 
 @pytest.mark.parametrize(
     "status_in, expected_out",
     [
-        pytest.param(STATUS_CANCELLED, 1, id="failure on cancellation"),
-        pytest.param(STATUS_COMPLETED, 0, id="success on completion"),
-        pytest.param(STATUS_FAILED, 1, id="failure on failed"),
-        pytest.param(STATUS_NEW, None, id="failure on new"),
-        pytest.param(STATUS_PAUSED, None, id="failure on paused"),
-        pytest.param(STATUS_RUNNING, None, id="failure on running"),
+        pytest.param(SmartSimStatus.STATUS_CANCELLED, 1, id="failure on cancellation"),
+        pytest.param(SmartSimStatus.STATUS_COMPLETED, 0, id="success on completion"),
+        pytest.param(SmartSimStatus.STATUS_FAILED, 1, id="failure on failed"),
+        pytest.param(SmartSimStatus.STATUS_NEW, None, id="failure on new"),
+        pytest.param(SmartSimStatus.STATUS_PAUSED, None, id="failure on paused"),
+        pytest.param(SmartSimStatus.STATUS_RUNNING, None, id="failure on running"),
     ],
 )
 def test_faux_rc(status_in: str, expected_out: t.Optional[int]):
     """Ensure faux response codes match expectations."""
     step_info = StepInfo(status=status_in)
 
-    rc = faux_return_code(step_info)
+    rc = map_return_code(step_info)
     assert rc == expected_out
 
 
 @pytest.mark.parametrize(
     "status_in, expected_out, expected_has_jobs",
     [
-        pytest.param(STATUS_CANCELLED, 1, False, id="failure on cancellation"),
-        pytest.param(STATUS_COMPLETED, 0, False, id="success on completion"),
-        pytest.param(STATUS_FAILED, 1, False, id="failure on failed"),
-        pytest.param(STATUS_NEW, None, True, id="failure on new"),
-        pytest.param(STATUS_PAUSED, None, True, id="failure on paused"),
-        pytest.param(STATUS_RUNNING, None, True, id="failure on running"),
+        pytest.param(
+            SmartSimStatus.STATUS_CANCELLED, 1, False, id="failure on cancellation"
+        ),
+        pytest.param(
+            SmartSimStatus.STATUS_COMPLETED, 0, False, id="success on completion"
+        ),
+        pytest.param(SmartSimStatus.STATUS_FAILED, 1, False, id="failure on failed"),
+        pytest.param(SmartSimStatus.STATUS_NEW, None, True, id="failure on new"),
+        pytest.param(SmartSimStatus.STATUS_PAUSED, None, True, id="failure on paused"),
+        pytest.param(
+            SmartSimStatus.STATUS_RUNNING, None, True, id="failure on running"
+        ),
     ],
 )
-def test_wlm_completion_handling(
+@pytest.mark.asyncio
+async def test_wlm_completion_handling(
     test_dir: str,
     monkeypatch: pytest.MonkeyPatch,
     status_in: str,
@@ -1084,7 +1288,7 @@ def _faux_updates(_self: WLMLauncher, _names: t.List[str]) -> t.List[StepInfo]:
 
         return _faux_updates
 
-    ts = get_ts()
+    ts = get_ts_ms()
     with monkeypatch.context() as ctx:
         # don't actually start a job manager
         ctx.setattr(JobManager, "start", lambda x: ...)
@@ -1107,7 +1311,7 @@ def _faux_updates(_self: WLMLauncher, _names: t.List[str]) -> t.List[StepInfo]:
         mani_handler._tracked_jobs = {job_entity.key: job_entity}
         mani_handler.job_manager.jobs[job.name] = job
 
-        mani_handler.on_timestep(ts)
+        await mani_handler.on_timestep(ts)
 
         # see that the job queue was properly manipulated
         has_jobs = bool(mani_handler._tracked_jobs)
diff --git a/tests/utils/test_network.py b/tests/utils/test_network.py
new file mode 100644
index 000000000..cdc3168ef
--- /dev/null
+++ b/tests/utils/test_network.py
@@ -0,0 +1,30 @@
+import pytest
+
+from smartsim._core.utils.network import find_free_port
+
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+
+def test_find_free_port_no_start():
+    """Test that a free port is identified and returned when no
+    starting port number is specified"""
+    port = find_free_port()
+    assert port > 0
+
+
+@pytest.mark.parametrize(
+    "start_at",
+    [
+        pytest.param(1000, id="start at 1000"),
+        pytest.param(2000, id="start at 2000"),
+        pytest.param(5000, id="start at 5000"),
+        pytest.param(10000, id="start at 10000"),
+        pytest.param(16000, id="start at 16000"),
+    ],
+)
+def test_find_free_port_range_specified(start_at):
+    """Test that a free port greater than or equal to the specified
+    starting port number is identified and returned"""
+    port = find_free_port(start_at)
+    assert port >= start_at
diff --git a/tests/utils/test_security.py b/tests/utils/test_security.py
new file mode 100644
index 000000000..1a7a9586b
--- /dev/null
+++ b/tests/utils/test_security.py
@@ -0,0 +1,234 @@
+# BSD 2-Clause License
+#
+# Copyright (c) 2021-2024, Hewlett Packard Enterprise
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+#
+# 1. Redistributions of source code must retain the above copyright notice, this
+#    list of conditions and the following disclaimer.
+#
+# 2. Redistributions in binary form must reproduce the above copyright notice,
+#    this list of conditions and the following disclaimer in the documentation
+#    and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+import pathlib
+import stat
+
+import pytest
+from sympy import public
+
+from smartsim._core.config.config import get_config
+from smartsim._core.utils.security import KeyManager, _KeyLocator, _KeyPermissions
+
+# The tests in this file belong to the group_a group
+pytestmark = pytest.mark.group_a
+
+
+def test_keylocator_filename_resolution(test_dir: str) -> None:
+    """Ensure the key locator resolves filenames as expected."""
+    key_path = pathlib.Path(test_dir)
+    key_category = "mycategory"
+    key_file = "mykey"
+    locator = _KeyLocator(key_path, key_file, key_category)
+
+    assert locator.public_filename == f"{key_file}.key", "public mismatch"
+    assert locator.private_filename == f"{key_file}.key_secret", "private mismatch"
+
+
+def test_keylocator_dir_resolution(test_dir: str) -> None:
+    """Ensure the key locator resolves paths as expected."""
+    key_path = pathlib.Path(test_dir)
+    key_name = "test"
+    key_category = "mycategory"
+
+    locator = _KeyLocator(key_path, key_name, key_category)
+
+    # we expect a category and pub/priv subdirectory
+    exp_pub = pathlib.Path(f"{test_dir}/{key_category}/pub").resolve()
+    assert str(locator.public_dir) == str(exp_pub)
+
+    exp_priv = pathlib.Path(f"{test_dir}/{key_category}/priv").resolve()
+    assert str(locator.private_dir) == str(exp_priv)
+
+    # and to be explicit... prove pub & priv are not same directory
+    assert str(locator.private_dir) != str(locator.public_dir)
+
+
+def test_key_manager_dir_preparation(
+    test_dir: str, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """Ensure the KeyManager creates the appropriate directory
+    structure required for public/private key pairs."""
+    with monkeypatch.context() as ctx:
+        ctx.setenv("SMARTSIM_KEY_PATH", test_dir)
+
+        cfg = get_config()
+        km = KeyManager(cfg)
+
+        km.create_directories()
+
+        # verify the expected paths are created
+        server_locator = _KeyLocator(pathlib.Path(test_dir), "curve", "server")
+        client_locator = _KeyLocator(pathlib.Path(test_dir), "curve", "client")
+
+        locators = [server_locator, client_locator]
+
+        for locator in locators:
+            assert locator.public_dir.exists()
+            assert locator.private_dir.exists()
+
+
+def test_key_manager_get_existing_keys_only_no_keys_found(
+    test_dir: str, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """Ensure the key manager cannot load keys when
+    directed not to create missing keys."""
+    with monkeypatch.context() as ctx:
+        ctx.setenv("SMARTSIM_KEY_PATH", test_dir)
+
+        cfg = get_config()
+        km = KeyManager(cfg)
+
+        # use create=False to only load pre-existing keys
+        server_keys, client_keys = km.get_keys(create=False)
+
+        assert server_keys.empty
+        assert client_keys.empty
+
+
+def test_key_manager_get_existing_keys_only_existing(
+    test_dir: str, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """Ensure the key manager can load keys when
+    they exist from a previous call."""
+    with monkeypatch.context() as ctx:
+        ctx.setenv("SMARTSIM_KEY_PATH", test_dir)
+
+        cfg = get_config()
+
+        # use a KeyManager to create some keys
+        km = KeyManager(cfg, as_server=True, as_client=True)
+        old_server_keys, old_client_keys = km.get_keys(create=True)
+
+        # create a new KM to verify keys reload
+        km = KeyManager(cfg, as_server=True, as_client=True)
+
+        # use create=True to manifest any bugs missing existing keys
+        server_keys, client_keys = km.get_keys(create=True)
+
+        # ensure we loaded something
+        assert not server_keys.empty
+        assert not client_keys.empty
+
+        # and show the old keys were reloaded from disk
+        assert server_keys == old_server_keys
+        assert client_keys == old_client_keys
+
+
+def test_key_manager_get_or_create_keys_default(
+    test_dir: str, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """Ensure the key manager creates keys when none can be loaded"""
+    with monkeypatch.context() as ctx:
+        ctx.setenv("SMARTSIM_KEY_PATH", test_dir)
+
+        cfg = get_config()
+        km = KeyManager(cfg)
+
+        key_set = km.get_keys()
+
+        # public keys are returned by default
+        assert key_set[0].public != b""
+        assert key_set[1].public != b""
+
+        # default behavior will only return public keys
+        assert not key_set[0].private
+        assert not key_set[1].private
+
+
+@pytest.mark.parametrize(
+    "as_server, as_client",
+    [
+        pytest.param(False, True, id="as-client"),
+        pytest.param(True, False, id="as-server"),
+        pytest.param(True, True, id="as-both"),
+        pytest.param(False, False, id="public-only"),
+    ],
+)
+def test_key_manager_as_context(
+    as_server: bool,
+    as_client: bool,
+    test_dir: str,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Ensure the key manager loads the correct keys
+    when passed `as_server=True` and `as_client=True`"""
+    with monkeypatch.context() as ctx:
+        ctx.setenv("SMARTSIM_KEY_PATH", test_dir)
+
+        cfg = get_config()
+        km = KeyManager(cfg, as_server=as_server, as_client=as_client)
+
+        server_keyset, client_keyset = km.get_keys()
+
+        assert bool(server_keyset.public) == True
+        assert bool(server_keyset.private) == as_server
+
+        assert bool(client_keyset.public) == True
+        assert bool(client_keyset.private) == as_client
+
+
+def test_key_manager_applied_permissions(
+    test_dir: str, monkeypatch: pytest.MonkeyPatch
+) -> None:
+    """Ensure the key manager applies the appropriate file-system
+    permissions to the keys and directories"""
+    with monkeypatch.context() as ctx:
+        ctx.setenv("SMARTSIM_KEY_PATH", test_dir)
+
+        cfg = get_config()
+        km = KeyManager(cfg, as_client=True, as_server=True)
+
+        server_keys, client_keys = km.get_keys()
+
+        # ensure public dirs are open for reading by others
+        s_pub_stat = km._server_locator.public_dir.stat()
+        c_pub_stat = km._client_locator.public_dir.stat()
+
+        assert stat.S_IMODE(s_pub_stat.st_mode) == _KeyPermissions.PUBLIC_DIR
+        assert stat.S_IMODE(c_pub_stat.st_mode) == _KeyPermissions.PUBLIC_DIR
+
+        # ensure private dirs are open only to owner
+        s_priv_stat = km._server_locator.private_dir.stat()
+        c_priv_stat = km._client_locator.private_dir.stat()
+
+        assert stat.S_IMODE(s_priv_stat.st_mode) == _KeyPermissions.PRIVATE_DIR
+        assert stat.S_IMODE(c_priv_stat.st_mode) == _KeyPermissions.PRIVATE_DIR
+
+        # ensure public files are open for reading by others
+        s_pub_stat = km._server_locator.public.stat()
+        c_pub_stat = km._client_locator.public.stat()
+
+        assert stat.S_IMODE(s_pub_stat.st_mode) == _KeyPermissions.PUBLIC_KEY
+        assert stat.S_IMODE(c_pub_stat.st_mode) == _KeyPermissions.PUBLIC_KEY
+
+        # ensure private files are read-only for owner
+        s_priv_stat = km._server_locator.private.stat()
+        c_priv_stat = km._client_locator.private.stat()
+
+        assert stat.S_IMODE(s_priv_stat.st_mode) == _KeyPermissions.PRIVATE_KEY
+        assert stat.S_IMODE(c_priv_stat.st_mode) == _KeyPermissions.PRIVATE_KEY