diff --git a/.github/workflows/tests.yaml b/.github/workflows/tests.yaml
index ffc0644df2c889..16e180a2c06e05 100644
--- a/.github/workflows/tests.yaml
+++ b/.github/workflows/tests.yaml
@@ -221,8 +221,30 @@ jobs:
                      --tv-app ./out/linux-x64-tv-app-${BUILD_VARIANT}/chip-tv-app \
                      --bridge-app ./out/linux-x64-bridge-${BUILD_VARIANT}/chip-bridge-app \
                   "
-            - name: Run Tests using chip-repl
+            - name: Run Tests using chip-repl (skip slow)
               timeout-minutes: 45
+              if: github.event_name == 'pull_request'
+              run: |
+                  ./scripts/run_in_build_env.sh \
+                  "./scripts/tests/run_test_suite.py \
+                     --run-yamltests-with-chip-repl \
+                     --exclude-tags MANUAL \
+                     --exclude-tags FLAKY \
+                     --exclude-tags IN_DEVELOPMENT \
+                     --exclude-tags SLOW \
+                     run \
+                     --iterations 1 \
+                     --test-timeout-seconds 120 \
+                     --all-clusters-app ./out/linux-x64-all-clusters-${BUILD_VARIANT}/chip-all-clusters-app \
+                     --lock-app ./out/linux-x64-lock-${BUILD_VARIANT}/chip-lock-app \
+                     --ota-provider-app ./out/linux-x64-ota-provider-${BUILD_VARIANT}/chip-ota-provider-app \
+                     --ota-requestor-app ./out/linux-x64-ota-requestor-${BUILD_VARIANT}/chip-ota-requestor-app \
+                     --tv-app ./out/linux-x64-tv-app-${BUILD_VARIANT}/chip-tv-app \
+                     --bridge-app ./out/linux-x64-bridge-${BUILD_VARIANT}/chip-bridge-app \
+                  "
+            - name: Run Tests using chip-repl (including slow)
+              timeout-minutes: 45
+              if: github.event_name == 'push'
               run: |
                   ./scripts/run_in_build_env.sh \
                   "./scripts/tests/run_test_suite.py \
diff --git a/scripts/tests/chiptest/__init__.py b/scripts/tests/chiptest/__init__.py
index aecdf0cf0336f2..eefdee8012e6d0 100644
--- a/scripts/tests/chiptest/__init__.py
+++ b/scripts/tests/chiptest/__init__.py
@@ -22,7 +22,7 @@
 from typing import Iterator, Set
 
 from . import linux, runner
-from .test_definition import ApplicationPaths, TestDefinition, TestTarget
+from .test_definition import ApplicationPaths, TestDefinition, TestRunTime, TestTag, TestTarget
 
 _DEFAULT_CHIP_ROOT = os.path.abspath(
     os.path.join(os.path.dirname(__file__), "..", "..", ".."))
@@ -39,60 +39,43 @@ class ManualTest:
 INVALID_TESTS = {
     "tests.yaml",  # certification/tests.yaml is not a real test
     "PICS.yaml",  # certification/PICS.yaml is not a real test
+
+    # The items below are examples and will never work (likely)
+    # completely exclude them
+    "Config_Example.yaml",
+    "Config_Variables_Example.yaml",
+    "PICS_Example.yaml",
+    "Response_Example.yaml",
+    "Test_Example.yaml",
 }
 
 
-def _LoadManualTestsJson(json_file_path: str) -> Iterator[ManualTest]:
+def _IsValidYamlTest(name: str) -> bool:
+    """Check if the given file name is a valid YAML test.
+
+    This returns invalid for examples, simulated and other specific tests.
+    """
+
+    # Simulated tests are not runnable by repl tests, need
+    # separate infrastructure. Exclude them completely (they are
+    # not even manual)
+    if name.endswith('_Simulated.yaml'):
+        return False
+
+    return name not in INVALID_TESTS
+
+
+def _LoadManualTestsJson(json_file_path: str) -> Iterator[str]:
     with open(json_file_path, 'rt') as f:
         data = json.load(f)
         for c in data["collection"]:
             for name in data[c]:
-                yield ManualTest(yaml="%s.yaml" % name, reason=json_file_path)
+                yield f"{name}.yaml"
 
 
-def _GetManualTests() -> Set[ManualTest]:
+def _GetManualTests() -> Set[str]:
     manualtests = set()
 
-    # TODO:
-    #
-    # These are NOT manual tests, but rather "tests that fail in yaml and
-    # for this reason are marked as manual".
-    #
-    # We are working to get this list down to 0.
-    manualtests.add(ManualTest(yaml="Test_TC_ACL_2_10.yaml", reason="TODO Event Not Supported Yet"))
-    manualtests.add(ManualTest(yaml="Test_TC_ACL_2_7.yaml", reason="TODO Event Not Supported Yet"))
-    manualtests.add(ManualTest(yaml="Test_TC_ACL_2_8.yaml", reason="TODO Event Not Supported Yet"))
-    manualtests.add(ManualTest(yaml="Test_TC_ACL_2_9.yaml", reason="TODO Event Not Supported Yet"))
-    manualtests.add(ManualTest(yaml="TestEvents.yaml", reason="TODO Event Not Supported Yet"))
-
-    manualtests.add(ManualTest(yaml="Test_TC_ACE_1_1.yaml", reason="TODO GetCommissionerNodeId Not Supported Yet"))
-    manualtests.add(ManualTest(yaml="Test_TC_ACE_1_5.yaml", reason="TODO GetCommissionerNodeId Not Supported Yet"))
-    manualtests.add(ManualTest(yaml="Test_TC_SC_5_1.yaml", reason="TODO GetCommissionerNodeId Not Supported Yet"))
-    manualtests.add(ManualTest(yaml="Test_TC_SC_5_2.yaml", reason="TODO GetCommissionerNodeId Not Supported Yet"))
-    manualtests.add(ManualTest(yaml="TestCommissionerNodeId.yaml", reason="TODO GetCommissionerNodeId Not Supported Yet"))
-
-    manualtests.add(ManualTest(yaml="TestClusterMultiFabric.yaml", reason="TODO Enum Mismatch"))
-    manualtests.add(ManualTest(yaml="TestGroupMessaging.yaml", reason="TODO Group Message Not Supported in chip-repl yet"))
-    manualtests.add(ManualTest(yaml="TestMultiAdmin.yaml", reason="TODO chip-repl hangs on command expected to fail"))
-
-    # Failing, unclear why. Likely repl specific, used to pass however first
-    # failure point seems unrelated. Historically this seems (very?) flaky
-    # in repl.
-    manualtests.add(ManualTest(yaml="Test_TC_OO_2_4.yaml", reason="Flaky"))
-
-    # Examples:
-    #
-    # Currently these are not in ciTests.json, however yaml logic currently
-    # does NOT use allowlist json but rather finds all yaml files.
-    #
-    # This is on purpose for now to make it harder to orphan files, however
-    # we can reconsider as things evolve.
-    manualtests.add(ManualTest(yaml="Config_Example.yaml", reason="Example"))
-    manualtests.add(ManualTest(yaml="Config_Variables_Example.yaml", reason="Example"))
-    manualtests.add(ManualTest(yaml="PICS_Example.yaml", reason="Example"))
-    manualtests.add(ManualTest(yaml="Response_Example.yaml", reason="Example"))
-    manualtests.add(ManualTest(yaml="Test_Example.yaml", reason="Example"))
-
     # Flagged as manual from: src/app/tests/suites/manualTests.json
     for item in _LoadManualTestsJson(os.path.join(_YAML_TEST_SUITE_PATH, "manualTests.json")):
         manualtests.add(item)
@@ -100,6 +83,74 @@ def _GetManualTests() -> Set[ManualTest]:
     return manualtests
 
 
+def _GetFlakyTests() -> Set[str]:
+    """List of flaky tests, ideally this list should become empty."""
+    return {
+        "Test_TC_OO_2_4.yaml"
+    }
+
+
+def _GetSlowTests() -> Set[str]:
+    """Generally tests using sleep() a bit too freely.
+
+       10s seems like a good threshold to consider something slow
+    """
+    return {
+        "DL_LockUnlock.yaml",                             # ~ 10 seconds
+        "TestSubscribe_AdministratorCommissioning.yaml",  # ~ 15 seconds
+        "Test_TC_CC_5_1.yaml",                            # ~ 30 seconds
+        "Test_TC_CC_5_2.yaml",                            # ~ 30 seconds
+        "Test_TC_CC_5_3.yaml",                            # ~ 25 seconds
+        "Test_TC_CC_6_1.yaml",                            # ~ 35 seconds
+        "Test_TC_CC_6_2.yaml",                            # ~ 60 seconds
+        "Test_TC_CC_6_3.yaml",                            # ~ 50 seconds
+        "Test_TC_CC_7_2.yaml",                            # ~ 65 seconds
+        "Test_TC_CC_7_3.yaml",                            # ~ 70 seconds
+        "Test_TC_CC_7_4.yaml",                            # ~ 25 seconds
+        "Test_TC_CC_8_1.yaml",                            # ~ 60 seconds
+        "Test_TC_DRLK_2_4.yaml",                          # ~ 60 seconds
+        "Test_TC_I_2_2.yaml",                             # ~ 15 seconds
+        "Test_TC_LVL_3_1.yaml",                           # ~ 35 seconds
+        "Test_TC_LVL_4_1.yaml",                           # ~ 55 seconds
+        "Test_TC_LVL_5_1.yaml",                           # ~ 35 seconds
+        "Test_TC_LVL_6_1.yaml",                           # ~ 10 seconds
+        "Test_TC_WNCV_3_1.yaml",                          # ~ 20 seconds
+        "Test_TC_WNCV_3_2.yaml",                          # ~ 20 seconds
+        "Test_TC_WNCV_3_3.yaml",                          # ~ 15 seconds
+        "Test_TC_WNCV_3_4.yaml",                          # ~ 10 seconds
+        "Test_TC_WNCV_3_5.yaml",                          # ~ 10 seconds
+        "Test_TC_WNCV_4_1.yaml",                          # ~ 20 seconds
+        "Test_TC_WNCV_4_2.yaml",                          # ~ 20 seconds
+        "Test_TC_WNCV_4_5.yaml",                          # ~ 12 seconds
+    }
+
+
+def _GetInDevelopmentTests() -> Set[str]:
+    """Tests that fail in YAML for some reason.
+
+       Goal is for this set to become empty.
+    """
+    return {
+        # TODO: Event not yet supported:
+        "Test_TC_ACL_2_10.yaml",
+        "Test_TC_ACL_2_7.yaml",
+        "Test_TC_ACL_2_8.yaml",
+        "Test_TC_ACL_2_9.yaml",
+        "TestEvents.yaml",
+
+        # TODO: CommissionerNodeId not yet supported:
+        "Test_TC_ACE_1_1.yaml",
+        "Test_TC_ACE_1_5.yaml",
+        "Test_TC_SC_5_1.yaml",
+        "Test_TC_SC_5_2.yaml",
+        "TestCommissionerNodeId.yaml",
+
+        "TestClusterMultiFabric.yaml",  # Enum mismatch
+        "TestGroupMessaging.yaml",     # Needs group support in repl
+        "TestMultiAdmin.yaml",         # chip-repl hang on command expeted to fail
+    }
+
+
 def _AllYamlTests():
     yaml_test_suite_path = Path(_YAML_TEST_SUITE_PATH)
 
@@ -111,12 +162,6 @@ def _AllYamlTests():
         if not path.is_file():
             continue
 
-        if path.name.endswith('_Simulated.yaml'):
-            # Simulated tests are not runnable by repl tests, need
-            # separate infrastructure. Exclude theml completely (they are
-            # not even manual)
-            continue
-
         yield path
 
 
@@ -142,6 +187,10 @@ def tests_with_command(chip_tool: str, is_manual: bool):
 
     result = subprocess.run([chip_tool, "tests", cmd], capture_output=True)
 
+    test_tags = set()
+    if is_manual:
+        test_tags.add(TestTag.MANUAL)
+
     for name in result.stdout.decode("utf8").split("\n"):
         if not name:
             continue
@@ -149,34 +198,49 @@ def tests_with_command(chip_tool: str, is_manual: bool):
         target = target_for_name(name)
 
         yield TestDefinition(
-            run_name=name, name=name, target=target, is_manual=is_manual
+            run_name=name, name=name, target=target, tags=test_tags
         )
 
 
 # TODO We will move away from hardcoded list of yamltests to run all file when yamltests
 # parser/runner reaches parity with the code gen version.
 def _hardcoded_python_yaml_tests():
-    manual_tests = set([b.yaml for b in _GetManualTests()])
+    manual_tests = _GetManualTests()
+    flaky_tests = _GetFlakyTests()
+    slow_tests = _GetSlowTests()
+    in_development_tests = _GetInDevelopmentTests()
 
     for path in _AllYamlTests():
-        if path.name in INVALID_TESTS:
+        if not _IsValidYamlTest(path.name):
             continue
 
+        tags = set()
+        if path.name in manual_tests:
+            tags.add(TestTag.MANUAL)
+
+        if path.name in flaky_tests:
+            tags.add(TestTag.FLAKY)
+
+        if path.name in slow_tests:
+            tags.add(TestTag.SLOW)
+
+        if path.name in in_development_tests:
+            tags.add(TestTag.IN_DEVELOPMENT)
+
         yield TestDefinition(
             run_name=str(path),
             name=path.stem,  # `path.stem` converts "some/path/Test_ABC_1.2.yaml" to "Test_ABC.1.2"
             target=target_for_name(path.name),
-            is_manual=path.name in manual_tests,
-            use_chip_repl_yaml_tester=True
+            tags=tags,
         )
 
 
-def AllTests(chip_tool: str, run_yamltests_with_chip_repl: bool):
-    if run_yamltests_with_chip_repl:
-        for test in _hardcoded_python_yaml_tests():
-            yield test
-        return
+def AllYamlTests():
+    for test in _hardcoded_python_yaml_tests():
+        yield test
+
 
+def AllChipToolTests(chip_tool: str):
     for test in tests_with_command(chip_tool, is_manual=False):
         yield test
 
diff --git a/scripts/tests/chiptest/test_definition.py b/scripts/tests/chiptest/test_definition.py
index 6cbfe2abc0c0e2..52238719e7a211 100644
--- a/scripts/tests/chiptest/test_definition.py
+++ b/scripts/tests/chiptest/test_definition.py
@@ -19,7 +19,7 @@
 import threading
 import time
 import typing
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import datetime
 from enum import Enum, auto
 from random import randrange
@@ -210,15 +210,48 @@ def LogContents(self):
         logging.error('================ CAPTURED LOG END ====================')
 
 
+class TestTag(Enum):
+    MANUAL = auto()          # requires manual input. Generally not run automatically
+    SLOW = auto()            # test uses Sleep and is generally slow (>=10s is a typical threshold)
+    FLAKY = auto()           # test is considered flaky (usually a bug/time dependent issue)
+    IN_DEVELOPMENT = auto()  # test may not pass or undergoes changes
+
+    def to_s(self):
+        for (k, v) in TestTag.__members__.items():
+            if self == v:
+                return k
+        raise Exception("Unknown tag: %r" % self)
+
+
+class TestRunTime(Enum):
+    CHIP_TOOL_BUILTIN = auto()  # run via chip-tool built-in test commands
+    PYTHON_YAML = auto()       # use the python yaml test runner
+
+
 @dataclass
 class TestDefinition:
     name: str
     run_name: str
     target: TestTarget
-    is_manual: bool
-    use_chip_repl_yaml_tester: bool = False
+    tags: typing.Set[TestTag] = field(default_factory=set)
+
+    @property
+    def is_manual(self) -> bool:
+        return TestTag.MANUAL in self.tags
+
+    @property
+    def is_slow(self) -> bool:
+        return TestTag.SLOW in self.tags
+
+    @property
+    def is_flaky(self) -> bool:
+        return TestTag.FLAKY in self.tags
+
+    def tags_str(self) -> str:
+        """Get a human readable list of tags applied to this test"""
+        return ", ".join([t.to_s() for t in self.tags])
 
-    def Run(self, runner, apps_register, paths: ApplicationPaths, pics_file: str, timeout_seconds: typing.Optional[int], dry_run=False):
+    def Run(self, runner, apps_register, paths: ApplicationPaths, pics_file: str, timeout_seconds: typing.Optional[int], dry_run=False, test_runtime: TestRunTime = TestRunTime.CHIP_TOOL_BUILTIN):
         """
         Executes the given test case using the provided runner for execution.
         """
@@ -280,7 +313,7 @@ def Run(self, runner, apps_register, paths: ApplicationPaths, pics_file: str, ti
             if dry_run:
                 logging.info(" ".join(pairing_cmd))
                 logging.info(" ".join(test_cmd))
-            elif self.use_chip_repl_yaml_tester:
+            elif test_runtime == TestRunTime.PYTHON_YAML:
                 chip_repl_yaml_tester_cmd = paths.chip_repl_yaml_tester_cmd
                 python_cmd = chip_repl_yaml_tester_cmd + \
                     ['--setup-code', app.setupCode] + ['--yaml-path', self.run_name] + ["--pics-file", pics_file]
diff --git a/scripts/tests/run_test_suite.py b/scripts/tests/run_test_suite.py
index 3984e641fbcd9d..ba3deb1899f112 100755
--- a/scripts/tests/run_test_suite.py
+++ b/scripts/tests/run_test_suite.py
@@ -20,7 +20,7 @@
 import sys
 import time
 import typing
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
 
 import chiptest
@@ -28,6 +28,7 @@
 import coloredlogs
 from chiptest.accessories import AppsRegister
 from chiptest.glob_matcher import GlobMatcher
+from chiptest.test_definition import TestRunTime, TestTag
 
 DEFAULT_CHIP_ROOT = os.path.abspath(
     os.path.join(os.path.dirname(__file__), '..', '..'))
@@ -67,7 +68,13 @@ class RunContext:
     in_unshare: bool
     chip_tool: str
     dry_run: bool
-    manual_handling: ManualHandling
+    runtime: TestRunTime
+
+    # If not empty, include only the specified test tags
+    include_tags: set(TestTag) = field(default_factory={})
+
+    # If not empty, exclude tests tagged with these tags
+    exclude_tags: set(TestTag) = field(default_factory={})
 
 
 @click.group(chain=True)
@@ -114,11 +121,16 @@ class RunContext:
     help='Internal flag for running inside a unshared environment'
 )
 @click.option(
-    '--manual',
-    type=click.Choice(['skip', 'only', 'include'], case_sensitive=False),
-    default='skip',
-    show_default=True,
-    help='Internal flag to determine how to handle manual tests. ONLY for "all" test choice.',
+    '--include-tags',
+    type=click.Choice(TestTag.__members__.keys(), case_sensitive=False),
+    multiple=True,
+    help='What test tags to include when running. Equivalent to "exlcude all except these" for priority purpuses.',
+)
+@click.option(
+    '--exclude-tags',
+    type=click.Choice(TestTag.__members__.keys(), case_sensitive=False),
+    multiple=True,
+    help='What test tags to exclude when running. Exclude options takes precedence over include.',
 )
 @click.option(
     '--run-yamltests-with-chip-repl',
@@ -130,7 +142,7 @@ class RunContext:
     help='Binary path of chip tool app to use to run the test')
 @click.pass_context
 def main(context, dry_run, log_level, target, target_glob, target_skip_glob,
-         no_log_timestamps, root, internal_inside_unshare, manual, run_yamltests_with_chip_repl, chip_tool):
+         no_log_timestamps, root, internal_inside_unshare, include_tags, exclude_tags, run_yamltests_with_chip_repl, chip_tool):
     # Ensures somewhat pretty logging of what is going on
     log_fmt = '%(asctime)s.%(msecs)03d %(levelname)-7s %(message)s'
     if no_log_timestamps:
@@ -141,21 +153,28 @@ def main(context, dry_run, log_level, target, target_glob, target_skip_glob,
         # non yaml tests REQUIRE chip-tool. Yaml tests should not require chip-tool
         chip_tool = FindBinaryPath('chip-tool')
 
+    if include_tags:
+        include_tags = set([TestTag.__members__[t] for t in include_tags])
+
+    if exclude_tags:
+        exclude_tags = set([TestTag.__members__[t] for t in exclude_tags])
+
     # Figures out selected test that match the given name(s)
-    all_tests = [test for test in chiptest.AllTests(chip_tool, run_yamltests_with_chip_repl)]
+    if run_yamltests_with_chip_repl:
+        all_tests = [test for test in chiptest.AllYamlTests()]
+    else:
+        all_tests = [test for test in chiptest.AllChipToolTests(chip_tool)]
 
     tests = all_tests
 
-    # Default to only non-manual tests unless explicit targets are specified.
-    if 'all' in target:
-        if manual == 'skip':
-            manual_handling = ManualHandling.SKIP
-        elif manual == 'only':
-            manual_handling = ManualHandling.ONLY
-        else:
-            manual_handling = ManualHandling.INCLUDE
-    else:
-        manual_handling = ManualHandling.INCLUDE
+    # If just defaults specified, do not run manual and in development
+    # Specific target basically includes everything
+    if 'all' in target and not include_tags:
+        exclude_tags = {
+            TestTag.MANUAL,
+            TestTag.IN_DEVELOPMENT,
+            TestTag.FLAKY,
+        }
 
     if 'all' not in target:
         tests = []
@@ -186,7 +205,9 @@ def main(context, dry_run, log_level, target, target_glob, target_skip_glob,
     context.obj = RunContext(root=root, tests=tests,
                              in_unshare=internal_inside_unshare,
                              chip_tool=chip_tool, dry_run=dry_run,
-                             manual_handling=manual_handling)
+                             runtime=TestRunTime.PYTHON_YAML if run_yamltests_with_chip_repl else TestRunTime.CHIP_TOOL_BUILTIN,
+                             include_tags=include_tags,
+                             exclude_tags=exclude_tags)
 
 
 @main.command(
@@ -194,10 +215,11 @@ def main(context, dry_run, log_level, target, target_glob, target_skip_glob,
 @click.pass_context
 def cmd_list(context):
     for test in context.obj.tests:
-        if test.is_manual:
-            print("%s (MANUAL TEST)" % test.name)
-        else:
-            print(test.name)
+        tags = test.tags_str()
+        if tags:
+            tags = f" ({tags})"
+
+        print("%s%s" % (test.name, tags))
 
 
 @main.command(
@@ -294,11 +316,14 @@ def cmd_run(context, iterations, all_clusters_app, lock_app, ota_provider_app, o
     for i in range(iterations):
         logging.info("Starting iteration %d" % (i+1))
         for test in context.obj.tests:
-            if test.is_manual:
-                if context.obj.manual_handling == ManualHandling.SKIP:
+            if context.obj.include_tags:
+                if not (test.tags & context.obj.include_tags):
+                    logging.debug("Test %s not included" % test.name)
                     continue
-            else:
-                if context.obj.manual_handling == ManualHandling.ONLY:
+
+            if context.obj.exclude_tags:
+                if test.tags & context.obj.exclude_tags:
+                    logging.debug("Test %s excluded" % test.name)
                     continue
 
             test_start = time.monotonic()
@@ -308,7 +333,9 @@ def cmd_run(context, iterations, all_clusters_app, lock_app, ota_provider_app, o
                     continue
 
                 logging.info('%-20s - Starting test' % (test.name))
-                test.Run(runner, apps_register, paths, pics_file, test_timeout_seconds, context.obj.dry_run)
+                test.Run(
+                    runner, apps_register, paths, pics_file, test_timeout_seconds, context.obj.dry_run,
+                    test_runtime=context.obj.runtime)
                 test_end = time.monotonic()
                 logging.info('%-30s - Completed in %0.2f seconds' %
                              (test.name, (test_end - test_start)))