From 1e5dbd3c7c80e09b34cad6b30bca6e3369a18060 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 17:23:53 +0200
Subject: [PATCH 01/15] Create a module to split test case collection from
 checks

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/check_test_cases.py   | 1 +
 tests/scripts/collect_test_cases.py | 4 ++++
 2 files changed, 5 insertions(+)
 create mode 100644 tests/scripts/collect_test_cases.py

diff --git a/tests/scripts/check_test_cases.py b/tests/scripts/check_test_cases.py
index 6809dd5e7..a7ad9aebe 100755
--- a/tests/scripts/check_test_cases.py
+++ b/tests/scripts/check_test_cases.py
@@ -18,6 +18,7 @@
 
 import scripts_path # pylint: disable=unused-import
 from mbedtls_framework import build_tree
+import collect_test_cases
 
 class ScriptOutputError(ValueError):
     """A kind of ValueError that indicates we found
diff --git a/tests/scripts/collect_test_cases.py b/tests/scripts/collect_test_cases.py
new file mode 100644
index 000000000..7e9031c97
--- /dev/null
+++ b/tests/scripts/collect_test_cases.py
@@ -0,0 +1,4 @@
+"""Discover all the test cases (unit tests and SSL tests)."""
+
+# Copyright The Mbed TLS Contributors
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later

From c8c83d03030e2ec1bba35e2953a23a675c3af64b Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 17:35:52 +0200
Subject: [PATCH 02/15] Split test case collection from checks

Move the test case collection code out of check_test_cases.py and into its
own module. This allows outcome analysis to depend only on the new module
and not on check_test_cases.py.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/analyze_outcomes.py   |   4 +-
 tests/scripts/check_test_cases.py   | 164 +---------------------------
 tests/scripts/collect_test_cases.py | 163 +++++++++++++++++++++++++++
 3 files changed, 169 insertions(+), 162 deletions(-)

diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index 188b68d1d..87efb1b89 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -14,7 +14,7 @@
 import os
 import typing
 
-import check_test_cases
+import collect_test_cases
 
 
 # `ComponentOutcomes` is a named tuple which is defined as:
@@ -197,7 +197,7 @@ def run(self, results: Results, outcomes: Outcomes) -> None:
             sys.stderr.write(cp.stdout.decode('utf-8'))
             results.error("Failed \"make generated_files\" in tests. "
                           "Coverage analysis may be incorrect.")
-        available = check_test_cases.collect_available_test_cases()
+        available = collect_test_cases.collect_available_test_cases()
         for suite_case in available:
             hit = any(suite_case in comp_outcomes.successes or
                       suite_case in comp_outcomes.failures
diff --git a/tests/scripts/check_test_cases.py b/tests/scripts/check_test_cases.py
index a7ad9aebe..b00319bad 100755
--- a/tests/scripts/check_test_cases.py
+++ b/tests/scripts/check_test_cases.py
@@ -10,170 +10,14 @@
 # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
 
 import argparse
-import glob
-import os
 import re
-import subprocess
 import sys
 
 import scripts_path # pylint: disable=unused-import
-from mbedtls_framework import build_tree
 import collect_test_cases
 
-class ScriptOutputError(ValueError):
-    """A kind of ValueError that indicates we found
-    the script doesn't list test cases in an expected
-    pattern.
-    """
 
-    @property
-    def script_name(self):
-        return super().args[0]
-
-    @property
-    def idx(self):
-        return super().args[1]
-
-    @property
-    def line(self):
-        return super().args[2]
-
-class Results:
-    """Store file and line information about errors or warnings in test suites."""
-
-    def __init__(self, options):
-        self.errors = 0
-        self.warnings = 0
-        self.ignore_warnings = options.quiet
-
-    def error(self, file_name, line_number, fmt, *args):
-        sys.stderr.write(('{}:{}:ERROR:' + fmt + '\n').
-                         format(file_name, line_number, *args))
-        self.errors += 1
-
-    def warning(self, file_name, line_number, fmt, *args):
-        if not self.ignore_warnings:
-            sys.stderr.write(('{}:{}:Warning:' + fmt + '\n')
-                             .format(file_name, line_number, *args))
-            self.warnings += 1
-
-class TestDescriptionExplorer:
-    """An iterator over test cases with descriptions.
-
-The test cases that have descriptions are:
-* Individual unit tests (entries in a .data file) in test suites.
-* Individual test cases in ssl-opt.sh.
-
-This is an abstract class. To use it, derive a class that implements
-the process_test_case method, and call walk_all().
-"""
-
-    def process_test_case(self, per_file_state,
-                          file_name, line_number, description):
-        """Process a test case.
-
-per_file_state: an object created by new_per_file_state() at the beginning
-                of each file.
-file_name: a relative path to the file containing the test case.
-line_number: the line number in the given file.
-description: the test case description as a byte string.
-"""
-        raise NotImplementedError
-
-    def new_per_file_state(self):
-        """Return a new per-file state object.
-
-The default per-file state object is None. Child classes that require per-file
-state may override this method.
-"""
-        #pylint: disable=no-self-use
-        return None
-
-    def walk_test_suite(self, data_file_name):
-        """Iterate over the test cases in the given unit test data file."""
-        in_paragraph = False
-        descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none
-        with open(data_file_name, 'rb') as data_file:
-            for line_number, line in enumerate(data_file, 1):
-                line = line.rstrip(b'\r\n')
-                if not line:
-                    in_paragraph = False
-                    continue
-                if line.startswith(b'#'):
-                    continue
-                if not in_paragraph:
-                    # This is a test case description line.
-                    self.process_test_case(descriptions,
-                                           data_file_name, line_number, line)
-                in_paragraph = True
-
-    def collect_from_script(self, script_name):
-        """Collect the test cases in a script by calling its listing test cases
-option"""
-        descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none
-        listed = subprocess.check_output(['sh', script_name, '--list-test-cases'])
-        # Assume test file is responsible for printing identical format of
-        # test case description between --list-test-cases and its OUTCOME.CSV
-        #
-        # idx indicates the number of test case since there is no line number
-        # in the script for each test case.
-        for idx, line in enumerate(listed.splitlines()):
-            # We are expecting the script to list the test cases in
-            # `<suite_name>;<description>` pattern.
-            script_outputs = line.split(b';', 1)
-            if len(script_outputs) == 2:
-                suite_name, description = script_outputs
-            else:
-                raise ScriptOutputError(script_name, idx, line.decode("utf-8"))
-
-            self.process_test_case(descriptions,
-                                   suite_name.decode('utf-8'),
-                                   idx,
-                                   description.rstrip())
-
-    @staticmethod
-    def collect_test_directories():
-        """Get the relative path for the TLS and Crypto test directories."""
-        mbedtls_root = build_tree.guess_mbedtls_root()
-        directories = [os.path.join(mbedtls_root, 'tests'),
-                       os.path.join(mbedtls_root, 'tf-psa-crypto', 'tests')]
-        directories = [os.path.relpath(p) for p in directories]
-        return directories
-
-    def walk_all(self):
-        """Iterate over all named test cases."""
-        test_directories = self.collect_test_directories()
-        for directory in test_directories:
-            for data_file_name in glob.glob(os.path.join(directory, 'suites',
-                                                         '*.data')):
-                self.walk_test_suite(data_file_name)
-
-            for sh_file in ['ssl-opt.sh', 'compat.sh']:
-                sh_file = os.path.join(directory, sh_file)
-                if os.path.isfile(sh_file):
-                    self.collect_from_script(sh_file)
-
-class TestDescriptions(TestDescriptionExplorer):
-    """Collect the available test cases."""
-
-    def __init__(self):
-        super().__init__()
-        self.descriptions = set()
-
-    def process_test_case(self, _per_file_state,
-                          file_name, _line_number, description):
-        """Record an available test case."""
-        base_name = re.sub(r'\.[^.]*$', '', re.sub(r'.*/', '', file_name))
-        key = ';'.join([base_name, description.decode('utf-8')])
-        self.descriptions.add(key)
-
-def collect_available_test_cases():
-    """Collect the available test cases."""
-    explorer = TestDescriptions()
-    explorer.walk_all()
-    return sorted(explorer.descriptions)
-
-class DescriptionChecker(TestDescriptionExplorer):
+class DescriptionChecker(collect_test_cases.TestDescriptionExplorer):
     """Check all test case descriptions.
 
 * Check that each description is valid (length, allowed character set, etc.).
@@ -223,14 +67,14 @@ def main():
                         help='Show warnings (default: on; undoes --quiet)')
     options = parser.parse_args()
     if options.list_all:
-        descriptions = collect_available_test_cases()
+        descriptions = collect_test_cases.collect_available_test_cases()
         sys.stdout.write('\n'.join(descriptions + ['']))
         return
-    results = Results(options)
+    results = collect_test_cases.Results(options)
     checker = DescriptionChecker(results)
     try:
         checker.walk_all()
-    except ScriptOutputError as e:
+    except collect_test_cases.ScriptOutputError as e:
         results.error(e.script_name, e.idx,
                       '"{}" should be listed as "<suite_name>;<description>"',
                       e.line)
diff --git a/tests/scripts/collect_test_cases.py b/tests/scripts/collect_test_cases.py
index 7e9031c97..c326710b6 100644
--- a/tests/scripts/collect_test_cases.py
+++ b/tests/scripts/collect_test_cases.py
@@ -2,3 +2,166 @@
 
 # Copyright The Mbed TLS Contributors
 # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+
+import glob
+import os
+import re
+import subprocess
+import sys
+
+import scripts_path # pylint: disable=unused-import
+from mbedtls_framework import build_tree
+
+
+class ScriptOutputError(ValueError):
+    """A kind of ValueError that indicates we found
+    the script doesn't list test cases in an expected
+    pattern.
+    """
+
+    @property
+    def script_name(self):
+        return super().args[0]
+
+    @property
+    def idx(self):
+        return super().args[1]
+
+    @property
+    def line(self):
+        return super().args[2]
+
+class Results:
+    """Store file and line information about errors or warnings in test suites."""
+
+    def __init__(self, options):
+        self.errors = 0
+        self.warnings = 0
+        self.ignore_warnings = options.quiet
+
+    def error(self, file_name, line_number, fmt, *args):
+        sys.stderr.write(('{}:{}:ERROR:' + fmt + '\n').
+                         format(file_name, line_number, *args))
+        self.errors += 1
+
+    def warning(self, file_name, line_number, fmt, *args):
+        if not self.ignore_warnings:
+            sys.stderr.write(('{}:{}:Warning:' + fmt + '\n')
+                             .format(file_name, line_number, *args))
+            self.warnings += 1
+
+class TestDescriptionExplorer:
+    """An iterator over test cases with descriptions.
+
+The test cases that have descriptions are:
+* Individual unit tests (entries in a .data file) in test suites.
+* Individual test cases in ssl-opt.sh.
+
+This is an abstract class. To use it, derive a class that implements
+the process_test_case method, and call walk_all().
+"""
+
+    def process_test_case(self, per_file_state,
+                          file_name, line_number, description):
+        """Process a test case.
+
+per_file_state: an object created by new_per_file_state() at the beginning
+                of each file.
+file_name: a relative path to the file containing the test case.
+line_number: the line number in the given file.
+description: the test case description as a byte string.
+"""
+        raise NotImplementedError
+
+    def new_per_file_state(self):
+        """Return a new per-file state object.
+
+The default per-file state object is None. Child classes that require per-file
+state may override this method.
+"""
+        #pylint: disable=no-self-use
+        return None
+
+    def walk_test_suite(self, data_file_name):
+        """Iterate over the test cases in the given unit test data file."""
+        in_paragraph = False
+        descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none
+        with open(data_file_name, 'rb') as data_file:
+            for line_number, line in enumerate(data_file, 1):
+                line = line.rstrip(b'\r\n')
+                if not line:
+                    in_paragraph = False
+                    continue
+                if line.startswith(b'#'):
+                    continue
+                if not in_paragraph:
+                    # This is a test case description line.
+                    self.process_test_case(descriptions,
+                                           data_file_name, line_number, line)
+                in_paragraph = True
+
+    def collect_from_script(self, script_name):
+        """Collect the test cases in a script by calling its listing test cases
+option"""
+        descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none
+        listed = subprocess.check_output(['sh', script_name, '--list-test-cases'])
+        # Assume test file is responsible for printing identical format of
+        # test case description between --list-test-cases and its OUTCOME.CSV
+        #
+        # idx indicates the number of test case since there is no line number
+        # in the script for each test case.
+        for idx, line in enumerate(listed.splitlines()):
+            # We are expecting the script to list the test cases in
+            # `<suite_name>;<description>` pattern.
+            script_outputs = line.split(b';', 1)
+            if len(script_outputs) == 2:
+                suite_name, description = script_outputs
+            else:
+                raise ScriptOutputError(script_name, idx, line.decode("utf-8"))
+
+            self.process_test_case(descriptions,
+                                   suite_name.decode('utf-8'),
+                                   idx,
+                                   description.rstrip())
+
+    @staticmethod
+    def collect_test_directories():
+        """Get the relative path for the TLS and Crypto test directories."""
+        mbedtls_root = build_tree.guess_mbedtls_root()
+        directories = [os.path.join(mbedtls_root, 'tests'),
+                       os.path.join(mbedtls_root, 'tf-psa-crypto', 'tests')]
+        directories = [os.path.relpath(p) for p in directories]
+        return directories
+
+    def walk_all(self):
+        """Iterate over all named test cases."""
+        test_directories = self.collect_test_directories()
+        for directory in test_directories:
+            for data_file_name in glob.glob(os.path.join(directory, 'suites',
+                                                         '*.data')):
+                self.walk_test_suite(data_file_name)
+
+            for sh_file in ['ssl-opt.sh', 'compat.sh']:
+                sh_file = os.path.join(directory, sh_file)
+                if os.path.isfile(sh_file):
+                    self.collect_from_script(sh_file)
+
+class TestDescriptions(TestDescriptionExplorer):
+    """Collect the available test cases."""
+
+    def __init__(self):
+        super().__init__()
+        self.descriptions = set()
+
+    def process_test_case(self, _per_file_state,
+                          file_name, _line_number, description):
+        """Record an available test case."""
+        base_name = re.sub(r'\.[^.]*$', '', re.sub(r'.*/', '', file_name))
+        key = ';'.join([base_name, description.decode('utf-8')])
+        self.descriptions.add(key)
+
+def collect_available_test_cases():
+    """Collect the available test cases."""
+    explorer = TestDescriptions()
+    explorer.walk_all()
+    return sorted(explorer.descriptions)

From 9f930e0f9e0df867826cb59d8cdce9201cbaf3b3 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 17:38:32 +0200
Subject: [PATCH 03/15] Create a module to split branch-independent code out of
 analyze_outcomes.py

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/outcome_analysis.py | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100644 tests/scripts/outcome_analysis.py

diff --git a/tests/scripts/outcome_analysis.py b/tests/scripts/outcome_analysis.py
new file mode 100644
index 000000000..84b83eb81
--- /dev/null
+++ b/tests/scripts/outcome_analysis.py
@@ -0,0 +1,9 @@
+"""Outcome file analysis code.
+
+This module is the bulk of the code of tests/scripts/analyze_outcomes.py
+in each consuming branch. The consuming script is expected to derive
+the classes with branch-specific customizations such as ignore lists.
+"""
+
+# Copyright The Mbed TLS Contributors
+# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later

From 49c77dd0e4846577aa6bb89f26a7f5389b230f04 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 17:40:55 +0200
Subject: [PATCH 04/15] Remove sample ignore list elements for coverage

The ignore list for coverage only has two test cases out of ~10000 that are
currently reported as not executed. This is a drop in the sea and not
useful. Remove them so that the class can be used generically. A follow-up
will construct a comprehensive ignore list.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/analyze_outcomes.py | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index 87efb1b89..8123ea1ef 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -167,14 +167,6 @@ class CoverageTask(Task):
     # Test cases whose suite and description are matched by an entry in
     # IGNORED_TESTS are expected to be never executed.
     # All other test cases are expected to be executed at least once.
-    IGNORED_TESTS = {
-        'test_suite_psa_crypto_metadata': [
-            # Algorithm not supported yet
-            'Asymmetric signature: pure EdDSA',
-            # Algorithm not supported yet
-            'Cipher: XTS',
-        ],
-    }
 
     def __init__(self, options) -> None:
         super().__init__(options)

From 9d78e87b4954c94a23e9e40c9fbb549554642499 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 18:18:33 +0200
Subject: [PATCH 05/15] Missing NotImplementedError in abstract method

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/analyze_outcomes.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index 8123ea1ef..decc33496 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -135,6 +135,7 @@ def __init__(self, options) -> None:
 
     def section_name(self) -> str:
         """The section name to use in results."""
+        raise NotImplementedError
 
     def ignored_tests(self, test_suite: str) -> typing.Iterator[IgnoreEntry]:
         """Generate the ignore list for the specified test suite."""

From ad02d44e013be605910001e22d488d094e3f553e Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 18:27:13 +0200
Subject: [PATCH 06/15] Don't reuse a variable name inside a function

Use different names for task name, a task class and a task instance. The
interpreter doesn't care, but it's less confusing for both humans and type
checkers.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/analyze_outcomes.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index decc33496..9f8c2c3c7 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -784,17 +784,17 @@ def main():
         options = parser.parse_args()
 
         if options.list:
-            for task in KNOWN_TASKS:
-                print(task)
+            for task_name in KNOWN_TASKS:
+                print(task_name)
             sys.exit(0)
 
         if options.specified_tasks == 'all':
             tasks_list = KNOWN_TASKS.keys()
         else:
             tasks_list = re.split(r'[, ]+', options.specified_tasks)
-            for task in tasks_list:
-                if task not in KNOWN_TASKS:
-                    sys.stderr.write('invalid task: {}\n'.format(task))
+            for task_name in tasks_list:
+                if task_name not in KNOWN_TASKS:
+                    sys.stderr.write('invalid task: {}\n'.format(task_name))
                     sys.exit(2)
 
         # If the outcome file exists, parse it once and share the result
@@ -806,22 +806,22 @@ def main():
                 sys.exit(2)
 
             task_name = tasks_list[0]
-            task = KNOWN_TASKS[task_name]
-            if not issubclass(task, DriverVSReference):
+            task_class = KNOWN_TASKS[task_name]
+            if not issubclass(task_class, DriverVSReference):
                 sys.stderr.write("please provide valid outcomes file for {}.\n".format(task_name))
                 sys.exit(2)
             execute_reference_driver_tests(main_results,
-                                           task.REFERENCE,
-                                           task.DRIVER,
+                                           task_class.REFERENCE,
+                                           task_class.DRIVER,
                                            options.outcomes)
 
         outcomes = read_outcome_file(options.outcomes)
 
         for task_name in tasks_list:
             task_constructor = KNOWN_TASKS[task_name]
-            task = task_constructor(options)
-            main_results.new_section(task.section_name())
-            task.run(main_results, outcomes)
+            task_instance = task_constructor(options)
+            main_results.new_section(task_instance.section_name())
+            task_instance.run(main_results, outcomes)
 
         main_results.info("Overall results: {} warnings and {} errors",
                           main_results.warning_count, main_results.error_count)

From 005dca6ad8d1292d53d2fc8b5cff0a8bfc0c0edf Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 18:31:38 +0200
Subject: [PATCH 07/15] Typecheck main

Always have tasks_list be a list, not potentially some fancier iterable.

Bypass mypy's somewhat legitimate complaint about REFERENCE and DRIVER in
task_class: they could potentially be instance attributes, but we rely on
them being class attributes. Python does normally guarantee their existence
as class attributes (unless a derived class explicitly deletes them), but
they could be overridden by an instance attribute; that's just something
we don't do, so the class attribute's value is legitimate. We can't
expect mypy to know that, so work around its complaint.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/analyze_outcomes.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index 9f8c2c3c7..f681a30ba 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -761,8 +761,7 @@ class DriverVSReference_block_cipher_dispatch(DriverVSReference):
     'analyze_block_cipher_dispatch': DriverVSReference_block_cipher_dispatch,
 }
 
-
-def main():
+def main() -> None:
     main_results = Results()
 
     try:
@@ -789,7 +788,7 @@ def main():
             sys.exit(0)
 
         if options.specified_tasks == 'all':
-            tasks_list = KNOWN_TASKS.keys()
+            tasks_list = list(KNOWN_TASKS.keys())
         else:
             tasks_list = re.split(r'[, ]+', options.specified_tasks)
             for task_name in tasks_list:
@@ -810,9 +809,16 @@ def main():
             if not issubclass(task_class, DriverVSReference):
                 sys.stderr.write("please provide valid outcomes file for {}.\n".format(task_name))
                 sys.exit(2)
+            # mypy isn't smart enough to know that REFERENCE and DRIVER
+            # are *class* attributes of all classes derived from
+            # DriverVSReference. (It would be smart enough if we had an
+            # instance of task_class, but we can't construct an instance
+            # until we have the outcome data, so at this point we only
+            # have the class.) So we use indirection to access the class
+            # attributes.
             execute_reference_driver_tests(main_results,
-                                           task_class.REFERENCE,
-                                           task_class.DRIVER,
+                                           getattr(task_class, 'REFERENCE'),
+                                           getattr(task_class, 'DRIVER'),
                                            options.outcomes)
 
         outcomes = read_outcome_file(options.outcomes)

From e41cde57c357e40024ade4efdec04dbc4b302b2a Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 18:36:09 +0200
Subject: [PATCH 08/15] Pass KNOWN_TASKS as an argument to main

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/analyze_outcomes.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index f681a30ba..8dd8f59a1 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -761,7 +761,7 @@ class DriverVSReference_block_cipher_dispatch(DriverVSReference):
     'analyze_block_cipher_dispatch': DriverVSReference_block_cipher_dispatch,
 }
 
-def main() -> None:
+def main(known_tasks: typing.Dict[str, typing.Type[Task]]) -> None:
     main_results = Results()
 
     try:
@@ -783,16 +783,16 @@ def main() -> None:
         options = parser.parse_args()
 
         if options.list:
-            for task_name in KNOWN_TASKS:
+            for task_name in known_tasks:
                 print(task_name)
             sys.exit(0)
 
         if options.specified_tasks == 'all':
-            tasks_list = list(KNOWN_TASKS.keys())
+            tasks_list = list(known_tasks.keys())
         else:
             tasks_list = re.split(r'[, ]+', options.specified_tasks)
             for task_name in tasks_list:
-                if task_name not in KNOWN_TASKS:
+                if task_name not in known_tasks:
                     sys.stderr.write('invalid task: {}\n'.format(task_name))
                     sys.exit(2)
 
@@ -805,7 +805,7 @@ def main() -> None:
                 sys.exit(2)
 
             task_name = tasks_list[0]
-            task_class = KNOWN_TASKS[task_name]
+            task_class = known_tasks[task_name]
             if not issubclass(task_class, DriverVSReference):
                 sys.stderr.write("please provide valid outcomes file for {}.\n".format(task_name))
                 sys.exit(2)
@@ -824,7 +824,7 @@ def main() -> None:
         outcomes = read_outcome_file(options.outcomes)
 
         for task_name in tasks_list:
-            task_constructor = KNOWN_TASKS[task_name]
+            task_constructor = known_tasks[task_name]
             task_instance = task_constructor(options)
             main_results.new_section(task_instance.section_name())
             task_instance.run(main_results, outcomes)
@@ -840,4 +840,4 @@ def main() -> None:
         sys.exit(120)
 
 if __name__ == '__main__':
-    main()
+    main(KNOWN_TASKS)

From 082eadef4e55274d72c138212be00d24e9f16b38 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 18:42:37 +0200
Subject: [PATCH 09/15] Separate code and data of outcome analysis

Place the code of outcome analysis (auxiliary functions, tasks, command line
entry point) into a separate module, which will be moved to the
version-independent framework repository so that it can be shared between
maintained branches. Keep the branch-specific list of driver components and
ignore lists in the per-repository script.

We keep the executable script at `tests/scripts/analyze_outcomes.py`. It's
simpler that way, because that path is hard-coded in CI scripts.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/analyze_outcomes.py | 375 ++----------------------------
 tests/scripts/outcome_analysis.py | 353 ++++++++++++++++++++++++++++
 2 files changed, 368 insertions(+), 360 deletions(-)

diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index 8dd8f59a1..080a4fb58 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -6,279 +6,13 @@
 less likely to be useful.
 """
 
-import argparse
-import sys
-import traceback
 import re
-import subprocess
-import os
-import typing
 
-import collect_test_cases
+import outcome_analysis
 
 
-# `ComponentOutcomes` is a named tuple which is defined as:
-# ComponentOutcomes(
-#     successes = {
-#         "<suite_case>",
-#         ...
-#     },
-#     failures = {
-#         "<suite_case>",
-#         ...
-#     }
-# )
-# suite_case = "<suite>;<case>"
-ComponentOutcomes = typing.NamedTuple('ComponentOutcomes',
-                                      [('successes', typing.Set[str]),
-                                       ('failures', typing.Set[str])])
-
-# `Outcomes` is a representation of the outcomes file,
-# which defined as:
-# Outcomes = {
-#     "<component>": ComponentOutcomes,
-#     ...
-# }
-Outcomes = typing.Dict[str, ComponentOutcomes]
-
-
-class Results:
-    """Process analysis results."""
-
-    def __init__(self):
-        self.error_count = 0
-        self.warning_count = 0
-
-    def new_section(self, fmt, *args, **kwargs):
-        self._print_line('\n*** ' + fmt + ' ***\n', *args, **kwargs)
-
-    def info(self, fmt, *args, **kwargs):
-        self._print_line('Info: ' + fmt, *args, **kwargs)
-
-    def error(self, fmt, *args, **kwargs):
-        self.error_count += 1
-        self._print_line('Error: ' + fmt, *args, **kwargs)
-
-    def warning(self, fmt, *args, **kwargs):
-        self.warning_count += 1
-        self._print_line('Warning: ' + fmt, *args, **kwargs)
-
-    @staticmethod
-    def _print_line(fmt, *args, **kwargs):
-        sys.stderr.write((fmt + '\n').format(*args, **kwargs))
-
-def execute_reference_driver_tests(results: Results, ref_component: str, driver_component: str, \
-                                   outcome_file: str) -> None:
-    """Run the tests specified in ref_component and driver_component. Results
-    are stored in the output_file and they will be used for the following
-    coverage analysis"""
-    results.new_section("Test {} and {}", ref_component, driver_component)
-
-    shell_command = "tests/scripts/all.sh --outcome-file " + outcome_file + \
-                    " " + ref_component + " " + driver_component
-    results.info("Running: {}", shell_command)
-    ret_val = subprocess.run(shell_command.split(), check=False).returncode
-
-    if ret_val != 0:
-        results.error("failed to run reference/driver components")
-
-IgnoreEntry = typing.Union[str, typing.Pattern]
-
-def name_matches_pattern(name: str, str_or_re: IgnoreEntry) -> bool:
-    """Check if name matches a pattern, that may be a string or regex.
-    - If the pattern is a string, name must be equal to match.
-    - If the pattern is a regex, name must fully match.
-    """
-    # The CI's python is too old for re.Pattern
-    #if isinstance(str_or_re, re.Pattern):
-    if not isinstance(str_or_re, str):
-        return str_or_re.fullmatch(name) is not None
-    else:
-        return str_or_re == name
-
-def read_outcome_file(outcome_file: str) -> Outcomes:
-    """Parse an outcome file and return an outcome collection.
-    """
-    outcomes = {}
-    with open(outcome_file, 'r', encoding='utf-8') as input_file:
-        for line in input_file:
-            (_platform, component, suite, case, result, _cause) = line.split(';')
-            # Note that `component` is not unique. If a test case passes on Linux
-            # and fails on FreeBSD, it'll end up in both the successes set and
-            # the failures set.
-            suite_case = ';'.join([suite, case])
-            if component not in outcomes:
-                outcomes[component] = ComponentOutcomes(set(), set())
-            if result == 'PASS':
-                outcomes[component].successes.add(suite_case)
-            elif result == 'FAIL':
-                outcomes[component].failures.add(suite_case)
-
-    return outcomes
-
-
-class Task:
-    """Base class for outcome analysis tasks."""
-
-    # Override the following in child classes.
-    # Map test suite names (with the test_suite_prefix) to a list of ignored
-    # test cases. Each element in the list can be either a string or a regex;
-    # see the `name_matches_pattern` function.
-    IGNORED_TESTS = {} #type: typing.Dict[str, typing.List[IgnoreEntry]]
-
-    def __init__(self, options) -> None:
-        """Pass command line options to the tasks.
-
-        Each task decides which command line options it cares about.
-        """
-        pass
-
-    def section_name(self) -> str:
-        """The section name to use in results."""
-        raise NotImplementedError
-
-    def ignored_tests(self, test_suite: str) -> typing.Iterator[IgnoreEntry]:
-        """Generate the ignore list for the specified test suite."""
-        if test_suite in self.IGNORED_TESTS:
-            yield from self.IGNORED_TESTS[test_suite]
-        pos = test_suite.find('.')
-        if pos != -1:
-            base_test_suite = test_suite[:pos]
-            if base_test_suite in self.IGNORED_TESTS:
-                yield from self.IGNORED_TESTS[base_test_suite]
-
-    def is_test_case_ignored(self, test_suite: str, test_string: str) -> bool:
-        """Check if the specified test case is ignored."""
-        for str_or_re in self.ignored_tests(test_suite):
-            if name_matches_pattern(test_string, str_or_re):
-                return True
-        return False
-
-    def run(self, results: Results, outcomes: Outcomes):
-        """Run the analysis on the specified outcomes.
-
-        Signal errors via the results objects
-        """
-        raise NotImplementedError
-
-
-class CoverageTask(Task):
-    """Analyze test coverage."""
-
-    # Test cases whose suite and description are matched by an entry in
-    # IGNORED_TESTS are expected to be never executed.
-    # All other test cases are expected to be executed at least once.
-
-    def __init__(self, options) -> None:
-        super().__init__(options)
-        self.full_coverage = options.full_coverage #type: bool
-
-    @staticmethod
-    def section_name() -> str:
-        return "Analyze coverage"
-
-    def run(self, results: Results, outcomes: Outcomes) -> None:
-        """Check that all available test cases are executed at least once."""
-        # Make sure that the generated data files are present (and up-to-date).
-        # This allows analyze_outcomes.py to run correctly on a fresh Git
-        # checkout.
-        cp = subprocess.run(['make', 'generated_files'],
-                            cwd='tests',
-                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-                            check=False)
-        if cp.returncode != 0:
-            sys.stderr.write(cp.stdout.decode('utf-8'))
-            results.error("Failed \"make generated_files\" in tests. "
-                          "Coverage analysis may be incorrect.")
-        available = collect_test_cases.collect_available_test_cases()
-        for suite_case in available:
-            hit = any(suite_case in comp_outcomes.successes or
-                      suite_case in comp_outcomes.failures
-                      for comp_outcomes in outcomes.values())
-            (test_suite, test_description) = suite_case.split(';')
-            ignored = self.is_test_case_ignored(test_suite, test_description)
-
-            if not hit and not ignored:
-                if self.full_coverage:
-                    results.error('Test case not executed: {}', suite_case)
-                else:
-                    results.warning('Test case not executed: {}', suite_case)
-            elif hit and ignored:
-                # If a test case is no longer always skipped, we should remove
-                # it from the ignore list.
-                if self.full_coverage:
-                    results.error('Test case was executed but marked as ignored for coverage: {}',
-                                  suite_case)
-                else:
-                    results.warning('Test case was executed but marked as ignored for coverage: {}',
-                                    suite_case)
-
-
-class DriverVSReference(Task):
-    """Compare outcomes from testing with and without a driver.
-
-    There are 2 options to use analyze_driver_vs_reference_xxx locally:
-    1. Run tests and then analysis:
-      - tests/scripts/all.sh --outcome-file "$PWD/out.csv" <component_ref> <component_driver>
-      - tests/scripts/analyze_outcomes.py out.csv analyze_driver_vs_reference_xxx
-    2. Let this script run both automatically:
-      - tests/scripts/analyze_outcomes.py out.csv analyze_driver_vs_reference_xxx
-    """
-
-    # Override the following in child classes.
-    # Configuration name (all.sh component) used as the reference.
-    REFERENCE = ''
-    # Configuration name (all.sh component) used as the driver.
-    DRIVER = ''
-    # Ignored test suites (without the test_suite_ prefix).
-    IGNORED_SUITES = [] #type: typing.List[str]
-
-    def __init__(self, options) -> None:
-        super().__init__(options)
-        self.ignored_suites = frozenset('test_suite_' + x
-                                        for x in self.IGNORED_SUITES)
-
-    def section_name(self) -> str:
-        return f"Analyze driver {self.DRIVER} vs reference {self.REFERENCE}"
-
-    def run(self, results: Results, outcomes: Outcomes) -> None:
-        """Check that all tests passing in the driver component are also
-        passing in the corresponding reference component.
-        Skip:
-        - full test suites provided in ignored_suites list
-        - only some specific test inside a test suite, for which the corresponding
-          output string is provided
-        """
-        ref_outcomes = outcomes.get("component_" + self.REFERENCE)
-        driver_outcomes = outcomes.get("component_" + self.DRIVER)
-
-        if ref_outcomes is None or driver_outcomes is None:
-            results.error("required components are missing: bad outcome file?")
-            return
-
-        if not ref_outcomes.successes:
-            results.error("no passing test in reference component: bad outcome file?")
-            return
-
-        for suite_case in ref_outcomes.successes:
-            # suite_case is like "test_suite_foo.bar;Description of test case"
-            (full_test_suite, test_string) = suite_case.split(';')
-            test_suite = full_test_suite.split('.')[0] # retrieve main part of test suite name
-
-            # Immediately skip fully-ignored test suites
-            if test_suite in self.ignored_suites or \
-               full_test_suite in self.ignored_suites:
-                continue
-
-            # For ignored test cases inside test suites, just remember and:
-            # don't issue an error if they're skipped with drivers,
-            # but issue an error if they're not (means we have a bad entry).
-            ignored = self.is_test_case_ignored(full_test_suite, test_string)
-
-            if not ignored and not suite_case in driver_outcomes.successes:
-                results.error("SKIP/FAIL -> PASS: {}", suite_case)
-            if ignored and suite_case in driver_outcomes.successes:
-                results.error("uselessly ignored: {}", suite_case)
+class CoverageTask(outcome_analysis.CoverageTask):
+    pass # We'll populate IGNORED_TESTS soon
 
 
 # The names that we give to classes derived from DriverVSReference do not
@@ -288,7 +22,7 @@ def run(self, results: Results, outcomes: Outcomes) -> None:
 # documentation.
 #pylint: disable=invalid-name,missing-class-docstring
 
-class DriverVSReference_hash(DriverVSReference):
+class DriverVSReference_hash(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_hash_use_psa'
     DRIVER = 'test_psa_crypto_config_accel_hash_use_psa'
     IGNORED_SUITES = [
@@ -308,7 +42,7 @@ class DriverVSReference_hash(DriverVSReference):
         ],
     }
 
-class DriverVSReference_hmac(DriverVSReference):
+class DriverVSReference_hmac(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_hmac'
     DRIVER = 'test_psa_crypto_config_accel_hmac'
     IGNORED_SUITES = [
@@ -347,7 +81,7 @@ class DriverVSReference_hmac(DriverVSReference):
         ],
     }
 
-class DriverVSReference_cipher_aead_cmac(DriverVSReference):
+class DriverVSReference_cipher_aead_cmac(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_cipher_aead_cmac'
     DRIVER = 'test_psa_crypto_config_accel_cipher_aead_cmac'
     # Modules replaced by drivers.
@@ -414,7 +148,7 @@ class DriverVSReference_cipher_aead_cmac(DriverVSReference):
         ],
     }
 
-class DriverVSReference_ecp_light_only(DriverVSReference):
+class DriverVSReference_ecp_light_only(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_ecc_ecp_light_only'
     DRIVER = 'test_psa_crypto_config_accel_ecc_ecp_light_only'
     IGNORED_SUITES = [
@@ -454,7 +188,7 @@ class DriverVSReference_ecp_light_only(DriverVSReference):
         ],
     }
 
-class DriverVSReference_no_ecp_at_all(DriverVSReference):
+class DriverVSReference_no_ecp_at_all(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_ecc_no_ecp_at_all'
     DRIVER = 'test_psa_crypto_config_accel_ecc_no_ecp_at_all'
     IGNORED_SUITES = [
@@ -492,7 +226,7 @@ class DriverVSReference_no_ecp_at_all(DriverVSReference):
         ],
     }
 
-class DriverVSReference_ecc_no_bignum(DriverVSReference):
+class DriverVSReference_ecc_no_bignum(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_ecc_no_bignum'
     DRIVER = 'test_psa_crypto_config_accel_ecc_no_bignum'
     IGNORED_SUITES = [
@@ -537,7 +271,7 @@ class DriverVSReference_ecc_no_bignum(DriverVSReference):
         ],
     }
 
-class DriverVSReference_ecc_ffdh_no_bignum(DriverVSReference):
+class DriverVSReference_ecc_ffdh_no_bignum(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_ecc_ffdh_no_bignum'
     DRIVER = 'test_psa_crypto_config_accel_ecc_ffdh_no_bignum'
     IGNORED_SUITES = [
@@ -590,7 +324,7 @@ class DriverVSReference_ecc_ffdh_no_bignum(DriverVSReference):
         ],
     }
 
-class DriverVSReference_ffdh_alg(DriverVSReference):
+class DriverVSReference_ffdh_alg(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_ffdh'
     DRIVER = 'test_psa_crypto_config_accel_ffdh'
     IGNORED_SUITES = ['dhm']
@@ -606,7 +340,7 @@ class DriverVSReference_ffdh_alg(DriverVSReference):
         ],
     }
 
-class DriverVSReference_tfm_config(DriverVSReference):
+class DriverVSReference_tfm_config(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_tfm_config_no_p256m'
     DRIVER = 'test_tfm_config_p256m_driver_accel_ec'
     IGNORED_SUITES = [
@@ -638,7 +372,7 @@ class DriverVSReference_tfm_config(DriverVSReference):
         ],
     }
 
-class DriverVSReference_rsa(DriverVSReference):
+class DriverVSReference_rsa(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_psa_crypto_config_reference_rsa_crypto'
     DRIVER = 'test_psa_crypto_config_accel_rsa_crypto'
     IGNORED_SUITES = [
@@ -677,7 +411,7 @@ class DriverVSReference_rsa(DriverVSReference):
         ],
     }
 
-class DriverVSReference_block_cipher_dispatch(DriverVSReference):
+class DriverVSReference_block_cipher_dispatch(outcome_analysis.DriverVSReference):
     REFERENCE = 'test_full_block_cipher_legacy_dispatch'
     DRIVER = 'test_full_block_cipher_psa_dispatch'
     IGNORED_SUITES = [
@@ -744,7 +478,6 @@ class DriverVSReference_block_cipher_dispatch(DriverVSReference):
 #pylint: enable=invalid-name,missing-class-docstring
 
 
-
 # List of tasks with a function that can handle this task and additional arguments if required
 KNOWN_TASKS = {
     'analyze_coverage': CoverageTask,
@@ -761,83 +494,5 @@ class DriverVSReference_block_cipher_dispatch(DriverVSReference):
     'analyze_block_cipher_dispatch': DriverVSReference_block_cipher_dispatch,
 }
 
-def main(known_tasks: typing.Dict[str, typing.Type[Task]]) -> None:
-    main_results = Results()
-
-    try:
-        parser = argparse.ArgumentParser(description=__doc__)
-        parser.add_argument('outcomes', metavar='OUTCOMES.CSV',
-                            help='Outcome file to analyze')
-        parser.add_argument('specified_tasks', default='all', nargs='?',
-                            help='Analysis to be done. By default, run all tasks. '
-                                 'With one or more TASK, run only those. '
-                                 'TASK can be the name of a single task or '
-                                 'comma/space-separated list of tasks. ')
-        parser.add_argument('--list', action='store_true',
-                            help='List all available tasks and exit.')
-        parser.add_argument('--require-full-coverage', action='store_true',
-                            dest='full_coverage', help="Require all available "
-                            "test cases to be executed and issue an error "
-                            "otherwise. This flag is ignored if 'task' is "
-                            "neither 'all' nor 'analyze_coverage'")
-        options = parser.parse_args()
-
-        if options.list:
-            for task_name in known_tasks:
-                print(task_name)
-            sys.exit(0)
-
-        if options.specified_tasks == 'all':
-            tasks_list = list(known_tasks.keys())
-        else:
-            tasks_list = re.split(r'[, ]+', options.specified_tasks)
-            for task_name in tasks_list:
-                if task_name not in known_tasks:
-                    sys.stderr.write('invalid task: {}\n'.format(task_name))
-                    sys.exit(2)
-
-        # If the outcome file exists, parse it once and share the result
-        # among tasks to improve performance.
-        # Otherwise, it will be generated by execute_reference_driver_tests.
-        if not os.path.exists(options.outcomes):
-            if len(tasks_list) > 1:
-                sys.stderr.write("mutiple tasks found, please provide a valid outcomes file.\n")
-                sys.exit(2)
-
-            task_name = tasks_list[0]
-            task_class = known_tasks[task_name]
-            if not issubclass(task_class, DriverVSReference):
-                sys.stderr.write("please provide valid outcomes file for {}.\n".format(task_name))
-                sys.exit(2)
-            # mypy isn't smart enough to know that REFERENCE and DRIVER
-            # are *class* attributes of all classes derived from
-            # DriverVSReference. (It would be smart enough if we had an
-            # instance of task_class, but we can't construct an instance
-            # until we have the outcome data, so at this point we only
-            # have the class.) So we use indirection to access the class
-            # attributes.
-            execute_reference_driver_tests(main_results,
-                                           getattr(task_class, 'REFERENCE'),
-                                           getattr(task_class, 'DRIVER'),
-                                           options.outcomes)
-
-        outcomes = read_outcome_file(options.outcomes)
-
-        for task_name in tasks_list:
-            task_constructor = known_tasks[task_name]
-            task_instance = task_constructor(options)
-            main_results.new_section(task_instance.section_name())
-            task_instance.run(main_results, outcomes)
-
-        main_results.info("Overall results: {} warnings and {} errors",
-                          main_results.warning_count, main_results.error_count)
-
-        sys.exit(0 if (main_results.error_count == 0) else 1)
-
-    except Exception: # pylint: disable=broad-except
-        # Print the backtrace and exit explicitly with our chosen status.
-        traceback.print_exc()
-        sys.exit(120)
-
 if __name__ == '__main__':
-    main(KNOWN_TASKS)
+    outcome_analysis.main(KNOWN_TASKS)
diff --git a/tests/scripts/outcome_analysis.py b/tests/scripts/outcome_analysis.py
index 84b83eb81..e2ad3e758 100644
--- a/tests/scripts/outcome_analysis.py
+++ b/tests/scripts/outcome_analysis.py
@@ -7,3 +7,356 @@
 
 # Copyright The Mbed TLS Contributors
 # SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
+
+import argparse
+import sys
+import traceback
+import re
+import subprocess
+import os
+import typing
+
+import collect_test_cases
+
+
+# `ComponentOutcomes` is a named tuple which is defined as:
+# ComponentOutcomes(
+#     successes = {
+#         "<suite_case>",
+#         ...
+#     },
+#     failures = {
+#         "<suite_case>",
+#         ...
+#     }
+# )
+# suite_case = "<suite>;<case>"
+ComponentOutcomes = typing.NamedTuple('ComponentOutcomes',
+                                      [('successes', typing.Set[str]),
+                                       ('failures', typing.Set[str])])
+
+# `Outcomes` is a representation of the outcomes file,
+# which defined as:
+# Outcomes = {
+#     "<component>": ComponentOutcomes,
+#     ...
+# }
+Outcomes = typing.Dict[str, ComponentOutcomes]
+
+
+class Results:
+    """Process analysis results."""
+
+    def __init__(self):
+        self.error_count = 0
+        self.warning_count = 0
+
+    def new_section(self, fmt, *args, **kwargs):
+        self._print_line('\n*** ' + fmt + ' ***\n', *args, **kwargs)
+
+    def info(self, fmt, *args, **kwargs):
+        self._print_line('Info: ' + fmt, *args, **kwargs)
+
+    def error(self, fmt, *args, **kwargs):
+        self.error_count += 1
+        self._print_line('Error: ' + fmt, *args, **kwargs)
+
+    def warning(self, fmt, *args, **kwargs):
+        self.warning_count += 1
+        self._print_line('Warning: ' + fmt, *args, **kwargs)
+
+    @staticmethod
+    def _print_line(fmt, *args, **kwargs):
+        sys.stderr.write((fmt + '\n').format(*args, **kwargs))
+
+def execute_reference_driver_tests(results: Results, ref_component: str, driver_component: str, \
+                                   outcome_file: str) -> None:
+    """Run the tests specified in ref_component and driver_component. Results
+    are stored in the output_file and they will be used for the following
+    coverage analysis"""
+    results.new_section("Test {} and {}", ref_component, driver_component)
+
+    shell_command = "tests/scripts/all.sh --outcome-file " + outcome_file + \
+                    " " + ref_component + " " + driver_component
+    results.info("Running: {}", shell_command)
+    ret_val = subprocess.run(shell_command.split(), check=False).returncode
+
+    if ret_val != 0:
+        results.error("failed to run reference/driver components")
+
+IgnoreEntry = typing.Union[str, typing.Pattern]
+
+def name_matches_pattern(name: str, str_or_re: IgnoreEntry) -> bool:
+    """Check if name matches a pattern, that may be a string or regex.
+    - If the pattern is a string, name must be equal to match.
+    - If the pattern is a regex, name must fully match.
+    """
+    # The CI's python is too old for re.Pattern
+    #if isinstance(str_or_re, re.Pattern):
+    if not isinstance(str_or_re, str):
+        return str_or_re.fullmatch(name) is not None
+    else:
+        return str_or_re == name
+
+def read_outcome_file(outcome_file: str) -> Outcomes:
+    """Parse an outcome file and return an outcome collection.
+    """
+    outcomes = {}
+    with open(outcome_file, 'r', encoding='utf-8') as input_file:
+        for line in input_file:
+            (_platform, component, suite, case, result, _cause) = line.split(';')
+            # Note that `component` is not unique. If a test case passes on Linux
+            # and fails on FreeBSD, it'll end up in both the successes set and
+            # the failures set.
+            suite_case = ';'.join([suite, case])
+            if component not in outcomes:
+                outcomes[component] = ComponentOutcomes(set(), set())
+            if result == 'PASS':
+                outcomes[component].successes.add(suite_case)
+            elif result == 'FAIL':
+                outcomes[component].failures.add(suite_case)
+
+    return outcomes
+
+
+class Task:
+    """Base class for outcome analysis tasks."""
+
+    # Override the following in child classes.
+    # Map test suite names (with the test_suite_prefix) to a list of ignored
+    # test cases. Each element in the list can be either a string or a regex;
+    # see the `name_matches_pattern` function.
+    IGNORED_TESTS = {} #type: typing.Dict[str, typing.List[IgnoreEntry]]
+
+    def __init__(self, options) -> None:
+        """Pass command line options to the tasks.
+
+        Each task decides which command line options it cares about.
+        """
+        pass
+
+    def section_name(self) -> str:
+        """The section name to use in results."""
+        raise NotImplementedError
+
+    def ignored_tests(self, test_suite: str) -> typing.Iterator[IgnoreEntry]:
+        """Generate the ignore list for the specified test suite."""
+        if test_suite in self.IGNORED_TESTS:
+            yield from self.IGNORED_TESTS[test_suite]
+        pos = test_suite.find('.')
+        if pos != -1:
+            base_test_suite = test_suite[:pos]
+            if base_test_suite in self.IGNORED_TESTS:
+                yield from self.IGNORED_TESTS[base_test_suite]
+
+    def is_test_case_ignored(self, test_suite: str, test_string: str) -> bool:
+        """Check if the specified test case is ignored."""
+        for str_or_re in self.ignored_tests(test_suite):
+            if name_matches_pattern(test_string, str_or_re):
+                return True
+        return False
+
+    def run(self, results: Results, outcomes: Outcomes):
+        """Run the analysis on the specified outcomes.
+
+        Signal errors via the results objects
+        """
+        raise NotImplementedError
+
+
+class CoverageTask(Task):
+    """Analyze test coverage."""
+
+    # Test cases whose suite and description are matched by an entry in
+    # IGNORED_TESTS are expected to be never executed.
+    # All other test cases are expected to be executed at least once.
+
+    def __init__(self, options) -> None:
+        super().__init__(options)
+        self.full_coverage = options.full_coverage #type: bool
+
+    @staticmethod
+    def section_name() -> str:
+        return "Analyze coverage"
+
+    def run(self, results: Results, outcomes: Outcomes) -> None:
+        """Check that all available test cases are executed at least once."""
+        # Make sure that the generated data files are present (and up-to-date).
+        # This allows analyze_outcomes.py to run correctly on a fresh Git
+        # checkout.
+        cp = subprocess.run(['make', 'generated_files'],
+                            cwd='tests',
+                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
+                            check=False)
+        if cp.returncode != 0:
+            sys.stderr.write(cp.stdout.decode('utf-8'))
+            results.error("Failed \"make generated_files\" in tests. "
+                          "Coverage analysis may be incorrect.")
+        available = collect_test_cases.collect_available_test_cases()
+        for suite_case in available:
+            hit = any(suite_case in comp_outcomes.successes or
+                      suite_case in comp_outcomes.failures
+                      for comp_outcomes in outcomes.values())
+            (test_suite, test_description) = suite_case.split(';')
+            ignored = self.is_test_case_ignored(test_suite, test_description)
+
+            if not hit and not ignored:
+                if self.full_coverage:
+                    results.error('Test case not executed: {}', suite_case)
+                else:
+                    results.warning('Test case not executed: {}', suite_case)
+            elif hit and ignored:
+                # If a test case is no longer always skipped, we should remove
+                # it from the ignore list.
+                if self.full_coverage:
+                    results.error('Test case was executed but marked as ignored for coverage: {}',
+                                  suite_case)
+                else:
+                    results.warning('Test case was executed but marked as ignored for coverage: {}',
+                                    suite_case)
+
+
+class DriverVSReference(Task):
+    """Compare outcomes from testing with and without a driver.
+
+    There are 2 options to use analyze_driver_vs_reference_xxx locally:
+    1. Run tests and then analysis:
+      - tests/scripts/all.sh --outcome-file "$PWD/out.csv" <component_ref> <component_driver>
+      - tests/scripts/analyze_outcomes.py out.csv analyze_driver_vs_reference_xxx
+    2. Let this script run both automatically:
+      - tests/scripts/analyze_outcomes.py out.csv analyze_driver_vs_reference_xxx
+    """
+
+    # Override the following in child classes.
+    # Configuration name (all.sh component) used as the reference.
+    REFERENCE = ''
+    # Configuration name (all.sh component) used as the driver.
+    DRIVER = ''
+    # Ignored test suites (without the test_suite_ prefix).
+    IGNORED_SUITES = [] #type: typing.List[str]
+
+    def __init__(self, options) -> None:
+        super().__init__(options)
+        self.ignored_suites = frozenset('test_suite_' + x
+                                        for x in self.IGNORED_SUITES)
+
+    def section_name(self) -> str:
+        return f"Analyze driver {self.DRIVER} vs reference {self.REFERENCE}"
+
+    def run(self, results: Results, outcomes: Outcomes) -> None:
+        """Check that all tests passing in the driver component are also
+        passing in the corresponding reference component.
+        Skip:
+        - full test suites provided in ignored_suites list
+        - only some specific test inside a test suite, for which the corresponding
+          output string is provided
+        """
+        ref_outcomes = outcomes.get("component_" + self.REFERENCE)
+        driver_outcomes = outcomes.get("component_" + self.DRIVER)
+
+        if ref_outcomes is None or driver_outcomes is None:
+            results.error("required components are missing: bad outcome file?")
+            return
+
+        if not ref_outcomes.successes:
+            results.error("no passing test in reference component: bad outcome file?")
+            return
+
+        for suite_case in ref_outcomes.successes:
+            # suite_case is like "test_suite_foo.bar;Description of test case"
+            (full_test_suite, test_string) = suite_case.split(';')
+            test_suite = full_test_suite.split('.')[0] # retrieve main part of test suite name
+
+            # Immediately skip fully-ignored test suites
+            if test_suite in self.ignored_suites or \
+               full_test_suite in self.ignored_suites:
+                continue
+
+            # For ignored test cases inside test suites, just remember and:
+            # don't issue an error if they're skipped with drivers,
+            # but issue an error if they're not (means we have a bad entry).
+            ignored = self.is_test_case_ignored(full_test_suite, test_string)
+
+            if not ignored and not suite_case in driver_outcomes.successes:
+                results.error("SKIP/FAIL -> PASS: {}", suite_case)
+            if ignored and suite_case in driver_outcomes.successes:
+                results.error("uselessly ignored: {}", suite_case)
+
+
+def main(known_tasks: typing.Dict[str, typing.Type[Task]]) -> None:
+    main_results = Results()
+
+    try:
+        parser = argparse.ArgumentParser(description=__doc__)
+        parser.add_argument('outcomes', metavar='OUTCOMES.CSV',
+                            help='Outcome file to analyze')
+        parser.add_argument('specified_tasks', default='all', nargs='?',
+                            help='Analysis to be done. By default, run all tasks. '
+                                 'With one or more TASK, run only those. '
+                                 'TASK can be the name of a single task or '
+                                 'comma/space-separated list of tasks. ')
+        parser.add_argument('--list', action='store_true',
+                            help='List all available tasks and exit.')
+        parser.add_argument('--require-full-coverage', action='store_true',
+                            dest='full_coverage', help="Require all available "
+                            "test cases to be executed and issue an error "
+                            "otherwise. This flag is ignored if 'task' is "
+                            "neither 'all' nor 'analyze_coverage'")
+        options = parser.parse_args()
+
+        if options.list:
+            for task_name in known_tasks:
+                print(task_name)
+            sys.exit(0)
+
+        if options.specified_tasks == 'all':
+            tasks_list = list(known_tasks.keys())
+        else:
+            tasks_list = re.split(r'[, ]+', options.specified_tasks)
+            for task_name in tasks_list:
+                if task_name not in known_tasks:
+                    sys.stderr.write('invalid task: {}\n'.format(task_name))
+                    sys.exit(2)
+
+        # If the outcome file exists, parse it once and share the result
+        # among tasks to improve performance.
+        # Otherwise, it will be generated by execute_reference_driver_tests.
+        if not os.path.exists(options.outcomes):
+            if len(tasks_list) > 1:
+                sys.stderr.write("mutiple tasks found, please provide a valid outcomes file.\n")
+                sys.exit(2)
+
+            task_name = tasks_list[0]
+            task_class = known_tasks[task_name]
+            if not issubclass(task_class, DriverVSReference):
+                sys.stderr.write("please provide valid outcomes file for {}.\n".format(task_name))
+                sys.exit(2)
+            # mypy isn't smart enough to know that REFERENCE and DRIVER
+            # are *class* attributes of all classes derived from
+            # DriverVSReference. (It would be smart enough if we had an
+            # instance of task_class, but we can't construct an instance
+            # until we have the outcome data, so at this point we only
+            # have the class.) So we use indirection to access the class
+            # attributes.
+            execute_reference_driver_tests(main_results,
+                                           getattr(task_class, 'REFERENCE'),
+                                           getattr(task_class, 'DRIVER'),
+                                           options.outcomes)
+
+        outcomes = read_outcome_file(options.outcomes)
+
+        for task_name in tasks_list:
+            task_constructor = known_tasks[task_name]
+            task_instance = task_constructor(options)
+            main_results.new_section(task_instance.section_name())
+            task_instance.run(main_results, outcomes)
+
+        main_results.info("Overall results: {} warnings and {} errors",
+                          main_results.warning_count, main_results.error_count)
+
+        sys.exit(0 if (main_results.error_count == 0) else 1)
+
+    except Exception: # pylint: disable=broad-except
+        # Print the backtrace and exit explicitly with our chosen status.
+        traceback.print_exc()
+        sys.exit(120)

From 31467725759752c276abac14598c29e4e79c2c6d Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Thu, 3 Oct 2024 18:52:58 +0200
Subject: [PATCH 10/15] Adjust paths for impending moves to the framework

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 docs/architecture/testing/test-framework.md | 2 +-
 tests/scripts/analyze_outcomes.py           | 3 ++-
 tests/scripts/check_test_cases.py           | 2 +-
 tests/scripts/components-basic-checks.sh    | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/docs/architecture/testing/test-framework.md b/docs/architecture/testing/test-framework.md
index 80667df92..a9e3dac47 100644
--- a/docs/architecture/testing/test-framework.md
+++ b/docs/architecture/testing/test-framework.md
@@ -22,7 +22,7 @@ Each test case has a description which succinctly describes for a human audience
 * Make the description descriptive. “foo: x=2, y=4” is more descriptive than “foo #2”. “foo: 0<x<y, both even” is even better if these inequalities and parities are why this particular test data was chosen.
 * Avoid changing the description of an existing test case without a good reason. This breaks the tracking of failures across CI runs, since this tracking is based on the descriptions.
 
-`tests/scripts/check_test_cases.py` enforces some rules and warns if some guidelines are violated.
+`framework/scripts/check_test_cases.py` enforces some rules and warns if some guidelines are violated.
 
 ## TLS tests
 
diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index 080a4fb58..c40dab55e 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -8,7 +8,8 @@
 
 import re
 
-import outcome_analysis
+import scripts_path # pylint: disable=unused-import
+from mbedtls_framework import outcome_analysis
 
 
 class CoverageTask(outcome_analysis.CoverageTask):
diff --git a/tests/scripts/check_test_cases.py b/tests/scripts/check_test_cases.py
index b00319bad..f1185d2f7 100755
--- a/tests/scripts/check_test_cases.py
+++ b/tests/scripts/check_test_cases.py
@@ -14,7 +14,7 @@
 import sys
 
 import scripts_path # pylint: disable=unused-import
-import collect_test_cases
+from mbedtls_framework import collect_test_cases
 
 
 class DescriptionChecker(collect_test_cases.TestDescriptionExplorer):
diff --git a/tests/scripts/components-basic-checks.sh b/tests/scripts/components-basic-checks.sh
index 5e19c9339..86951f74b 100644
--- a/tests/scripts/components-basic-checks.sh
+++ b/tests/scripts/components-basic-checks.sh
@@ -70,7 +70,7 @@ component_check_test_cases () {
     else
         opt=''
     fi
-    tests/scripts/check_test_cases.py -q $opt
+    framework/scripts/check_test_cases.py -q $opt
     unset opt
 }
 

From 9f85399e7be3722c3bc25125b202a00d8c4a6c53 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Wed, 9 Oct 2024 13:49:40 +0200
Subject: [PATCH 11/15] Move test case analysis modules to framework repository

Move `collect_test_cases.py` (split from `check_test_cases.py`),
`check_test_cases.py`, and `outcome_analysis.py` (split from
`analyze_outcomes.py`) to the framework repository.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/check_test_cases.py   |  87 -------
 tests/scripts/collect_test_cases.py | 167 -------------
 tests/scripts/outcome_analysis.py   | 362 ----------------------------
 3 files changed, 616 deletions(-)
 delete mode 100755 tests/scripts/check_test_cases.py
 delete mode 100644 tests/scripts/collect_test_cases.py
 delete mode 100644 tests/scripts/outcome_analysis.py

diff --git a/tests/scripts/check_test_cases.py b/tests/scripts/check_test_cases.py
deleted file mode 100755
index f1185d2f7..000000000
--- a/tests/scripts/check_test_cases.py
+++ /dev/null
@@ -1,87 +0,0 @@
-#!/usr/bin/env python3
-
-"""Sanity checks for test data.
-
-This program contains a class for traversing test cases that can be used
-independently of the checks.
-"""
-
-# Copyright The Mbed TLS Contributors
-# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
-
-import argparse
-import re
-import sys
-
-import scripts_path # pylint: disable=unused-import
-from mbedtls_framework import collect_test_cases
-
-
-class DescriptionChecker(collect_test_cases.TestDescriptionExplorer):
-    """Check all test case descriptions.
-
-* Check that each description is valid (length, allowed character set, etc.).
-* Check that there is no duplicated description inside of one test suite.
-"""
-
-    def __init__(self, results):
-        self.results = results
-
-    def new_per_file_state(self):
-        """Dictionary mapping descriptions to their line number."""
-        return {}
-
-    def process_test_case(self, per_file_state,
-                          file_name, line_number, description):
-        """Check test case descriptions for errors."""
-        results = self.results
-        seen = per_file_state
-        if description in seen:
-            results.error(file_name, line_number,
-                          'Duplicate description (also line {})',
-                          seen[description])
-            return
-        if re.search(br'[\t;]', description):
-            results.error(file_name, line_number,
-                          'Forbidden character \'{}\' in description',
-                          re.search(br'[\t;]', description).group(0).decode('ascii'))
-        if re.search(br'[^ -~]', description):
-            results.error(file_name, line_number,
-                          'Non-ASCII character in description')
-        if len(description) > 66:
-            results.warning(file_name, line_number,
-                            'Test description too long ({} > 66)',
-                            len(description))
-        seen[description] = line_number
-
-def main():
-    parser = argparse.ArgumentParser(description=__doc__)
-    parser.add_argument('--list-all',
-                        action='store_true',
-                        help='List all test cases, without doing checks')
-    parser.add_argument('--quiet', '-q',
-                        action='store_true',
-                        help='Hide warnings')
-    parser.add_argument('--verbose', '-v',
-                        action='store_false', dest='quiet',
-                        help='Show warnings (default: on; undoes --quiet)')
-    options = parser.parse_args()
-    if options.list_all:
-        descriptions = collect_test_cases.collect_available_test_cases()
-        sys.stdout.write('\n'.join(descriptions + ['']))
-        return
-    results = collect_test_cases.Results(options)
-    checker = DescriptionChecker(results)
-    try:
-        checker.walk_all()
-    except collect_test_cases.ScriptOutputError as e:
-        results.error(e.script_name, e.idx,
-                      '"{}" should be listed as "<suite_name>;<description>"',
-                      e.line)
-    if (results.warnings or results.errors) and not options.quiet:
-        sys.stderr.write('{}: {} errors, {} warnings\n'
-                         .format(sys.argv[0], results.errors, results.warnings))
-    sys.exit(1 if results.errors else 0)
-
-if __name__ == '__main__':
-    main()
diff --git a/tests/scripts/collect_test_cases.py b/tests/scripts/collect_test_cases.py
deleted file mode 100644
index c326710b6..000000000
--- a/tests/scripts/collect_test_cases.py
+++ /dev/null
@@ -1,167 +0,0 @@
-"""Discover all the test cases (unit tests and SSL tests)."""
-
-# Copyright The Mbed TLS Contributors
-# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
-
-import glob
-import os
-import re
-import subprocess
-import sys
-
-import scripts_path # pylint: disable=unused-import
-from mbedtls_framework import build_tree
-
-
-class ScriptOutputError(ValueError):
-    """A kind of ValueError that indicates we found
-    the script doesn't list test cases in an expected
-    pattern.
-    """
-
-    @property
-    def script_name(self):
-        return super().args[0]
-
-    @property
-    def idx(self):
-        return super().args[1]
-
-    @property
-    def line(self):
-        return super().args[2]
-
-class Results:
-    """Store file and line information about errors or warnings in test suites."""
-
-    def __init__(self, options):
-        self.errors = 0
-        self.warnings = 0
-        self.ignore_warnings = options.quiet
-
-    def error(self, file_name, line_number, fmt, *args):
-        sys.stderr.write(('{}:{}:ERROR:' + fmt + '\n').
-                         format(file_name, line_number, *args))
-        self.errors += 1
-
-    def warning(self, file_name, line_number, fmt, *args):
-        if not self.ignore_warnings:
-            sys.stderr.write(('{}:{}:Warning:' + fmt + '\n')
-                             .format(file_name, line_number, *args))
-            self.warnings += 1
-
-class TestDescriptionExplorer:
-    """An iterator over test cases with descriptions.
-
-The test cases that have descriptions are:
-* Individual unit tests (entries in a .data file) in test suites.
-* Individual test cases in ssl-opt.sh.
-
-This is an abstract class. To use it, derive a class that implements
-the process_test_case method, and call walk_all().
-"""
-
-    def process_test_case(self, per_file_state,
-                          file_name, line_number, description):
-        """Process a test case.
-
-per_file_state: an object created by new_per_file_state() at the beginning
-                of each file.
-file_name: a relative path to the file containing the test case.
-line_number: the line number in the given file.
-description: the test case description as a byte string.
-"""
-        raise NotImplementedError
-
-    def new_per_file_state(self):
-        """Return a new per-file state object.
-
-The default per-file state object is None. Child classes that require per-file
-state may override this method.
-"""
-        #pylint: disable=no-self-use
-        return None
-
-    def walk_test_suite(self, data_file_name):
-        """Iterate over the test cases in the given unit test data file."""
-        in_paragraph = False
-        descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none
-        with open(data_file_name, 'rb') as data_file:
-            for line_number, line in enumerate(data_file, 1):
-                line = line.rstrip(b'\r\n')
-                if not line:
-                    in_paragraph = False
-                    continue
-                if line.startswith(b'#'):
-                    continue
-                if not in_paragraph:
-                    # This is a test case description line.
-                    self.process_test_case(descriptions,
-                                           data_file_name, line_number, line)
-                in_paragraph = True
-
-    def collect_from_script(self, script_name):
-        """Collect the test cases in a script by calling its listing test cases
-option"""
-        descriptions = self.new_per_file_state() # pylint: disable=assignment-from-none
-        listed = subprocess.check_output(['sh', script_name, '--list-test-cases'])
-        # Assume test file is responsible for printing identical format of
-        # test case description between --list-test-cases and its OUTCOME.CSV
-        #
-        # idx indicates the number of test case since there is no line number
-        # in the script for each test case.
-        for idx, line in enumerate(listed.splitlines()):
-            # We are expecting the script to list the test cases in
-            # `<suite_name>;<description>` pattern.
-            script_outputs = line.split(b';', 1)
-            if len(script_outputs) == 2:
-                suite_name, description = script_outputs
-            else:
-                raise ScriptOutputError(script_name, idx, line.decode("utf-8"))
-
-            self.process_test_case(descriptions,
-                                   suite_name.decode('utf-8'),
-                                   idx,
-                                   description.rstrip())
-
-    @staticmethod
-    def collect_test_directories():
-        """Get the relative path for the TLS and Crypto test directories."""
-        mbedtls_root = build_tree.guess_mbedtls_root()
-        directories = [os.path.join(mbedtls_root, 'tests'),
-                       os.path.join(mbedtls_root, 'tf-psa-crypto', 'tests')]
-        directories = [os.path.relpath(p) for p in directories]
-        return directories
-
-    def walk_all(self):
-        """Iterate over all named test cases."""
-        test_directories = self.collect_test_directories()
-        for directory in test_directories:
-            for data_file_name in glob.glob(os.path.join(directory, 'suites',
-                                                         '*.data')):
-                self.walk_test_suite(data_file_name)
-
-            for sh_file in ['ssl-opt.sh', 'compat.sh']:
-                sh_file = os.path.join(directory, sh_file)
-                if os.path.isfile(sh_file):
-                    self.collect_from_script(sh_file)
-
-class TestDescriptions(TestDescriptionExplorer):
-    """Collect the available test cases."""
-
-    def __init__(self):
-        super().__init__()
-        self.descriptions = set()
-
-    def process_test_case(self, _per_file_state,
-                          file_name, _line_number, description):
-        """Record an available test case."""
-        base_name = re.sub(r'\.[^.]*$', '', re.sub(r'.*/', '', file_name))
-        key = ';'.join([base_name, description.decode('utf-8')])
-        self.descriptions.add(key)
-
-def collect_available_test_cases():
-    """Collect the available test cases."""
-    explorer = TestDescriptions()
-    explorer.walk_all()
-    return sorted(explorer.descriptions)
diff --git a/tests/scripts/outcome_analysis.py b/tests/scripts/outcome_analysis.py
deleted file mode 100644
index e2ad3e758..000000000
--- a/tests/scripts/outcome_analysis.py
+++ /dev/null
@@ -1,362 +0,0 @@
-"""Outcome file analysis code.
-
-This module is the bulk of the code of tests/scripts/analyze_outcomes.py
-in each consuming branch. The consuming script is expected to derive
-the classes with branch-specific customizations such as ignore lists.
-"""
-
-# Copyright The Mbed TLS Contributors
-# SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
-
-import argparse
-import sys
-import traceback
-import re
-import subprocess
-import os
-import typing
-
-import collect_test_cases
-
-
-# `ComponentOutcomes` is a named tuple which is defined as:
-# ComponentOutcomes(
-#     successes = {
-#         "<suite_case>",
-#         ...
-#     },
-#     failures = {
-#         "<suite_case>",
-#         ...
-#     }
-# )
-# suite_case = "<suite>;<case>"
-ComponentOutcomes = typing.NamedTuple('ComponentOutcomes',
-                                      [('successes', typing.Set[str]),
-                                       ('failures', typing.Set[str])])
-
-# `Outcomes` is a representation of the outcomes file,
-# which defined as:
-# Outcomes = {
-#     "<component>": ComponentOutcomes,
-#     ...
-# }
-Outcomes = typing.Dict[str, ComponentOutcomes]
-
-
-class Results:
-    """Process analysis results."""
-
-    def __init__(self):
-        self.error_count = 0
-        self.warning_count = 0
-
-    def new_section(self, fmt, *args, **kwargs):
-        self._print_line('\n*** ' + fmt + ' ***\n', *args, **kwargs)
-
-    def info(self, fmt, *args, **kwargs):
-        self._print_line('Info: ' + fmt, *args, **kwargs)
-
-    def error(self, fmt, *args, **kwargs):
-        self.error_count += 1
-        self._print_line('Error: ' + fmt, *args, **kwargs)
-
-    def warning(self, fmt, *args, **kwargs):
-        self.warning_count += 1
-        self._print_line('Warning: ' + fmt, *args, **kwargs)
-
-    @staticmethod
-    def _print_line(fmt, *args, **kwargs):
-        sys.stderr.write((fmt + '\n').format(*args, **kwargs))
-
-def execute_reference_driver_tests(results: Results, ref_component: str, driver_component: str, \
-                                   outcome_file: str) -> None:
-    """Run the tests specified in ref_component and driver_component. Results
-    are stored in the output_file and they will be used for the following
-    coverage analysis"""
-    results.new_section("Test {} and {}", ref_component, driver_component)
-
-    shell_command = "tests/scripts/all.sh --outcome-file " + outcome_file + \
-                    " " + ref_component + " " + driver_component
-    results.info("Running: {}", shell_command)
-    ret_val = subprocess.run(shell_command.split(), check=False).returncode
-
-    if ret_val != 0:
-        results.error("failed to run reference/driver components")
-
-IgnoreEntry = typing.Union[str, typing.Pattern]
-
-def name_matches_pattern(name: str, str_or_re: IgnoreEntry) -> bool:
-    """Check if name matches a pattern, that may be a string or regex.
-    - If the pattern is a string, name must be equal to match.
-    - If the pattern is a regex, name must fully match.
-    """
-    # The CI's python is too old for re.Pattern
-    #if isinstance(str_or_re, re.Pattern):
-    if not isinstance(str_or_re, str):
-        return str_or_re.fullmatch(name) is not None
-    else:
-        return str_or_re == name
-
-def read_outcome_file(outcome_file: str) -> Outcomes:
-    """Parse an outcome file and return an outcome collection.
-    """
-    outcomes = {}
-    with open(outcome_file, 'r', encoding='utf-8') as input_file:
-        for line in input_file:
-            (_platform, component, suite, case, result, _cause) = line.split(';')
-            # Note that `component` is not unique. If a test case passes on Linux
-            # and fails on FreeBSD, it'll end up in both the successes set and
-            # the failures set.
-            suite_case = ';'.join([suite, case])
-            if component not in outcomes:
-                outcomes[component] = ComponentOutcomes(set(), set())
-            if result == 'PASS':
-                outcomes[component].successes.add(suite_case)
-            elif result == 'FAIL':
-                outcomes[component].failures.add(suite_case)
-
-    return outcomes
-
-
-class Task:
-    """Base class for outcome analysis tasks."""
-
-    # Override the following in child classes.
-    # Map test suite names (with the test_suite_prefix) to a list of ignored
-    # test cases. Each element in the list can be either a string or a regex;
-    # see the `name_matches_pattern` function.
-    IGNORED_TESTS = {} #type: typing.Dict[str, typing.List[IgnoreEntry]]
-
-    def __init__(self, options) -> None:
-        """Pass command line options to the tasks.
-
-        Each task decides which command line options it cares about.
-        """
-        pass
-
-    def section_name(self) -> str:
-        """The section name to use in results."""
-        raise NotImplementedError
-
-    def ignored_tests(self, test_suite: str) -> typing.Iterator[IgnoreEntry]:
-        """Generate the ignore list for the specified test suite."""
-        if test_suite in self.IGNORED_TESTS:
-            yield from self.IGNORED_TESTS[test_suite]
-        pos = test_suite.find('.')
-        if pos != -1:
-            base_test_suite = test_suite[:pos]
-            if base_test_suite in self.IGNORED_TESTS:
-                yield from self.IGNORED_TESTS[base_test_suite]
-
-    def is_test_case_ignored(self, test_suite: str, test_string: str) -> bool:
-        """Check if the specified test case is ignored."""
-        for str_or_re in self.ignored_tests(test_suite):
-            if name_matches_pattern(test_string, str_or_re):
-                return True
-        return False
-
-    def run(self, results: Results, outcomes: Outcomes):
-        """Run the analysis on the specified outcomes.
-
-        Signal errors via the results objects
-        """
-        raise NotImplementedError
-
-
-class CoverageTask(Task):
-    """Analyze test coverage."""
-
-    # Test cases whose suite and description are matched by an entry in
-    # IGNORED_TESTS are expected to be never executed.
-    # All other test cases are expected to be executed at least once.
-
-    def __init__(self, options) -> None:
-        super().__init__(options)
-        self.full_coverage = options.full_coverage #type: bool
-
-    @staticmethod
-    def section_name() -> str:
-        return "Analyze coverage"
-
-    def run(self, results: Results, outcomes: Outcomes) -> None:
-        """Check that all available test cases are executed at least once."""
-        # Make sure that the generated data files are present (and up-to-date).
-        # This allows analyze_outcomes.py to run correctly on a fresh Git
-        # checkout.
-        cp = subprocess.run(['make', 'generated_files'],
-                            cwd='tests',
-                            stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-                            check=False)
-        if cp.returncode != 0:
-            sys.stderr.write(cp.stdout.decode('utf-8'))
-            results.error("Failed \"make generated_files\" in tests. "
-                          "Coverage analysis may be incorrect.")
-        available = collect_test_cases.collect_available_test_cases()
-        for suite_case in available:
-            hit = any(suite_case in comp_outcomes.successes or
-                      suite_case in comp_outcomes.failures
-                      for comp_outcomes in outcomes.values())
-            (test_suite, test_description) = suite_case.split(';')
-            ignored = self.is_test_case_ignored(test_suite, test_description)
-
-            if not hit and not ignored:
-                if self.full_coverage:
-                    results.error('Test case not executed: {}', suite_case)
-                else:
-                    results.warning('Test case not executed: {}', suite_case)
-            elif hit and ignored:
-                # If a test case is no longer always skipped, we should remove
-                # it from the ignore list.
-                if self.full_coverage:
-                    results.error('Test case was executed but marked as ignored for coverage: {}',
-                                  suite_case)
-                else:
-                    results.warning('Test case was executed but marked as ignored for coverage: {}',
-                                    suite_case)
-
-
-class DriverVSReference(Task):
-    """Compare outcomes from testing with and without a driver.
-
-    There are 2 options to use analyze_driver_vs_reference_xxx locally:
-    1. Run tests and then analysis:
-      - tests/scripts/all.sh --outcome-file "$PWD/out.csv" <component_ref> <component_driver>
-      - tests/scripts/analyze_outcomes.py out.csv analyze_driver_vs_reference_xxx
-    2. Let this script run both automatically:
-      - tests/scripts/analyze_outcomes.py out.csv analyze_driver_vs_reference_xxx
-    """
-
-    # Override the following in child classes.
-    # Configuration name (all.sh component) used as the reference.
-    REFERENCE = ''
-    # Configuration name (all.sh component) used as the driver.
-    DRIVER = ''
-    # Ignored test suites (without the test_suite_ prefix).
-    IGNORED_SUITES = [] #type: typing.List[str]
-
-    def __init__(self, options) -> None:
-        super().__init__(options)
-        self.ignored_suites = frozenset('test_suite_' + x
-                                        for x in self.IGNORED_SUITES)
-
-    def section_name(self) -> str:
-        return f"Analyze driver {self.DRIVER} vs reference {self.REFERENCE}"
-
-    def run(self, results: Results, outcomes: Outcomes) -> None:
-        """Check that all tests passing in the driver component are also
-        passing in the corresponding reference component.
-        Skip:
-        - full test suites provided in ignored_suites list
-        - only some specific test inside a test suite, for which the corresponding
-          output string is provided
-        """
-        ref_outcomes = outcomes.get("component_" + self.REFERENCE)
-        driver_outcomes = outcomes.get("component_" + self.DRIVER)
-
-        if ref_outcomes is None or driver_outcomes is None:
-            results.error("required components are missing: bad outcome file?")
-            return
-
-        if not ref_outcomes.successes:
-            results.error("no passing test in reference component: bad outcome file?")
-            return
-
-        for suite_case in ref_outcomes.successes:
-            # suite_case is like "test_suite_foo.bar;Description of test case"
-            (full_test_suite, test_string) = suite_case.split(';')
-            test_suite = full_test_suite.split('.')[0] # retrieve main part of test suite name
-
-            # Immediately skip fully-ignored test suites
-            if test_suite in self.ignored_suites or \
-               full_test_suite in self.ignored_suites:
-                continue
-
-            # For ignored test cases inside test suites, just remember and:
-            # don't issue an error if they're skipped with drivers,
-            # but issue an error if they're not (means we have a bad entry).
-            ignored = self.is_test_case_ignored(full_test_suite, test_string)
-
-            if not ignored and not suite_case in driver_outcomes.successes:
-                results.error("SKIP/FAIL -> PASS: {}", suite_case)
-            if ignored and suite_case in driver_outcomes.successes:
-                results.error("uselessly ignored: {}", suite_case)
-
-
-def main(known_tasks: typing.Dict[str, typing.Type[Task]]) -> None:
-    main_results = Results()
-
-    try:
-        parser = argparse.ArgumentParser(description=__doc__)
-        parser.add_argument('outcomes', metavar='OUTCOMES.CSV',
-                            help='Outcome file to analyze')
-        parser.add_argument('specified_tasks', default='all', nargs='?',
-                            help='Analysis to be done. By default, run all tasks. '
-                                 'With one or more TASK, run only those. '
-                                 'TASK can be the name of a single task or '
-                                 'comma/space-separated list of tasks. ')
-        parser.add_argument('--list', action='store_true',
-                            help='List all available tasks and exit.')
-        parser.add_argument('--require-full-coverage', action='store_true',
-                            dest='full_coverage', help="Require all available "
-                            "test cases to be executed and issue an error "
-                            "otherwise. This flag is ignored if 'task' is "
-                            "neither 'all' nor 'analyze_coverage'")
-        options = parser.parse_args()
-
-        if options.list:
-            for task_name in known_tasks:
-                print(task_name)
-            sys.exit(0)
-
-        if options.specified_tasks == 'all':
-            tasks_list = list(known_tasks.keys())
-        else:
-            tasks_list = re.split(r'[, ]+', options.specified_tasks)
-            for task_name in tasks_list:
-                if task_name not in known_tasks:
-                    sys.stderr.write('invalid task: {}\n'.format(task_name))
-                    sys.exit(2)
-
-        # If the outcome file exists, parse it once and share the result
-        # among tasks to improve performance.
-        # Otherwise, it will be generated by execute_reference_driver_tests.
-        if not os.path.exists(options.outcomes):
-            if len(tasks_list) > 1:
-                sys.stderr.write("mutiple tasks found, please provide a valid outcomes file.\n")
-                sys.exit(2)
-
-            task_name = tasks_list[0]
-            task_class = known_tasks[task_name]
-            if not issubclass(task_class, DriverVSReference):
-                sys.stderr.write("please provide valid outcomes file for {}.\n".format(task_name))
-                sys.exit(2)
-            # mypy isn't smart enough to know that REFERENCE and DRIVER
-            # are *class* attributes of all classes derived from
-            # DriverVSReference. (It would be smart enough if we had an
-            # instance of task_class, but we can't construct an instance
-            # until we have the outcome data, so at this point we only
-            # have the class.) So we use indirection to access the class
-            # attributes.
-            execute_reference_driver_tests(main_results,
-                                           getattr(task_class, 'REFERENCE'),
-                                           getattr(task_class, 'DRIVER'),
-                                           options.outcomes)
-
-        outcomes = read_outcome_file(options.outcomes)
-
-        for task_name in tasks_list:
-            task_constructor = known_tasks[task_name]
-            task_instance = task_constructor(options)
-            main_results.new_section(task_instance.section_name())
-            task_instance.run(main_results, outcomes)
-
-        main_results.info("Overall results: {} warnings and {} errors",
-                          main_results.warning_count, main_results.error_count)
-
-        sys.exit(0 if (main_results.error_count == 0) else 1)
-
-    except Exception: # pylint: disable=broad-except
-        # Print the backtrace and exit explicitly with our chosen status.
-        traceback.print_exc()
-        sys.exit(120)

From 1c5a25272940d4d0389da70402657097a330676b Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Mon, 23 Sep 2024 18:03:10 +0200
Subject: [PATCH 12/15] Upgrade mypy to the last version supporting Python 3.6

Upgrade mypy to 0.971, which is the last version that supports Python 3.6
(the oldest Python version that we currently run on the CI).

This fixes the error
```
framework/scripts/mbedtls_framework/outcome_analysis.py:119: error: Incompatible return value type (got "IO[Any]", expected "TextIO")
framework/scripts/mbedtls_framework/outcome_analysis.py:121: error: Incompatible return value type (got "IO[Any]", expected "TextIO")
```
As far as I can tell the fix is https://github.com/python/mypy/pull/9275
which was released in mypy 0.940.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 scripts/ci.requirements.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/ci.requirements.txt b/scripts/ci.requirements.txt
index d21aa2798..1ef8abd58 100644
--- a/scripts/ci.requirements.txt
+++ b/scripts/ci.requirements.txt
@@ -7,9 +7,9 @@
 # 2.4.4 is the version in Ubuntu 20.04. It supports Python >=3.5.
 pylint == 2.4.4
 
-# Use the earliest version of mypy that works with our code base.
-# See https://github.com/Mbed-TLS/mbedtls/pull/3953 .
-mypy >= 0.780
+# Use the last version of mypy that is compatible with Python 3.6.
+# Newer versions should be ok too.
+mypy >= 0.971
 
 # At the time of writing, only needed for tests/scripts/audit-validity-dates.py.
 # It needs >=35.0.0 for correct operation, and that requires Python >=3.6,

From 041a84d1dcb4982d383a97e3a742ca4e4182c468 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Mon, 23 Sep 2024 19:16:47 +0200
Subject: [PATCH 13/15] Downgrade mypy to a version that works with our code
 base

mypy >=0.960 rejects macro_collector.py.
https://github.com/Mbed-TLS/mbedtls-framework/issues/50

We currently need mypy >=0.940, <0.960. Pick 0.942, which works, and is the
system version on Ubuntu 22.04.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 scripts/ci.requirements.txt | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/scripts/ci.requirements.txt b/scripts/ci.requirements.txt
index 1ef8abd58..fc10c63b8 100644
--- a/scripts/ci.requirements.txt
+++ b/scripts/ci.requirements.txt
@@ -7,9 +7,13 @@
 # 2.4.4 is the version in Ubuntu 20.04. It supports Python >=3.5.
 pylint == 2.4.4
 
-# Use the last version of mypy that is compatible with Python 3.6.
-# Newer versions should be ok too.
-mypy >= 0.971
+# Use a version of mypy that is compatible with our code base.
+# mypy <0.940 is known not to work: see commit
+#  :/Upgrade mypy to the last version supporting Python 3.6
+# mypy >=0.960 is known not to work:
+#   https://github.com/Mbed-TLS/mbedtls-framework/issues/50
+# mypy 0.942 is the version in Ubuntu 22.04.
+mypy == 0.942
 
 # At the time of writing, only needed for tests/scripts/audit-validity-dates.py.
 # It needs >=35.0.0 for correct operation, and that requires Python >=3.6,

From 96db2cceddcdeb22a016d848045315c55fe9a775 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Fri, 4 Oct 2024 15:52:01 +0200
Subject: [PATCH 14/15] Default to allowing partial test coverage

Currently, many test cases are not executed. A follow-up pull request will
take care of that. In the meantime, continue allowing partial test coverage.

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 tests/scripts/analyze_outcomes.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/scripts/analyze_outcomes.py b/tests/scripts/analyze_outcomes.py
index c40dab55e..72dba99f7 100755
--- a/tests/scripts/analyze_outcomes.py
+++ b/tests/scripts/analyze_outcomes.py
@@ -13,7 +13,9 @@
 
 
 class CoverageTask(outcome_analysis.CoverageTask):
-    pass # We'll populate IGNORED_TESTS soon
+    # We'll populate IGNORED_TESTS soon. In the meantime, lack of coverage
+    # is just a warning.
+    outcome_analysis.FULL_COVERAGE_BY_DEFAULT = False
 
 
 # The names that we give to classes derived from DriverVSReference do not

From 8fa4964830584a3a8a384e9e06c9bf66244b80d2 Mon Sep 17 00:00:00 2001
From: Gilles Peskine <Gilles.Peskine@arm.com>
Date: Wed, 9 Oct 2024 14:07:46 +0200
Subject: [PATCH 15/15] Update framework to the branch with
 collect_test_cases.py and outcome_analysis.py

Signed-off-by: Gilles Peskine <Gilles.Peskine@arm.com>
---
 framework | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/framework b/framework
index 33ac13321..1de0641e7 160000
--- a/framework
+++ b/framework
@@ -1 +1 @@
-Subproject commit 33ac13321737c333f52659ee848ca25746588227
+Subproject commit 1de0641e789d3c38b3ce99d7922002992cbe816c