mlcommons · arjunsuresh · Jun 25, 2025 · May 29, 2025 · May 29, 2025 · Jun 2, 2025
@@ -31,11 +31,24 @@ def process_files(files):
     ]
 
 
+def get_modified_metas(files):
+    filenames = files.split(",")
+    return [
+        {
+            "file": file,
+            "uid": uid,
+        }
+        for file in filenames if os.path.basename(file) == 'meta.yaml'
+        for uid, num_tests in [get_file_info(file)]
+    ]
+
+
 if __name__ == "__main__":
     changed_files = sys.stdin.read().strip()
     processed_files = process_files(changed_files)
+    modified_metas = get_modified_metas(changed_files)
     json_processed_files = json.dumps(processed_files)
     print(json_processed_files)
     with open(os.environ['GITHUB_OUTPUT'], 'a') as f:
         f.write(
-            f"processed_files={json.dumps({'file_info': processed_files})}\n")
+            f"processed_files={json.dumps({'file_info': processed_files, 'modified_metas': modified_metas})}\n")
@@ -5,7 +5,6 @@ on:
     types: [published]
   workflow_dispatch: {}
 
-
 jobs:
   build_wheels:
     if: github.repository_owner == 'mlcommons'

@@ -0,0 +1,80 @@
+# This workflow will automatically update the README for any updated MLC script
+name: Document script on modified meta
+
+on:
+  push:
+    branches: [ "dev" ]
+    paths:
+      - 'script/**meta.yaml'
+
+jobs:
+  get_modified_files:
+    runs-on: ubuntu-latest
+    outputs:
+      processed_files: ${{ steps.modified-files.outputs.processed_files }}
+
+    steps:
+    - name: 'Checkout'
+      uses: actions/checkout@v4
+      with:
+        fetch-depth: 0
+
+    - name: Setup Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.x'
+
+    - name: Install dependencies
+      run: |
+        pip install pyyaml
+
+    - name: Get changed files
+      id: modified-files
+      env:
+        filter: ${{ github.event.before }}
+      run: |
+        changed_files=$(git diff --name-only $filter | grep -E '.*\.yaml$')
+        echo "$changed_files" | python3 .github/scripts/list_modified_files.py
+
+  document_modified_scripts:
+    runs-on: ubuntu-latest
+    needs: get_modified_files
+    if: ${{ needs.get_modified_files.outputs.processed_files != '[]' && needs.get_modified_files.outputs.processed_files != '' }}
+    permissions:
+      contents: write
+
+    strategy:
+      fail-fast: false
+      matrix:
+        modified_metas: ${{ fromJSON(needs.get_modified_files.outputs.processed_files).modified_metas }}
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          path: automation-scripts
+
+      - name: Set up Git for commit
+        run: |
+          git config --global user.name "github-actions[bot]"
+          git config --global user.email "github-actions[bot]@users.noreply.github.com"
+
+      - name: Document meta.yaml file
+        run: |
+          echo "Documenting ${{ matrix.modified_metas.file }}"
+
+          pip install mlcflow
+          mlc add repo automation-scripts
+          mlc doc script ${{ matrix.modified_metas.uid}} --quiet
+          cd automation-scripts
+          find . -type f -name README.md -exec git add {} +
+          # Use the GitHub actor's name and email
+          git config --global user.name github-actions[bot]
+          git config --global user.email "github-actions[bot]@users.noreply.github.com"
+          # Commit changes
+          git commit -m '[Automated Commit] Document ${{ matrix.modified_metas.file}}  [skip ci]'
+          git push
+
+
+
@@ -46,11 +46,6 @@ jobs:
         file_info: ${{ fromJSON(needs.get_modified_files.outputs.processed_files).file_info }}
 
     steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 2
-
       - name: Process meta.yaml file
         run: |
           echo "Processing ${{ matrix.file_info.file }} with run number ${{ matrix.file_info.num_run }}"

@@ -0,0 +1,149 @@
+import os
+from mlc import utils
+from utils import *
+import logging
+from pathlib import PureWindowsPath, PurePosixPath
+import copy
+
+
+def generate_doc(self_module, input_params):
+    """
+    Generates the documentation of MLC scripts.
+
+    Args:
+        self_module: Reference to the current module for internal calls.
+        i: Dictionary containing input parameters.
+
+    Returns:
+        Dictionary with the result of the operation. Keys:
+        - 'return': 0 on success, >0 on error.
+        - 'error': Error message (if any).
+    """
+
+    # Extract and handle basic inputs
+    quiet = input_params.get('quiet', False)
+    logger = self_module.logger
+    env = input_params.get('env', {})
+    generic_inputs = self_module.input_flags_converted_to_env
+
+    # Step 2: Search for scripts
+    search_result = self_module.search(input_params.copy())
+    if search_result['return'] > 0:
+        return search_result
+
+    scripts_list = search_result['list']
+    if not scripts_list:
+        return {'return': 1, 'error': 'No scripts were found'}
+
+    env = input_params.get('env', {})
+    state_data = input_params.get('state', {})
+    constant_vars = input_params.get('const', {})
+    constant_state = input_params.get('const_state', {})
+    tag_values = input_params.get('tags', '').split(",")
+    variation_tags = [tag[1:] for tag in tag_values if tag.startswith("_")]
+
+    # Step 4: Iterate over scripts and generate Dockerfile
+    for script in sorted(scripts_list, key=lambda x: x.meta.get('alias', '')):
+        metadata = script.meta
+        script_directory = script.path
+        script_tags = metadata.get("tags", [])
+        script_alias = metadata.get('alias', '')
+        script_uid = metadata.get('uid', '')
+        script_input_mapping = metadata.get('input_mapping', {})
+        script_input_description = metadata.get('input_description', {})
+
+        r = generate_docs(metadata, script_directory, generic_inputs)
+        if r['return'] > 0:
+            continue
+
+    return {'return': 0}
+
+
+def generate_docs(metadata, script_path, generic_inputs):
+    script_name = metadata.get('alias', metadata['uid'])
+    readme_prefix = f"""This README is automatically generated. Please follow the [script execution document](https://docs.mlcommons.org/mlcflow/targets/script/execution-flow/) to understand more about the MLC script execution.
+"""
+    doc_content = f"""# README for {script_name}
+{readme_prefix}
+"""
+
+    readme_dir = os.path.join(script_path, "docs")
+
+    if not os.path.exists(readme_dir):
+        os.makedirs(readme_dir)
+
+    script_tags = metadata.get("tags", [])
+    script_tags_help = metadata.get("tags_help", '')
+    if not script_tags_help:
+        tags_string = ",".join(script_tags)
+    else:
+        tags_string = script_tags_help
+
+    script_input_mapping = metadata.get('input_mapping', {})
+    script_input_description = metadata.get('input_description', {})
+
+    r = get_run_readme(
+        tags_string,
+        script_input_mapping,
+        script_input_description,
+        generic_inputs)
+    if r['return'] > 0:
+        return r
+
+    run_readme = r['run_readme']
+
+    doc_content += run_readme
+
+    readme_path = os.path.join(readme_dir, "README.md")
+    with open(readme_path, "w") as f:
+        f.write(doc_content)
+    print(f"Readme generated at {readme_path}")
+
+    return {'return': 0}
+
+
+def get_run_readme(tags, input_mapping, input_description, generic_inputs):
+    run_readme = f"""## Run Commands
+
+```mlcr {tags}
+```
+"""
+
+    if input_description:
+        input_description_string = generate_markdown(
+            "Script Inputs", input_description)
+    else:
+        input_description_string = "No script specific inputs"
+
+    run_readme += input_description_string
+
+    generic_input_string = generate_markdown(
+        "Generic Script Inputs", generic_inputs)
+
+    run_readme += generic_input_string
+
+    return {'return': 0, 'run_readme': run_readme}
+
+
+def infer_type(field):
+    if "dtype" in field:
+        return field["dtype"]
+    elif "default" in field:
+        return type(field["default"]).__name__
+    else:
+        return "str"
+
+
+def generate_markdown(heading, input_dict):
+    lines = [
+        f"### {heading}\n",
+        "| Name | Description | Default | Type |",
+        "|------|-------------|---------|------|"]
+    for key in sorted(
+            input_dict, key=lambda k: input_dict[k].get("sort", 9999)):
+        field = input_dict[key]
+        desc = field.get("desc", "")
+        default = field.get("default", "")
+        dtype = infer_type(field)
+        lines.append(f"| `{key}` | {desc} | `{default}` | {dtype} |")
+    return "\n".join(lines)
@@ -68,22 +68,24 @@ def __init__(self, action_object, automation_file):
                                'MLC_GIT_*',
                                'MLC_RENEW_CACHE_ENTRY']
 
-        self.input_flags_converted_to_tmp_env = ['path']
-
-        self.input_flags_converted_to_env = ['input',
-                                             'output',
-                                             'outdirname',
-                                             'outbasename',
-                                             'name',
-                                             'extra_cache_tags',
-                                             'skip_compile',
-                                             'skip_run',
-                                             'accept_license',
-                                             'skip_system_deps',
-                                             'git_ssh',
-                                             'gh_token',
-                                             'hf_token',
-                                             'verify_ssl']
+        self.input_flags_converted_to_tmp_env = {
+            'path': {'desc': 'Filesystem path to search for executable', 'default': ''}}
+
+        self.input_flags_converted_to_env = {'input': {'desc': 'Input to the script passed using the env key `MLC_INPUT`', 'default': ''},
+                                             'output': {'desc': 'Output from the script passed using the env key `MLC_OUTPUT`', 'default': ''},
+                                             'outdirname': {'desc': 'The directory to store the script output', 'default': 'cache directory ($HOME/MLC/repos/local/cache/<>) if the script is cacheable or else the current directory'},
+                                             'outbasename': {'desc': 'The output file/folder name', 'default': ''},
+                                             'name': {},
+                                             'extra_cache_tags': {'desc': 'Extra cache tags to be added to the cached entry when the script results are saved', 'default': ''},
+                                             'skip_compile': {'desc': 'Skip compilation', 'default': False},
+                                             'skip_run': {'desc': 'Skip run', 'default': False},
+                                             'accept_license': {'desc': 'Accept the required license requirement to run the script', 'default': False},
+                                             'skip_system_deps': {'desc': 'Skip installing any system dependencies', 'default': False},
+                                             'git_ssh': {'desc': 'Use SSH for git repos', 'default': False},
+                                             'gh_token': {'desc': 'Github Token', 'default': ''},
+                                             'hf_token': {'desc': 'Huggingface Token', 'default': ''},
+                                             'verify_ssl': {'desc': 'Verify SSL', 'default': False}
+                                             }
 
     ############################################################
 
@@ -4461,8 +4463,8 @@ def doc(self, i):
 
         """
 
-        return utils.call_internal_module(
-            self, __file__, 'module_misc', 'doc', i)
+        from script.doc import generate_doc
+        return generate_doc(self, i)
 
     ############################################################
 

@@ -172,7 +172,7 @@ def postprocess(i):
                 else:
                     env[env_key] = v[1].strip()
 
-    if env.get('MLC_HOST_CPU_SOCKETS', '') == '-':  # assume as 1
+    if env.get('MLC_HOST_CPU_SOCKETS', '') in ['-', '']:  # assume as 1
         env['MLC_HOST_CPU_SOCKETS'] = '1'
 
     if env.get('MLC_HOST_CPU_TOTAL_CORES', '') != '' and env.get(
@@ -184,9 +184,17 @@ def postprocess(i):
         env['MLC_HOST_CPU_THREADS_PER_CORE'] = str(int(int(env['MLC_HOST_CPU_TOTAL_LOGICAL_CORES']) //
                                                        int(env['MLC_HOST_CPU_TOTAL_PHYSICAL_CORES'])))
 
-    if env.get('MLC_HOST_CPU_SOCKETS', '') != '' and env.get('MLC_HOST_CPU_TOTAL_PHYSICAL_CORES',
-                                                             '') != '' and env.get('MLC_HOST_CPU_PHYSICAL_CORES_PER_SOCKET', '') == '':
+    if env.get('MLC_HOST_CPU_TOTAL_PHYSICAL_CORES', '') != '' and env.get(
+            'MLC_HOST_CPU_PHYSICAL_CORES_PER_SOCKET', '') == '':
         env['MLC_HOST_CPU_PHYSICAL_CORES_PER_SOCKET'] = str(
             int(env['MLC_HOST_CPU_TOTAL_PHYSICAL_CORES']) // int(env['MLC_HOST_CPU_SOCKETS']))
 
+    if env.get('MLC_HOST_CPU_TOTAL_PHYSICAL_CORES', '') == '' and env.get(
+            'MLC_HOST_CPU_PHYSICAL_CORES_PER_SOCKET', '') != '':
+        env['MLC_HOST_CPU_TOTAL_PHYSICAL_CORES'] = str(int(
+            env['MLC_HOST_CPU_PHYSICAL_CORES_PER_SOCKET']) * int(env['MLC_HOST_CPU_SOCKETS']))
+
+    if env.get('MLC_HOST_CPU_TOTAL_PHYSICAL_CORES', '') != '':
+        env['MLC_HOST_CPU_PHYSICAL_CORES_LIST'] = f"""0-{int(env['MLC_HOST_CPU_TOTAL_PHYSICAL_CORES'])-1}"""
+
     return {'return': 0}
@@ -77,9 +77,39 @@ def preprocess(i):
         else:
             env['MLC_EXTRACT_TOOL_OPTIONS'] = ' -xvJf'
             env['MLC_EXTRACT_TOOL'] = 'tar '
+    elif filename.endswith(".tar.bz2"):
+        if windows:
+            x = '"' if ' ' in filename else ''
+            env['MLC_EXTRACT_CMD0'] = 'bzip2 -d ' + x + filename + x
+            filename = filename[:-4]  # leave only .tar
+            env['MLC_EXTRACT_TOOL_OPTIONS'] = ' -xvf'
+            env['MLC_EXTRACT_TOOL'] = 'tar '
+        elif os_info['platform'] == 'darwin':
+            env['MLC_EXTRACT_TOOL_OPTIONS'] = ' -xvjf '
+            env['MLC_EXTRACT_TOOL'] = 'tar '
+        else:
+            env['MLC_EXTRACT_TOOL_OPTIONS'] = ' --skip-old-files -xvjf '
+            env['MLC_EXTRACT_TOOL'] = 'tar '
     elif filename.endswith(".tar"):
         env['MLC_EXTRACT_TOOL_OPTIONS'] = ' -xvf'
         env['MLC_EXTRACT_TOOL'] = 'tar '
+    elif filename.endswith(".7z"):
+        if windows:
+            env['MLC_EXTRACT_TOOL'] = '7z'
+            env['MLC_EXTRACT_TOOL_OPTIONS'] = ' x -y '
+        else:
+            # Assumes p7zip is installed and provides the `7z` or `7zr` binary
+            env['MLC_EXTRACT_TOOL'] = '7z'
+            env['MLC_EXTRACT_TOOL_OPTIONS'] = ' x -y '
+
+    elif filename.endswith(".rar"):
+        if windows:
+            env['MLC_EXTRACT_TOOL'] = 'unrar'
+            env['MLC_EXTRACT_TOOL_OPTIONS'] = ' x -y '
+        else:
+            # unrar or unar may be available on Unix-like systems
+            env['MLC_EXTRACT_TOOL'] = 'unrar'
+            env['MLC_EXTRACT_TOOL_OPTIONS'] = ' x -y '
     elif filename.endswith(".gz"):
         # Check target filename
         extracted_filename = env.get('MLC_EXTRACT_EXTRACTED_FILENAME', '')