Merge branch 'main' into csl/switch_multigpu_runner

pytorch · Mar 26, 2024 · 3d37bd7 · 3d37bd7
2 parents dac83f2 + 9ceac50
commit 3d37bd7
Show file tree

Hide file tree

Showing 36 changed files with 782 additions and 955 deletions.
diff --git a/.gitignore b/.gitignore
@@ -3,7 +3,7 @@ beginner
 intermediate
 advanced
 pytorch_basics
-recipes
+/recipes
 prototype
 
 #data things

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
@@ -15,6 +15,10 @@ sudo apt-get update || sudo apt-get install libgnutls30
 sudo apt-get update
 sudo apt-get install -y --no-install-recommends unzip p7zip-full sox libsox-dev libsox-fmt-all rsync
 
+# Install pandoc (does not install from pypi)
+sudo apt-get update
+sudo apt-get install -y pandoc
+
 # NS: Path to python runtime should already be part of docker container
 # export PATH=/opt/conda/bin:$PATH
 rm -rf src
@@ -57,10 +61,15 @@ if [[ "${JOB_TYPE}" == "worker" ]]; then
   # IMPORTANT NOTE: We assume that each tutorial has a UNIQUE filename.
   FILES_TO_RUN=$(python .jenkins/get_files_to_run.py)
   echo "FILES_TO_RUN: " ${FILES_TO_RUN}
+  # Files to run must be accessible to subprocessed (at least to `download_data.py`)
+  export FILES_TO_RUN
 
   # Step 3: Run `make docs` to generate HTML files and static files for these tutorials
   make docs
 
+  # Step 3.1: Run the post-processing script:
+  python .jenkins/post_process_notebooks.py
+
   # Step 4: If any of the generated files are not related the tutorial files we want to run,
   # then we remove them
   set +x
@@ -138,6 +147,9 @@ elif [[ "${JOB_TYPE}" == "manager" ]]; then
   bash $DIR/remove_invisible_code_block_batch.sh docs
   python .jenkins/validate_tutorials_built.py
 
+  # Step 5.1: Run post-processing script on .ipynb files:
+  python .jenkins/post_process_notebooks.py
+
   # Step 6: Copy generated HTML files and static files to S3
   7z a manager.7z docs
   awsv2 s3 cp manager.7z s3://${BUCKET_NAME}/${COMMIT_ID}/manager.7z

diff --git a/.jenkins/custom_pandoc_filter.py b/.jenkins/custom_pandoc_filter.py
@@ -0,0 +1,139 @@
+from pandocfilters import toJSONFilter, Div, RawBlock, Para, Str, Space, Link, Code, CodeBlock
+import markdown
+import html
+
+def to_markdown(item, skip_octicon=False):
+    # A handler function to process strings, links, code, and code
+    # blocks
+    if item['t'] == 'Str':
+        return item['c']
+    elif item['t'] == 'Space':
+        return ' '
+    elif item['t'] == 'Link':
+        link_text = ''.join(to_markdown(i, skip_octicon) for i in item['c'][1])
+        return f'<a href="{item["c"][2][0]}">{link_text}</a>'
+    elif item['t'] == 'Code':
+        # Need to remove icticon as they don't render in .ipynb
+        if any(value == 'octicon' for key, value in item['c'][0][2]):
+            return ''
+        else:
+            # Escape the code and wrap it in <code> tags
+            return f'<code>{html.escape(item["c"][1])}</code>'
+    elif item['t'] == 'CodeBlock':
+        # Escape the code block and wrap it in <pre><code> tags
+        return f'<pre><code>{html.escape(item["c"][1])}</code></pre>'
+    else:
+        return ''
+
+
+def process_admonitions(key, value, format, meta):
+    # Replace admonitions with proper HTML.
+    if key == 'Div':
+        [[ident, classes, keyvals], contents] = value
+        if 'note' in classes:
+            color = '#54c7ec'
+            label = 'NOTE:'
+        elif 'tip' in classes:
+            color = '#6bcebb'
+            label = 'TIP:'
+        elif 'warning' in classes:
+            color = '#e94f3b'
+            label = 'WARNING:'
+        else:
+            return
+
+        note_content = []
+        for block in contents:
+            if block.get('t') == 'Para':
+                for item in block['c']:
+                    if item['t'] == 'Str':
+                        note_content.append(Str(item['c']))
+                    elif item['t'] == 'Space':
+                        note_content.append(Space())
+                    elif item['t'] == 'Link':
+                        note_content.append(Link(*item['c']))
+                    elif item['t'] == 'Code':
+                        note_content.append(Code(*item['c']))
+            elif block.get('t') == 'CodeBlock':
+                note_content.append(CodeBlock(*block['c']))
+
+        note_content_md = ''.join(to_markdown(item) for item in note_content)
+        html_content = markdown.markdown(note_content_md)
+
+        return [{'t': 'RawBlock', 'c': ['html', f'<div style="background-color: {color}; color: #fff; font-weight: 700; padding-left: 10px; padding-top: 5px; padding-bottom: 5px"><strong>{label}</strong></div>']}, {'t': 'RawBlock', 'c': ['html', '<div style="background-color: #f3f4f7; padding-left: 10px; padding-top: 10px; padding-bottom: 10px; padding-right: 10px">']}, {'t': 'RawBlock', 'c': ['html', html_content]}, {'t': 'RawBlock', 'c': ['html', '</div>']}]
+    elif key == 'RawBlock':
+    # this is needed for the cells that have embedded video.
+    # We add a special tag to those: ``` {python, .jupyter-code-cell}
+    # The post-processing script then finds those and genrates separate
+    # code cells that can load video.
+        [format, content] = value
+        if format == 'html' and 'iframe' in content:
+            # Extract the video URL
+            video_url = content.split('src="')[1].split('"')[0]
+            # Create the Python code to display the video
+            python_code = f"""
+from IPython.display import display, HTML
+html_code = \"""
+{content}
+\"""
+display(HTML(html_code))
+"""
+
+            return {'t': 'CodeBlock', 'c': [['', ['python', 'jupyter-code-cell'], []], python_code]}
+
+
+def process_images(key, value, format, meta):
+    # Add https://pytorch.org/tutorials/ to images so that they
+    # load correctly in the notebook.
+    if key != 'Image':
+        return None
+    [ident, classes, keyvals], caption, [src, title] = value
+    if not src.startswith('http'):
+        while src.startswith('../'):
+            src = src[3:]
+        if src.startswith('/_static'):
+            src = src[1:]
+        src = 'https://pytorch.org/tutorials/' + src
+
+    return {'t': 'Image', 'c': [[ident, classes, keyvals], caption, [src, title]]}
+
+
+def process_grids(key, value, format, meta):
+    # Generate side by side grid cards. Only for the two-cards layout
+    # that we use in the tutorial template.
+    if key == 'Div':
+        [[ident, classes, keyvals], contents] = value
+        if 'grid' in classes:
+            columns = ['<div style="width: 45%; float: left; padding: 20px;">',
+                       '<div style="width: 45%; float: right; padding: 20px;">']
+            column_num = 0
+            for block in contents:
+                if 't' in block and block['t'] == 'Div' and 'grid-item-card' in block['c'][0][1]:
+                    item_html = ''
+                    for item in block['c'][1]:
+                        if item['t'] == 'Para':
+                            item_html += '<h2>' + ''.join(to_markdown(i) for i in item['c']) + '</h2>'
+                        elif item['t'] == 'BulletList':
+                            item_html += '<ul>'
+                            for list_item in item['c']:
+                                item_html += '<li>' + ''.join(to_markdown(i) for i in list_item[0]['c']) + '</li>'
+                            item_html += '</ul>'
+                    columns[column_num] += item_html
+                    column_num = (column_num + 1) % 2
+            columns = [column + '</div>' for column in columns]
+            return {'t': 'RawBlock', 'c': ['html', ''.join(columns)]}
+
+def is_code_block(item):
+    return item['t'] == 'Code' and 'octicon' in item['c'][1]
+
+
+def process_all(key, value, format, meta):
+    for transform in [process_admonitions, process_images, process_grids]:
+        new_value = transform(key, value, format, meta)
+        if new_value is not None:
+            break
+    return new_value
+
+
+if __name__ == "__main__":
+    toJSONFilter(process_all)
diff --git a/.jenkins/download_data.py b/.jenkins/download_data.py
@@ -105,6 +105,13 @@ def download_lenet_mnist() -> None:
                          sha256="cb5f8e578aef96d5c1a2cc5695e1aa9bbf4d0fe00d25760eeebaaac6ebc2edcb",
                          )
 
+def download_gpu_quantization_torchao() -> None:
+    # Download SAM model checkpoint for prototype_source/gpu_quantization_torchao_tutorial.py
+    download_url_to_file("https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth",
+                         prefix=PROTOTYPE_DATA_DIR,
+                         dst="sam_vit_h_4b8939.pth",
+                         sha256="a7bf3b02f3ebf1267aba913ff637d9a2d5c33d3173bb679e46d9f338c26f262e",
+                         )
 
 def main() -> None:
     DATA_DIR.mkdir(exist_ok=True)
@@ -122,7 +129,8 @@ def main() -> None:
         download_dcgan_data()
     if FILES_TO_RUN is None or "fgsm_tutorial" in FILES_TO_RUN:
         download_lenet_mnist()
-
+    if FILES_TO_RUN is None or "gpu_quantization_torchao_tutorial" in FILES_TO_RUN:
+        download_gpu_quantization_torchao()
 
 if __name__ == "__main__":
     main()
diff --git a/.jenkins/metadata.json b/.jenkins/metadata.json
@@ -28,10 +28,21 @@
   "intermediate_source/model_parallel_tutorial.py": {
     "needs": "linux.16xlarge.nvidia.gpu"
   },
+  "intermediate_source/torchvision_tutorial.py": {
+    "needs": "linux.g5.4xlarge.nvidia.gpu", 
+    "_comment": "does not require a5g but needs to run before gpu_quantization_torchao_tutorial.py."
+  },
+  "advanced_source/coding_ddpg.py": {
+     "needs": "linux.g5.4xlarge.nvidia.gpu",
+     "_comment": "does not require a5g but needs to run before gpu_quantization_torchao_tutorial.py."
+  },
   "intermediate_source/torch_compile_tutorial.py": {
     "needs": "linux.g5.4xlarge.nvidia.gpu"
   },
   "intermediate_source/scaled_dot_product_attention_tutorial.py": {
     "needs": "linux.g5.4xlarge.nvidia.gpu"
+  },
+  "prototype_source/gpu_quantization_torchao_tutorial.py": {
+    "needs": "linux.g5.4xlarge.nvidia.gpu"
   }
 }
diff --git a/.jenkins/post_process_notebooks.py b/.jenkins/post_process_notebooks.py
@@ -0,0 +1,97 @@
+import nbformat as nbf
+import os
+import re
+
+"""
+This post-processing script needs to run after the .ipynb files are
+generated. The script removes extraneous ```{=html} syntax from the
+admonitions and splits the cells that have video iframe into a 
+separate code cell that can be run to load the video directly
+in the notebook. This script is included in build.sh.
+"""
+
+
+# Pattern to search ``` {.python .jupyter-code-cell}
+pattern = re.compile(r'(.*?)``` {.python .jupyter-code-cell}\n\n(from IPython.display import display, HTML\nhtml_code = """\n.*?\n"""\ndisplay\(HTML\(html_code\)\))\n```(.*)', re.DOTALL)
+
+
+def process_video_cell(notebook_path):
+    """
+    This function finds the code blocks with the
+    "``` {.python .jupyter-code-cell}" code bocks and slices them
+    into a separe code cell (instead of markdown) which allows to
+    load the video in the notebook. The rest of the content is placed
+    in a new markdown cell.
+    """
+    print(f'Processing file: {notebook_path}')
+    notebook = nbf.read(notebook_path, as_version=4)
+
+    # Iterate over markdown cells
+    for i, cell in enumerate(notebook.cells):
+        if cell.cell_type == 'markdown':
+            match = pattern.search(cell.source)
+            if match:
+                print(f'Match found in cell {i}: {match.group(0)[:100]}...')
+                # Extract the parts before and after the video code block
+                before_html_block = match.group(1)
+                code_block = match.group(2)
+
+                # Add a comment to run the cell to display the video 
+                code_block = "# Run this cell to load the video\n" + code_block
+                # Create a new code cell
+                new_code_cell = nbf.v4.new_code_cell(source=code_block)
+
+                # Replace the original markdown cell with the part before the code block
+                cell.source = before_html_block
+
+                # Insert the new code cell after the current one
+                notebook.cells.insert(i+1, new_code_cell)
+                print(f'New code cell created with source: {new_code_cell.source}')
+
+                # If there is content after the HTML code block, create a new markdown cell
+                if len(match.group(3).strip()) > 0:
+                    after_html_block = match.group(3)
+                    new_markdown_cell = nbf.v4.new_markdown_cell(source=after_html_block)
+                    # Create a new markdown cell and add the content after code block there
+                    notebook.cells.insert(i+2, new_markdown_cell)
+
+            else:
+                # Remove ```{=html} from the code block
+                cell.source = remove_html_tag(cell.source)
+
+    nbf.write(notebook, notebook_path)
+
+
+def remove_html_tag(content):
+    """
+    Pandoc adds an extraneous ```{=html} ``` to raw HTML blocks which
+    prevents it from rendering correctly. This function removes
+    ```{=html} that we don't need.
+    """
+    content = re.sub(r'```{=html}\n<div', '<div', content)
+    content = re.sub(r'">\n```', '">', content)
+    content = re.sub(r'<\/div>\n```', '</div>\n', content)
+    content = re.sub(r'```{=html}\n</div>\n```', '</div>\n', content)
+    content = re.sub(r'```{=html}', '', content)
+    content = re.sub(r'</p>\n```', '</p>', content)
+    return content
+
+
+def walk_dir(downloads_dir):
+    """
+    Walk the dir and process all notebook files in
+    the _downloads directory and its subdirectories.
+    """
+    for root, dirs, files in os.walk(downloads_dir):
+        for filename in files:
+            if filename.endswith('.ipynb'):
+                process_video_cell(os.path.join(root, filename))
+
+
+def main():
+    downloads_dir = './docs/_downloads'
+    walk_dir(downloads_dir)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py
@@ -23,13 +23,11 @@
     "beginner_source/examples_autograd/polynomial_autograd",
     "beginner_source/examples_autograd/polynomial_custom_function",
     "beginner_source/t5_tutorial", # re-enable after this is fixed: https://github.com/pytorch/text/issues/1756
-    "intermediate_source/parametrizations",
     "intermediate_source/mnist_train_nas",  # used by ax_multiobjective_nas_tutorial.py
     "intermediate_source/fx_conv_bn_fuser",
     "intermediate_source/_torch_export_nightly_tutorial",  # does not work on release
     "advanced_source/super_resolution_with_onnxruntime",
     "advanced_source/ddp_pipeline",  # requires 4 gpus
-    "advanced_source/usb_semisup_learn", # in the current form takes 140+ minutes to build - can be enabled when the build time is reduced
     "prototype_source/fx_graph_mode_ptq_dynamic",
     "prototype_source/vmap_recipe",
     "prototype_source/torchscript_freezing",

diff --git a/README.md b/README.md
@@ -57,7 +57,13 @@ GALLERY_PATTERN="neural_style_transfer_tutorial.py" sphinx-build . _build
 
 The `GALLERY_PATTERN` variable respects regular expressions.
 
+
 ## About contributing to PyTorch Documentation and Tutorials
 * You can find information about contributing to PyTorch documentation in the 
 PyTorch Repo [README.md](https://github.com/pytorch/pytorch/blob/master/README.md) file. 
 * Additional information can be found in [PyTorch CONTRIBUTING.md](https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md).
+
+
+## License
+
+PyTorch Tutorials is BSD licensed, as found in the LICENSE file.
diff --git a/_static/img/usb_semisup_learn/code.png b/_static/img/usb_semisup_learn/code.png
diff --git a/_templates/layout.html b/_templates/layout.html
@@ -85,11 +85,11 @@
       tutorialTitle: $('h1:first').text(),
       rating: $(this).attr("data-count")
     });
-
     gtag('event', 'click', {
       'event_category': 'Tutorial Rating',
       'event_label': $("h1").first().text(),
-      'value': $(this).attr("data-count")
+      'value': $(this).attr("data-count"),
+      'customEvent:Rating': $(this).attr("data-count") // send to GA custom dimension customEvent:Rating.
     });
    });