Skip to content

Commit

Permalink
Merge branch 'main' into csl/switch_multigpu_runner
Browse files Browse the repository at this point in the history
  • Loading branch information
clee2000 authored Mar 26, 2024
2 parents dac83f2 + 9ceac50 commit 3d37bd7
Show file tree
Hide file tree
Showing 36 changed files with 782 additions and 955 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ beginner
intermediate
advanced
pytorch_basics
recipes
/recipes
prototype

#data things
Expand Down
12 changes: 12 additions & 0 deletions .jenkins/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ sudo apt-get update || sudo apt-get install libgnutls30
sudo apt-get update
sudo apt-get install -y --no-install-recommends unzip p7zip-full sox libsox-dev libsox-fmt-all rsync

# Install pandoc (does not install from pypi)
sudo apt-get update
sudo apt-get install -y pandoc

# NS: Path to python runtime should already be part of docker container
# export PATH=/opt/conda/bin:$PATH
rm -rf src
Expand Down Expand Up @@ -57,10 +61,15 @@ if [[ "${JOB_TYPE}" == "worker" ]]; then
# IMPORTANT NOTE: We assume that each tutorial has a UNIQUE filename.
FILES_TO_RUN=$(python .jenkins/get_files_to_run.py)
echo "FILES_TO_RUN: " ${FILES_TO_RUN}
# Files to run must be accessible to subprocessed (at least to `download_data.py`)
export FILES_TO_RUN

# Step 3: Run `make docs` to generate HTML files and static files for these tutorials
make docs

# Step 3.1: Run the post-processing script:
python .jenkins/post_process_notebooks.py

# Step 4: If any of the generated files are not related the tutorial files we want to run,
# then we remove them
set +x
Expand Down Expand Up @@ -138,6 +147,9 @@ elif [[ "${JOB_TYPE}" == "manager" ]]; then
bash $DIR/remove_invisible_code_block_batch.sh docs
python .jenkins/validate_tutorials_built.py

# Step 5.1: Run post-processing script on .ipynb files:
python .jenkins/post_process_notebooks.py

# Step 6: Copy generated HTML files and static files to S3
7z a manager.7z docs
awsv2 s3 cp manager.7z s3://${BUCKET_NAME}/${COMMIT_ID}/manager.7z
Expand Down
139 changes: 139 additions & 0 deletions .jenkins/custom_pandoc_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
from pandocfilters import toJSONFilter, Div, RawBlock, Para, Str, Space, Link, Code, CodeBlock
import markdown
import html

def to_markdown(item, skip_octicon=False):
# A handler function to process strings, links, code, and code
# blocks
if item['t'] == 'Str':
return item['c']
elif item['t'] == 'Space':
return ' '
elif item['t'] == 'Link':
link_text = ''.join(to_markdown(i, skip_octicon) for i in item['c'][1])
return f'<a href="{item["c"][2][0]}">{link_text}</a>'
elif item['t'] == 'Code':
# Need to remove icticon as they don't render in .ipynb
if any(value == 'octicon' for key, value in item['c'][0][2]):
return ''
else:
# Escape the code and wrap it in <code> tags
return f'<code>{html.escape(item["c"][1])}</code>'
elif item['t'] == 'CodeBlock':
# Escape the code block and wrap it in <pre><code> tags
return f'<pre><code>{html.escape(item["c"][1])}</code></pre>'
else:
return ''


def process_admonitions(key, value, format, meta):
# Replace admonitions with proper HTML.
if key == 'Div':
[[ident, classes, keyvals], contents] = value
if 'note' in classes:
color = '#54c7ec'
label = 'NOTE:'
elif 'tip' in classes:
color = '#6bcebb'
label = 'TIP:'
elif 'warning' in classes:
color = '#e94f3b'
label = 'WARNING:'
else:
return

note_content = []
for block in contents:
if block.get('t') == 'Para':
for item in block['c']:
if item['t'] == 'Str':
note_content.append(Str(item['c']))
elif item['t'] == 'Space':
note_content.append(Space())
elif item['t'] == 'Link':
note_content.append(Link(*item['c']))
elif item['t'] == 'Code':
note_content.append(Code(*item['c']))
elif block.get('t') == 'CodeBlock':
note_content.append(CodeBlock(*block['c']))

note_content_md = ''.join(to_markdown(item) for item in note_content)
html_content = markdown.markdown(note_content_md)

return [{'t': 'RawBlock', 'c': ['html', f'<div style="background-color: {color}; color: #fff; font-weight: 700; padding-left: 10px; padding-top: 5px; padding-bottom: 5px"><strong>{label}</strong></div>']}, {'t': 'RawBlock', 'c': ['html', '<div style="background-color: #f3f4f7; padding-left: 10px; padding-top: 10px; padding-bottom: 10px; padding-right: 10px">']}, {'t': 'RawBlock', 'c': ['html', html_content]}, {'t': 'RawBlock', 'c': ['html', '</div>']}]
elif key == 'RawBlock':
# this is needed for the cells that have embedded video.
# We add a special tag to those: ``` {python, .jupyter-code-cell}
# The post-processing script then finds those and genrates separate
# code cells that can load video.
[format, content] = value
if format == 'html' and 'iframe' in content:
# Extract the video URL
video_url = content.split('src="')[1].split('"')[0]
# Create the Python code to display the video
python_code = f"""
from IPython.display import display, HTML
html_code = \"""
{content}
\"""
display(HTML(html_code))
"""

return {'t': 'CodeBlock', 'c': [['', ['python', 'jupyter-code-cell'], []], python_code]}


def process_images(key, value, format, meta):
# Add https://pytorch.org/tutorials/ to images so that they
# load correctly in the notebook.
if key != 'Image':
return None
[ident, classes, keyvals], caption, [src, title] = value
if not src.startswith('http'):
while src.startswith('../'):
src = src[3:]
if src.startswith('/_static'):
src = src[1:]
src = 'https://pytorch.org/tutorials/' + src

return {'t': 'Image', 'c': [[ident, classes, keyvals], caption, [src, title]]}


def process_grids(key, value, format, meta):
# Generate side by side grid cards. Only for the two-cards layout
# that we use in the tutorial template.
if key == 'Div':
[[ident, classes, keyvals], contents] = value
if 'grid' in classes:
columns = ['<div style="width: 45%; float: left; padding: 20px;">',
'<div style="width: 45%; float: right; padding: 20px;">']
column_num = 0
for block in contents:
if 't' in block and block['t'] == 'Div' and 'grid-item-card' in block['c'][0][1]:
item_html = ''
for item in block['c'][1]:
if item['t'] == 'Para':
item_html += '<h2>' + ''.join(to_markdown(i) for i in item['c']) + '</h2>'
elif item['t'] == 'BulletList':
item_html += '<ul>'
for list_item in item['c']:
item_html += '<li>' + ''.join(to_markdown(i) for i in list_item[0]['c']) + '</li>'
item_html += '</ul>'
columns[column_num] += item_html
column_num = (column_num + 1) % 2
columns = [column + '</div>' for column in columns]
return {'t': 'RawBlock', 'c': ['html', ''.join(columns)]}

def is_code_block(item):
return item['t'] == 'Code' and 'octicon' in item['c'][1]


def process_all(key, value, format, meta):
for transform in [process_admonitions, process_images, process_grids]:
new_value = transform(key, value, format, meta)
if new_value is not None:
break
return new_value


if __name__ == "__main__":
toJSONFilter(process_all)
10 changes: 9 additions & 1 deletion .jenkins/download_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,13 @@ def download_lenet_mnist() -> None:
sha256="cb5f8e578aef96d5c1a2cc5695e1aa9bbf4d0fe00d25760eeebaaac6ebc2edcb",
)

def download_gpu_quantization_torchao() -> None:
# Download SAM model checkpoint for prototype_source/gpu_quantization_torchao_tutorial.py
download_url_to_file("https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth",
prefix=PROTOTYPE_DATA_DIR,
dst="sam_vit_h_4b8939.pth",
sha256="a7bf3b02f3ebf1267aba913ff637d9a2d5c33d3173bb679e46d9f338c26f262e",
)

def main() -> None:
DATA_DIR.mkdir(exist_ok=True)
Expand All @@ -122,7 +129,8 @@ def main() -> None:
download_dcgan_data()
if FILES_TO_RUN is None or "fgsm_tutorial" in FILES_TO_RUN:
download_lenet_mnist()

if FILES_TO_RUN is None or "gpu_quantization_torchao_tutorial" in FILES_TO_RUN:
download_gpu_quantization_torchao()

if __name__ == "__main__":
main()
11 changes: 11 additions & 0 deletions .jenkins/metadata.json
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,21 @@
"intermediate_source/model_parallel_tutorial.py": {
"needs": "linux.16xlarge.nvidia.gpu"
},
"intermediate_source/torchvision_tutorial.py": {
"needs": "linux.g5.4xlarge.nvidia.gpu",
"_comment": "does not require a5g but needs to run before gpu_quantization_torchao_tutorial.py."
},
"advanced_source/coding_ddpg.py": {
"needs": "linux.g5.4xlarge.nvidia.gpu",
"_comment": "does not require a5g but needs to run before gpu_quantization_torchao_tutorial.py."
},
"intermediate_source/torch_compile_tutorial.py": {
"needs": "linux.g5.4xlarge.nvidia.gpu"
},
"intermediate_source/scaled_dot_product_attention_tutorial.py": {
"needs": "linux.g5.4xlarge.nvidia.gpu"
},
"prototype_source/gpu_quantization_torchao_tutorial.py": {
"needs": "linux.g5.4xlarge.nvidia.gpu"
}
}
97 changes: 97 additions & 0 deletions .jenkins/post_process_notebooks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import nbformat as nbf
import os
import re

"""
This post-processing script needs to run after the .ipynb files are
generated. The script removes extraneous ```{=html} syntax from the
admonitions and splits the cells that have video iframe into a
separate code cell that can be run to load the video directly
in the notebook. This script is included in build.sh.
"""


# Pattern to search ``` {.python .jupyter-code-cell}
pattern = re.compile(r'(.*?)``` {.python .jupyter-code-cell}\n\n(from IPython.display import display, HTML\nhtml_code = """\n.*?\n"""\ndisplay\(HTML\(html_code\)\))\n```(.*)', re.DOTALL)


def process_video_cell(notebook_path):
"""
This function finds the code blocks with the
"``` {.python .jupyter-code-cell}" code bocks and slices them
into a separe code cell (instead of markdown) which allows to
load the video in the notebook. The rest of the content is placed
in a new markdown cell.
"""
print(f'Processing file: {notebook_path}')
notebook = nbf.read(notebook_path, as_version=4)

# Iterate over markdown cells
for i, cell in enumerate(notebook.cells):
if cell.cell_type == 'markdown':
match = pattern.search(cell.source)
if match:
print(f'Match found in cell {i}: {match.group(0)[:100]}...')
# Extract the parts before and after the video code block
before_html_block = match.group(1)
code_block = match.group(2)

# Add a comment to run the cell to display the video
code_block = "# Run this cell to load the video\n" + code_block
# Create a new code cell
new_code_cell = nbf.v4.new_code_cell(source=code_block)

# Replace the original markdown cell with the part before the code block
cell.source = before_html_block

# Insert the new code cell after the current one
notebook.cells.insert(i+1, new_code_cell)
print(f'New code cell created with source: {new_code_cell.source}')

# If there is content after the HTML code block, create a new markdown cell
if len(match.group(3).strip()) > 0:
after_html_block = match.group(3)
new_markdown_cell = nbf.v4.new_markdown_cell(source=after_html_block)
# Create a new markdown cell and add the content after code block there
notebook.cells.insert(i+2, new_markdown_cell)

else:
# Remove ```{=html} from the code block
cell.source = remove_html_tag(cell.source)

nbf.write(notebook, notebook_path)


def remove_html_tag(content):
"""
Pandoc adds an extraneous ```{=html} ``` to raw HTML blocks which
prevents it from rendering correctly. This function removes
```{=html} that we don't need.
"""
content = re.sub(r'```{=html}\n<div', '<div', content)
content = re.sub(r'">\n```', '">', content)
content = re.sub(r'<\/div>\n```', '</div>\n', content)
content = re.sub(r'```{=html}\n</div>\n```', '</div>\n', content)
content = re.sub(r'```{=html}', '', content)
content = re.sub(r'</p>\n```', '</p>', content)
return content


def walk_dir(downloads_dir):
"""
Walk the dir and process all notebook files in
the _downloads directory and its subdirectories.
"""
for root, dirs, files in os.walk(downloads_dir):
for filename in files:
if filename.endswith('.ipynb'):
process_video_cell(os.path.join(root, filename))


def main():
downloads_dir = './docs/_downloads'
walk_dir(downloads_dir)


if __name__ == "__main__":
main()
2 changes: 0 additions & 2 deletions .jenkins/validate_tutorials_built.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,11 @@
"beginner_source/examples_autograd/polynomial_autograd",
"beginner_source/examples_autograd/polynomial_custom_function",
"beginner_source/t5_tutorial", # re-enable after this is fixed: https://github.com/pytorch/text/issues/1756
"intermediate_source/parametrizations",
"intermediate_source/mnist_train_nas", # used by ax_multiobjective_nas_tutorial.py
"intermediate_source/fx_conv_bn_fuser",
"intermediate_source/_torch_export_nightly_tutorial", # does not work on release
"advanced_source/super_resolution_with_onnxruntime",
"advanced_source/ddp_pipeline", # requires 4 gpus
"advanced_source/usb_semisup_learn", # in the current form takes 140+ minutes to build - can be enabled when the build time is reduced
"prototype_source/fx_graph_mode_ptq_dynamic",
"prototype_source/vmap_recipe",
"prototype_source/torchscript_freezing",
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,13 @@ GALLERY_PATTERN="neural_style_transfer_tutorial.py" sphinx-build . _build

The `GALLERY_PATTERN` variable respects regular expressions.


## About contributing to PyTorch Documentation and Tutorials
* You can find information about contributing to PyTorch documentation in the
PyTorch Repo [README.md](https://github.com/pytorch/pytorch/blob/master/README.md) file.
* Additional information can be found in [PyTorch CONTRIBUTING.md](https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md).


## License

PyTorch Tutorials is BSD licensed, as found in the LICENSE file.
Binary file added _static/img/usb_semisup_learn/code.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 2 additions & 2 deletions _templates/layout.html
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@
tutorialTitle: $('h1:first').text(),
rating: $(this).attr("data-count")
});

gtag('event', 'click', {
'event_category': 'Tutorial Rating',
'event_label': $("h1").first().text(),
'value': $(this).attr("data-count")
'value': $(this).attr("data-count"),
'customEvent:Rating': $(this).attr("data-count") // send to GA custom dimension customEvent:Rating.
});
});

Expand Down
Loading

0 comments on commit 3d37bd7

Please sign in to comment.