Skip to content

Commit

Permalink
Merge branch 'main' into links
Browse files Browse the repository at this point in the history
  • Loading branch information
svekars authored Jan 28, 2025
2 parents e6c1203 + 15ef015 commit 0ab3667
Show file tree
Hide file tree
Showing 36 changed files with 1,109 additions and 663 deletions.
4 changes: 2 additions & 2 deletions .ci/docker/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ tensorboard
jinja2==3.1.3
pytorch-lightning
torchx
torchrl==0.5.0
tensordict==0.5.0
torchrl==0.6.0
tensordict==0.6.0
ax-platform>=0.4.0
nbformat>=5.9.2
datasets
Expand Down
6 changes: 4 additions & 2 deletions .jenkins/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ sudo apt-get install -y pandoc
#Install PyTorch Nightly for test.
# Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
# Install 2.5 to merge all 2.4 PRs - uncomment to install nightly binaries (update the version as needed).
# pip uninstall -y torch torchvision torchaudio torchtext torchdata
# pip3 install torch==2.5.0 torchvision torchaudio --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124
sudo pip uninstall -y torch torchvision torchaudio torchtext torchdata
sudo pip3 install torch==2.6.0 torchvision --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124
sudo pip uninstall -y fbgemm-gpu torchrec
sudo pip3 install fbgemm-gpu==1.1.0 torchrec==1.0.0 --no-cache-dir --index-url https://download.pytorch.org/whl/test/cu124

# Install two language tokenizers for Translation with TorchText tutorial
python -m spacy download en_core_web_sm
Expand Down
2 changes: 0 additions & 2 deletions .jenkins/validate_tutorials_built.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
"intermediate_source/mnist_train_nas", # used by ax_multiobjective_nas_tutorial.py
"intermediate_source/fx_conv_bn_fuser",
"intermediate_source/_torch_export_nightly_tutorial", # does not work on release
"intermediate_source/transformer_building_blocks", # does not work on release
"advanced_source/super_resolution_with_onnxruntime",
"advanced_source/usb_semisup_learn", # fails with CUDA OOM error, should try on a different worker
"prototype_source/fx_graph_mode_ptq_dynamic",
Expand All @@ -51,7 +50,6 @@
"intermediate_source/flask_rest_api_tutorial",
"intermediate_source/text_to_speech_with_torchaudio",
"intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release.
"intermediate_source/torch_export_tutorial" # reenable after 2940 is fixed.
]

def tutorial_source_dirs() -> List[Path]:
Expand Down
Binary file added _static/img/onnx/custom_addandround.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file removed _static/img/onnx/custom_addandround_function.png
Binary file not shown.
Binary file removed _static/img/onnx/custom_addandround_model.png
Binary file not shown.
Binary file removed _static/img/onnx/custom_aten_add_function.png
Binary file not shown.
Binary file removed _static/img/onnx/custom_aten_add_model.png
Binary file not shown.
Binary file modified _static/img/onnx/custom_aten_gelu_model.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
2 changes: 1 addition & 1 deletion advanced_source/coding_ddpg.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,7 +893,7 @@ def make_recorder(actor_model_explore, transform_state_dict, record_interval):
record_frames=1000,
policy_exploration=actor_model_explore,
environment=environment,
exploration_type=ExplorationType.MEAN,
exploration_type=ExplorationType.DETERMINISTIC,
record_interval=record_interval,
)
return recorder_obj
Expand Down
2 changes: 1 addition & 1 deletion advanced_source/pendulum.py
Original file line number Diff line number Diff line change
Expand Up @@ -604,7 +604,7 @@ def __init__(self, td_params=None, seed=None, device="cpu"):
env,
# ``Unsqueeze`` the observations that we will concatenate
UnsqueezeTransform(
unsqueeze_dim=-1,
dim=-1,
in_keys=["th", "thdot"],
in_keys_inv=["th", "thdot"],
),
Expand Down
15 changes: 4 additions & 11 deletions beginner_source/basics/buildmodel_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,10 @@
#############################################
# Get Device for Training
# -----------------------
# We want to be able to train our model on a hardware accelerator like the GPU or MPS,
# if available. Let's check to see if `torch.cuda <https://pytorch.org/docs/stable/notes/cuda.html>`_
# or `torch.backends.mps <https://pytorch.org/docs/stable/notes/mps.html>`_ are available, otherwise we use the CPU.

device = (
"cuda"
if torch.cuda.is_available()
else "mps"
if torch.backends.mps.is_available()
else "cpu"
)
# We want to be able to train our model on an `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
# such as CUDA, MPS, MTIA, or XPU. If the current accelerator is available, we will use it. Otherwise, we use the CPU.

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

##############################################
Expand Down
14 changes: 4 additions & 10 deletions beginner_source/basics/quickstart_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,16 +84,10 @@
# To define a neural network in PyTorch, we create a class that inherits
# from `nn.Module <https://pytorch.org/docs/stable/generated/torch.nn.Module.html>`_. We define the layers of the network
# in the ``__init__`` function and specify how data will pass through the network in the ``forward`` function. To accelerate
# operations in the neural network, we move it to the GPU or MPS if available.

# Get cpu, gpu or mps device for training.
device = (
"cuda"
if torch.cuda.is_available()
else "mps"
if torch.backends.mps.is_available()
else "cpu"
)
# operations in the neural network, we move it to the `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
# such as CUDA, MPS, MTIA, or XPU. If the current accelerator is available, we will use it. Otherwise, we use the CPU.

device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

# Define model
Expand Down
16 changes: 8 additions & 8 deletions beginner_source/basics/tensorqs_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,20 +99,20 @@
# Operations on Tensors
# ~~~~~~~~~~~~~~~~~~~~~~~
#
# Over 100 tensor operations, including arithmetic, linear algebra, matrix manipulation (transposing,
# Over 1200 tensor operations, including arithmetic, linear algebra, matrix manipulation (transposing,
# indexing, slicing), sampling and more are
# comprehensively described `here <https://pytorch.org/docs/stable/torch.html>`__.
#
# Each of these operations can be run on the GPU (at typically higher speeds than on a
# CPU). If you’re using Colab, allocate a GPU by going to Runtime > Change runtime type > GPU.
# Each of these operations can be run on the CPU and `Accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
# such as CUDA, MPS, MTIA, or XPU. If you’re using Colab, allocate an accelerator by going to Runtime > Change runtime type > GPU.
#
# By default, tensors are created on the CPU. We need to explicitly move tensors to the GPU using
# ``.to`` method (after checking for GPU availability). Keep in mind that copying large tensors
# By default, tensors are created on the CPU. We need to explicitly move tensors to the accelerator using
# ``.to`` method (after checking for accelerator availability). Keep in mind that copying large tensors
# across devices can be expensive in terms of time and memory!

# We move our tensor to the GPU if available
if torch.cuda.is_available():
tensor = tensor.to("cuda")
# We move our tensor to the current accelerator if available
if torch.accelerator.is_available():
tensor = tensor.to(torch.accelerator.current_accelerator())


######################################################################
Expand Down
12 changes: 7 additions & 5 deletions beginner_source/chatbot_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,10 @@
import json


USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")
# If the current `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__ is available,
# we will use it. Otherwise, we use the CPU.
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")


######################################################################
Expand Down Expand Up @@ -1318,16 +1320,16 @@ def evaluateInput(encoder, decoder, searcher, voc):
encoder_optimizer.load_state_dict(encoder_optimizer_sd)
decoder_optimizer.load_state_dict(decoder_optimizer_sd)

# If you have CUDA, configure CUDA to call
# If you have an accelerator, configure it to call
for state in encoder_optimizer.state.values():
for k, v in state.items():
if isinstance(v, torch.Tensor):
state[k] = v.cuda()
state[k] = v.to(device)

for state in decoder_optimizer.state.values():
for k, v in state.items():
if isinstance(v, torch.Tensor):
state[k] = v.cuda()
state[k] = v.to(device)

# Run training iterations
print("Starting Training!")
Expand Down
6 changes: 5 additions & 1 deletion beginner_source/examples_autograd/polynomial_autograd.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,12 @@
import torch
import math

# We want to be able to train our model on an `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
# such as CUDA, MPS, MTIA, or XPU. If the current accelerator is available, we will use it. Otherwise, we use the CPU.

dtype = torch.float
device = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")
torch.set_default_device(device)

# Create Tensors to hold input and outputs.
Expand Down
12 changes: 4 additions & 8 deletions beginner_source/fgsm_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,9 @@
# `pytorch/examples/mnist <https://github.com/pytorch/examples/tree/master/mnist>`__.
# For simplicity, download the pretrained model `here <https://drive.google.com/file/d/1HJV2nUHJqclXQ8flKvcWmjZ-OU5DGatl/view?usp=drive_link>`__.
#
# - ``use_cuda`` - boolean flag to use CUDA if desired and available.
# Note, a GPU with CUDA is not critical for this tutorial as a CPU will
# not take much time.
#

epsilons = [0, .05, .1, .15, .2, .25, .3]
pretrained_model = "data/lenet_mnist_model.pth"
use_cuda=True
# Set random seed for reproducibility
torch.manual_seed(42)

Expand Down Expand Up @@ -184,9 +179,10 @@ def forward(self, x):
])),
batch_size=1, shuffle=True)

# Define what device we are using
print("CUDA Available: ",torch.cuda.is_available())
device = torch.device("cuda" if use_cuda and torch.cuda.is_available() else "cpu")
# We want to be able to train our model on an `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
# such as CUDA, MPS, MTIA, or XPU. If the current accelerator is available, we will use it. Otherwise, we use the CPU.
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

# Initialize the network
model = Net().to(device)
Expand Down
48 changes: 22 additions & 26 deletions beginner_source/introyt/tensors_deeper_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,34 +632,33 @@
# does this *without* changing ``a`` - you can see that when we print
# ``a`` again at the end, it retains its ``requires_grad=True`` property.
#
# Moving to GPU
# Moving to `Accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
# -------------
#
# One of the major advantages of PyTorch is its robust acceleration on
# CUDA-compatible Nvidia GPUs. (“CUDA” stands for *Compute Unified Device
# Architecture*, which is Nvidia’s platform for parallel computing.) So
# far, everything we’ve done has been on CPU. How do we move to the faster
# One of the major advantages of PyTorch is its robust acceleration on an
# `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
# such as CUDA, MPS, MTIA, or XPU.
# So far, everything we’ve done has been on CPU. How do we move to the faster
# hardware?
#
# First, we should check whether a GPU is available, with the
# First, we should check whether an accelerator is available, with the
# ``is_available()`` method.
#
# .. note::
# If you do not have a CUDA-compatible GPU and CUDA drivers
# installed, the executable cells in this section will not execute any
# GPU-related code.
# If you do not have an accelerator, the executable cells in this section will not execute any
# accelerator-related code.
#

if torch.cuda.is_available():
print('We have a GPU!')
if torch.accelerator.is_available():
print('We have an accelerator!')
else:
print('Sorry, CPU only.')


##########################################################################
# Once we’ve determined that one or more GPUs is available, we need to put
# our data someplace where the GPU can see it. Your CPU does computation
# on data in your computer’s RAM. Your GPU has dedicated memory attached
# Once we’ve determined that one or more accelerators is available, we need to put
# our data someplace where the accelerator can see it. Your CPU does computation
# on data in your computer’s RAM. Your accelerator has dedicated memory attached
# to it. Whenever you want to perform a computation on a device, you must
# move *all* the data needed for that computation to memory accessible by
# that device. (Colloquially, “moving the data to memory accessible by the
Expand All @@ -669,34 +668,31 @@
# may do it at creation time:
#

if torch.cuda.is_available():
gpu_rand = torch.rand(2, 2, device='cuda')
if torch.accelerator.is_available():
gpu_rand = torch.rand(2, 2, device=torch.accelerator.current_accelerator())
print(gpu_rand)
else:
print('Sorry, CPU only.')


##########################################################################
# By default, new tensors are created on the CPU, so we have to specify
# when we want to create our tensor on the GPU with the optional
# when we want to create our tensor on the accelerator with the optional
# ``device`` argument. You can see when we print the new tensor, PyTorch
# informs us which device it’s on (if it’s not on CPU).
#
# You can query the number of GPUs with ``torch.cuda.device_count()``. If
# you have more than one GPU, you can specify them by index:
# You can query the number of accelerators with ``torch.accelerator.device_count()``. If
# you have more than one accelerator, you can specify them by index, take CUDA for example:
# ``device='cuda:0'``, ``device='cuda:1'``, etc.
#
# As a coding practice, specifying our devices everywhere with string
# constants is pretty fragile. In an ideal world, your code would perform
# robustly whether you’re on CPU or GPU hardware. You can do this by
# robustly whether you’re on CPU or accelerator hardware. You can do this by
# creating a device handle that can be passed to your tensors instead of a
# string:
#

if torch.cuda.is_available():
my_device = torch.device('cuda')
else:
my_device = torch.device('cpu')
my_device = torch.accelerator.current_accelerator() if torch.accelerator.is_available() else torch.device('cpu')
print('Device: {}'.format(my_device))

x = torch.rand(2, 2, device=my_device)
Expand All @@ -718,12 +714,12 @@
# It is important to know that in order to do computation involving two or
# more tensors, *all of the tensors must be on the same device*. The
# following code will throw a runtime error, regardless of whether you
# have a GPU device available:
# have an accelerator device available, take CUDA for example:
#
# .. code-block:: python
#
# x = torch.rand(2, 2)
# y = torch.rand(2, 2, device='gpu')
# y = torch.rand(2, 2, device='cuda')
# z = x + y # exception will be thrown
#

Expand Down
6 changes: 4 additions & 2 deletions beginner_source/knowledge_distillation_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@
import torchvision.transforms as transforms
import torchvision.datasets as datasets

# Check if GPU is available, and if not, use the CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Check if the current `accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
# is available, and if not, use the CPU
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

######################################################################
# Loading CIFAR-10
Expand Down
25 changes: 11 additions & 14 deletions beginner_source/nn_tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@
# we'll write `log_softmax` and use it. Remember: although PyTorch
# provides lots of prewritten loss functions, activation functions, and
# so forth, you can easily write your own using plain python. PyTorch will
# even create fast GPU or vectorized CPU code for your function
# even create fast accelerator or vectorized CPU code for your function
# automatically.

def log_softmax(x):
Expand Down Expand Up @@ -827,38 +827,35 @@ def __iter__(self):
fit(epochs, model, loss_func, opt, train_dl, valid_dl)

###############################################################################
# Using your GPU
# Using your `Accelerator <https://pytorch.org/docs/stable/torch.html#accelerators>`__
# ---------------
#
# If you're lucky enough to have access to a CUDA-capable GPU (you can
# If you're lucky enough to have access to an accelerator such as CUDA (you can
# rent one for about $0.50/hour from most cloud providers) you can
# use it to speed up your code. First check that your GPU is working in
# use it to speed up your code. First check that your accelerator is working in
# Pytorch:

print(torch.cuda.is_available())
# If the current accelerator is available, we will use it. Otherwise, we use the CPU.
device = torch.accelerator.current_accelerator().type if torch.accelerator.is_available() else "cpu"
print(f"Using {device} device")

###############################################################################
# And then create a device object for it:

dev = torch.device(
"cuda") if torch.cuda.is_available() else torch.device("cpu")

###############################################################################
# Let's update ``preprocess`` to move batches to the GPU:
# Let's update ``preprocess`` to move batches to the accelerator:


def preprocess(x, y):
return x.view(-1, 1, 28, 28).to(dev), y.to(dev)
return x.view(-1, 1, 28, 28).to(device), y.to(device)


train_dl, valid_dl = get_data(train_ds, valid_ds, bs)
train_dl = WrappedDataLoader(train_dl, preprocess)
valid_dl = WrappedDataLoader(valid_dl, preprocess)

###############################################################################
# Finally, we can move our model to the GPU.
# Finally, we can move our model to the accelerator.

model.to(dev)
model.to(device)
opt = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

###############################################################################
Expand Down
Loading

0 comments on commit 0ab3667

Please sign in to comment.