Skip to content

Commit

Permalink
[pre-commit.ci] auto fixes from pre-commit.com hooks
Browse files Browse the repository at this point in the history
for more information, see https://pre-commit.ci
  • Loading branch information
pre-commit-ci[bot] committed Sep 26, 2023
1 parent d2fe147 commit 91b4f97
Show file tree
Hide file tree
Showing 16 changed files with 34 additions and 34 deletions.
4 changes: 2 additions & 2 deletions .actions/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,7 +462,7 @@ def _replace_images(lines: list, local_dir: str) -> list:

@staticmethod
def _is_ipynb_parent_dir(dir_path: str) -> bool:
"""Determine in recursive fasion of a folder is valid notebook file or any of sub-folders is."""
"""Determine in recursive fashion of a folder is valid notebook file or any of sub-folders is."""
if AssistantCLI._find_meta(dir_path):
return True
sub_dirs = [d for d in glob.glob(os.path.join(dir_path, "*")) if os.path.isdir(d)]
Expand Down Expand Up @@ -702,7 +702,7 @@ def list_dirs(folder: str = "", include_file_ext: str = "") -> str:
dirs += glob.glob(os.path.join(folder, "**", "*" + include_file_ext))
if include_file_ext:
_ignore_base_dir = lambda p: os.path.sep.join(p.split(os.path.sep)[1:]) # noqa: E731
# Take the notebook as a folder (notebook are on teh same level as the raw tutorial file mix)
# Take the notebook as a folder (notebook are on the same level as the raw tutorial file mix)
dirs = [os.path.splitext(_ignore_base_dir(p))[0] for p in dirs]
else:
dirs = [p for p in dirs if os.path.isdir(p)]
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci_block-ipybn.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Prevent adding/chnaging notebooks
name: Prevent adding/changing notebooks

# see: https://help.github.com/en/actions/reference/events-that-trigger-workflows
on: # Trigger the workflow on PR to master
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,7 @@ def visualize_samples(data, label):
# For instance, for BCE, PyTorch has two modules: `nn.BCELoss()`, `nn.BCEWithLogitsLoss()`.
# While `nn.BCELoss` expects the inputs $x$ to be in the range $[0,1]$, i.e. the output of a sigmoid, `nn.BCEWithLogitsLoss` combines a sigmoid layer and the BCE loss in a single class.
# This version is numerically more stable than using a plain Sigmoid followed by a BCE loss because of the logarithms applied in the loss function.
# Hence, it is adviced to use loss functions applied on "logits" where possible (remember to not apply a sigmoid on the output of the model in this case!).
# Hence, it is advised to use loss functions applied on "logits" where possible (remember to not apply a sigmoid on the output of the model in this case!).
# For our model defined above, we therefore use the module `nn.BCEWithLogitsLoss`.

# %%
Expand Down Expand Up @@ -982,7 +982,7 @@ def visualize_classification(model, data, label):
# Finally, you are all set to start with your own PyTorch project!
# In summary, we have looked at how we can build neural networks in PyTorch, and train and test them on data.
# However, there is still much more to PyTorch we haven't discussed yet.
# In the comming series of Jupyter notebooks, we will discover more and more functionalities of PyTorch, so that you also get familiar to PyTorch concepts beyond the basics.
# In the coming series of Jupyter notebooks, we will discover more and more functionalities of PyTorch, so that you also get familiar to PyTorch concepts beyond the basics.
# If you are already interested in learning more of PyTorch, we recommend the official [tutorial website](https://pytorch.org/tutorials/) that contains many tutorials on various topics.
# Especially logging with Tensorboard ([tutorial
# here](https://pytorch.org/tutorials/intermediate/tensorboard_tutorial.html))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
sns.set()

# %% [markdown]
# Instead of the `set_seed` function as in Tutorial 3, we can use Lightning's build-in function `L.seed_everything`.
# Instead of the `set_seed` function as in Tutorial 3, we can use Lightning's built-in function `L.seed_everything`.
# We will reuse the path variables `DATASET_PATH` and `CHECKPOINT_PATH` as in Tutorial 3.
# Adjust the paths if necessary.

Expand Down Expand Up @@ -416,7 +416,7 @@ def var_init(model, std=0.01):
# Actually, as $b$ is a single element per output neuron and is constant across different inputs, we set it to 0 overall.
#
# Next, we need to calculate the variance with which we need to initialize the weight parameters.
# Along the calculation, we will need to following variance rule: given two independent variables, the variance of their product is $\text{Var}(X\cdot Y) = \mathbb{E}(Y)^2\text{Var}(X) + \mathbb{E}(X)^2\text{Var}(Y) + \text{Var}(X)\text{Var}(Y) = \mathbb{E}(Y^2)\mathbb{E}(X^2)-\mathbb{E}(Y)^2\mathbb{E}(X)^2$ ($X$ and $Y$ are not refering to $x$ and $y$, but any random variable).
# Along the calculation, we will need to following variance rule: given two independent variables, the variance of their product is $\text{Var}(X\cdot Y) = \mathbb{E}(Y)^2\text{Var}(X) + \mathbb{E}(X)^2\text{Var}(Y) + \text{Var}(X)\text{Var}(Y) = \mathbb{E}(Y^2)\mathbb{E}(X^2)-\mathbb{E}(Y)^2\mathbb{E}(X)^2$ ($X$ and $Y$ are not referring to $x$ and $y$, but any random variable).
#
# The needed variance of the weights, $\text{Var}(w_{ij})$, is calculated as follows:
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,7 @@ def train_model(model_name, save_name=None, **kwargs):
# Automatically loads the model with the saved hyperparameters
model = CIFARModule.load_from_checkpoint(pretrained_filename)
else:
L.seed_everything(42) # To be reproducable
L.seed_everything(42) # To be reproducible
model = CIFARModule(model_name=model_name, **kwargs)
trainer.fit(model, train_loader, val_loader)
model = CIFARModule.load_from_checkpoint(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@
#
# One aspect we haven't discussed yet is the scaling factor of $1/\sqrt{d_k}$.
# This scaling factor is crucial to maintain an appropriate variance of attention values after initialization.
# Remember that we intialize our layers with the intention of having equal variance throughout the model, and hence,
# Remember that we initialize our layers with the intention of having equal variance throughout the model, and hence,
# $Q$ and $K$ might also have a variance close to $1$.
# However, performing a dot product over two vectors with a variance $\sigma$ results
# in a scalar having $d_k$-times higher variance:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,7 @@ def train_model(**kwargs):
# ### Image Generation
#
# Another way of evaluating generative models is by sampling a few generated images.
# Generative models need to be good at generating realistic images as this truely shows that they have modeled the true data distribution.
# Generative models need to be good at generating realistic images as this truly shows that they have modeled the true data distribution.
# Thus, let's sample a few images of the model below:

# %%
Expand Down
10 changes: 5 additions & 5 deletions course_UvA-DL/09-normalizing-flows/NF_image_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def encode(self, imgs):
def _get_likelihood(self, imgs, return_ll=False):
"""Given a batch of images, return the likelihood of those.
If return_ll is True, this function returns the log likelihood of the input. Otherwise, the ouptut metric is
If return_ll is True, this function returns the log likelihood of the input. Otherwise, the output metric is
bits per dimension (scaled negative log likelihood)
"""
z, ldj = self.encode(imgs)
Expand Down Expand Up @@ -352,7 +352,7 @@ def test_step(self, batch, batch_idx):

# %% [markdown]
# The `test_step` function differs from the training and validation step in that it makes use of importance sampling.
# We will discuss the motiviation and details behind this after
# We will discuss the motivation and details behind this after
# understanding how flows model discrete images in continuous space.

# %% [markdown]
Expand Down Expand Up @@ -975,7 +975,7 @@ def train_flow(flow, model_name="MNISTFlow"):
# One disadvantage of normalizing flows is that they operate on the exact same dimensions as the input.
# If the input is high-dimensional, so is the latent space, which requires larger computational cost to learn suitable transformations.
# However, particularly in the image domain, many pixels contain less information in the sense
# that we could remove them without loosing the semantical information of the image.
# that we could remove them without losing the semantical information of the image.
#
# Based on this intuition, deep normalizing flows on images commonly apply a multi-scale architecture [1].
# After the first $N$ flow transformations, we split off half of the latent dimensions and directly evaluate them on the prior.
Expand Down Expand Up @@ -1208,7 +1208,7 @@ def print_num_params(model):
)

# %% [markdown]
# As we have intially expected, using variational dequantization improves upon standard dequantization in terms of bits per dimension.
# As we have initially expected, using variational dequantization improves upon standard dequantization in terms of bits per dimension.
# Although the difference with 0.04bpd doesn't seem impressive first, it is a considerably step for generative models
# (most state-of-the-art models improve upon previous models in a range of 0.02-0.1bpd on CIFAR with three times as high bpd).
# While it takes longer to evaluate the probability of an image due to the variational dequantization,
Expand All @@ -1223,7 +1223,7 @@ def print_num_params(model):
# We should note that the samples for variational dequantization and standard dequantization are very similar,
# and hence we visualize here only the ones for variational dequantization and the multi-scale model.
# However, feel free to also test out the `"simple"` model.
# The seeds are set to obtain reproducable generations and are not cherry picked.
# The seeds are set to obtain reproducible generations and are not cherry picked.

# %%
L.seed_everything(44)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@
L.seed_everything(42)

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.determinstic = True
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Fetching the device that will be used throughout this notebook
Expand Down Expand Up @@ -504,7 +504,7 @@ def forward(self, v_stack, h_stack):
#
# Using the gated convolutions, we can now build our PixelCNN model.
# The architecture consists of multiple stacked GatedMaskedConv blocks, where we add an additional dilation factor to a few convolutions.
# This is used to increase the receptive field of the model and allows to take a larger context into accout during generation.
# This is used to increase the receptive field of the model and allows to take a larger context into account during generation.
# As a reminder, dilation on a convolution works looks as follows
# (figure credit - [Vincent Dumoulin and Francesco Visin](https://arxiv.org/pdf/1603.07285.pdf)):
#
Expand Down
4 changes: 2 additions & 2 deletions course_UvA-DL/11-vision-transformer/Vision_Transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,7 @@ def train_model(**kwargs):
# Automatically loads the model with the saved hyperparameters
model = ViT.load_from_checkpoint(pretrained_filename)
else:
L.seed_everything(42) # To be reproducable
L.seed_everything(42) # To be reproducible
model = ViT(**kwargs)
trainer.fit(model, train_loader, val_loader)
# Load best checkpoint after training
Expand Down Expand Up @@ -503,7 +503,7 @@ def train_model(**kwargs):
# In this tutorial, we have implemented our own Vision Transformer from scratch and applied it on the task of image classification.
# Vision Transformers work by splitting an image into a sequence of smaller patches, use those as input to a standard Transformer encoder.
# While Vision Transformers achieved outstanding results on large-scale image recognition benchmarks such as ImageNet, they considerably underperform when being trained from scratch on small-scale datasets like CIFAR10.
# The reason is that in contrast to CNNs, Transformers do not have the inductive biases of translation invariance and the feature hierachy (i.e. larger patterns consist of many smaller patterns).
# The reason is that in contrast to CNNs, Transformers do not have the inductive biases of translation invariance and the feature hierarchy (i.e. larger patterns consist of many smaller patterns).
# However, these aspects can be learned when enough data is provided, or the model has been pre-trained on other large-scale tasks.
# Considering that Vision Transformers have just been proposed end of 2020, there is likely a lot more to come on Transformers for Computer Vision.
#
Expand Down
8 changes: 4 additions & 4 deletions course_UvA-DL/12-meta-learning/Meta_Learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ def dataset_from_labels(imgs, targets, class_set, **kwargs):
#
# This subsection summarizes the code that is needed to create such training batches.
# In PyTorch, we can specify the data sampling procedure by so-called `Sampler` ([documentation](https://pytorch.org/docs/stable/data.html#data-loading-order-and-sampler)).
# Samplers are iteratable objects that return indices in the order in which the data elements should be sampled.
# Samplers are iterable objects that return indices in the order in which the data elements should be sampled.
# In our previous notebooks, we usually used the option `shuffle=True` in the `data.DataLoader` objects which creates a sampler returning the data indices in a random order.
# Here, we focus on samplers that return batches of indices that correspond to support and query set batches.
# Below, we implement such a sampler.
Expand Down Expand Up @@ -575,7 +575,7 @@ def train_model(model_class, train_loader, val_loader, **kwargs):
# Automatically loads the model with the saved hyperparameters
model = model_class.load_from_checkpoint(pretrained_filename)
else:
L.seed_everything(42) # To be reproducable
L.seed_everything(42) # To be reproducible
model = model_class(**kwargs)
trainer.fit(model, train_loader, val_loader)
model = model_class.load_from_checkpoint(
Expand Down Expand Up @@ -777,7 +777,7 @@ def plot_few_shot(acc_dict, name, color=None, ax=None):

# %% [markdown]
# To obtain gradients for the initial parameters $\theta$ from the optimized model $f_{\theta_i'}$, we actually need second-order gradients, i.e. gradients of gradients, as the support set gradients depend on $\theta$ as well.
# This makes MAML computationally expensive, especially when using mulitple inner loop steps.
# This makes MAML computationally expensive, especially when using multiple inner loop steps.
# A simpler, yet almost equally well performing alternative is First-Order MAML (FOMAML) which only uses first-order gradients.
# This means that the second-order gradients are ignored, and we can calculate the outer loop gradients (line 10 in algorithm 2) simply by calculating the gradients with respect to $\theta_i'$, and use those as update to $\theta$.
# Hence, the new update rule becomes:
Expand Down Expand Up @@ -1049,7 +1049,7 @@ def collate_fn(item_list):
# We use the same feature space size as for ProtoNet, but can use a higher learning rate since the outer loop gradients are accumulated over 16 batches.
# The inner loop learning rate is set to 0.1, which is much higher than the outer loop lr because we use SGD in the inner loop instead of Adam.
# Commonly, the learning rate for the output layer is higher than the base model is the base model is very deep or pre-trained.
# However, for our setup, we observed no noticable impact of using a different learning rate than the base model.
# However, for our setup, we observed no noticeable impact of using a different learning rate than the base model.
# The number of inner loop updates is another crucial hyperparmaeter, and depends on the similarity of our training tasks.
# Since all tasks are on images from the same dataset, we notice that a single inner loop update achieves similar performance as 3 or 5 while training considerably faster.
# However, especially in RL and NLP, larger number of inner loop steps are often needed.
Expand Down
10 changes: 5 additions & 5 deletions course_UvA-DL/13-contrastive-learning/SimCLR.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@
L.seed_everything(42)

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.determinstic = True
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
Expand Down Expand Up @@ -390,7 +390,7 @@ def train_simclr(batch_size, max_epochs=500, **kwargs):
pin_memory=True,
num_workers=NUM_WORKERS,
)
L.seed_everything(42) # To be reproducable
L.seed_everything(42) # To be reproducible
model = SimCLR(max_epochs=max_epochs, **kwargs)
trainer.fit(model, train_loader, val_loader)
# Load best checkpoint after training
Expand Down Expand Up @@ -566,7 +566,7 @@ def train_logreg(batch_size, train_feats_data, test_feats_data, model_suffix, ma
print(f"Found pretrained model at {pretrained_filename}, loading...")
model = LogisticRegression.load_from_checkpoint(pretrained_filename)
else:
L.seed_everything(42) # To be reproducable
L.seed_everything(42) # To be reproducible
model = LogisticRegression(**kwargs)
trainer.fit(model, train_loader, test_loader)
model = LogisticRegression.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)
Expand All @@ -582,7 +582,7 @@ def train_logreg(batch_size, train_feats_data, test_feats_data, model_suffix, ma
# %% [markdown]
# Despite the training dataset of STL10 already only having 500 labeled images per class, we will perform experiments with even smaller datasets.
# Specifically, we train a Logistic Regression model for datasets with only 10, 20, 50, 100, 200, and all 500 examples per class.
# This gives us an intuition on how well the representations learned by contrastive learning can be transfered to a image recognition task like this classification.
# This gives us an intuition on how well the representations learned by contrastive learning can be transferred to a image recognition task like this classification.
# First, let's define a function to create the intended sub-datasets from the full training set:


Expand Down Expand Up @@ -762,7 +762,7 @@ def train_resnet(batch_size, max_epochs=100, **kwargs):
print("Found pretrained model at %s, loading..." % pretrained_filename)
model = ResNet.load_from_checkpoint(pretrained_filename)
else:
L.seed_everything(42) # To be reproducable
L.seed_everything(42) # To be reproducible
model = ResNet(**kwargs)
trainer.fit(model, train_loader, test_loader)
model = ResNet.load_from_checkpoint(trainer.checkpoint_callback.best_model_path)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
# # Predicting
# ## Load the model from a checkpoint
#
# `TabularClassifier.load_from_checkpoint` supports both url or local_path to a checkpoint. If provided with an url, the checkpoint will first be downloaded and laoded to re-create the model.
# `TabularClassifier.load_from_checkpoint` supports both url or local_path to a checkpoint. If provided with an url, the checkpoint will first be downloaded and loaded to re-create the model.

# %%
model = TabularClassifier.load_from_checkpoint(
Expand Down
4 changes: 2 additions & 2 deletions lightning_examples/augmentation_kornia/augmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(self, apply_color_jitter: bool = False) -> None:

self.jitter = ColorJitter(0.5, 0.5, 0.5, 0.5)

@torch.no_grad() # disable gradients for effiency
@torch.no_grad() # disable gradients for efficiency
def forward(self, x: Tensor) -> Tensor:
x_out = self.transforms(x) # BxCxHxW
if self._apply_color_jitter:
Expand All @@ -76,7 +76,7 @@ def forward(self, x: Tensor) -> Tensor:
class Preprocess(nn.Module):
"""Module to perform pre-process using Kornia on torch tensors."""

@torch.no_grad() # disable gradients for effiency
@torch.no_grad() # disable gradients for efficiency
def forward(self, x) -> Tensor:
x_tmp: np.ndarray = np.array(x) # HxWxC
x_out: Tensor = image_to_tensor(x_tmp, keepdim=True) # CxHxW
Expand Down
Loading

0 comments on commit 91b4f97

Please sign in to comment.