From 9912989380ebe1246a2e35a92488e424d7ae571b Mon Sep 17 00:00:00 2001 From: Anthony Gitter Date: Fri, 30 Aug 2024 19:43:15 -0500 Subject: [PATCH] Colab notebook minor edits --- README.md | 1 + notebooks/README.md | 8 ++++---- notebooks/colab_finetuning.ipynb | 8 ++++---- notebooks/colab_predicting.ipynb | 18 +++++++++--------- 4 files changed, 18 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index 7101db1..590ecae 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ This framework uses [PyTorch Lightning](https://lightning.ai/docs/pytorch/stable - To run our pretrained METL [models](https://zenodo.org/doi/10.5281/zenodo.11051644) in pure PyTorch with minimal software dependencies, see our [metl-pretrained](https://github.com/gitter-lab/metl-pretrained) repository. - To recreate the results from our preprint, see our [metl-pub](https://github.com/gitter-lab/metl-pub) repository and Zenodo [datasets](https://zenodo.org/doi/10.5281/zenodo.10967412). - To run your own molecular simulations, see our [metl-sim](https://github.com/gitter-lab/metl-sim) repository. +- See the [notebooks](notebooks) directory for links to Colab notebooks. For more information, please see our manuscript: diff --git a/notebooks/README.md b/notebooks/README.md index 76b1200..3bef630 100644 --- a/notebooks/README.md +++ b/notebooks/README.md @@ -6,7 +6,7 @@ This directory contains example notebooks that show how to use various aspects o |------------------------------------------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------| | [generate_rosetta_dataset.ipynb](generate_rosetta_dataset.ipynb) | Generate a Rosetta pretraining dataset using molecular simulations data obtained from the [metl-sim](https://github.com/gitter-lab/metl-sim) repository. | | [train_test_split.ipynb](train_test_split.ipynb) | Create train, validation, and test splits for experimental datasets. | -| [pretraining.ipynb](pretraining.ipynb) | Pretrain METL models with Rosetta data. | -| [finetuning.ipynb](finetuning.ipynb) | Finetune METL models with experimental data. | -| [colab_finetuning.ipynb](colab_finetuning.ipynb) | Finetune METL models with experimental data on Colab. | -| [colab_predicting.ipynb](colab_predicting.ipynb) | Predict with METL models with on Colab. | \ No newline at end of file +| [pretraining.ipynb](pretraining.ipynb) | Pretrain METL models with Rosetta data locally. | +| [finetuning.ipynb](finetuning.ipynb) | Finetune METL models with experimental data locally. | +| [colab_finetuning.ipynb](https://colab.research.google.com/github/gitter-lab/metl/blob/main/notebooks/colab_finetuning.ipynb) | Finetune METL models with experimental data on Colab. | +| [colab_predicting.ipynb](https://colab.research.google.com/github/gitter-lab/metl/blob/main/notebooks/colab_predicting.ipynb) | Predict with METL models with on Colab. | diff --git a/notebooks/colab_finetuning.ipynb b/notebooks/colab_finetuning.ipynb index 7401b7b..99e5ed4 100644 --- a/notebooks/colab_finetuning.ipynb +++ b/notebooks/colab_finetuning.ipynb @@ -40,7 +40,7 @@ } ], "source": [ - "# @title Cloning metl\n", + "# @title Cloning metl repo\n", "!git clone https://github.com/gitter-lab/metl.git\n", "%cd metl" ] @@ -205,7 +205,7 @@ "source": [ "# Training arguments\n", "\n", - "The script for finetuning on experimental data is [train_target_model.py](https://github.com/gitter-lab/metl/blob/main/code/train_target_model.py). This script has a number of arguments you can view by uncommenting and running the below cell. There are additional arguments related to architecture that won't show up if you run the command, but you can view them in [models.py](https://github.com/gitter-lab/metl/tree/main/code/models.py) in the `TransferModel` class." + "The script for finetuning on experimental data is [train_target_model.py](https://github.com/gitter-lab/metl/blob/main/code/train_target_model.py). This script has a number of arguments you can view by uncommenting and running the below cell. There are additional arguments related to the architecture that won't show up if you run the command, but you can view them in [models.py](https://github.com/gitter-lab/metl/tree/main/code/models.py) in the `TransferModel` class." ] }, { @@ -334,7 +334,7 @@ "-1\n", "```\n", "\n", - "The remaining arguments determine the encoding, which should be set to `int_seqs`, whether to use dropout after the backbone cutoff, and the architecture of the new top net. You can leave these values as-is to match what we did for the preprint.\n", + "The remaining arguments determine the encoding, which should be set to `int_seqs`, whether to use dropout after the backbone cutoff, and the architecture of the new top net. You can leave these values as-is to match what we did for the manuscript.\n", "\n", "```\n", "--encoding\n", @@ -695,7 +695,7 @@ }, "source": [ "## Convert to PyTorch\n", - "Lightning checkpoints are compatible with pure pytorch, but they may contain additional items that are not needed for inference. This script loads the checkpoint and saves a smaller checkpoint with just the model weights and hyperparameters." + "Lightning checkpoints are compatible with pure PyTorch, but they may contain additional items that are not needed for inference. This script loads the checkpoint and saves a smaller checkpoint with just the model weights and hyperparameters." ] }, { diff --git a/notebooks/colab_predicting.ipynb b/notebooks/colab_predicting.ipynb index eb18765..08605e1 100644 --- a/notebooks/colab_predicting.ipynb +++ b/notebooks/colab_predicting.ipynb @@ -6,7 +6,7 @@ "id": "_Om47-OgrLFv" }, "source": [ - "This notebook will show an example on how to use METL models through hugging face to predict on more than the sequences allowed by the demo.\n", + "This notebook will show an example on how to use METL models through Hugging Face to predict on more than the sequences allowed by the demo.\n", "\n", "The example provided through the notebook uses a pretrained METL model to predict GB1 binding affinity.\n", "\n", @@ -241,7 +241,7 @@ "source": [ "The METL 🤗 wrapper requires the loading of the specific METL model after initialization of the `metl` variable above. Use the dropdown below to select a model to use for predicting.\n", "\n", - "The publically available METL models are hosted on [Zenodo](https://zenodo.org/records/11051645). The [metl-pretrained](https://github.com/gitter-lab/metl-pretrained#available-models) repo provides a table describing the available models." + "The publicly available METL models are hosted on [Zenodo](https://zenodo.org/doi/10.5281/zenodo.11051644). The [metl-pretrained](https://github.com/gitter-lab/metl-pretrained#available-models) repo provides a table describing the available models." ] }, { @@ -286,9 +286,9 @@ "id": "UENVWR2srLGD" }, "source": [ - "Depending on the model chosen, different files might be needed. This example is setup to use metl-l-2m-3d-gb1 and will need a few data for prediction.\n", + "Depending on the model chosen, different files might be needed. This example is setup to use metl-l-2m-3d-gb1 and will need a few inputs for prediction.\n", "\n", - "Specifically, for this 3d gb1 model we will need:\n", + "Specifically, for this 3d GB1 model we will need:\n", "- a wild type sequnece\n", "- a PDB structure file (as this is a 3d model)\n", "- variants to use with METL" @@ -303,7 +303,7 @@ }, "outputs": [], "source": [ - "# @title Protein wild type\n", + "# @title Protein wild type sequence\n", "# @markdown Enter the wild type of your protein here. The wildtype for gb1 is provided to use with the default model example here.\n", "wildtype = 'MQYKLILNGKTLKGETTTEAVDAATAEKVFKQYANDNGVDGEWTYDDATKTFTVTE' # @param {type:\"string\", placeholder:\"Enter a wildtype here\"}" ] @@ -551,7 +551,7 @@ } ], "source": [ - "# @title Variant Selecting Logic (always run this)\n", + "# @title Variant selecting logic (always run this)\n", "\n", "clear_output()\n", "if len(variant_text.value) > 0:\n", @@ -571,7 +571,7 @@ "id": "YOfEUeNYrLGI" }, "source": [ - "For biologists, one-based indexing is commonly used. However, METL models were designed to used zero-based indexing. If one-based indexing is needed, select it in the dropdown below." + "For biologists, 1-based indexing is commonly used. However, METL models were designed to used 0-based indexing. If one-based indexing is needed, select it in the dropdown below." ] }, { @@ -583,7 +583,7 @@ }, "outputs": [], "source": [ - "# @title Transform input from 1 based indexing to zero based indexing\n", + "# @title Transform input from 1-based indexing to 0-based indexing\n", "# @markdown Select indexing for residue mutations\n", "indexing = \"0\" # @param ['0', '1']" ] @@ -932,7 +932,7 @@ "id": "pO8jdBssrLGO" }, "source": [ - "Finally, we will save our output. We will save our output as a list of JSON Objects." + "Finally, we will save our output. We will save our output as a list of JSON Objects. Access the saved output.json file with the Files icon to the left." ] }, {