docs: improve README and Makefile comments for new coming users with

tips and tricks
rayanramoul · Oct 24, 2024 · acd6f1f · acd6f1f
1 parent 2ef5cac
commit acd6f1f
Show file tree

Hide file tree

Showing 3 changed files with 376 additions and 66 deletions.
diff --git a/Makefile b/Makefile
@@ -1,7 +1,7 @@
 SHELL := /bin/bash
 PYTHON_VERSION = 3.10
 
-# Some variables
+# Variables that will be used mainly for the Docker build
 PLATFORM = linux/arm64
 EXAMPLE_DIR = examples
 DOCKER_RUN_FLAGS = --env NEPTUNE_API_TOKEN $(NEPTUNE_API_TOKEN) --env NEPTUNE_PROJECT $(NEPTUNE_PROJECT) --privileged --network=host --ulimit nofile=65536:65536
@@ -14,38 +14,56 @@ init:
 install: configure_commit_template
 	uv sync -p $(PYTHON_VERSION) && uv lock
 
+# Configure git commit template, this will help to write better commit messages
 configure-commit-template:
 	git config --global commit.template $(realpath commit-template.txt)
 
+# Pre-commit hooks are useful to run some checks (usually linters) before committing the code to the repository
+# It will help to keep the code clean and consistent, this commands sets up the pre-commit hooks
 configre-pre-commit:
 	uvx pre-commit install
 
+# Run pre-commit hooks on all files, it will run the checks on all files in the repository
 format:
 	uvx pre-commit run --all-files
 
+# Run pytest to test the code unit tests under the 'tests/' directory
 test:
 	uv run pytest
 
+# Use uv to run the train script while passing the arguments from the command line
 train:
 	uv run src/train.py ${ARGS}
 
+# Use uv to run the evaluate script while passing the arguments from the command line
 evaluate:
 	uv run src/evaluate.py ${ARGS}
 
+# Build the Docker image with the base dependencies
 build-docker:
 	docker build --target lightning-base -t lightning-base .
 
+# Build the Docker image and jump into the container to test a fresh environment (CPU)
 dev-container-cpu: build-docker
 	docker run $(DOCKER_RUN_FLAGS) -v $(PROJECT_ROOT):/app -it lightning-base:latest /bin/bash
 
+# Build the Docker image and jump into the container to test a fresh environment (GPU)
 dev-container-gpu: build-docker
 	docker run $(DOCKER_RUN_FLAGS_GPU) -v $(PROJECT_ROOT):/app -it lightning-base:latest /bin/bash
 
+# Run the train script using the Docker image
 train-docker: docker-build
 	docker run $(DOCKER_RUN_FLAGS) --user root -v $(PROJECT_ROOT):/app lightning-base:latest /bin/bash -i -c "uv run /app/src/train.py ${ARGS}"
 
+# Run the evaluate script using the Docker image
 evaluate-docker: docker-build
 	docker run $(DOCKER_RUN_FLAGS) --user root -v $(PROJECT_ROOT):/app lightning-base:latest /bin/bash -i -c "uv run /app/src/evaluate.py ${ARGS}"
 
+# This build the documentation based on current code 'src/' and 'docs/' directories and deploy it to the gh-pages branch
+# in your GitHub repository (you then need to setup the GitHub Pages to use the gh-pages branch)
 deploy-pages:
 	uv run mkdocs build && uv run mkdocs gh-deploy
+
+# This is to run the documentation locally to see how it looks
+serve-docs:
+	uv run mkdocs build && uv run mkdocs serve
diff --git a/README.md b/README.md
@@ -23,87 +23,177 @@ A template for machine learning or deep learning projects.
 
 ## 🧠 Features
 
-- [x] Easy to implement your own model and dataloader
+- [x] Easy to implement your own model and dataloader through hydra instantiation of datamodules and models
 - [x] Configurable hyperparameters with Hydra
 - [x] Logging with the solution that fits your needs
 - [x] Works on CPU, multi-GPU, and multi-TPUs
-
-## ⚙️ Installation
+- [x] Use bleeding edge UV to manage packages
+- [x] pre-commits hooks to validate code style and quality
+- [x] Hydra instantiation of models and dataloaders
+- [x] torch.compile of models
+- [x] Tensors typing validation with TorchTyping
+- [x] Dockerized project (Dockerfile, run tests and training through docker, optionally docker-compose)
+- [x] Examples of efficient multi-processing using python's pool map
+- [x] Examples using polars for faster and more efficient dataframe processing
+- [x] Example of mock tests using pytest
+- [x] Util scripts to download dataset from kaggle
+- [x] Cloud data retrieval using cloudpathlib (launch your training on AWS, GCP, Azure)
+- [x] Architecture and example of creating the model serving API through LitServe
+- [x] Wiki creation and setup of documentation website with best integrations through Mkdocs
+
+## ⚙️ Steps for Installation
 
 - [ ] Use this repository as a template
 - [ ] Clone your repository
 - [ ] Run `make install` to install the dependencies
 - [ ] Add your model which inherits from `LightningModule` in `src/models`
 - [ ] Add your dataset which inherits from `Datamodule` in `src/data`
 - [ ] Add associated yaml configuration files in `configs/` folder following existing examples
+- [ ] Read the commands in the Makefile to understand the available commands you can use
+
+## 🤠Tips and Tricks
+
+### 🐍 How does the project work?
+
+The `train.py` or `eval.py` script is the entry point of the project. It uses Hydra to instantiate the model (LightningModule), dataloader (DataModule), and trainer using the configuration reconstructed using Hydra. The model is then trained or evaluated using Pytorch Lightning.
+
+### 👀 Implementing your logic
+
+You don't need to worry about implementing the training loops, the support for different hardwares, reading of
+configurations, etc. You need to care about 4 files for each training : your LightningModule (+ its hydra config), your
+DataModule (+ its hydra config).
+
+In the LightningModule, you need to implement the following methods:
+
+- `forward method`
+- `training_step`
+- `validation_step`
+- `test_step`
+Get inspired by the provided examples in the `src/models` folder.
+For the DataModule, you need to implement the following methods:
+
+- `prepare_data`
+- `setup`
+- `train_dataloader`
+- `val_dataloader`
+- `test_dataloader`
+
+Get inspired by the provided examples in the `src/data` folder.
+
+Get to know more about Pytorch Lightning's [LightningModule](https://pytorch-lightning.readthedocs.io/en/0.10.0/lightning_module.html) and [DataModule](https://pytorch-lightning.readthedocs.io/en/0.10.0/datamodules.html) in the Pytorch Lightning documentation.
+Finally in the associated configs/ folder, you need to implement the yaml configuration files for the model and dataloader.
+
+### 🔍 The power of Hydra
+
+As Hydra is used for configuration, you can easily change the hyperparameters of your model, the dataloader, the trainer, etc. by changing the yaml configuration files in the `configs/` folder. You can also use the `--multirun` option to run multiple experiments with different configurations.
+
+But also, as it used to instantiate the model and dataloader, you can easily change the model, dataloader, or any other component by changing the yaml configuration files or DIRECTLY IN COMMAND LINE. This is especially useful when you want to use different models or dataloaders.
+
+For example, you can run the following command to train a model with a different architecture, changing the dataset
+used, and the trainer used:
+
+```bash
+uv run src/train.py model=LeNet datamodule=MNISTDataModule trainer=gpu
+```
+
+Read more about Hydra in the [official documentation](https://hydra.cc/docs/intro/).
 
-## 🌳 Tree Explained
+### 💡 Best practices
+
+- Typing your functions and classes with `TorchTyping` for better type checking (in addition to python's typing module)
+- Docstring your functions and classes, it is even more important as it is used to generate the documentation with Mkdocs
+- Use the `make` commands to run your code, it is easier and faster than writing the full command (and check the Makefile for all available commands 😉)
+- [Use the pre-commit hooks](https://pre-commit.com/) to ensure your code is formatted correctly and is of good quality
+- [UV](https://docs.astral.sh/uv/ ) is powerful (multi-thread, package graph solving, rust backend, etc.) use it as much as you can.
+- If you have a lot of data, use Polars for faster and more efficient dataframe processing.
+- If you have CPU intensive tasks, use multi-processing with python's pool map, you can find an example in the `src/utils/utils.py` file.
+
+### 📚 Documentation
+
+You have the possibility to generate a documentation website using Mkdocs. It will automatically generate the documentation based on both the markdown files in the `docs/` folder and the docstrings in your code.
+To generate and serve the documentation locally:
+
+```bash
+make serve-docs # Documentation will be available at http://localhost:8000
+```
+
+And to deploy it to Github pages (youn need to enable Pages in your repository configuration and set it to use the
+gh-pages branch):
+
+```bash
+make pages-deploy # It will create a gh-pages branch and push the documentation to it
+```
+
+### 🎓 Github Templates
+
+This repository uses Github templates to help you with issues, pull requests, and discussions. It is a great way to standardize the way your team interacts with the repository. You can customize the templates to fit your needs. They can be find in [.github](.github) folder.
+
+### 🚀 Use this template as your junior's on-boarding process
+
+This template is perfect for your junior's on-boarding process. It has all the best practices and tools to make them productive from day one. It is also a great way to ensure that your team follows the same best practices and tools.
+For example you can select as a start a training notebook for any dataset on Kaggle, and ask your junior to
+industrialize the notebook into a full-fledged project. It will help them to understand the best practices and tools used in the industry.
+After selecting the dataset and notebook, potential steps for the junior can be:
+
+- Implement the DataModule and the LightningModule
+- Implement the associated yaml configuration files and use Hydra to instantiate important classes
+- Implement the training script
+- Implement the evaluation script
+- Implement unit tests
+- Create a CI/CD pipeline with Github Actions
+- Dockerize the project
+- Create a Makefile with useful commands
+- Implement the documentation with Mkdocs
+(All of this while following the best practices and tools provided in the template and PEP8)
+
+If any struggle is encountered, the junior can refer to the provided examples in the project.
+
+### 🌳 Tree Explained
 
 ```
 .
-├── commit-template.txt
-├── configs
-│   ├── callbacks
-│   ├── data
-│   ├── debug
-│   ├── eval.yaml
-│   ├── experiment
-│   ├── extras
-│   ├── hparams_search
-│   ├── local
-│   ├── logger
-│   ├── model
-│   ├── paths
-│   ├── trainer
-│   └── train.yaml
-├── data
-├── Makefile
-├── notebooks
-├── pyproject.toml
-├── README.md
-├── ruff.toml
-├── scripts
+├── commit-template.txt # use this file to set your commit message template, with make configure-commit template
+├── configs # configuration files for hydra
+│   ├── callbacks # configuration files for callbacks
+│   ├── data # configuration files for datamodules
+│   ├── debug # configuration files for pytorch lightning debuggers
+│   ├── eval.yaml # configuration file for evaluation
+│   ├── experiment # configuration files for experiments
+│   ├── extras # configuration files for extra components
+│   ├── hparams_search # configuration files for hyperparameters search
+│   ├── local # configuration files for local training
+│   ├── logger # configuration files for loggers (neptune, wandb, etc.)
+│   ├── model # configuration files for models (LightningModule)
+│   ├── paths # configuration files for paths
+│   ├── trainer # configuration files for trainers (cpu, gpu, tpu)
+│   └── train.yaml # configuration file for training
+├── data # data folder (to store potentially downloaded datasets)
+├── Makefile # makefile contains useful commands for the project
+├── notebooks # notebooks folder
+├── pyproject.toml # pyproject.toml file for uv package manager
+├── README.md # this file
+├── ruff.toml # ruff.toml file for pre-commit
+├── scripts # scripts folder
 │   └── example_train.sh
-├── src
-│   ├── app.py
-│   ├── data
+├── src # source code folder
+│   ├── data # datamodules folder
 │   │   ├── components
 │   │   └── mnist_datamodule.py
-│   ├── Dockerfile
-│   ├── eval.py
-│   ├── models
-│   │   ├── components
-│   │   │   ├── __init__.py
-│   │   │   └── simple_dense_net.py
-│   │   ├── __init__.py
-│   │   └── mnist_module.py
-│   ├── train.py
-│   └── utils
-│       ├── instantiators.py
-│       ├── logging_utils.py
-│       ├── pylogger.py
-│       ├── rich_utils.py
-│       └── utils.py
-└── tests
-    └── conftest.py
-
-````
-
-## 🔮 Incoming features for this template
-
-- [x] Add support for multi-GPU training
-- [x] UV package manager setup
-- [x] pre-commits hooks
-- [x] Hydra instantiation of models and dataloaders
-- [x] Add torch.compile of models
-- [x] Integrate TorchTyping
-- [x] Dockerize the project (Dockerfile, run tests and training through docker, optionally docker-compose)
-- [x] Add example of efficient multi-processing using pool map
-- [x] Add example using polars
-- [x] Example mock tests
-- [x] Util scripts to download dataset from kaggle for example
-- [x] Cloud oriented scripts (launch your training on AWS, GCP, Azure)
-- [x] Add config architecture and example of LitServe
+│   ├── eval.py # evaluation entry script
+│   ├── models # models folder (LightningModule)
+│   │   ├── components # components folder, contains model parts or "nets"
+│   ├── train.py # training entry script
+│   └── utils # utils folder
+│       ├── instantiators.py # instantiators for models and dataloaders
+│       ├── logging_utils.py # logger utils
+│       ├── pylogger.py # multi-process and multi-gpu safe logging
+│       ├── rich_utils.py # rich utils
+│       └── utils.py # general utils like multi-processing, etc.
+└── tests # tests folder
+    └── conftest.py # fixtures for tests
+    └── mock_test.py # example of mocking tests
+
+```
 
 ## 🤝 Contributing