huggingface · lvwerra · Dec 30, 2022 · Dec 29, 2022 · Dec 29, 2022 · Dec 29, 2022
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -7,7 +7,27 @@ on:
     branches: [ master ]
 
 jobs:
+
+  check_code_quality:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.7"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install .[dev]
+      - name: Check quality
+        run: |
+          black --check --line-length 119 --target-version py36 tests trl
+          isort --check-only tests trl
+          flake8 tests trl
+
   tests:
+    needs: check_code_quality
     strategy:
       matrix:
         python-version: [3.7, 3.8, 3.9]

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -2,9 +2,10 @@
 
 ## How to get started
 
-Before anything else, please install the git hooks that run automatic scripts during each commit and merge to strip the notebooks of superfluous metadata (and avoid merge conflicts). After cloning the repository, run the following command inside it:
-```
-nbdev_install_git_hooks
+Before you start contributing make sure you installed all the dev tools:
+
+```bash
+pip install "trl[dev]"
 ```
 
 ## Did you find a bug?
@@ -27,7 +28,21 @@ nbdev_install_git_hooks
 * Do not turn an already submitted PR into your development playground. If after you submitted PR, you discovered that more work is needed - close the PR, do the required work and then submit a new PR. Otherwise each of your commits requires attention from maintainers of the project.
 * If, however, you submitted a PR and received a request for changes, you should proceed with commits inside that PR, so that the maintainer can see the incremental fixes and won't need to review the whole PR again. In the exception case where you realize it'll take many many commits to complete the requests, then it's probably best to close the PR, do the work and then submit it again. Use common sense where you'd choose one way over another.
 
+### Before you submit a PR
+
+First you want to make sure that all the tests pass:
+
+```bash
+make test
+```
+
+Then before submitting your PR make sure the code quality follows the standards. You can run the following command to format and test:
+
+```bash
+make style && make quality
+```
+
 ## Do you want to contribute to the documentation?
 
-* Docs are automatically created from the notebooks in the nbs folder.
+* Docs are are in the `docs/` folder and can be updated there.
 
diff --git a/Makefile b/Makefile
@@ -1,33 +1,13 @@
-SRC = $(wildcard ./nbs//*.ipynb)
-
-all: trl docs
-
-trl: $(SRC)
-	nbdev_build_lib
-	touch trl
-
-docs_serve: docs
-	cd docs && bundle exec jekyll serve
-
-docs: $(SRC)
-	nbdev_build_docs
-	touch docs
+.PHONY: quality style test
 
 test:
-	pytest tests
-
-format:
-	black --line-length 119 --target-version py36 tests trl examples 
-	isort tests trl examples 
-
-release: pypi
-	nbdev_bump_version
-
-pypi: dist
-	twine upload --repository pypi dist/*
+	python -m pytest -n auto --dist=loadfile -s -v ./tests/
 
-dist: clean
-	python setup.py sdist bdist_wheel
+quality:
+	black --check --line-length 119 --target-version py36 tests trl
+	isort --check-only tests trl
+	flake8  tests trl
 
-clean:
-	rm -rf dist
+style:
+	black --line-length 119 --target-version py36 tests trl
+	isort tests trl
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,15 @@
+[metadata]
+license_file = LICENSE
+
+[isort]
+ensure_newline_before_comments = True
+force_grid_wrap = 0
+include_trailing_comma = True
+line_length = 119
+lines_after_imports = 2
+multi_line_output = 3
+use_parentheses = True
+
+[flake8]
+ignore = E203, E501, W503
+max-line-length = 119
diff --git a/tests/test_gpt2_model.py b/tests/test_gpt2_model.py
@@ -1,11 +1,9 @@
 # imports
-import pytest
 import torch
 from transformers import GPT2Tokenizer
 
 from trl import AutoModelForCausalLMWithValueHead
 from trl.gpt2 import respond_to_batch
-
 from trl.ppo import PPOTrainer
 
 
@@ -26,7 +24,6 @@ def test_gpt2_model():
     # get model response
     response_tensor = respond_to_batch(gpt2_model, query_tensor)
     assert response_tensor.shape == (1, 20)
-    response_txt = gpt2_tokenizer.decode(response_tensor[0, :])
 
     # define a reward for response
     # (this could be any reward such as human feedback or output from another model)

diff --git a/tests/test_modeling.py b/tests/test_modeling.py
@@ -11,10 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-import unittest
 import tempfile
-import torch
+import unittest
 
+import torch
 from transformers import AutoModelForCausalLM
 
 from trl import AutoModelForCausalLMWithValueHead
@@ -30,6 +30,7 @@
     "trl-internal-testing/tiny-random-GPT2LMHeadModel",
 ]
 
+
 class BaseModelTester:
     all_model_names = None
     trl_model_class = None
@@ -41,17 +42,17 @@ def test_from_save(self):
         for model_name in self.all_model_names:
             torch.manual_seed(0)
             model = self.trl_model_class.from_pretrained(model_name)
-            
+
             with tempfile.TemporaryDirectory() as tmp_dir:
                 model.save_pretrained(tmp_dir)
-                
+
                 torch.manual_seed(0)
                 model_from_save = self.trl_model_class.from_pretrained(tmp_dir)
-            
-            # Check if the weights are the same 
+
+            # Check if the weights are the same
             for key in model_from_save.state_dict():
                 self.assertTrue(torch.allclose(model_from_save.state_dict()[key], model.state_dict()[key]))
-        
+
     def test_from_save_transformers(self):
         """
         Test if the model can be saved and loaded using transformers and get the same weights
@@ -60,19 +61,25 @@ def test_from_save_transformers(self):
             transformers_model = self.trl_model_class.transformers_parent_class.from_pretrained(model_name)
 
             trl_model = self.trl_model_class.from_pretrained(model_name)
-            
+
             with tempfile.TemporaryDirectory() as tmp_dir:
                 trl_model.save_pretrained(tmp_dir)
                 transformers_model_from_save = self.trl_model_class.transformers_parent_class.from_pretrained(tmp_dir)
-            
-            # Check if the weights are the same 
+
+            # Check if the weights are the same
             for key in transformers_model.state_dict():
-                self.assertTrue(torch.allclose(transformers_model_from_save.state_dict()[key], transformers_model.state_dict()[key]))
+                self.assertTrue(
+                    torch.allclose(
+                        transformers_model_from_save.state_dict()[key], transformers_model.state_dict()[key]
+                    )
+                )
+
 
 class VHeadModelTester(BaseModelTester, unittest.TestCase):
     """
-    Testing suite for v-head models.   
+    Testing suite for v-head models.
     """
+
     all_model_names = ALL_CAUSAL_LM_MODELS
     trl_model_class = AutoModelForCausalLMWithValueHead
 
@@ -83,25 +90,25 @@ def test_vhead(self):
         for model_name in self.all_model_names:
             model = AutoModelForCausalLMWithValueHead.from_pretrained(model_name)
             self.assertTrue(hasattr(model, "v_head"))
-    
+
     def test_vhead_nb_classes(self):
-    def test_vhead_nb_classes(self):
+    def test_vhead_shape(self):
-    def test_vhead_nb_classes(self):
+    def test_vhead_shape(self):
         r"""
         Test if the v-head has the correct shape
         """
         for model_name in self.all_model_names:
             model = AutoModelForCausalLMWithValueHead.from_pretrained(model_name)
             self.assertTrue(model.v_head.summary.weight.shape[0] == 1)
-    
+
     def test_vhead_init_random(self):
         r"""
-        Test if the v-head has been randomly initialized. 
-        We can check that by making sure the bias is different 
+        Test if the v-head has been randomly initialized.
+        We can check that by making sure the bias is different
         than zeros by default.
         """
         for model_name in self.all_model_names:
             model = AutoModelForCausalLMWithValueHead.from_pretrained(model_name)
-            self.assertFalse(torch.allclose(model.v_head.summary.bias, torch.zeros_like(model.v_head.summary.bias))) 
-    
+            self.assertFalse(torch.allclose(model.v_head.summary.bias, torch.zeros_like(model.v_head.summary.bias)))
+
     def test_vhead_not_str(self):
         r"""
         Test if the v-head is added to the model succesfully
@@ -113,8 +120,8 @@ def test_vhead_not_str(self):
 
     def test_inference(self):
         r"""
-        Test if the model can be used for inference and outputs 3 values 
-        - logits, loss, and value states   
+        Test if the model can be used for inference and outputs 3 values
+        - logits, loss, and value states
         """
         EXPECTED_OUTPUT_SIZE = 3
 
@@ -123,10 +130,10 @@ def test_inference(self):
             input_ids = torch.tensor([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])
             outputs = model(input_ids)
 
-            # Check if the outputs are of the right size - here 
+            # Check if the outputs are of the right size - here
             # we always output 3 values - logits, loss, and value states
             self.assertEqual(len(outputs), EXPECTED_OUTPUT_SIZE)
-    
+
     def test_dropout_config(self):
         r"""
         Test if we instantiate a model by adding `summary_drop_prob` to the config
@@ -139,7 +146,7 @@ def test_dropout_config(self):
 
             # Check if v head of the model has the same dropout as the config
             self.assertEqual(model.v_head.dropout.p, pretrained_model.config.summary_dropout_prob)
-    
+
     def test_dropout_kwargs(self):
         r"""
         Test if we instantiate a model by adding `summary_drop_prob` to the config
@@ -157,10 +164,10 @@ def test_dropout_kwargs(self):
 
             # Check if v head of the model has the same dropout as the config
             self.assertEqual(model.v_head.dropout.p, 0.5)
-    
+
     def test_generate(self):
         r"""
-        Test if `generate` works for every model 
+        Test if `generate` works for every model
         """
         for model_name in self.all_model_names:
             model = AutoModelForCausalLMWithValueHead.from_pretrained(model_name)
@@ -175,4 +182,4 @@ def test_raise_error_not_causallm(self):
         # This should raise a ValueError
         with self.assertRaises(ValueError):
             pretrained_model = AutoModelForCausalLM.from_pretrained(model_id)
-            _ = AutoModelForCausalLMWithValueHead.from_pretrained(pretrained_model.transformer)
+            _ = AutoModelForCausalLMWithValueHead.from_pretrained(pretrained_model.transformer)
diff --git a/trl/__init__.py b/trl/__init__.py
@@ -1,3 +1,5 @@
+# flake8: noqa
+
 __version__ = "0.1.1"
 
-from .models import AutoModelForCausalLMWithValueHead
+from .models import AutoModelForCausalLMWithValueHead