Merge reformat. Changelog

ThilinaRajapakse · Jan 20, 2020 · f204d0a · f204d0a
1 parent 267fff6
commit f204d0a
Show file tree

Hide file tree

Showing 6 changed files with 88 additions and 36 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,16 @@ All notable changes to this project will be documented in this file.
 
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [0.18.10] - 2020-01-20
+### Added
+
+- Added Makefile with tests dependency installation, test code, formatter and types.
+- Added setup.cfg file with Make configuration
+- Added some tests for the functionality
+- Files linted using flake8
+- Files formatted using black
+- Test tested with pytest
+- Unused variables deleted
 
 ## [0.18.9] - 2020-01-20
 ### Fixed

diff --git a/simpletransformers/classification/classification_model.py b/simpletransformers/classification/classification_model.py
@@ -86,7 +86,6 @@
 
 
 class ClassificationModel:
-
     def __init__(
         self,
         model_type,
@@ -111,8 +110,7 @@ def __init__(
             use_cuda (optional): Use GPU if available. Setting to False will force model to use CPU only.
             cuda_device (optional): Specific GPU that should be used. Will use the first available GPU by default.
             **kwargs (optional): For providing proxies, force_download, resume_download, cache_dir and other options specific to the 'from_pretrained' implementation where this will be supplied.
-        """ # noqa: ignore flake8"
-
+        """  # noqa: ignore flake8"
 
         MODEL_CLASSES = {
             "bert": (BertConfig, BertForSequenceClassification, BertTokenizer),
@@ -143,10 +141,12 @@ def __init__(
 
         config_class, model_class, tokenizer_class = MODEL_CLASSES[model_type]
         if num_labels:
-            self.config = config_class.from_pretrained(model_name, num_labels=num_labels,  **kwargs)
+            self.config = config_class.from_pretrained(
+                model_name, num_labels=num_labels, **kwargs
+            )
             self.num_labels = num_labels
         else:
-            self.config = config_class.from_pretrained(model_name,  **kwargs)
+            self.config = config_class.from_pretrained(model_name, **kwargs)
             self.num_labels = self.config.num_labels
         self.weight = weight
 
@@ -166,9 +166,16 @@ def __init__(
 
         if self.weight:
 
-            self.model = model_class.from_pretrained(model_name, config=self.config, weight=torch.Tensor(self.weight).to(self.device),  **kwargs)
+            self.model = model_class.from_pretrained(
+                model_name,
+                config=self.config,
+                weight=torch.Tensor(self.weight).to(self.device),
+                **kwargs,
+            )
         else:
-            self.model = model_class.from_pretrained(model_name, config=self.config,  **kwargs)
+            self.model = model_class.from_pretrained(
+                model_name, config=self.config, **kwargs
+            )
 
         self.results = {}
 
@@ -187,15 +194,19 @@ def __init__(
         if args:
             self.args.update(args)
 
+        self.tokenizer = tokenizer_class.from_pretrained(
+            model_name, do_lower_case=self.args["do_lower_case"], **kwargs
+        )
 
-        self.tokenizer = tokenizer_class.from_pretrained(model_name, do_lower_case=self.args['do_lower_case'],  **kwargs)
-
-        self.args['model_name'] = model_name
-        self.args['model_type'] = model_type
+        self.args["model_name"] = model_name
+        self.args["model_type"] = model_type
 
-        if model_type in ['camembert', 'xlmroberta']:
-            warnings.warn(f"use_multiprocessing automatically disabled as {model_type} fails when using multiprocessing for feature conversion.")
-            self.args['use_multiprocessing'] = False
+        if model_type in ["camembert", "xlmroberta"]:
+            warnings.warn(
+                f"use_multiprocessing automatically disabled as {model_type}"
+                " fails when using multiprocessing for feature conversion."
+            )
+            self.args["use_multiprocessing"] = False
 
         self.args["model_name"] = model_name
         self.args["model_type"] = model_type
@@ -582,7 +593,7 @@ def train(
             ) and not os.path.exists(output_dir_current):
                 os.makedirs(output_dir_current)
 
-            if args['save_model_every_epoch']:
+            if args["save_model_every_epoch"]:
 
                 model_to_save = model.module if hasattr(model, "module") else model
                 model_to_save.save_pretrained(output_dir_current)

diff --git a/simpletransformers/classification/multi_label_classification_model.py b/simpletransformers/classification/multi_label_classification_model.py
@@ -31,8 +31,16 @@
 
 
 class MultiLabelClassificationModel(ClassificationModel):
-
-    def __init__(self, model_type, model_name, num_labels=None, pos_weight=None, args=None, use_cuda=True, **kwargs):
+    def __init__(
+        self,
+        model_type,
+        model_name,
+        num_labels=None,
+        pos_weight=None,
+        args=None,
+        use_cuda=True,
+        **kwargs
+    ):
 
         """
         Initializes a MultiLabelClassification model.
@@ -45,7 +53,7 @@ def __init__(self, model_type, model_name, num_labels=None, pos_weight=None, arg
             args (optional): Default args will be used if this parameter is not provided. If provided, it should be a dict containing the args that should be changed in the default args.
             use_cuda (optional): Use GPU if available. Setting to False will force model to use CPU only.
             **kwargs (optional): For providing proxies, force_download, resume_download, cache_dir and other options specific to the 'from_pretrained' implementation where this will be supplied.
-        """# noqa: ignore flake8"
+        """  # noqa: ignore flake8"
 
         MODEL_CLASSES = {
             "bert": (
@@ -79,7 +87,9 @@ def __init__(self, model_type, model_name, num_labels=None, pos_weight=None, arg
         config_class, model_class, tokenizer_class = MODEL_CLASSES[model_type]
         if num_labels:
 
-            self.config = config_class.from_pretrained(model_name, num_labels=num_labels, **kwargs)
+            self.config = config_class.from_pretrained(
+                model_name, num_labels=num_labels, **kwargs
+            )
 
             self.num_labels = num_labels
         else:
@@ -99,9 +109,16 @@ def __init__(self, model_type, model_name, num_labels=None, pos_weight=None, arg
             self.device = "cpu"
 
         if self.pos_weight:
-            self.model = model_class.from_pretrained(model_name, config=self.config, pos_weight=torch.Tensor(self.pos_weight).to(self.device), **kwargs)
+            self.model = model_class.from_pretrained(
+                model_name,
+                config=self.config,
+                pos_weight=torch.Tensor(self.pos_weight).to(self.device),
+                **kwargs
+            )
         else:
-            self.model = model_class.from_pretrained(model_name, config=self.config, **kwargs)
+            self.model = model_class.from_pretrained(
+                model_name, config=self.config, **kwargs
+            )
 
         self.results = {}
 
@@ -120,9 +137,9 @@ def __init__(self, model_type, model_name, num_labels=None, pos_weight=None, arg
         if args:
             self.args.update(args)
 
-
-        self.tokenizer = tokenizer_class.from_pretrained(model_name, do_lower_case=self.args['do_lower_case'], **kwargs)
-
+        self.tokenizer = tokenizer_class.from_pretrained(
+            model_name, do_lower_case=self.args["do_lower_case"], **kwargs
+        )
 
         self.args["model_name"] = model_name
         self.args["model_type"] = model_type

diff --git a/simpletransformers/custom_models/models.py b/simpletransformers/custom_models/models.py
@@ -9,6 +9,7 @@
 from transformers.modeling_utils import SequenceSummary, PreTrainedModel
 from transformers import RobertaModel
 from transformers.configuration_roberta import RobertaConfig
+from torch.nn import BCEWithLogitsLoss
 
 from transformers.modeling_albert import (
     AlbertConfig,
@@ -19,6 +20,7 @@
 from transformers.modeling_roberta import ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
 from transformers.modeling_distilbert import DISTILBERT_PRETRAINED_MODEL_ARCHIVE_MAP
 
+
 class BertForMultiLabelSequenceClassification(BertPreTrainedModel):
     """
     Bert model adapted for multi-label sequence classification

diff --git a/simpletransformers/ner/ner_model.py b/simpletransformers/ner/ner_model.py
@@ -56,7 +56,16 @@
 
 
 class NERModel:
-    def __init__(self, model_type, model_name, labels=None, args=None, use_cuda=True, cuda_device=-1, **kwargs):
+    def __init__(
+        self,
+        model_type,
+        model_name,
+        labels=None,
+        args=None,
+        use_cuda=True,
+        cuda_device=-1,
+        **kwargs,
+    ):
         """
         Initializes a NERModel
 
@@ -107,7 +116,9 @@ def __init__(self, model_type, model_name, labels=None, args=None, use_cuda=True
 
         config_class, model_class, tokenizer_class = MODEL_CLASSES[model_type]
 
-        self.model = model_class.from_pretrained(model_name, num_labels=self.num_labels, **kwargs)
+        self.model = model_class.from_pretrained(
+            model_name, num_labels=self.num_labels, **kwargs
+        )
 
         if use_cuda:
             if torch.cuda.is_available():
@@ -135,9 +146,9 @@ def __init__(self, model_type, model_name, labels=None, args=None, use_cuda=True
         if args:
             self.args.update(args)
 
-
-        self.tokenizer = tokenizer_class.from_pretrained(model_name, do_lower_case=self.args['do_lower_case'], **kwargs)
-
+        self.tokenizer = tokenizer_class.from_pretrained(
+            model_name, do_lower_case=self.args["do_lower_case"], **kwargs
+        )
 
         self.args["model_name"] = model_name
         self.args["model_type"] = model_type
@@ -466,7 +477,7 @@ def train(self, train_dataset, output_dir, show_running_loss=True, eval_df=None)
             ) and not os.path.exists(output_dir_current):
                 os.makedirs(output_dir_current)
 
-            if args['save_model_every_epoch']:
+            if args["save_model_every_epoch"]:
 
                 model_to_save = model.module if hasattr(model, "module") else model
                 model_to_save.save_pretrained(output_dir_current)

diff --git a/simpletransformers/question_answering/question_answering_model.py b/simpletransformers/question_answering/question_answering_model.py
@@ -62,8 +62,9 @@
 
 
 class QuestionAnsweringModel:
-
-    def __init__(self, model_type, model_name, args=None, use_cuda=True, cuda_device=-1, **kwargs):
+    def __init__(
+        self, model_type, model_name, args=None, use_cuda=True, cuda_device=-1, **kwargs
+    ):
 
         """
         Initializes a QuestionAnsweringModel model.
@@ -125,9 +126,9 @@ def __init__(self, model_type, model_name, args=None, use_cuda=True, cuda_device
         if args:
             self.args.update(args)
 
-
-        self.tokenizer = tokenizer_class.from_pretrained(model_name, do_lower_case=self.args['do_lower_case'], **kwargs)
-
+        self.tokenizer = tokenizer_class.from_pretrained(
+            model_name, do_lower_case=self.args["do_lower_case"], **kwargs
+        )
 
         self.args["model_name"] = model_name
         self.args["model_type"] = model_type
@@ -542,7 +543,7 @@ def train(self, train_dataset, output_dir, show_running_loss=True, eval_data=Non
             ) and not os.path.exists(output_dir_current):
                 os.makedirs(output_dir_current)
 
-            if args['save_model_every_epoch']:
+            if args["save_model_every_epoch"]:
 
                 model_to_save = model.module if hasattr(model, "module") else model
                 model_to_save.save_pretrained(output_dir_current)