Added support for ELECTRA NER

ThilinaRajapakse · Apr 12, 2020 · 0b325d5 · 0b325d5
1 parent 777f78d
commit 0b325d5
Show file tree

Hide file tree

Showing 4 changed files with 28 additions and 4 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+## [0.24.6] - 2020-04-12
+
+### Added
+
+- Added support for ELECTRA based NER models.
+
 ## [0.24.5] - 2020-04-11
 
 ### Fixed
@@ -569,7 +575,9 @@ Model checkpoint is now saved for all epochs again.
 
 - This CHANGELOG file to hopefully serve as an evolving example of a standardized open source project CHANGELOG.
 
-[0.24.5]: https://github.com/ThilinaRajapakse/simpletransformers/compare/8f1daac...HEAD
+[0.24.5]: https://github.com/ThilinaRajapakse/simpletransformers/compare/777f78d...HEAD
+
+[0.24.5]: https://github.com/ThilinaRajapakse/simpletransformers/compare/8f1daac...777f78d
 
 [0.24.3]: https://github.com/ThilinaRajapakse/simpletransformers/compare/ce4b925...8f1daac
 

diff --git a/README.md b/README.md
@@ -56,6 +56,7 @@ Supports
       - [Example (Medium Article)](#example-medium-article)
     - [Minimal Example For Language Model Training From Scratch](#minimal-example-for-language-model-training-from-scratch)
     - [Minimal Example For Language Model Training With ELECTRA](#minimal-example-for-language-model-training-with-electra)
+    - [Real Dataset Example For Training a Language Model](#real-dataset-example-for-training-a-language-model)
     - [LanguageModelingModel](#languagemodelingmodel)
     - [Additional attributes for Language Modeling tasks](#additional-attributes-for-language-modeling-tasks)
       - [*dataset_type: str*](#datasettype-str)
@@ -1054,7 +1055,9 @@ model.eval_model("wikitext-2/wiki.test.tokens")
 
 ```
 
-*A more comprehensive guide will be added here soon*
+### Real Dataset Example For Training a Language Model
+
+- [Esparanto Model trained with ELECTRA](https://medium.com/@chaturangarajapakshe/understanding-electra-and-training-an-electra-language-model-3d33e3a9660d?source=friends_link&sk=2b4b4a79954e3d7c84ab863efaea8c65)
 
 ### LanguageModelingModel
 

diff --git a/setup.py b/setup.py
@@ -6,7 +6,7 @@
 
 setup(
     name="simpletransformers",
-    version="0.24.5",
+    version="0.24.6",
     author="Thilina Rajapakse",
     author_email="chaturangarajapakshe@gmail.com",
     description="An easy-to-use wrapper library for the Transformers library.",

diff --git a/simpletransformers/ner/ner_model.py b/simpletransformers/ner/ner_model.py
@@ -51,6 +51,11 @@
     CamembertForTokenClassification,
     CamembertTokenizer,
 )
+from transformers import (
+    ElectraConfig,
+    ElectraForTokenClassification,
+    ElectraTokenizer,
+)
 from simpletransformers.config.global_args import global_args
 
 try:
@@ -108,11 +113,19 @@ def __init__(
             "distilbert": (DistilBertConfig, DistilBertForTokenClassification, DistilBertTokenizer),
             "camembert": (CamembertConfig, CamembertForTokenClassification, CamembertTokenizer),
             "xlmroberta": (XLMRobertaConfig, XLMRobertaForTokenClassification, XLMRobertaTokenizer),
+            "electra": (ElectraConfig, ElectraForTokenClassification, ElectraTokenizer),
         }
 
         config_class, model_class, tokenizer_class = MODEL_CLASSES[model_type]
 
-        self.model = model_class.from_pretrained(model_name, num_labels=self.num_labels, **kwargs)
+        if self.num_labels:
+            self.config = config_class.from_pretrained(model_name, num_labels=self.num_labels, **kwargs)
+            self.num_labels = self.num_labels
+        else:
+            self.config = config_class.from_pretrained(model_name, **kwargs)
+            self.num_labels = self.config.num_labels
+
+        self.model = model_class.from_pretrained(model_name, config=self.config, **kwargs)
 
         if use_cuda:
             if torch.cuda.is_available():