huggingface · sgugger · Nov 9, 2020 · Nov 5, 2020 · Nov 5, 2020 · Nov 5, 2020
diff --git a/examples/README.md b/examples/README.md
@@ -37,7 +37,7 @@ git checkout tags/v3.4.0
 |---|---|:---:|:---:|:---:|:---:|
 | [**`language-modeling`**](https://github.com/huggingface/transformers/tree/master/examples/language-modeling)       | Raw text        | ✅ | -  | ✅ | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/blog/blob/master/notebooks/01_how_to_train.ipynb)
 | [**`text-classification`**](https://github.com/huggingface/transformers/tree/master/examples/text-classification)   | GLUE, XNLI      | ✅ | ✅ | ✅ | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://github.com/huggingface/notebooks/blob/master/examples/text_classification.ipynb)
-| [**`token-classification`**](https://github.com/huggingface/transformers/tree/master/examples/token-classification) | CoNLL NER       | ✅ | ✅ | - | -
+| [**`token-classification`**](https://github.com/huggingface/transformers/tree/master/examples/token-classification) | CoNLL NER       | ✅ | ✅ | ✅ | -
 | [**`multiple-choice`**](https://github.com/huggingface/transformers/tree/master/examples/multiple-choice)           | SWAG, RACE, ARC | ✅ | ✅ | - | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/ViktorAlm/notebooks/blob/master/MPC_GPU_Demo_for_TF_and_PT.ipynb)
 | [**`question-answering`**](https://github.com/huggingface/transformers/tree/master/examples/question-answering)     | SQuAD           | ✅ | ✅ | - | -
 | [**`text-generation`**](https://github.com/huggingface/transformers/tree/master/examples/text-generation)           | -               | n/a | n/a | - | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/huggingface/blog/blob/master/notebooks/02_how_to_generate.ipynb)

diff --git a/examples/test_examples.py b/examples/test_examples.py
@@ -28,7 +28,13 @@
 
 SRC_DIRS = [
     os.path.join(os.path.dirname(__file__), dirname)
-    for dirname in ["text-generation", "text-classification", "language-modeling", "question-answering"]
+    for dirname in [
+        "text-generation",
+        "text-classification",
+        "token-classification",
+        "language-modeling",
+        "question-answering",
+    ]
 ]
 sys.path.extend(SRC_DIRS)
 
@@ -38,6 +44,7 @@
     import run_generation
     import run_glue
     import run_mlm
+    import run_ner
     import run_pl_glue
     import run_squad
 
@@ -185,6 +192,36 @@ def test_run_mlm(self):
             result = run_mlm.main()
             self.assertLess(result["perplexity"], 42)
 
+    def test_run_ner(self):
+        stream_handler = logging.StreamHandler(sys.stdout)
+        logger.addHandler(stream_handler)
+
+        tmp_dir = self.get_auto_remove_tmp_dir()
+        testargs = f"""
+            run_ner.py
+            --model_name_or_path bert-base-uncased
+            --train_file tests/fixtures/tests_samples/conll/sample.json
+            --validation_file tests/fixtures/tests_samples/conll/sample.json
+            --output_dir {tmp_dir}
+            --overwrite_output_dir
+            --do_train
+            --do_eval
+            --warmup_steps=2
+            --learning_rate=2e-4
+            --per_gpu_train_batch_size=2
+            --per_gpu_eval_batch_size=2
+            --num_train_epochs=2
+        """.split()
+
+        if torch_device != "cuda":
+            testargs.append("--no_cuda")
+
+        with patch.object(sys, "argv", testargs):
+            result = run_ner.main()
+            self.assertGreaterEqual(result["eval_accuracy_score"], 0.75)
+            self.assertGreaterEqual(result["eval_precision"], 0.75)
+            self.assertLess(result["eval_loss"], 0.5)
+
     def test_run_squad(self):
         stream_handler = logging.StreamHandler(sys.stdout)
         logger.addHandler(stream_handler)

diff --git a/examples/token-classification/README.md b/examples/token-classification/README.md
@@ -1,6 +1,40 @@
-## Named Entity Recognition
+## Token classification
 
-Based on the scripts [`run_ner.py`](https://github.com/huggingface/transformers/blob/master/examples/token-classification/run_ner.py) for Pytorch and
+Fine-tuning the library models for token classification task such as Named Entity Recognition (NER) or Parts-of-speech
+tagging (POS). The main scrip `run_ner.py` leverages the 🤗 Datasets library and the Trainer API. You can easily
+customize it to your needs if you need extra processing on your datasets.
+
+It will either run on a datasets hosted on our [hub](https://huggingface.co/datasets) or with your own text files for
+training and validation.
+
+The following example fine-tunes BERT on CoNLL-2003:
+
+```bash
+python run_ner.py \
+  --model_name_or_path bert-base-uncased \
+  --dataset_name conll2003 \
+  --output_dir /tmp/test-ner \
+  --do_train \
+  --do_eval
+```
+
+or just can just run the bash script `run.sh`.
+
+To run on your own training and validation files, use the following command:
+
+```bash
+python run_ner.py \
+  --model_name_or_path bert-base-uncased \
+  --train_file path_to_train_file \
+  --validation_file path_to_validation_file \
+  --output_dir /tmp/test-ner \
+  --do_train \
+  --do_eval
+```
+
+## Old version of the script
+
+Based on the scripts [`run_ner_old.py`](https://github.com/huggingface/transformers/blob/master/examples/token-classification/run_ner_old.py) for Pytorch and
 [`run_tf_ner.py`](https://github.com/huggingface/transformers/blob/master/examples/token-classification/run_tf_ner.py) for Tensorflow 2.
 
 The following examples are covered in this section:
@@ -69,7 +103,7 @@ export SEED=1
 To start training, just run:
 
 ```bash
-python3 run_ner.py --data_dir ./ \
+python3 run_ner_old.py --data_dir ./ \
 --labels ./labels.txt \
 --model_name_or_path $BERT_MODEL \
 --output_dir $OUTPUT_DIR \
@@ -87,7 +121,7 @@ If your GPU supports half-precision training, just add the `--fp16` flag. After
 
 #### JSON-based configuration file
 
-Instead of passing all parameters via commandline arguments, the `run_ner.py` script also supports reading parameters from a json-based configuration file:
+Instead of passing all parameters via commandline arguments, the `run_ner_old.py` script also supports reading parameters from a json-based configuration file:
 
 ```json
 {
@@ -106,7 +140,7 @@ Instead of passing all parameters via commandline arguments, the `run_ner.py` sc
 }
 ```
 
-It must be saved with a `.json` extension and can be used by running `python3 run_ner.py config.json`.
+It must be saved with a `.json` extension and can be used by running `python3 run_ner_old.py config.json`.
 
 #### Evaluation
 
@@ -250,7 +284,7 @@ cat data_wnut_17/train.txt data_wnut_17/dev.txt data_wnut_17/test.txt | cut -d "
 
 #### Run the Pytorch version
 
-Fine-tuning with the PyTorch version can be started using the `run_ner.py` script. In this example we use a JSON-based configuration file.
+Fine-tuning with the PyTorch version can be started using the `run_ner_old.py` script. In this example we use a JSON-based configuration file.
 
 This configuration file looks like:
 
@@ -274,7 +308,7 @@ This configuration file looks like:
 
 If your GPU supports half-precision training, please set `fp16` to `true`.
 
-Save this JSON-based configuration under `wnut_17.json`. The fine-tuning can be started with `python3 run_ner.py wnut_17.json`.
+Save this JSON-based configuration under `wnut_17.json`. The fine-tuning can be started with `python3 run_ner_old.py wnut_17.json`.
 
 #### Evaluation
 

diff --git a/examples/token-classification/run.sh b/examples/token-classification/run.sh
@@ -1,36 +1,6 @@
-## The relevant files are currently on a shared Google
-## drive at https://drive.google.com/drive/folders/1kC0I2UGl2ltrluI9NqDjaQJGw5iliw_J
-## Monitor for changes and eventually migrate to nlp dataset
-curl -L 'https://drive.google.com/uc?export=download&id=1Jjhbal535VVz2ap4v4r_rN1UEHTdLK5P' \
-| grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > train.txt.tmp
-curl -L 'https://drive.google.com/uc?export=download&id=1ZfRcQThdtAR5PPRjIDtrVP7BtXSCUBbm' \
-| grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > dev.txt.tmp
-curl -L 'https://drive.google.com/uc?export=download&id=1u9mb7kNJHWQCWyweMDRMuTFoOHOfeBTH' \
-| grep -v "^#" | cut -f 2,3 | tr '\t' ' ' > test.txt.tmp
-
-export MAX_LENGTH=128
-export BERT_MODEL=bert-base-multilingual-cased
-python3 scripts/preprocess.py train.txt.tmp $BERT_MODEL $MAX_LENGTH > train.txt
-python3 scripts/preprocess.py dev.txt.tmp $BERT_MODEL $MAX_LENGTH > dev.txt
-python3 scripts/preprocess.py test.txt.tmp $BERT_MODEL $MAX_LENGTH > test.txt
-cat train.txt dev.txt test.txt | cut -d " " -f 2 | grep -v "^$"| sort | uniq > labels.txt
-export OUTPUT_DIR=germeval-model
-export BATCH_SIZE=32
-export NUM_EPOCHS=3
-export SAVE_STEPS=750
-export SEED=1
-
 python3 run_ner.py \
---task_type NER \
---data_dir . \
---labels ./labels.txt \
---model_name_or_path $BERT_MODEL \
---output_dir $OUTPUT_DIR \
---max_seq_length  $MAX_LENGTH \
---num_train_epochs $NUM_EPOCHS \
---per_gpu_train_batch_size $BATCH_SIZE \
---save_steps $SAVE_STEPS \
---seed $SEED \
---do_train \
---do_eval \
---do_predict
+  --model_name_or_path bert-base-uncased \
+  --dataset_name conll2003 \
+  --output_dir /tmp/test-ner \
+  --do_train \
+  --do_eval
diff --git a/examples/token-classification/run_chunk.sh b/examples/token-classification/run_chunk.sh
@@ -21,7 +21,7 @@ export NUM_EPOCHS=3
 export SAVE_STEPS=750
 export SEED=1
 
-python3 run_ner.py \
+python3 run_ner_old.py \
 --task_type Chunk \
 --data_dir . \
 --model_name_or_path $BERT_MODEL \