Fixed datasets error in v2.3.x (intel#119)

sywangyi · Jun 24, 2022 · 45e6a1a · 45e6a1a
1 parent eaf9cfe
commit 45e6a1a
Show file tree

Hide file tree

Showing 4 changed files with 12 additions and 8 deletions.
diff --git a/examples/optimization/tensorflow/huggingface/text-classification/pruning/run_glue.py b/examples/optimization/tensorflow/huggingface/text-classification/pruning/run_glue.py
@@ -467,7 +467,8 @@ def compute_metrics(preds, label_ids):
                 collate_fn=data_collator,
                 drop_remainder=drop_remainder,
                 # `label_cols` is needed for user-defined losses, such as in this example
-                label_cols="label" if "label" in dataset.column_names else None,
+                # datasets v2.3.x need "labels", not "label"
+                label_cols=["labels", "label"] if "label" in dataset.column_names else None,
             )
             tf_data[key] = data
         # endregion
@@ -593,4 +594,4 @@ def compute_metrics(preds, label_ids):
 
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/...ples/optimization/tensorflow/huggingface/text-classification/quantization/inc/run_glue.py b/...ples/optimization/tensorflow/huggingface/text-classification/quantization/inc/run_glue.py
@@ -468,7 +468,8 @@ def compute_metrics(preds, label_ids):
                 collate_fn=data_collator,
                 drop_remainder=drop_remainder,
                 # `label_cols` is needed for user-defined losses, such as in this example
-                label_cols="label" if "label" in dataset.column_names else None,
+                # datasets v2.3.x need "labels", not "label"
+                label_cols=["labels", "label"] if "label" in dataset.column_names else None,
             )
             tf_data[key] = data
         # endregion
@@ -619,4 +620,4 @@ def compute_metrics(preds, label_ids):
 
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/tests/test_tf_pruning.py b/tests/test_tf_pruning.py
@@ -49,7 +49,8 @@ def preprocess_function(examples):
             collate_fn=data_collator,
             drop_remainder=False,
             # `label_cols` is needed for user-defined losses, such as in this example
-            label_cols="label" if "label" in dataset.column_names else None,
+            # datasets v2.3.x need "labels", not "label"
+            label_cols=["label", "labels"] if "label" in dataset.column_names else None,
         )
         parser = HfArgumentParser(TFTrainingArguments)
         self.args = parser.parse_args_into_dataclasses(args=["--output_dir", "./quantized_model",

diff --git a/tests/test_tf_quantization.py b/tests/test_tf_quantization.py
@@ -1,7 +1,7 @@
 import numpy as np
 import os
 import shutil
-import tensorflow as tf 
+import tensorflow as tf
 import unittest
 from datasets import load_dataset, load_metric
 from nlp_toolkit import (
@@ -43,14 +43,15 @@ def preprocess_function(examples):
         data_collator = DefaultDataCollator(return_tensors="tf")
         dataset = raw_datasets.select(range(10))
         self.dummy_dataset = dataset.to_tf_dataset(
-            columns=[col for col in dataset.column_names if col not in 
+            columns=[col for col in dataset.column_names if col not in
                      set(non_label_column_names + ["label"])],
             shuffle=False,
             batch_size=2,
             collate_fn=data_collator,
             drop_remainder=False,
             # `label_cols` is needed for user-defined losses, such as in this example
-            label_cols="label" if "label" in dataset.column_names else None,
+            # datasets v2.3.x need "labels", not "label"
+            label_cols=["label", "labels"] if "label" in dataset.column_names else None,
         )