From f631e01d2c78614416655a85955f326636f69825 Mon Sep 17 00:00:00 2001
From: Julien Chaumond <chaumond@gmail.com>
Date: Tue, 3 Mar 2020 15:31:31 -0500
Subject: [PATCH] [ci] Re-run integration ground truth from fairseq

Adopted best practice set by @patrickvonplaten of commenting lines run on fairseq, for easy comparison

also see #3020
---
 ..._original_pytorch_checkpoint_to_pytorch.py |  4 ++-
 tests/test_modeling_roberta.py                | 31 ++++++++++++++-----
 tests/test_modeling_tf_roberta.py             | 10 +++---
 tests/test_modeling_xlm_roberta.py            | 18 +++++------
 4 files changed, 39 insertions(+), 24 deletions(-)

diff --git a/src/transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py b/src/transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
index df4c34143608..39e4b82019ef 100644
--- a/src/transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
+++ b/src/transformers/convert_roberta_original_pytorch_checkpoint_to_pytorch.py
@@ -46,7 +46,9 @@
 SAMPLE_TEXT = "Hello world! cécé herlolip"
 
 
-def convert_roberta_checkpoint_to_pytorch(roberta_checkpoint_path, pytorch_dump_folder_path, classification_head):
+def convert_roberta_checkpoint_to_pytorch(
+    roberta_checkpoint_path: str, pytorch_dump_folder_path: str, classification_head: bool
+):
     """
     Copy/paste/tweak roberta's weights to our BERT structure.
     """
diff --git a/tests/test_modeling_roberta.py b/tests/test_modeling_roberta.py
index 9ea25a186b1d..3e58a0b59766 100644
--- a/tests/test_modeling_roberta.py
+++ b/tests/test_modeling_roberta.py
@@ -329,10 +329,15 @@ def test_inference_masked_lm(self):
         expected_shape = torch.Size((1, 11, 50265))
         self.assertEqual(output.shape, expected_shape)
         # compare the actual values for a slice.
-        expected_slice = torch.Tensor(
-            [[[33.8843, -4.3107, 22.7779], [4.6533, -2.8099, 13.6252], [1.8222, -3.6898, 8.8600]]]
+        expected_slice = torch.tensor(
+            [[[33.8802, -4.3103, 22.7761], [4.6539, -2.8098, 13.6253], [1.8228, -3.6898, 8.8600]]]
         )
-        self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))
+
+        # roberta = torch.hub.load('pytorch/fairseq', 'roberta.base')
+        # roberta.eval()
+        # expected_slice = roberta.model.forward(input_ids)[0][:, :3, :3].detach()
+
+        self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4))
 
     @slow
     def test_inference_no_head(self):
@@ -341,10 +346,15 @@ def test_inference_no_head(self):
         input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
         output = model(input_ids)[0]
         # compare the actual values for a slice.
-        expected_slice = torch.Tensor(
-            [[[-0.0231, 0.0782, 0.0074], [-0.1854, 0.0539, -0.0174], [0.0548, 0.0799, 0.1687]]]
+        expected_slice = torch.tensor(
+            [[[-0.0231, 0.0782, 0.0074], [-0.1854, 0.0540, -0.0175], [0.0548, 0.0799, 0.1687]]]
         )
-        self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-3))
+
+        # roberta = torch.hub.load('pytorch/fairseq', 'roberta.base')
+        # roberta.eval()
+        # expected_slice = roberta.extract_features(input_ids)[:, :3, :3].detach()
+
+        self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=1e-4))
 
     @slow
     def test_inference_classification_head(self):
@@ -354,5 +364,10 @@ def test_inference_classification_head(self):
         output = model(input_ids)[0]
         expected_shape = torch.Size((1, 3))
         self.assertEqual(output.shape, expected_shape)
-        expected_tensor = torch.Tensor([[-0.9469, 0.3913, 0.5118]])
-        self.assertTrue(torch.allclose(output, expected_tensor, atol=1e-3))
+        expected_tensor = torch.tensor([[-0.9469, 0.3913, 0.5118]])
+
+        # roberta = torch.hub.load('pytorch/fairseq', 'roberta.large.mnli')
+        # roberta.eval()
+        # expected_tensor = roberta.predict("mnli", input_ids, return_logits=True).detach()
+
+        self.assertTrue(torch.allclose(output, expected_tensor, atol=1e-4))
diff --git a/tests/test_modeling_tf_roberta.py b/tests/test_modeling_tf_roberta.py
index 5ee598b2dc20..9bc837c4e3ce 100644
--- a/tests/test_modeling_tf_roberta.py
+++ b/tests/test_modeling_tf_roberta.py
@@ -222,9 +222,9 @@ def test_inference_masked_lm(self):
         self.assertEqual(list(output.numpy().shape), expected_shape)
         # compare the actual values for a slice.
         expected_slice = tf.constant(
-            [[[33.8843, -4.3107, 22.7779], [4.6533, -2.8099, 13.6252], [1.8222, -3.6898, 8.8600]]]
+            [[[33.8802, -4.3103, 22.7761], [4.6539, -2.8098, 13.6253], [1.8228, -3.6898, 8.8600]]]
         )
-        self.assertTrue(numpy.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-3))
+        self.assertTrue(numpy.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-4))
 
     @slow
     def test_inference_no_head(self):
@@ -234,9 +234,9 @@ def test_inference_no_head(self):
         output = model(input_ids)[0]
         # compare the actual values for a slice.
         expected_slice = tf.constant(
-            [[[-0.0231, 0.0782, 0.0074], [-0.1854, 0.0539, -0.0174], [0.0548, 0.0799, 0.1687]]]
+            [[[-0.0231, 0.0782, 0.0074], [-0.1854, 0.0540, -0.0175], [0.0548, 0.0799, 0.1687]]]
         )
-        self.assertTrue(numpy.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-3))
+        self.assertTrue(numpy.allclose(output[:, :3, :3].numpy(), expected_slice.numpy(), atol=1e-4))
 
     @slow
     def test_inference_classification_head(self):
@@ -247,4 +247,4 @@ def test_inference_classification_head(self):
         expected_shape = [1, 3]
         self.assertEqual(list(output.numpy().shape), expected_shape)
         expected_tensor = tf.constant([[-0.9469, 0.3913, 0.5118]])
-        self.assertTrue(numpy.allclose(output.numpy(), expected_tensor.numpy(), atol=1e-3))
+        self.assertTrue(numpy.allclose(output.numpy(), expected_tensor.numpy(), atol=1e-4))
diff --git a/tests/test_modeling_xlm_roberta.py b/tests/test_modeling_xlm_roberta.py
index 8c6bd0069b2a..3d035f48fc85 100644
--- a/tests/test_modeling_xlm_roberta.py
+++ b/tests/test_modeling_xlm_roberta.py
@@ -30,14 +30,13 @@ class XLMRobertaModelIntegrationTest(unittest.TestCase):
     @slow
     def test_xlm_roberta_base(self):
         model = XLMRobertaModel.from_pretrained("xlm-roberta-base")
-        input_ids = torch.tensor([0, 581, 10269, 83, 99942, 136, 60742, 23, 70, 80583, 18276, 2]).unsqueeze(
-            0
-        )  # The dog is cute and lives in the garden house
+        input_ids = torch.tensor([[0, 581, 10269, 83, 99942, 136, 60742, 23, 70, 80583, 18276, 2]])
+        # The dog is cute and lives in the garden house
 
         expected_output_shape = torch.Size((1, 12, 768))  # batch_size, sequence_length, embedding_vector_dim
         expected_output_values_last_dim = torch.tensor(
-            [-0.0101, 0.1218, -0.0803, 0.0801, 0.1327, 0.0776, -0.1215, 0.2383, 0.3338, 0.3106, 0.0300, 0.0252]
-        ).unsqueeze(0)
+            [[-0.0101, 0.1218, -0.0803, 0.0801, 0.1327, 0.0776, -0.1215, 0.2383, 0.3338, 0.3106, 0.0300, 0.0252]]
+        )
         #  xlmr = torch.hub.load('pytorch/fairseq', 'xlmr.base')
         #  xlmr.eval()
         #  expected_output_values_last_dim = xlmr.extract_features(input_ids[0])[:, :, -1]
@@ -50,14 +49,13 @@ def test_xlm_roberta_base(self):
     @slow
     def test_xlm_roberta_large(self):
         model = XLMRobertaModel.from_pretrained("xlm-roberta-large")
-        input_ids = torch.tensor([0, 581, 10269, 83, 99942, 136, 60742, 23, 70, 80583, 18276, 2]).unsqueeze(
-            0
-        )  # The dog is cute and lives in the garden house
+        input_ids = torch.tensor([[0, 581, 10269, 83, 99942, 136, 60742, 23, 70, 80583, 18276, 2]])
+        # The dog is cute and lives in the garden house
 
         expected_output_shape = torch.Size((1, 12, 1024))  # batch_size, sequence_length, embedding_vector_dim
         expected_output_values_last_dim = torch.tensor(
-            [-0.0699, -0.0318, 0.0705, -0.1241, 0.0999, -0.0520, 0.1004, -0.1838, -0.4704, 0.1437, 0.0821, 0.0126]
-        ).unsqueeze(0)
+            [[-0.0699, -0.0318, 0.0705, -0.1241, 0.0999, -0.0520, 0.1004, -0.1838, -0.4704, 0.1437, 0.0821, 0.0126]]
+        )
         #  xlmr = torch.hub.load('pytorch/fairseq', 'xlmr.large')
         #  xlmr.eval()
         #  expected_output_values_last_dim = xlmr.extract_features(input_ids[0])[:, :, -1]