comment out integration test

PaddlePaddle · Nov 10, 2024 · 35ea106 · 35ea106
1 parent 32a1764
commit 35ea106
Showing 1 changed file with 90 additions and 92 deletions.
diff --git a/tests/test_cache_utils.py b/tests/test_cache_utils.py
@@ -18,14 +18,12 @@
 import numpy as np
 import paddle
 
-from paddlenlp.transformers import (
-    AutoModelForCausalLM,
-    AutoTokenizer,
+from paddlenlp.transformers import (  # AutoModelForCausalLM,; AutoTokenizer,
     DynamicCache,
     LlamaForCausalLM,
 )
 
-from .testing_utils import slow
+# from .testing_utils import slow
 
 
 def set_seed(seed):
@@ -117,91 +115,91 @@ def test_reorder_cache_retrocompatibility(self):
                 )
 
 
-@slow
-class CacheIntegrationTest(unittest.TestCase):
-    def test_dynamic_cache_hard(self):
-        tokenizer = AutoTokenizer.from_pretrained(
-            "meta-llama/Llama-2-7b-hf", padding_side="left", from_hf_hub=True, use_fast=True
-        )
-        model = AutoModelForCausalLM.from_pretrained(
-            "meta-llama/Llama-2-7b-hf",
-            dtype=paddle.float16,
-            from_hf_hub=True,
-        )
-        inputs = tokenizer(["Here's everything I know about cats. Cats"], return_tensors="np")
-        for key in inputs:
-            inputs[key] = paddle.to_tensor(inputs[key])
-
-        # DynamicCache and the legacy cache format should be equivalent
-        set_seed(0)
-        gen_out_legacy = model.generate(**inputs, do_sample=True, max_new_tokens=256)
-        set_seed(0)
-        gen_out = model.generate(**inputs, do_sample=True, max_new_tokens=256, past_key_values=DynamicCache())
-        self.assertListEqual(gen_out_legacy[0].tolist(), gen_out[0].tolist())
-        self.assertListEqual(gen_out_legacy[1].tolist(), gen_out[1].tolist())
-
-        decoded = tokenizer.batch_decode(gen_out[0], skip_special_tokens=True)
-
-        expected_text = (
-            "Here's everything I know about cats. Cats are mysterious creatures. They can't talk, and they don't like "
-            "to be held. They don't play fetch, and they don't like to be hugged. But they do like to be petted.\n"
-            "Cats are also very independent. They don't like to be told what to do, and they don't like to be told "
-            "what to eat. They are also very territorial. They don't like to share their food or their toys.\nCats "
-            "are also very curious. They like to explore, and they like to play. They are also very fast. They can "
-            "run very fast, and they can jump very high.\nCats are also very smart. They can learn tricks, and they "
-            "can solve problems. They are also very playful. They like to play with toys, and they like to play with "
-            "other cats.\nCats are also very affectionate. They like to be petted, and they like to be held. They "
-            "also like to be scratched.\nCats are also very clean. They like to groom themselves, and they like to "
-            "clean their litter box.\nCats are also very independent. They don't"
-        )
-        self.assertEqual(decoded[0], expected_text)
-
-    def test_dynamic_cache_batched(self):
-        tokenizer = AutoTokenizer.from_pretrained(
-            "meta-llama/Llama-2-7b-hf", padding_side="left", from_hf_hub=True, use_fast=True
-        )
-        tokenizer.pad_token = tokenizer.eos_token
-        model = AutoModelForCausalLM.from_pretrained(
-            "meta-llama/Llama-2-7b-hf",
-            device_map="auto",
-            dtype=paddle.float16,
-            from_hf_hub=True,
-        )
-        inputs = tokenizer(["A sequence: 1, 2, 3, 4, 5", "A sequence: A, B, C"], padding=True, return_tensors="np").to(
-            model.device
-        )
-        for key in inputs:
-            inputs[key] = paddle.to_tensor(inputs[key])
-
-        gen_out = model.generate(**inputs, do_sample=False, max_new_tokens=10, past_key_values=DynamicCache())
-        decoded = tokenizer.batch_decode(gen_out[0], skip_special_tokens=True)
-        expected_text = ["A sequence: 1, 2, 3, 4, 5, 6, 7, 8,", "A sequence: A, B, C, D, E, F, G, H"]
-        self.assertListEqual(decoded, expected_text)
-
-    def test_dynamic_cache_beam_search(self):
-        tokenizer = AutoTokenizer.from_pretrained(
-            "meta-llama/Llama-2-7b-hf", padding_side="left", from_hf_hub=True, use_fast=True
-        )
-        model = AutoModelForCausalLM.from_pretrained(
-            "meta-llama/Llama-2-7b-hf",
-            device_map="auto",
-            dtype=paddle.float16,
-            from_hf_hub=True,
-        )
-
-        inputs = tokenizer(["The best color is"], return_tensors="np")
-        for key in inputs:
-            inputs[key] = paddle.to_tensor(inputs[key])
-        gen_out = model.generate(
-            **inputs,
-            do_sample=False,
-            max_new_tokens=20,
-            num_beams=2,
-            num_return_sequences=2,
-        )
-        decoded = tokenizer.batch_decode(gen_out[0], skip_special_tokens=True)
-        expected_text = [
-            "The best color is the one that makes you feel good.\nThe best color is the one that makes you feel good",
-            "The best color is the one that suits you.\nThe best color is the one that suits you. The",
-        ]
-        self.assertListEqual(decoded, expected_text)
+# @slow
+# class CacheIntegrationTest(unittest.TestCase):
+#     def test_dynamic_cache_hard(self):
+#         tokenizer = AutoTokenizer.from_pretrained(
+#             "meta-llama/Llama-2-7b-hf", padding_side="left", from_hf_hub=True, use_fast=True
+#         )
+#         model = AutoModelForCausalLM.from_pretrained(
+#             "meta-llama/Llama-2-7b-hf",
+#             dtype=paddle.float16,
+#             from_hf_hub=True,
+#         )
+#         inputs = tokenizer(["Here's everything I know about cats. Cats"], return_tensors="np")
+#         for key in inputs:
+#             inputs[key] = paddle.to_tensor(inputs[key])
+
+#         # DynamicCache and the legacy cache format should be equivalent
+#         set_seed(0)
+#         gen_out_legacy = model.generate(**inputs, do_sample=True, max_new_tokens=256)
+#         set_seed(0)
+#         gen_out = model.generate(**inputs, do_sample=True, max_new_tokens=256, past_key_values=DynamicCache())
+#         self.assertListEqual(gen_out_legacy[0].tolist(), gen_out[0].tolist())
+#         self.assertListEqual(gen_out_legacy[1].tolist(), gen_out[1].tolist())
+
+#         decoded = tokenizer.batch_decode(gen_out[0], skip_special_tokens=True)
+
+#         expected_text = (
+#             "Here's everything I know about cats. Cats are mysterious creatures. They can't talk, and they don't like "
+#             "to be held. They don't play fetch, and they don't like to be hugged. But they do like to be petted.\n"
+#             "Cats are also very independent. They don't like to be told what to do, and they don't like to be told "
+#             "what to eat. They are also very territorial. They don't like to share their food or their toys.\nCats "
+#             "are also very curious. They like to explore, and they like to play. They are also very fast. They can "
+#             "run very fast, and they can jump very high.\nCats are also very smart. They can learn tricks, and they "
+#             "can solve problems. They are also very playful. They like to play with toys, and they like to play with "
+#             "other cats.\nCats are also very affectionate. They like to be petted, and they like to be held. They "
+#             "also like to be scratched.\nCats are also very clean. They like to groom themselves, and they like to "
+#             "clean their litter box.\nCats are also very independent. They don't"
+#         )
+#         self.assertEqual(decoded[0], expected_text)
+
+#     def test_dynamic_cache_batched(self):
+#         tokenizer = AutoTokenizer.from_pretrained(
+#             "meta-llama/Llama-2-7b-hf", padding_side="left", from_hf_hub=True, use_fast=True
+#         )
+#         tokenizer.pad_token = tokenizer.eos_token
+#         model = AutoModelForCausalLM.from_pretrained(
+#             "meta-llama/Llama-2-7b-hf",
+#             device_map="auto",
+#             dtype=paddle.float16,
+#             from_hf_hub=True,
+#         )
+#         inputs = tokenizer(["A sequence: 1, 2, 3, 4, 5", "A sequence: A, B, C"], padding=True, return_tensors="np").to(
+#             model.device
+#         )
+#         for key in inputs:
+#             inputs[key] = paddle.to_tensor(inputs[key])
+
+#         gen_out = model.generate(**inputs, do_sample=False, max_new_tokens=10, past_key_values=DynamicCache())
+#         decoded = tokenizer.batch_decode(gen_out[0], skip_special_tokens=True)
+#         expected_text = ["A sequence: 1, 2, 3, 4, 5, 6, 7, 8,", "A sequence: A, B, C, D, E, F, G, H"]
+#         self.assertListEqual(decoded, expected_text)
+
+#     def test_dynamic_cache_beam_search(self):
+#         tokenizer = AutoTokenizer.from_pretrained(
+#             "meta-llama/Llama-2-7b-hf", padding_side="left", from_hf_hub=True, use_fast=True
+#         )
+#         model = AutoModelForCausalLM.from_pretrained(
+#             "meta-llama/Llama-2-7b-hf",
+#             device_map="auto",
+#             dtype=paddle.float16,
+#             from_hf_hub=True,
+#         )
+
+#         inputs = tokenizer(["The best color is"], return_tensors="np")
+#         for key in inputs:
+#             inputs[key] = paddle.to_tensor(inputs[key])
+#         gen_out = model.generate(
+#             **inputs,
+#             do_sample=False,
+#             max_new_tokens=20,
+#             num_beams=2,
+#             num_return_sequences=2,
+#         )
+#         decoded = tokenizer.batch_decode(gen_out[0], skip_special_tokens=True)
+#         expected_text = [
+#             "The best color is the one that makes you feel good.\nThe best color is the one that makes you feel good",
+#             "The best color is the one that suits you.\nThe best color is the one that suits you. The",
+#         ]
+#         self.assertListEqual(decoded, expected_text)