Skip to content

Commit

Permalink
comment out integration test
Browse files Browse the repository at this point in the history
  • Loading branch information
lvdongyi committed Nov 10, 2024
1 parent 32a1764 commit 35ea106
Showing 1 changed file with 90 additions and 92 deletions.
182 changes: 90 additions & 92 deletions tests/test_cache_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,12 @@
import numpy as np
import paddle

from paddlenlp.transformers import (
AutoModelForCausalLM,
AutoTokenizer,
from paddlenlp.transformers import ( # AutoModelForCausalLM,; AutoTokenizer,
DynamicCache,
LlamaForCausalLM,
)

from .testing_utils import slow
# from .testing_utils import slow


def set_seed(seed):
Expand Down Expand Up @@ -117,91 +115,91 @@ def test_reorder_cache_retrocompatibility(self):
)


@slow
class CacheIntegrationTest(unittest.TestCase):
def test_dynamic_cache_hard(self):
tokenizer = AutoTokenizer.from_pretrained(
"meta-llama/Llama-2-7b-hf", padding_side="left", from_hf_hub=True, use_fast=True
)
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf",
dtype=paddle.float16,
from_hf_hub=True,
)
inputs = tokenizer(["Here's everything I know about cats. Cats"], return_tensors="np")
for key in inputs:
inputs[key] = paddle.to_tensor(inputs[key])

# DynamicCache and the legacy cache format should be equivalent
set_seed(0)
gen_out_legacy = model.generate(**inputs, do_sample=True, max_new_tokens=256)
set_seed(0)
gen_out = model.generate(**inputs, do_sample=True, max_new_tokens=256, past_key_values=DynamicCache())
self.assertListEqual(gen_out_legacy[0].tolist(), gen_out[0].tolist())
self.assertListEqual(gen_out_legacy[1].tolist(), gen_out[1].tolist())

decoded = tokenizer.batch_decode(gen_out[0], skip_special_tokens=True)

expected_text = (
"Here's everything I know about cats. Cats are mysterious creatures. They can't talk, and they don't like "
"to be held. They don't play fetch, and they don't like to be hugged. But they do like to be petted.\n"
"Cats are also very independent. They don't like to be told what to do, and they don't like to be told "
"what to eat. They are also very territorial. They don't like to share their food or their toys.\nCats "
"are also very curious. They like to explore, and they like to play. They are also very fast. They can "
"run very fast, and they can jump very high.\nCats are also very smart. They can learn tricks, and they "
"can solve problems. They are also very playful. They like to play with toys, and they like to play with "
"other cats.\nCats are also very affectionate. They like to be petted, and they like to be held. They "
"also like to be scratched.\nCats are also very clean. They like to groom themselves, and they like to "
"clean their litter box.\nCats are also very independent. They don't"
)
self.assertEqual(decoded[0], expected_text)

def test_dynamic_cache_batched(self):
tokenizer = AutoTokenizer.from_pretrained(
"meta-llama/Llama-2-7b-hf", padding_side="left", from_hf_hub=True, use_fast=True
)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf",
device_map="auto",
dtype=paddle.float16,
from_hf_hub=True,
)
inputs = tokenizer(["A sequence: 1, 2, 3, 4, 5", "A sequence: A, B, C"], padding=True, return_tensors="np").to(
model.device
)
for key in inputs:
inputs[key] = paddle.to_tensor(inputs[key])

gen_out = model.generate(**inputs, do_sample=False, max_new_tokens=10, past_key_values=DynamicCache())
decoded = tokenizer.batch_decode(gen_out[0], skip_special_tokens=True)
expected_text = ["A sequence: 1, 2, 3, 4, 5, 6, 7, 8,", "A sequence: A, B, C, D, E, F, G, H"]
self.assertListEqual(decoded, expected_text)

def test_dynamic_cache_beam_search(self):
tokenizer = AutoTokenizer.from_pretrained(
"meta-llama/Llama-2-7b-hf", padding_side="left", from_hf_hub=True, use_fast=True
)
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf",
device_map="auto",
dtype=paddle.float16,
from_hf_hub=True,
)

inputs = tokenizer(["The best color is"], return_tensors="np")
for key in inputs:
inputs[key] = paddle.to_tensor(inputs[key])
gen_out = model.generate(
**inputs,
do_sample=False,
max_new_tokens=20,
num_beams=2,
num_return_sequences=2,
)
decoded = tokenizer.batch_decode(gen_out[0], skip_special_tokens=True)
expected_text = [
"The best color is the one that makes you feel good.\nThe best color is the one that makes you feel good",
"The best color is the one that suits you.\nThe best color is the one that suits you. The",
]
self.assertListEqual(decoded, expected_text)
# @slow
# class CacheIntegrationTest(unittest.TestCase):
# def test_dynamic_cache_hard(self):
# tokenizer = AutoTokenizer.from_pretrained(
# "meta-llama/Llama-2-7b-hf", padding_side="left", from_hf_hub=True, use_fast=True
# )
# model = AutoModelForCausalLM.from_pretrained(
# "meta-llama/Llama-2-7b-hf",
# dtype=paddle.float16,
# from_hf_hub=True,
# )
# inputs = tokenizer(["Here's everything I know about cats. Cats"], return_tensors="np")
# for key in inputs:
# inputs[key] = paddle.to_tensor(inputs[key])

# # DynamicCache and the legacy cache format should be equivalent
# set_seed(0)
# gen_out_legacy = model.generate(**inputs, do_sample=True, max_new_tokens=256)
# set_seed(0)
# gen_out = model.generate(**inputs, do_sample=True, max_new_tokens=256, past_key_values=DynamicCache())
# self.assertListEqual(gen_out_legacy[0].tolist(), gen_out[0].tolist())
# self.assertListEqual(gen_out_legacy[1].tolist(), gen_out[1].tolist())

# decoded = tokenizer.batch_decode(gen_out[0], skip_special_tokens=True)

# expected_text = (
# "Here's everything I know about cats. Cats are mysterious creatures. They can't talk, and they don't like "
# "to be held. They don't play fetch, and they don't like to be hugged. But they do like to be petted.\n"
# "Cats are also very independent. They don't like to be told what to do, and they don't like to be told "
# "what to eat. They are also very territorial. They don't like to share their food or their toys.\nCats "
# "are also very curious. They like to explore, and they like to play. They are also very fast. They can "
# "run very fast, and they can jump very high.\nCats are also very smart. They can learn tricks, and they "
# "can solve problems. They are also very playful. They like to play with toys, and they like to play with "
# "other cats.\nCats are also very affectionate. They like to be petted, and they like to be held. They "
# "also like to be scratched.\nCats are also very clean. They like to groom themselves, and they like to "
# "clean their litter box.\nCats are also very independent. They don't"
# )
# self.assertEqual(decoded[0], expected_text)

# def test_dynamic_cache_batched(self):
# tokenizer = AutoTokenizer.from_pretrained(
# "meta-llama/Llama-2-7b-hf", padding_side="left", from_hf_hub=True, use_fast=True
# )
# tokenizer.pad_token = tokenizer.eos_token
# model = AutoModelForCausalLM.from_pretrained(
# "meta-llama/Llama-2-7b-hf",
# device_map="auto",
# dtype=paddle.float16,
# from_hf_hub=True,
# )
# inputs = tokenizer(["A sequence: 1, 2, 3, 4, 5", "A sequence: A, B, C"], padding=True, return_tensors="np").to(
# model.device
# )
# for key in inputs:
# inputs[key] = paddle.to_tensor(inputs[key])

# gen_out = model.generate(**inputs, do_sample=False, max_new_tokens=10, past_key_values=DynamicCache())
# decoded = tokenizer.batch_decode(gen_out[0], skip_special_tokens=True)
# expected_text = ["A sequence: 1, 2, 3, 4, 5, 6, 7, 8,", "A sequence: A, B, C, D, E, F, G, H"]
# self.assertListEqual(decoded, expected_text)

# def test_dynamic_cache_beam_search(self):
# tokenizer = AutoTokenizer.from_pretrained(
# "meta-llama/Llama-2-7b-hf", padding_side="left", from_hf_hub=True, use_fast=True
# )
# model = AutoModelForCausalLM.from_pretrained(
# "meta-llama/Llama-2-7b-hf",
# device_map="auto",
# dtype=paddle.float16,
# from_hf_hub=True,
# )

# inputs = tokenizer(["The best color is"], return_tensors="np")
# for key in inputs:
# inputs[key] = paddle.to_tensor(inputs[key])
# gen_out = model.generate(
# **inputs,
# do_sample=False,
# max_new_tokens=20,
# num_beams=2,
# num_return_sequences=2,
# )
# decoded = tokenizer.batch_decode(gen_out[0], skip_special_tokens=True)
# expected_text = [
# "The best color is the one that makes you feel good.\nThe best color is the one that makes you feel good",
# "The best color is the one that suits you.\nThe best color is the one that suits you. The",
# ]
# self.assertListEqual(decoded, expected_text)

0 comments on commit 35ea106

Please sign in to comment.