diff --git a/tests/generation/test_beam_search.py b/tests/generation/test_beam_search.py index 6731f8b2a7e03a..47d3b4b38a7b5a 100644 --- a/tests/generation/test_beam_search.py +++ b/tests/generation/test_beam_search.py @@ -575,39 +575,3 @@ def test_constrained_beam_scorer_update(self): def test_constrained_beam_scorer_finalize(self): inputs = self.constrained_beam_search_tester.prepare_inputs() self.constrained_beam_search_tester.check_constrained_beam_scorer_finalize(*inputs) - - -@require_torch -@require_torch_gpu -# @slow -class Beams4dTest(unittest.TestCase): - def setUp(self): - device = 'cuda' if torch.cuda.is_available() else 'cpu' - self.model_name = "facebook/opt-125m" - model = transformers.AutoModelForCausalLM.from_pretrained(self.model_name, device_map=device) - - prefix_tokens = torch.tensor([11, 22, 33], device=device) - new_tokens = torch.tensor([44, 55, 66, 77], device=device) - - model_outputs_0 = model.forward(input_ids=prefix_tokens.unsqueeze(0)) # preparing KV cache value - - # regular beam search iteration - kv_cache = - - - # Constant values - EXPECTED_RELATIVE_DIFFERENCE = ( - 2.109659552692574 # This was obtained on a RTX Titan so the number might slightly change - ) - - input_text = "Hello my name is" - EXPECTED_OUTPUTS = set() - EXPECTED_OUTPUTS.add("Hello my name is John and I am a professional photographer. I") - EXPECTED_OUTPUTS.add("Hello my name is John.\nI am a friend of your father.\n") - EXPECTED_OUTPUTS.add("Hello my name is John Doe, I am a student at the University") - MAX_NEW_TOKENS = 10 - - def setUp(self): - # Models and tokenizer - self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) -