diff --git a/tests/generation/test_beam_search.py b/tests/generation/test_beam_search.py
index 6731f8b2a7e03a..47d3b4b38a7b5a 100644
--- a/tests/generation/test_beam_search.py
+++ b/tests/generation/test_beam_search.py
@@ -575,39 +575,3 @@ def test_constrained_beam_scorer_update(self):
     def test_constrained_beam_scorer_finalize(self):
         inputs = self.constrained_beam_search_tester.prepare_inputs()
         self.constrained_beam_search_tester.check_constrained_beam_scorer_finalize(*inputs)
-
-
-@require_torch
-@require_torch_gpu
-# @slow
-class Beams4dTest(unittest.TestCase):
-    def setUp(self):
-        device = 'cuda' if torch.cuda.is_available() else 'cpu'
-        self.model_name = "facebook/opt-125m"
-        model = transformers.AutoModelForCausalLM.from_pretrained(self.model_name, device_map=device)
-
-        prefix_tokens = torch.tensor([11, 22, 33], device=device)
-        new_tokens = torch.tensor([44, 55, 66, 77], device=device)
-
-        model_outputs_0 = model.forward(input_ids=prefix_tokens.unsqueeze(0))  # preparing KV cache value
-
-        # regular beam search iteration
-        kv_cache = 
-
-
-  # Constant values
-    EXPECTED_RELATIVE_DIFFERENCE = (
-        2.109659552692574  # This was obtained on a RTX Titan so the number might slightly change
-    )
-
-    input_text = "Hello my name is"
-    EXPECTED_OUTPUTS = set()
-    EXPECTED_OUTPUTS.add("Hello my name is John and I am a professional photographer. I")
-    EXPECTED_OUTPUTS.add("Hello my name is John.\nI am a friend of your father.\n")
-    EXPECTED_OUTPUTS.add("Hello my name is John Doe, I am a student at the University")
-    MAX_NEW_TOKENS = 10
-
-    def setUp(self):
-        # Models and tokenizer
-        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
-