diff --git a/src/transformers/modeling_tf_t5.py b/src/transformers/modeling_tf_t5.py
index 7bca9e84665ce8..584f6a274c3550 100644
--- a/src/transformers/modeling_tf_t5.py
+++ b/src/transformers/modeling_tf_t5.py
@@ -1136,9 +1136,7 @@ def call(
             training=training,
         )
 
-        past = (
-            (encoder_outputs, decoder_outputs[1]) if cast_bool_to_primitive(use_cache, self.config.use_cache) else None
-        )
+        past = (encoder_outputs, decoder_outputs[1]) if (use_cache) else None
         if not return_dict:
             if past is not None:
                 decoder_outputs = decoder_outputs[:1] + (past,) + decoder_outputs[2:]
diff --git a/tests/test_modeling_tf_common.py b/tests/test_modeling_tf_common.py
index a5be6ad2027b0a..8984ce9f2a57da 100644
--- a/tests/test_modeling_tf_common.py
+++ b/tests/test_modeling_tf_common.py
@@ -76,7 +76,7 @@ class TFModelTesterMixin:
     test_resize_embeddings = True
     is_encoder_decoder = False
 
-    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False):
+    def _prepare_for_class(self, inputs_dict, model_class, return_labels=False) -> dict:
         inputs_dict = copy.deepcopy(inputs_dict)
 
         if model_class in TF_MODEL_FOR_MULTIPLE_CHOICE_MAPPING.values():
diff --git a/tests/test_modeling_tf_lxmert.py b/tests/test_modeling_tf_lxmert.py
index a9072e3dd1769f..0fee989109937a 100644
--- a/tests/test_modeling_tf_lxmert.py
+++ b/tests/test_modeling_tf_lxmert.py
@@ -733,7 +733,11 @@ def test_saved_model_with_attentions_output(self):
                 model = tf.keras.models.load_model(tmpdirname)
                 outputs = model(class_inputs_dict)
 
-                language_attentions, vision_attentions, cross_encoder_attentions = (outputs[-3], outputs[-2], outputs[-1])
+                language_attentions, vision_attentions, cross_encoder_attentions = (
+                    outputs[-3],
+                    outputs[-2],
+                    outputs[-1],
+                )
 
                 self.assertEqual(len(language_attentions), self.model_tester.num_hidden_layers["language"])
                 self.assertEqual(len(vision_attentions), self.model_tester.num_hidden_layers["vision"])
diff --git a/tests/test_modeling_tf_t5.py b/tests/test_modeling_tf_t5.py
index 5a1e7768f283a8..60e6a8218e52b7 100644
--- a/tests/test_modeling_tf_t5.py
+++ b/tests/test_modeling_tf_t5.py
@@ -12,8 +12,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-
+import tempfile
 import unittest
 
 from transformers import T5Config, is_tf_available
@@ -282,6 +281,14 @@ def test_model_from_pretrained(self):
         model = TFT5Model.from_pretrained("t5-small")
         self.assertIsNotNone(model)
 
+    @slow
+    def test_saved_model_with_attentions_output(self):
+        pass
+
+    @slow
+    def test_saved_model_with_hidden_states_output(self):
+        pass
+
 
 @require_tf
 @require_sentencepiece
diff --git a/tests/test_modeling_tf_xlm_roberta.py b/tests/test_modeling_tf_xlm_roberta.py
index b67d42db4e5f70..10485abfe5cd34 100644
--- a/tests/test_modeling_tf_xlm_roberta.py
+++ b/tests/test_modeling_tf_xlm_roberta.py
@@ -39,7 +39,7 @@ def test_output_embeds_base_model(self):
             "attention_mask": tf.convert_to_tensor([[1, 1, 1, 1, 1, 1]], dtype=tf.int32),
         }
 
-        output = model(features)["last_hidden_state"]
+        output = model(features, return_dict=True)["last_hidden_state"]
         expected_shape = tf.TensorShape((1, 6, 768))
         self.assertEqual(output.shape, expected_shape)
         # compare the actual values for a slice.