From 147c8166852db64de12b851b8307f44c9e8fe0dd Mon Sep 17 00:00:00 2001 From: Aditya Kane <64411306+AdityaKane2001@users.noreply.github.com> Date: Wed, 30 Mar 2022 19:36:32 +0530 Subject: [PATCH] Nit: MCSCOCO -> MS COCO (#16481) --- src/transformers/models/lxmert/modeling_lxmert.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/models/lxmert/modeling_lxmert.py b/src/transformers/models/lxmert/modeling_lxmert.py index 54db04c51d5fdb..c9b2541251e855 100644 --- a/src/transformers/models/lxmert/modeling_lxmert.py +++ b/src/transformers/models/lxmert/modeling_lxmert.py @@ -803,7 +803,7 @@ def _init_weights(self, module): The LXMERT model was proposed in [LXMERT: Learning Cross-Modality Encoder Representations from Transformers](https://arxiv.org/abs/1908.07490) by Hao Tan and Mohit Bansal. It's a vision and language transformer - model, pretrained on a variety of multi-modal datasets comprising of GQA, VQAv2.0, MCSCOCO captions, and Visual + model, pretrained on a variety of multi-modal datasets comprising of GQA, VQAv2.0, MSCOCO captions, and Visual genome, using a combination of masked language modeling, region of interest feature regression, cross entropy loss for question answering attribute prediction, and object tag prediction.