This is the pytorch implementation code of the CoCa model. The warehouse code is still being improved and has not yet been completed....
@inproceedings{Yu2022CoCaCC,
title = {CoCa: Contrastive Captioners are Image-Text Foundation Models},
author = {Jiahui Yu and Zirui Wang and Vijay Vasudevan and Legg Yeung and Mojtaba Seyedhosseini and Yonghui Wu},
year = {2022}
}