Merge pull request ChiaXinLiang#15 from weian312/dev

update ch4 ref
tianpu2014 · Sep 28, 2024 · b294e2f · b294e2f
2 parents 066643b + c694acf
commit b294e2f
Show file tree

Hide file tree

Showing 2 changed files with 323 additions and 13 deletions.
diff --git a/MLLM_latex/chapter4/chap4_ref.bib b/MLLM_latex/chapter4/chap4_ref.bib
@@ -0,0 +1,246 @@
+@misc{cite1,
+  title={Hallucination Augmented Contrastive Learning for Multimodal Large Language Models},
+  author={},
+  year={2023},
+  howpublished={\url{https://arxiv.org/abs/2312.06968}}
+}
+
+@misc{cite2,
+  title={mPLUG-HalOwl: Multimodal Large Language Model},
+  author={},
+  year={2023},
+  howpublished={\url{https://github.com/X-PLUG/mPLUG-HalOwl}}
+}
+
+@misc{cite3,
+  title={Img-Diff: Contrastive Data Synthesis for Multimodal Large Language Models},
+  author={},
+  year={2024},
+  howpublished={\url{https://arxiv.org/abs/2408.04594}}
+}
+
+@misc{cite4,
+  title={Multimodality and Large Multimodal Models (LMMs)},
+  author={},
+  year={2023},
+  howpublished={\url{https://huyenchip.com/2023/10/10/multimodal.html}}
+}
+
+@article{ComprehensiveSurvey2024,
+  title={A Comprehensive Survey of Large Language Models and ...},
+  author={Author Name},
+  journal={arXiv},
+  year={2024},
+  url={https://arxiv.org/html/2405.08603v1}
+}
+
+@article{ManipLLM2024,
+  title={ManipLLM: Embodied Multimodal Large Language Model for Object ...},
+  author={Author Name},
+  journal={CVPR},
+  year={2024},
+  url={https://openaccess.thecvf.com/content/CVPR2024/papers/Li_ManipLLM_Embodied_Multimodal_Large_Language_Model_for_Object-Centric_Robotic_Manipulation_CVPR_2024_paper.pdf}
+}
+
+@article{OverviewLMM2024,
+  title={An Overview of Large Multi-modal Models (LMMs): Part 1},
+  author={Author Name},
+  journal={Medium},
+  year={2024},
+  url={https://medium.com/@baicenxiao/introduction-to-the-large-multi-modal-models-llms-part-1-07de7e9caf40}
+}
+
+@article{ResearchDevelopment2023,
+  title={ManipLLM: Embodied Multimodal Large Language Model for Object ...},
+  author={Author Name},
+  journal={arXiv},
+  year={2023},
+  url={https://arxiv.org/html/2312.16217v1}
+}
+
+@article{MaskedVisionLanguage2023,
+  title={Masked Vision and Language Pre-training with Unimodal and Multimodal Contrastive Losses for Medical Visual Question Answering},
+  author={Author Name},
+  journal={arXiv},
+  year={2023},
+  url={https://arxiv.org/abs/2307.05314}
+}
+
+@article{RAMMBiomedicalVQA2023,
+  title={RAMM: Retrieval-augmented Biomedical Visual Question Answering},
+  author={Author Name},
+  journal={arXiv},
+  year={2023},
+  url={https://arxiv.org/abs/2303.00534}
+}
+
+@article{MedicalVQA2023,
+  title={Masked Vision and Language Pre-training with Unimodal and Multimodal Contrastive Losses for Medical Visual Question Answering},
+  author={Author Name},
+  journal={MICCAI},
+  year={2023},
+  url={https://conferences.miccai.org/2023/papers/401-Paper2138.html}
+}
+
+@misc{pengfeiliHEU2023,
+  title={MUMC: This repository is made for the paper ...},
+  author={Pengfei Li},
+  year={2023},
+  url={https://github.com/pengfeiliHEU/MUMC}
+}
+
+@article{VisoAI2024,
+  title={Vision Language Models: Exploring Multimodal AI},
+  author={Author Name},
+  journal={Viso.ai},
+  year={2024},
+  url={https://viso.ai/deep-learning/vision-language-models/}
+}
+
+@article{FlexibleVLP2023,
+  title={Fusion or Defusion? Flexible Vision-and-Language Pre-Training},
+  author={Author Name},
+  journal={ACL Anthology},
+  year={2023},
+  url={https://aclanthology.org/2023.findings-acl.316}
+}
+
+@article{HeterogeneityFederatedVLP2024,
+  title={Mitigating Heterogeneity in Federated Multimodal Learning with Biomedical VLP},
+  author={Author Name},
+  journal={arXiv},
+  year={2024},
+  url={https://arxiv.org/html/2404.03854v1}
+}
+
+@article{FeedbackModalSearch2024,
+  title={Feedback-based Modal Mutual Search for Attacking Vision-Language Models},
+  author={Author Name},
+  journal={arXiv},
+  year={2024},
+  url={https://arxiv.org/html/2409.06726v1}
+}
+
+@article{TenyksBlogger2024,
+  title={Multimodal Large Language Models (MLLMs) transforming computer vision},
+  author={Tenyks Blogger},
+  journal={Medium},
+  year={2024},
+  url={https://medium.com/@tenyks_blogger/multimodal-large-language-models-mllms-transforming-computer-vision-76d3c5dd267f}
+}
+
+@article{EfficientMLLMs2024,
+  title={Efficient Multimodal Large Language Models: A Survey},
+  author={Author Name},
+  journal={arXiv},
+  year={2024},
+  url={https://arxiv.org/html/2405.10739v1}
+}
+
+@article{MultiwayAdapter2024,
+  title={Multiway-Adapter: Adapting Multimodal Large Language Models for Specific Tasks},
+  author={Author Name},
+  journal={IEEE Explore},
+  year={2024},
+  url={https://ieeexplore.ieee.org/document/10446792}
+}
+
+@article{RobustInstructionTuning2024,
+  title={Towards Robust Instruction Tuning on Multimodal Large Language Models},
+  author={Author Name},
+  journal={arXiv},
+  year={2024},
+  url={https://arxiv.org/html/2402.14492v2}
+}
+
+@article{CrossModalTasks2024,
+  title={Cross-Modal Tasks in Multimodal Large Language Models},
+  author={Author Name},
+  journal={Journal Name},
+  year={2024},
+  url={URL}
+}
+
+@article{FewShotZeroShotLearning2024,
+  title={Few-Shot and Zero-Shot Learning in MLLMs},
+  author={Author Name},
+  journal={Journal Name},
+  year={2024},
+  url={URL}
+}
+
+@article{FewShotLearning2024,
+  title={Few-Shot Learning in Multimodal Large Language Models},
+  author={Author Name},
+  journal={Journal Name},
+  year={2024},
+  url={URL}
+}
+
+@article{ZeroShotLearning2024,
+  title={Zero-Shot Learning with CLIP},
+  author={Author Name},
+  journal={Journal Name},
+  year={2024},
+  url={URL}
+}
+
+@article{TransferLearning2024,
+  title={Transfer Learning in Multimodal Large Language Models},
+  author={Author Name},
+  journal={Journal Name},
+  year={2024},
+  url={URL}
+}
+
+@article{InstructionTuning2024,
+  title={Instruction Tuning for Multimodal Large Language Models},
+  author={Author Name},
+  journal={Journal Name},
+  year={2024},
+  url={URL}
+}
+
+@article{NaturalLanguageInstructions2024,
+  title={Natural Language Instructions for MLLMs},
+  author={Author Name},
+  journal={Journal Name},
+  year={2024},
+  url={URL}
+}
+
+@article{MultimodalInstructionTuning2024,
+  title={Multimodal Instruction Tuning},
+  author={Author Name},
+  journal={Journal Name},
+  year={2024},
+  url={URL}
+}
+
+@misc{LarkSuite,
+  title = {Instruction Tuning},
+  howpublished = {\url{https://www.larksuite.com/en_us/topics/ai-glossary/instruction-tuning}},
+  year = {2023},
+  note = {Accessed: 2024-09-28}
+}
+
+@misc{OpenAICommunity,
+  title = {Can I fine-tune the model without the prompt and answer for the system role?},
+  howpublished = {\url{https://community.openai.com/t/can-i-fine-tune-the-model-without-the-prompt-and-answer-for-the-system-role/550580}},
+  year = {2023},
+  note = {Accessed: 2024-09-28}
+}
+
+@misc{RohitAggarwal,
+  author = {Rohit Aggarwal},
+  title = {AI systems with applications spanning software development, recruitment, and content creation},
+  howpublished = {\url{https://eccles.utah.edu/team/rohit-aggarwal/}},
+  note = {Accessed: 2024-09-28}
+}
+
+@misc{RedditExperience,
+  title = {My experience on starting with fine tuning LLMs with custom data},
+  howpublished = {\url{https://www.reddit.com/r/LocalLLaMA/comments/14vnfh2/my_experience_on_starting_with_fine_tuning_llms/}},
+  year = {2023},
+  note = {Accessed: 2024-09-28}
+}