Merge branch 'main' of github.com:haotian-liu/LLaVA into main

vishnumadhu365 · Nov 4, 2023 · b7a4865 · b7a4865
2 parents 5657a1a + 6c5ab2e
commit b7a4865
Show file tree

Hide file tree

Showing 23 changed files with 764 additions and 35 deletions.
diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile
@@ -0,0 +1,53 @@
+FROM mcr.microsoft.com/devcontainers/base:ubuntu-20.04
+
+SHELL [ "bash", "-c" ]
+
+# update apt and install packages
+RUN apt update && \
+    apt install -yq \
+        ffmpeg \
+        dkms \
+        build-essential
+
+# add user tools
+RUN sudo apt install -yq \
+        jq \
+        jp \
+        tree \
+        tldr
+
+# add git-lfs and install
+RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash && \
+    sudo apt-get install -yq git-lfs && \
+    git lfs install
+
+############################################
+# Setup user
+############################################
+
+USER vscode
+
+# install azcopy, a tool to copy to/from blob storage
+# for more info: https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-blobs-upload#upload-a-file
+RUN cd /tmp && \
+    wget https://azcopyvnext.azureedge.net/release20230123/azcopy_linux_amd64_10.17.0.tar.gz && \
+    tar xvf azcopy_linux_amd64_10.17.0.tar.gz && \
+    mkdir -p ~/.local/bin && \
+    mv azcopy_linux_amd64_10.17.0/azcopy ~/.local/bin && \
+    chmod +x ~/.local/bin/azcopy && \
+    rm -rf azcopy_linux_amd64*
+
+# Setup conda
+RUN cd /tmp && \
+    wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
+    bash ./Miniconda3-latest-Linux-x86_64.sh -b && \
+    rm ./Miniconda3-latest-Linux-x86_64.sh
+
+# Install dotnet
+RUN cd /tmp && \
+    wget https://dot.net/v1/dotnet-install.sh && \
+    chmod +x dotnet-install.sh && \
+    ./dotnet-install.sh --channel 7.0 && \
+    ./dotnet-install.sh --channel 3.1 && \
+    rm ./dotnet-install.sh
+
diff --git a/.devcontainer/devcontainer.env b/.devcontainer/devcontainer.env
@@ -0,0 +1,2 @@
+SAMPLE_ENV_VAR1="Sample Value"
+SAMPLE_ENV_VAR2=332431bf-68bf
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -0,0 +1,71 @@
+{
+    "name": "LLaVA",
+    "build": {
+        "dockerfile": "Dockerfile",
+        "context": "..",
+        "args": {}
+    },
+    "features": {
+        "ghcr.io/devcontainers/features/docker-in-docker:2": {},
+        "ghcr.io/devcontainers/features/azure-cli:1": {},
+        "ghcr.io/azure/azure-dev/azd:0": {},
+        "ghcr.io/devcontainers/features/powershell:1": {},
+        "ghcr.io/devcontainers/features/common-utils:2": {},
+        "ghcr.io/devcontainers-contrib/features/zsh-plugins:0": {},
+    },
+    // "forwardPorts": [],
+    "postCreateCommand": "bash ./.devcontainer/postCreateCommand.sh",
+    "customizations": {
+        "vscode": {
+            "settings": {
+                "python.analysis.autoImportCompletions": true,
+                "python.analysis.autoImportUserSymbols": true,
+                "python.defaultInterpreterPath": "~/miniconda3/envs/llava/bin/python",
+                "python.formatting.provider": "yapf",
+                "python.linting.enabled": true,
+                "python.linting.flake8Enabled": true,
+                "isort.check": true,
+                "dev.containers.copyGitConfig": true,
+                "terminal.integrated.defaultProfile.linux": "zsh",
+                "terminal.integrated.profiles.linux": {
+                    "zsh": {
+                        "path": "/usr/bin/zsh"
+                    },
+                }
+            },
+            "extensions": [
+                "aaron-bond.better-comments",
+                "eamodio.gitlens",
+                "EditorConfig.EditorConfig",
+                "foxundermoon.shell-format",
+                "GitHub.copilot-chat",
+                "GitHub.copilot-labs",
+                "GitHub.copilot",
+                "lehoanganh298.json-lines-viewer",
+                "mhutchie.git-graph",
+                "ms-azuretools.vscode-docker",
+                "ms-dotnettools.dotnet-interactive-vscode",
+                "ms-python.flake8",
+                "ms-python.isort",
+                "ms-python.python",
+                "ms-python.vscode-pylance",
+                "njpwerner.autodocstring",
+                "redhat.vscode-yaml",
+                "stkb.rewrap",
+                "yzhang.markdown-all-in-one",
+            ]
+        }
+    },
+    "mounts": [],
+    "runArgs": [
+        "--gpus",
+        "all",
+        // "--ipc",
+        // "host",
+        "--ulimit",
+        "memlock=-1",
+        "--env-file",
+        ".devcontainer/devcontainer.env"
+    ],
+    // "remoteUser": "root"
+}
diff --git a/.devcontainer/postCreateCommand.sh b/.devcontainer/postCreateCommand.sh
@@ -0,0 +1,45 @@
+git config --global safe.directory '*'
+git config --global core.editor "code --wait"
+git config --global pager.branch false
+
+# Set AZCOPY concurrency to auto
+echo "export AZCOPY_CONCURRENCY_VALUE=AUTO" >> ~/.zshrc
+echo "export AZCOPY_CONCURRENCY_VALUE=AUTO" >> ~/.bashrc
+
+# Activate conda by default
+echo ". /home/vscode/miniconda3/bin/activate" >> ~/.zshrc
+echo ". /home/vscode/miniconda3/bin/activate" >> ~/.bashrc
+
+# Use llava environment by default
+echo "conda activate llava" >> ~/.zshrc
+echo "conda activate llava" >> ~/.bashrc
+
+# Add dotnet to PATH
+echo 'export PATH="$PATH:$HOME/.dotnet"' >> ~/.bashrc
+echo 'export PATH="$PATH:$HOME/.dotnet"' >> ~/.zshrc
+
+# Create and activate llava environment
+source /home/vscode/miniconda3/bin/activate
+conda create -y -q -n llava python=3.10
+conda activate llava
+
+# Install Nvidia Cuda Compiler
+conda install -y -c nvidia cuda-compiler
+
+pip install pre-commit==3.0.2
+
+# Install package locally
+pip install --upgrade pip  # enable PEP 660 support
+pip install -e .
+
+# Install additional packages for training
+pip install -e ".[train]"
+pip install flash-attn --no-build-isolation
+
+# Download checkpoints to location outside of the repo
+git clone https://huggingface.co/liuhaotian/llava-v1.5-7b ~/llava-v1.5-7b
+
+# Commented because it is unlikely for users to have enough local GPU memory to load the model
+# git clone https://huggingface.co/liuhaotian/llava-v1.5-13b ~/llava-v1.5-13b
+
+echo "postCreateCommand.sh COMPLETE!"
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,21 @@
+# The .dockerignore file excludes files from the container build process.
+#
+# https://docs.docker.com/engine/reference/builder/#dockerignore-file
+
+# Exclude Git files
+.git
+.github
+.gitignore
+
+# Exclude Python cache files
+__pycache__
+.mypy_cache
+.pytest_cache
+.ruff_cache
+
+# Exclude Python virtual environment
+/venv
+
+# Exclude some weights
+/openai
+/liuhaotian
diff --git a/.editorconfig b/.editorconfig
@@ -0,0 +1,18 @@
+root = true
+
+# Unix-style newlines with a newline ending every file
+[*]
+end_of_line = lf
+insert_final_newline = true
+trim_trailing_whitespace = true
+charset = utf-8
+
+# 4 space indentation
+[*.{py,json}]
+indent_style = space
+indent_size = 4
+
+# 2 space indentation
+[*.{md,sh,yaml,yml}]
+indent_style = space
+indent_size = 2
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,29 @@
+# https://git-scm.com/docs/gitattributes
+
+# Set the default behavior, in case people don't have core.autocrlf set.
+# https://git-scm.com/docs/gitattributes#_end_of_line_conversion
+* text=auto
+
+# common python attributes, taken from https://github.com/alexkaratarakis/gitattributes/blob/710900479a2bedeec7003d381719521ffbb18bf8/Python.gitattributes
+# Source files
+# ============
+*.pxd    text diff=python
+*.py     text diff=python
+*.py3    text diff=python
+*.pyw    text diff=python
+*.pyx    text diff=python
+*.pyz    text diff=python
+*.pyi    text diff=python
+
+# Binary files
+# ============
+*.db     binary
+*.p      binary
+*.pkl    binary
+*.pickle binary
+*.pyc    binary export-ignore
+*.pyo    binary export-ignore
+*.pyd    binary
+
+# Jupyter notebook
+*.ipynb  text eol=lf
diff --git a/.gitignore b/.gitignore
@@ -27,3 +27,9 @@ ckpts*
 
 .ipynb_checkpoints
 *.ipynb
+
+# DevContainer
+!.devcontainer/*
+
+# Demo
+serve_images/
diff --git a/README.md b/README.md
@@ -4,7 +4,7 @@
 
 [[Project Page](https://llava-vl.github.io/)] [[Demo](https://llava.hliu.cc/)]  [[Data](https://github.com/haotian-liu/LLaVA/blob/main/docs/Data.md)] [[Model Zoo](https://github.com/haotian-liu/LLaVA/blob/main/docs/MODEL_ZOO.md)]
 
-🤝Community Contributions: [[llama.cpp](https://github.com/ggerganov/llama.cpp/pull/3436)] [[Colab](https://github.com/camenduru/LLaVA-colab)] [[🤗Space](https://huggingface.co/spaces/badayvedat/LLaVA)]
+🤝Community Contributions: [[llama.cpp](https://github.com/ggerganov/llama.cpp/pull/3436)] [[Colab](https://github.com/camenduru/LLaVA-colab)] [[🤗Space](https://huggingface.co/spaces/badayvedat/LLaVA)] [[Roboflow Deep Dive](https://blog.roboflow.com/first-impressions-with-llava-1-5/)] [[Replicate](https://replicate.com/yorickvp/llava-13b)]
 
 **Improved Baselines with Visual Instruction Tuning** [[Paper](https://arxiv.org/abs/2310.03744)] <br>
 [Haotian Liu](https://hliu.cc), [Chunyuan Li](https://chunyuan.li/), [Yuheng Li](https://yuheng-li.github.io/), [Yong Jae Lee](https://pages.cs.wisc.edu/~yongjaelee/)
@@ -88,6 +88,53 @@ git pull
 pip install -e .
 ```
 
+### Quick Start With HuggingFace
+
+<details>
+<summary>Example Code</summary>
+
+```Python
+from llava.model.builder import load_pretrained_model
+from llava.mm_utils import get_model_name_from_path
+
+model_path = "liuhaotian/llava-v1.5-7b"
+model_name = get_model_name_from_path(model_path)
+model_base = None
+
+tokenizer, model, image_processor, context_len = load_pretrained_model(
+    model_path=model_path,
+    model_base=model_base,
+    model_name=model_name
+)
+```
+
+Check out the details wth the `load_pretrained_model` function in `llava/model/builder.py`.
+
+You can also use the `eval_model` function in `llava/eval/run_llava.py` to get the output easily. By doing so, you can use this code on Colab directly after downloading this repository.
+
+``` python
+# import the file
+
+model_path = "liuhaotian/llava-v1.5-7b"
+model_name = get_model_name_from_path(model_path)
+model_base = None
+prompt = "Give me a short description of this image."
+imageFile = "https://llava-vl.github.io/static/images/view.jpg"
+
+args = type('Args', (), {
+    "model_path": model_path,
+    "model_base": model_base,
+    "model_name": model_name,
+    "query": prompt,
+    "conv_mode": None,
+    "image_file": imageFile
+})()
+
+output = eval_model(args)
+print(output)
+```
+</details>
+
 ## LLaVA Weights
 Please check out our [Model Zoo](https://github.com/haotian-liu/LLaVA/blob/main/docs/MODEL_ZOO.md) for all public LLaVA checkpoints, and the instructions of how to use the weights.
 

diff --git a/cog.yaml b/cog.yaml
@@ -0,0 +1,37 @@
+# Configuration for Cog ⚙️
+# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md
+
+build:
+  gpu: true
+
+  python_version: "3.11"
+
+  python_packages:
+    - "torch==2.0.1"
+    - "accelerate==0.21.0"
+    - "bitsandbytes==0.41.0"
+    - "deepspeed==0.9.5"
+    - "einops-exts==0.0.4"
+    - "einops==0.6.1"
+    - "gradio==3.35.2"
+    - "gradio_client==0.2.9"
+    - "httpx==0.24.0"
+    - "markdown2==2.4.10"
+    - "numpy==1.26.0"
+    - "peft==0.4.0"
+    - "scikit-learn==1.2.2"
+    - "sentencepiece==0.1.99"
+    - "shortuuid==1.0.11"
+    - "timm==0.6.13"
+    - "tokenizers==0.13.3"
+    - "torch==2.0.1"
+    - "torchvision==0.15.2"
+    - "transformers==4.31.0"
+    - "wandb==0.15.12"
+    - "wavedrom==2.0.3.post3"
+    - "Pygments==2.16.1"
+  run:
+    - curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.0.3/pget" && chmod +x /usr/local/bin/pget
+
+# predict.py defines how predictions are run on your model
+predict: "predict.py:Predictor"
diff --git a/docs/Evaluation.md b/docs/Evaluation.md
@@ -114,6 +114,7 @@ CUDA_VISIBLE_DEVICES=0 bash scripts/v1_5/eval/mmbench_cn.sh
 ```
 3. Submit the results to the evaluation server: `./playground/data/eval/mmbench/answers_upload/mmbench_dev_cn_20231003`.
 
+
 ### SEED-Bench
 
 1. Following the official [instructions](https://github.com/AILab-CVC/SEED-Bench/blob/main/DATASET.md) to download the images and the videos. Put images under `./playground/data/eval/seed_bench/SEED-Bench-image`.
@@ -140,3 +141,27 @@ CUDA_VISIBLE_DEVICES=0 bash scripts/v1_5/eval/llavabench.sh
 CUDA_VISIBLE_DEVICES=0 bash scripts/v1_5/eval/mmvet.sh
 ```
 3. Evaluate the predictions in `./playground/data/eval/mmvet/results` using the official jupyter notebook.
+
+## More Benchmarks
+
+Below are awesome benchmarks for multimodal understanding from the research community, that are not initially included in the LLaVA-1.5 release.
+
+### Q-Bench
+
+1. Download [`llvisionqa_dev.json`](https://huggingface.co/datasets/nanyangtu/LLVisionQA-QBench/resolve/main/llvisionqa_dev.json) (for `dev`-subset) and [`llvisionqa_test.json`](https://huggingface.co/datasets/nanyangtu/LLVisionQA-QBench/resolve/main/llvisionqa_test.json) (for `test`-subset). Put them under `./playground/data/eval/qbench`. 
+2. Download and extract [images](https://huggingface.co/datasets/nanyangtu/LLVisionQA-QBench/resolve/main/images_llvisionqa.tar) and put all the images directly under `./playground/data/eval/qbench/images_llviqionqa`.
+3. Single-GPU inference (change `dev` to `test` for evaluation on test set).
+```Shell
+CUDA_VISIBLE_DEVICES=0 bash scripts/v1_5/eval/qbench.sh dev
+```
+4. Submit the results by instruction [here](https://github.com/VQAssessment/Q-Bench#option-1-submit-results): `./playground/data/eval/qbench/llvisionqa_dev_answers.jsonl`.
+
+### Chinese-Q-Bench
+
+1. Download [`质衡-问答-验证集.json`](https://huggingface.co/datasets/nanyangtu/LLVisionQA-QBench/resolve/main/%E8%B4%A8%E8%A1%A1-%E9%97%AE%E7%AD%94-%E9%AA%8C%E8%AF%81%E9%9B%86.json) (for `dev`-subset) and [`质衡-问答-测试集.json`](https://huggingface.co/datasets/nanyangtu/LLVisionQA-QBench/resolve/main/%E8%B4%A8%E8%A1%A1-%E9%97%AE%E7%AD%94-%E6%B5%8B%E8%AF%95%E9%9B%86.json) (for `test`-subset). Put them under `./playground/data/eval/qbench`. 
+2. Download and extract [images](https://huggingface.co/datasets/nanyangtu/LLVisionQA-QBench/resolve/main/images_llvisionqa.tar) and put all the images directly under `./playground/data/eval/qbench/images_llviqionqa`.
+3. Single-GPU inference (change `dev` to `test` for evaluation on test set).
+```Shell
+CUDA_VISIBLE_DEVICES=0 bash scripts/v1_5/eval/qbench_zh.sh dev
+```
+4. Submit the results by instruction [here](https://github.com/VQAssessment/Q-Bench#option-1-submit-results): `./playground/data/eval/qbench/llvisionqa_zh_dev_answers.jsonl`.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		SAMPLE_ENV_VAR1="Sample Value"
		SAMPLE_ENV_VAR2=332431bf-68bf