Skip to content

Commit

Permalink
Merge branch 'main' of github.com:haotian-liu/LLaVA into main
Browse files Browse the repository at this point in the history
  • Loading branch information
haotian-liu committed Nov 4, 2023
2 parents 5657a1a + 6c5ab2e commit b7a4865
Show file tree
Hide file tree
Showing 23 changed files with 764 additions and 35 deletions.
53 changes: 53 additions & 0 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
FROM mcr.microsoft.com/devcontainers/base:ubuntu-20.04

SHELL [ "bash", "-c" ]

# update apt and install packages
RUN apt update && \
apt install -yq \
ffmpeg \
dkms \
build-essential

# add user tools
RUN sudo apt install -yq \
jq \
jp \
tree \
tldr

# add git-lfs and install
RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | sudo bash && \
sudo apt-get install -yq git-lfs && \
git lfs install

############################################
# Setup user
############################################

USER vscode

# install azcopy, a tool to copy to/from blob storage
# for more info: https://learn.microsoft.com/en-us/azure/storage/common/storage-use-azcopy-blobs-upload#upload-a-file
RUN cd /tmp && \
wget https://azcopyvnext.azureedge.net/release20230123/azcopy_linux_amd64_10.17.0.tar.gz && \
tar xvf azcopy_linux_amd64_10.17.0.tar.gz && \
mkdir -p ~/.local/bin && \
mv azcopy_linux_amd64_10.17.0/azcopy ~/.local/bin && \
chmod +x ~/.local/bin/azcopy && \
rm -rf azcopy_linux_amd64*

# Setup conda
RUN cd /tmp && \
wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \
bash ./Miniconda3-latest-Linux-x86_64.sh -b && \
rm ./Miniconda3-latest-Linux-x86_64.sh

# Install dotnet
RUN cd /tmp && \
wget https://dot.net/v1/dotnet-install.sh && \
chmod +x dotnet-install.sh && \
./dotnet-install.sh --channel 7.0 && \
./dotnet-install.sh --channel 3.1 && \
rm ./dotnet-install.sh

2 changes: 2 additions & 0 deletions .devcontainer/devcontainer.env
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
SAMPLE_ENV_VAR1="Sample Value"
SAMPLE_ENV_VAR2=332431bf-68bf
71 changes: 71 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
{
"name": "LLaVA",
"build": {
"dockerfile": "Dockerfile",
"context": "..",
"args": {}
},
"features": {
"ghcr.io/devcontainers/features/docker-in-docker:2": {},
"ghcr.io/devcontainers/features/azure-cli:1": {},
"ghcr.io/azure/azure-dev/azd:0": {},
"ghcr.io/devcontainers/features/powershell:1": {},
"ghcr.io/devcontainers/features/common-utils:2": {},
"ghcr.io/devcontainers-contrib/features/zsh-plugins:0": {},
},
// "forwardPorts": [],
"postCreateCommand": "bash ./.devcontainer/postCreateCommand.sh",
"customizations": {
"vscode": {
"settings": {
"python.analysis.autoImportCompletions": true,
"python.analysis.autoImportUserSymbols": true,
"python.defaultInterpreterPath": "~/miniconda3/envs/llava/bin/python",
"python.formatting.provider": "yapf",
"python.linting.enabled": true,
"python.linting.flake8Enabled": true,
"isort.check": true,
"dev.containers.copyGitConfig": true,
"terminal.integrated.defaultProfile.linux": "zsh",
"terminal.integrated.profiles.linux": {
"zsh": {
"path": "/usr/bin/zsh"
},
}
},
"extensions": [
"aaron-bond.better-comments",
"eamodio.gitlens",
"EditorConfig.EditorConfig",
"foxundermoon.shell-format",
"GitHub.copilot-chat",
"GitHub.copilot-labs",
"GitHub.copilot",
"lehoanganh298.json-lines-viewer",
"mhutchie.git-graph",
"ms-azuretools.vscode-docker",
"ms-dotnettools.dotnet-interactive-vscode",
"ms-python.flake8",
"ms-python.isort",
"ms-python.python",
"ms-python.vscode-pylance",
"njpwerner.autodocstring",
"redhat.vscode-yaml",
"stkb.rewrap",
"yzhang.markdown-all-in-one",
]
}
},
"mounts": [],
"runArgs": [
"--gpus",
"all",
// "--ipc",
// "host",
"--ulimit",
"memlock=-1",
"--env-file",
".devcontainer/devcontainer.env"
],
// "remoteUser": "root"
}
45 changes: 45 additions & 0 deletions .devcontainer/postCreateCommand.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
git config --global safe.directory '*'
git config --global core.editor "code --wait"
git config --global pager.branch false

# Set AZCOPY concurrency to auto
echo "export AZCOPY_CONCURRENCY_VALUE=AUTO" >> ~/.zshrc
echo "export AZCOPY_CONCURRENCY_VALUE=AUTO" >> ~/.bashrc

# Activate conda by default
echo ". /home/vscode/miniconda3/bin/activate" >> ~/.zshrc
echo ". /home/vscode/miniconda3/bin/activate" >> ~/.bashrc

# Use llava environment by default
echo "conda activate llava" >> ~/.zshrc
echo "conda activate llava" >> ~/.bashrc

# Add dotnet to PATH
echo 'export PATH="$PATH:$HOME/.dotnet"' >> ~/.bashrc
echo 'export PATH="$PATH:$HOME/.dotnet"' >> ~/.zshrc

# Create and activate llava environment
source /home/vscode/miniconda3/bin/activate
conda create -y -q -n llava python=3.10
conda activate llava

# Install Nvidia Cuda Compiler
conda install -y -c nvidia cuda-compiler

pip install pre-commit==3.0.2

# Install package locally
pip install --upgrade pip # enable PEP 660 support
pip install -e .

# Install additional packages for training
pip install -e ".[train]"
pip install flash-attn --no-build-isolation

# Download checkpoints to location outside of the repo
git clone https://huggingface.co/liuhaotian/llava-v1.5-7b ~/llava-v1.5-7b

# Commented because it is unlikely for users to have enough local GPU memory to load the model
# git clone https://huggingface.co/liuhaotian/llava-v1.5-13b ~/llava-v1.5-13b

echo "postCreateCommand.sh COMPLETE!"
21 changes: 21 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# The .dockerignore file excludes files from the container build process.
#
# https://docs.docker.com/engine/reference/builder/#dockerignore-file

# Exclude Git files
.git
.github
.gitignore

# Exclude Python cache files
__pycache__
.mypy_cache
.pytest_cache
.ruff_cache

# Exclude Python virtual environment
/venv

# Exclude some weights
/openai
/liuhaotian
18 changes: 18 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
root = true

# Unix-style newlines with a newline ending every file
[*]
end_of_line = lf
insert_final_newline = true
trim_trailing_whitespace = true
charset = utf-8

# 4 space indentation
[*.{py,json}]
indent_style = space
indent_size = 4

# 2 space indentation
[*.{md,sh,yaml,yml}]
indent_style = space
indent_size = 2
29 changes: 29 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# https://git-scm.com/docs/gitattributes

# Set the default behavior, in case people don't have core.autocrlf set.
# https://git-scm.com/docs/gitattributes#_end_of_line_conversion
* text=auto

# common python attributes, taken from https://github.com/alexkaratarakis/gitattributes/blob/710900479a2bedeec7003d381719521ffbb18bf8/Python.gitattributes
# Source files
# ============
*.pxd text diff=python
*.py text diff=python
*.py3 text diff=python
*.pyw text diff=python
*.pyx text diff=python
*.pyz text diff=python
*.pyi text diff=python

# Binary files
# ============
*.db binary
*.p binary
*.pkl binary
*.pickle binary
*.pyc binary export-ignore
*.pyo binary export-ignore
*.pyd binary

# Jupyter notebook
*.ipynb text eol=lf
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,9 @@ ckpts*

.ipynb_checkpoints
*.ipynb

# DevContainer
!.devcontainer/*

# Demo
serve_images/
49 changes: 48 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

[[Project Page](https://llava-vl.github.io/)] [[Demo](https://llava.hliu.cc/)] [[Data](https://github.com/haotian-liu/LLaVA/blob/main/docs/Data.md)] [[Model Zoo](https://github.com/haotian-liu/LLaVA/blob/main/docs/MODEL_ZOO.md)]

🤝Community Contributions: [[llama.cpp](https://github.com/ggerganov/llama.cpp/pull/3436)] [[Colab](https://github.com/camenduru/LLaVA-colab)] [[🤗Space](https://huggingface.co/spaces/badayvedat/LLaVA)]
🤝Community Contributions: [[llama.cpp](https://github.com/ggerganov/llama.cpp/pull/3436)] [[Colab](https://github.com/camenduru/LLaVA-colab)] [[🤗Space](https://huggingface.co/spaces/badayvedat/LLaVA)] [[Roboflow Deep Dive](https://blog.roboflow.com/first-impressions-with-llava-1-5/)] [[Replicate](https://replicate.com/yorickvp/llava-13b)]

**Improved Baselines with Visual Instruction Tuning** [[Paper](https://arxiv.org/abs/2310.03744)] <br>
[Haotian Liu](https://hliu.cc), [Chunyuan Li](https://chunyuan.li/), [Yuheng Li](https://yuheng-li.github.io/), [Yong Jae Lee](https://pages.cs.wisc.edu/~yongjaelee/)
Expand Down Expand Up @@ -88,6 +88,53 @@ git pull
pip install -e .
```

### Quick Start With HuggingFace

<details>
<summary>Example Code</summary>

```Python
from llava.model.builder import load_pretrained_model
from llava.mm_utils import get_model_name_from_path

model_path = "liuhaotian/llava-v1.5-7b"
model_name = get_model_name_from_path(model_path)
model_base = None

tokenizer, model, image_processor, context_len = load_pretrained_model(
model_path=model_path,
model_base=model_base,
model_name=model_name
)
```

Check out the details wth the `load_pretrained_model` function in `llava/model/builder.py`.

You can also use the `eval_model` function in `llava/eval/run_llava.py` to get the output easily. By doing so, you can use this code on Colab directly after downloading this repository.

``` python
# import the file

model_path = "liuhaotian/llava-v1.5-7b"
model_name = get_model_name_from_path(model_path)
model_base = None
prompt = "Give me a short description of this image."
imageFile = "https://llava-vl.github.io/static/images/view.jpg"

args = type('Args', (), {
"model_path": model_path,
"model_base": model_base,
"model_name": model_name,
"query": prompt,
"conv_mode": None,
"image_file": imageFile
})()

output = eval_model(args)
print(output)
```
</details>

## LLaVA Weights
Please check out our [Model Zoo](https://github.com/haotian-liu/LLaVA/blob/main/docs/MODEL_ZOO.md) for all public LLaVA checkpoints, and the instructions of how to use the weights.

Expand Down
37 changes: 37 additions & 0 deletions cog.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Configuration for Cog ⚙️
# Reference: https://github.com/replicate/cog/blob/main/docs/yaml.md

build:
gpu: true

python_version: "3.11"

python_packages:
- "torch==2.0.1"
- "accelerate==0.21.0"
- "bitsandbytes==0.41.0"
- "deepspeed==0.9.5"
- "einops-exts==0.0.4"
- "einops==0.6.1"
- "gradio==3.35.2"
- "gradio_client==0.2.9"
- "httpx==0.24.0"
- "markdown2==2.4.10"
- "numpy==1.26.0"
- "peft==0.4.0"
- "scikit-learn==1.2.2"
- "sentencepiece==0.1.99"
- "shortuuid==1.0.11"
- "timm==0.6.13"
- "tokenizers==0.13.3"
- "torch==2.0.1"
- "torchvision==0.15.2"
- "transformers==4.31.0"
- "wandb==0.15.12"
- "wavedrom==2.0.3.post3"
- "Pygments==2.16.1"
run:
- curl -o /usr/local/bin/pget -L "https://github.com/replicate/pget/releases/download/v0.0.3/pget" && chmod +x /usr/local/bin/pget

# predict.py defines how predictions are run on your model
predict: "predict.py:Predictor"
25 changes: 25 additions & 0 deletions docs/Evaluation.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ CUDA_VISIBLE_DEVICES=0 bash scripts/v1_5/eval/mmbench_cn.sh
```
3. Submit the results to the evaluation server: `./playground/data/eval/mmbench/answers_upload/mmbench_dev_cn_20231003`.


### SEED-Bench

1. Following the official [instructions](https://github.com/AILab-CVC/SEED-Bench/blob/main/DATASET.md) to download the images and the videos. Put images under `./playground/data/eval/seed_bench/SEED-Bench-image`.
Expand All @@ -140,3 +141,27 @@ CUDA_VISIBLE_DEVICES=0 bash scripts/v1_5/eval/llavabench.sh
CUDA_VISIBLE_DEVICES=0 bash scripts/v1_5/eval/mmvet.sh
```
3. Evaluate the predictions in `./playground/data/eval/mmvet/results` using the official jupyter notebook.

## More Benchmarks

Below are awesome benchmarks for multimodal understanding from the research community, that are not initially included in the LLaVA-1.5 release.

### Q-Bench

1. Download [`llvisionqa_dev.json`](https://huggingface.co/datasets/nanyangtu/LLVisionQA-QBench/resolve/main/llvisionqa_dev.json) (for `dev`-subset) and [`llvisionqa_test.json`](https://huggingface.co/datasets/nanyangtu/LLVisionQA-QBench/resolve/main/llvisionqa_test.json) (for `test`-subset). Put them under `./playground/data/eval/qbench`.
2. Download and extract [images](https://huggingface.co/datasets/nanyangtu/LLVisionQA-QBench/resolve/main/images_llvisionqa.tar) and put all the images directly under `./playground/data/eval/qbench/images_llviqionqa`.
3. Single-GPU inference (change `dev` to `test` for evaluation on test set).
```Shell
CUDA_VISIBLE_DEVICES=0 bash scripts/v1_5/eval/qbench.sh dev
```
4. Submit the results by instruction [here](https://github.com/VQAssessment/Q-Bench#option-1-submit-results): `./playground/data/eval/qbench/llvisionqa_dev_answers.jsonl`.

### Chinese-Q-Bench

1. Download [`质衡-问答-验证集.json`](https://huggingface.co/datasets/nanyangtu/LLVisionQA-QBench/resolve/main/%E8%B4%A8%E8%A1%A1-%E9%97%AE%E7%AD%94-%E9%AA%8C%E8%AF%81%E9%9B%86.json) (for `dev`-subset) and [`质衡-问答-测试集.json`](https://huggingface.co/datasets/nanyangtu/LLVisionQA-QBench/resolve/main/%E8%B4%A8%E8%A1%A1-%E9%97%AE%E7%AD%94-%E6%B5%8B%E8%AF%95%E9%9B%86.json) (for `test`-subset). Put them under `./playground/data/eval/qbench`.
2. Download and extract [images](https://huggingface.co/datasets/nanyangtu/LLVisionQA-QBench/resolve/main/images_llvisionqa.tar) and put all the images directly under `./playground/data/eval/qbench/images_llviqionqa`.
3. Single-GPU inference (change `dev` to `test` for evaluation on test set).
```Shell
CUDA_VISIBLE_DEVICES=0 bash scripts/v1_5/eval/qbench_zh.sh dev
```
4. Submit the results by instruction [here](https://github.com/VQAssessment/Q-Bench#option-1-submit-results): `./playground/data/eval/qbench/llvisionqa_zh_dev_answers.jsonl`.
Loading

0 comments on commit b7a4865

Please sign in to comment.