Skip to content

Commit

Permalink
Merge pull request #7 from gauravgarg17/support_cmake_build
Browse files Browse the repository at this point in the history
Add cmake build support
  • Loading branch information
ankan-ban authored Oct 6, 2023
2 parents ec8e1a3 + 39a7175 commit 1cdde7e
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 11 deletions.
13 changes: 13 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
cmake_minimum_required(VERSION 3.10)
project(llama2_q4 LANGUAGES CXX CUDA)
enable_language(CUDA)

include(FindCUDA/select_compute_arch)
CUDA_DETECT_INSTALLED_GPUS(INSTALLED_GPU_CCS_1)
string(STRIP "${INSTALLED_GPU_CCS_1}" INSTALLED_GPU_CCS_2)
string(REPLACE " " ";" INSTALLED_GPU_CCS_3 "${INSTALLED_GPU_CCS_2}")
string(REPLACE "." "" CUDA_ARCH_LIST "${INSTALLED_GPU_CCS_3}")
SET(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH_LIST})

add_executable(weight_packer weight_packer.cpp)
add_executable(llama2_q4 llama2_q4.cu)
24 changes: 13 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,19 @@ Simple and fast Pure Cuda inference for 4-bit [AWQ](https://github.com/mit-han-l

Based on [llama2.c](https://github.com/karpathy/llama2.c)

## Instructions
## Build

```
git clone https://github.com/ankan-ban/llama_cu_awq
cd llama_cu_awq
mdkir build
cd build
cmake ..
cmake --build . --config Release
cd ..
```

## Run

The simpler way is to download a pre-converted model from Huggingface, but you can also do all the steps

Expand All @@ -18,11 +30,6 @@ You can use one of these models:
Here are the commands for the 7B model:

```
git clone https://github.com/ankan-ban/llama_cu_awq
cd llama_cu_awq
gcc weight_packer.cpp -o weight_packer
nvcc -O3 llama2_q4.cu -o llama2_q4
wget https://huggingface.co/abhinavkulkarni/meta-llama-Llama-2-7b-chat-hf-w4-g128-awq/resolve/main/pytorch_model.bin
wget https://huggingface.co/abhinavkulkarni/meta-llama-Llama-2-7b-chat-hf-w4-g128-awq/resolve/main/config.json
Expand All @@ -36,11 +43,6 @@ python3 convert_awq_to_bin.py pytorch_model.bin output
And here are the commands for the 13B model:

```
git clone https://github.com/ankan-ban/llama_cu_awq
cd llama_cu_awq
gcc weight_packer.cpp -o weight_packer
nvcc -O3 llama2_q4.cu -o llama2_q4
wget https://huggingface.co/abhinavkulkarni/meta-llama-Llama-2-13b-chat-hf-w4-g128-awq/resolve/main/config.json
wget https://huggingface.co/abhinavkulkarni/meta-llama-Llama-2-13b-chat-hf-w4-g128-awq/resolve/main/pytorch_model-00001-of-00003.bin
wget https://huggingface.co/abhinavkulkarni/meta-llama-Llama-2-13b-chat-hf-w4-g128-awq/resolve/main/pytorch_model-00002-of-00003.bin
Expand Down

0 comments on commit 1cdde7e

Please sign in to comment.