Merge pull request #7 from gauravgarg17/support_cmake_build

Add cmake build support
ankan-ban · Oct 6, 2023 · 1cdde7e · 1cdde7e
2 parents ec8e1a3 + 39a7175
commit 1cdde7e
Show file tree

Hide file tree

Showing 2 changed files with 26 additions and 11 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,13 @@
+cmake_minimum_required(VERSION 3.10)
+project(llama2_q4 LANGUAGES CXX CUDA)
+enable_language(CUDA)
+
+include(FindCUDA/select_compute_arch)
+CUDA_DETECT_INSTALLED_GPUS(INSTALLED_GPU_CCS_1)
+string(STRIP "${INSTALLED_GPU_CCS_1}" INSTALLED_GPU_CCS_2)
+string(REPLACE " " ";" INSTALLED_GPU_CCS_3 "${INSTALLED_GPU_CCS_2}")
+string(REPLACE "." "" CUDA_ARCH_LIST "${INSTALLED_GPU_CCS_3}")
+SET(CMAKE_CUDA_ARCHITECTURES ${CUDA_ARCH_LIST})
+
+add_executable(weight_packer weight_packer.cpp)
+add_executable(llama2_q4 llama2_q4.cu)
diff --git a/README.md b/README.md
@@ -4,7 +4,19 @@ Simple and fast Pure Cuda inference for 4-bit [AWQ](https://github.com/mit-han-l
 
 Based on [llama2.c](https://github.com/karpathy/llama2.c)
 
-## Instructions
+## Build
+
+```
+git clone https://github.com/ankan-ban/llama_cu_awq
+cd llama_cu_awq
+mdkir build
+cd build
+cmake ..
+cmake --build . --config Release
+cd ..
+```
+
+## Run
 
 The simpler way is to download a pre-converted model from Huggingface, but you can also do all the steps
 
@@ -18,11 +30,6 @@ You can use one of these models:
 Here are the commands for the 7B model:
 
 ```
-git clone https://github.com/ankan-ban/llama_cu_awq
-cd llama_cu_awq
-gcc weight_packer.cpp -o weight_packer
-nvcc -O3 llama2_q4.cu -o llama2_q4
-
 wget https://huggingface.co/abhinavkulkarni/meta-llama-Llama-2-7b-chat-hf-w4-g128-awq/resolve/main/pytorch_model.bin
 wget https://huggingface.co/abhinavkulkarni/meta-llama-Llama-2-7b-chat-hf-w4-g128-awq/resolve/main/config.json
 
@@ -36,11 +43,6 @@ python3 convert_awq_to_bin.py pytorch_model.bin output
 And here are the commands for the 13B model:
 
 ```
-git clone https://github.com/ankan-ban/llama_cu_awq
-cd llama_cu_awq
-gcc weight_packer.cpp -o weight_packer
-nvcc -O3 llama2_q4.cu -o llama2_q4
-
 wget https://huggingface.co/abhinavkulkarni/meta-llama-Llama-2-13b-chat-hf-w4-g128-awq/resolve/main/config.json
 wget https://huggingface.co/abhinavkulkarni/meta-llama-Llama-2-13b-chat-hf-w4-g128-awq/resolve/main/pytorch_model-00001-of-00003.bin
 wget https://huggingface.co/abhinavkulkarni/meta-llama-Llama-2-13b-chat-hf-w4-g128-awq/resolve/main/pytorch_model-00002-of-00003.bin