Skip to content

Commit

Permalink
Merge 'origin/master' into hipblas
Browse files Browse the repository at this point in the history
  • Loading branch information
SlyEcho committed Apr 28, 2023
2 parents 2ab9d11 + 7f15c5c commit d194586
Show file tree
Hide file tree
Showing 20 changed files with 1,092 additions and 25 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,5 @@ zig-out/
zig-cache/

ppl-*.txt

examples/jeopardy/results.txt
20 changes: 18 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ endif()
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF)
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF)

option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
Expand Down Expand Up @@ -169,6 +170,21 @@ if (LLAMA_CUBLAS)
endif()
endif()

if (LLAMA_CLBLAST)
find_package(CLBlast)
if (CLBlast_FOUND)
message(STATUS "CLBlast found")

set(GGML_OPENCL_SOURCES ggml-opencl.c ggml-opencl.h)

add_compile_definitions(GGML_USE_CLBLAST)

set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} clblast)
else()
message(WARNING "CLBlast not found")
endif()
endif()

if (LLAMA_HIPBLAS)
list(APPEND CMAKE_PREFIX_PATH /opt/rocm)

Expand Down Expand Up @@ -196,7 +212,6 @@ if (LLAMA_HIPBLAS)
else()
message(WARNING "hipBLAS or HIP not found. Try setting CMAKE_PREFIX_PATH=/opt/rocm")
endif()

endif()

if (LLAMA_ALL_WARNINGS)
Expand Down Expand Up @@ -338,7 +353,8 @@ endif()
add_library(ggml OBJECT
ggml.c
ggml.h
${GGML_CUDA_SOURCES})
${GGML_CUDA_SOURCES}
${GGML_OPENCL_SOURCES})

target_include_directories(ggml PUBLIC .)
target_compile_features(ggml PUBLIC c_std_11) # don't bump
Expand Down
11 changes: 9 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -105,14 +105,21 @@ ifdef LLAMA_OPENBLAS
LDFLAGS += -lopenblas
endif
ifdef LLAMA_CUBLAS
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib
OBJS += ggml-cuda.o
NVCC = nvcc
NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@
endif
ifdef LLAMA_CLBLAST
CFLAGS += -DGGML_USE_CLBLAST
LDFLAGS += -lclblast -lOpenCL
OBJS += ggml-opencl.o
ggml-opencl.o: ggml-opencl.c ggml-opencl.h
$(CC) $(CFLAGS) -c $< -o $@
endif
ifdef LLAMA_HIPBLAS
ROCM_PATH ?= /opt/rocm
CC := $(ROCM_PATH)/llvm/bin/clang
Expand Down
6 changes: 2 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@ Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++

**Hot topics:**

- [Roadmap May 2023](https://github.com/ggerganov/llama.cpp/discussions/1220)
- [New quantization methods](https://github.com/ggerganov/llama.cpp#quantization)
- [Added LoRA support](https://github.com/ggerganov/llama.cpp/pull/820)
- [Add GPU support to ggml](https://github.com/ggerganov/llama.cpp/discussions/915)
- [Roadmap Apr 2023](https://github.com/ggerganov/llama.cpp/discussions/784)

## Description

Expand Down Expand Up @@ -174,7 +172,7 @@ In order to build llama.cpp you have three different options.

- On Windows:

1. Download the latest fortran version of [w64devkit](https://github.com/seeto/w64devkit/releases).
1. Download the latest fortran version of [w64devkit](https://github.com/skeeto/w64devkit/releases).
2. Extract `w64devkit` on your pc.
3. Run `w64devkit.exe`.
4. Use the `cd` command to reach the `llama.cpp` folder.
Expand Down
4 changes: 2 additions & 2 deletions examples/chat-13B.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@ The transcript only includes text, it does not include markup like HTML and Mark
$USER_NAME: Hello, $AI_NAME!
$AI_NAME: Hello $USER_NAME! How may I help you today?
$USER_NAME: What time is it?
$AI_NAME: It is $(date +%H:%M).
$USER_NAME: What year is it?
$AI_NAME: We are in $(date +%Y).
$USER_NAME: Please tell me the largest city in Europe.
Expand All @@ -50,4 +48,6 @@ $AI_NAME: The arguments are stored in process.argv.
argv[3] is the second argument passed to the script and so on.
$USER_NAME: Name a color.
$AI_NAME: Blue
$USER_NAME: What time is it?
$AI_NAME: It is $(date +%H:%M).
$USER_NAME:" "$@"
7 changes: 7 additions & 0 deletions examples/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,12 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
break;
}
params.prompt = argv[i];
} else if (arg == "--session") {
if (++i >= argc) {
invalid_param = true;
break;
}
params.path_session = argv[i];
} else if (arg == "-f" || arg == "--file") {
if (++i >= argc) {
invalid_param = true;
Expand Down Expand Up @@ -228,6 +234,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
fprintf(stderr, " -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
fprintf(stderr, " -p PROMPT, --prompt PROMPT\n");
fprintf(stderr, " prompt to start generation with (default: empty)\n");
fprintf(stderr, " --session FNAME file to cache model state in (may be large!) (default: none)\n");
fprintf(stderr, " --random-prompt start with a randomized prompt.\n");
fprintf(stderr, " --in-prefix STRING string to prefix user inputs with (default: empty)\n");
fprintf(stderr, " -f FNAME, --file FNAME\n");
Expand Down
1 change: 1 addition & 0 deletions examples/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ struct gpt_params {

std::string model = "models/lamma-7B/ggml-model.bin"; // model path
std::string prompt = "";
std::string path_session = ""; // path to file for saving/loading model eval state
std::string input_prefix = ""; // string to prefix user inputs with
std::vector<std::string> antiprompt; // string upon seeing which more user input is prompted

Expand Down
21 changes: 21 additions & 0 deletions examples/jeopardy/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# llama.cpp/example/jeopardy

This is pretty much just a straight port of aigoopy/llm-jeopardy/ with an added graph viewer.

The jeopardy test can be used to compare the fact knowledge of different models and compare them to eachother. This is in contrast to some other tests, which test logical deduction, creativity, writing skills, etc.


Step 1: Open jeopardy.sh and modify the following:
```
MODEL=(path to your model)
MODEL_NAME=(name of your model)
prefix=(basically, if you use vicuna it's Human: , if you use something else it might be User: , etc)
opts=(add -instruct here if needed for your model, or anything else you want to test out)
```
Step 2: Run `jeopardy.sh` from the llama.cpp folder

Step 3: Repeat steps 1 and 2 until you have all the results you need.

Step 4: Run `graph.py`, and follow the instructions. At the end, it will generate your final graph.

Note: The Human bar is based off of the full, original 100 sample questions. If you modify the question count or questions, it will not be valid.
56 changes: 56 additions & 0 deletions examples/jeopardy/graph.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import matplotlib.pyplot as plt
import sys, os
import csv

labels = []
numbers = []
numEntries = 1

rows = []

def bar_chart(numbers, labels, pos):
plt.bar(pos, numbers, color='blue')
plt.xticks(ticks=pos, labels=labels)
plt.title("Jeopardy Results by Model")
plt.xlabel("Model")
plt.ylabel("Questions Correct")
plt.show()

def calculatecorrect():
directory = os.fsencode("./examples/jeopardy/results/")
csv_reader = csv.reader(open("./examples/jeopardy/qasheet.csv", 'rt'), delimiter=',')
for row in csv_reader:
global rows
rows.append(row)
for listing in os.listdir(directory):
filename = os.fsdecode(listing)
if filename.endswith(".txt"):
file = open("./examples/jeopardy/results/" + filename, "rt")
global labels
global numEntries
global numbers
labels.append(filename[:-4])
numEntries += 1
i = 1
totalcorrect = 0
for line in file.readlines():
if line.strip() != "------":
print(line)
else:
print("Correct answer: " + rows[i][2] + "\n")
i+=1
print("Did the AI get the question right? (y/n)")
if input() == "y":
totalcorrect += 1
numbers.append(totalcorrect)



if __name__ == '__main__':
calculatecorrect()
pos = list(range(numEntries))
labels.append("Human")
numbers.append(48.11)
bar_chart(numbers, labels, pos)
print(labels)
print(numbers)
30 changes: 30 additions & 0 deletions examples/jeopardy/jeopardy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
#!/bin/bash
set -e

MODEL=./models/ggml-vicuna-13b-1.1-q4_0.bin
MODEL_NAME=Vicuna

# exec options
prefix="Human: " # Ex. Vicuna uses "Human: "
opts="--temp 0 -n 80" # additional flags
nl='
'
introduction="You will be playing a game of Jeopardy. Simply answer the question in the correct format (Ex. What is Paris, or Who is George Washington)."

# file options
question_file=./examples/jeopardy/questions.txt
touch ./examples/jeopardy/results/$MODEL_NAME.txt
output_file=./examples/jeopardy/results/$MODEL_NAME.txt

counter=1

echo 'Running'
while IFS= read -r question
do
exe_cmd="./main -p "\"$prefix$introduction$nl$prefix$question\"" "$opts" -m ""\"$MODEL\""" >> ""\"$output_file\""
echo $counter
echo "Current Question: $question"
eval "$exe_cmd"
echo -e "\n------" >> $output_file
counter=$((counter+1))
done < "$question_file"
Loading

0 comments on commit d194586

Please sign in to comment.