forked from carlini/yet-another-applied-llm-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
llm.py
117 lines (101 loc) · 4.4 KB
/
llm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
## Copyright (C) 2024, Nicholas Carlini <nicholas@carlini.com>.
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
from io import BytesIO
import os
import base64
import requests
import json
import pickle
import time
from llms.custom_openai_model import CustomOpenAIModel
from llms.openai_model import OpenAIModel
from llms.anthropic_model import AnthropicModel
from llms.mistral_model import MistralModel
#from llms.vertexai_model import VertexAIModel
from llms.cohere_model import CohereModel
from llms.moonshot_model import MoonshotAIModel
class LLM:
def __init__(self, name="gpt-3.5-turbo", use_cache=True, override_hparams={}):
self.name = name
if 'gpt' in name:
self.model = OpenAIModel(name)
elif 'Mistral-7B-Instruct-v0.1-AWQ' in name:
self.model = CustomOpenAIModel(name)
elif 'Mixtral-Instruct-AWQ' in name:
self.model = CustomOpenAIModel(name)
elif 'openchat_3.5' in name:
self.model = CustomOpenAIModel(name)
# elif 'llama' in name:
# self.model = LLAMAModel(name)
elif 'mistral' in name:
self.model = MistralModel(name)
# elif 'gemini' in name or 'bison' in name:
# self.model = VertexAIModel(name)
elif 'claude' in name:
self.model = AnthropicModel(name)
elif 'moonshot' in name:
self.model = MoonshotAIModel(name)
elif 'command' in name:
self.model = CohereModel(name)
else:
raise
self.model.hparams.update(override_hparams)
self.use_cache = use_cache
if use_cache:
try:
if not os.path.exists("tmp"):
os.mkdir("tmp")
self.cache = pickle.load(open(f"tmp/cache-{name.split('/')[-1]}.p","rb"))
except:
self.cache = {}
else:
self.cache = {}
def __call__(self, conversation, add_image=None, max_tokens=None, skip_cache=False):
if type(conversation) == str:
conversation = [conversation]
cache_key = tuple(conversation) if add_image is None else tuple(conversation + [add_image.tobytes()])
if cache_key in self.cache and not skip_cache and self.use_cache:
print(self.name, "GETCACHE", repr(self.cache[cache_key]))
if len(self.cache[cache_key]) > 0:
return self.cache[cache_key]
else:
print("Empty cache hit")
print(self.name, "CACHE MISS", repr(conversation))
response = "Model API request failed"
for _ in range(3):
try:
response = self.model.make_request(conversation, add_image=add_image, max_tokens=max_tokens)
break
except Exception as e:
print("RUN FAILED", e)
time.sleep(10)
pass
if self.use_cache and response != "Model API request failed":
self.cache[cache_key] = response
pickle.dump(self.cache, open(f"tmp/cache-{self.name.split('/')[-1]}.p","wb"))
return response
#llm = LLM("Mistral-7B-Instruct-v0.1-AWQ", override_hparams={'temperature': 0.1})
#llm = LLM("Mixtral-Instruct-AWQ", override_hparams={'temperature': 0.1})
#llm = LLM("openchat_3.5", override_hparams={'temperature': 0.1})
#llm = LLM("command")
llm = LLM("gpt-3.5-turbo")
#llm = LLM("gpt-4-1106-preview")
#llm = LLM("claude-instant-1.2")
#llm = LLM("mistral-tiny")
#llm = LLM("gemini-pro", override_hparams={'temperature': 0.3}, use_cache=False)
#eval_llm = LLM("gpt-4-1106-preview")
eval_llm = LLM("gpt-4-0125-preview", override_hparams={'temperature': 0.1})
#eval_llm = LLM("gpt-3.5-turbo", override_hparams={'temperature': 0.1})
vision_eval_llm = LLM("gpt-4-vision-preview", override_hparams={'temperature': 0.1})