-
Notifications
You must be signed in to change notification settings - Fork 0
/
test.py
56 lines (54 loc) · 1.44 KB
/
test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from bsd_evals import BSD_Evals
from eval import Eval
from model import Model
models = [
Model(
model_family="Claude",
model_version="claude-3-haiku-20240307",
service="Anthropic",
max_tokens=4096,
temperature=1.0),
Model(
model_family="Claude",
model_version="claude-3-sonnet-20240229",
service="Anthropic",
max_tokens=4096,
temperature=1.0),
Model(
model_family="Claude",
model_version="claude-3-opus-20240229",
service="Anthropic",
max_tokens=4096,
temperature=1.0),
Model(
model_family="Gemini",
model_version="gemini-1.0-pro",
service="Google AI Studio",
max_output_tokens=2048),
Model(
model_family="Gemini",
model_version="gemini-1.0-pro-001",
service="Google Cloud",
max_output_tokens=2048,
temperature=0.8,
top_k=40,
top_p=1),
Model(
model_family="GPT",
model_version="gpt-3.5-turbo",
service="Open AI",
temperature=1.0),
Model(
model_family="GPT",
model_version="gpt-4-turbo-preview",
service="Open AI",
temperature=1.0),
Model(
model_family="GPT",
model_version="gpt-4",
service="Open AI",
temperature=1.0)
]
evals = BSD_Evals(models=models, test_eval_file="./evals/test_evals.json")
evals.run()
evals.display_results("html")