-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbenchmark_tests.py
92 lines (66 loc) · 2.24 KB
/
benchmark_tests.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
from benchmark import Benchmark, Ranger
import logging as log
# TEST
def local_model_local_data() -> None:
"""BASIC TEST"""
model_source = "gpt2"
data_csv_location = "dummy_data.csv"
# create ranger
ranger: Ranger = Ranger(model_source)
# create benchmark
custom_benchmark: Benchmark = Benchmark("my_custom_benchmark")
# add csv dataset to benchmark
custom_benchmark.add_dataset_from_csv("dummy_dataset", data_csv_location)
# add custom assignment to benchmark
custom_benchmark.add_assignment(
"custom_assignment", "dummy_dataset", "text", "answer"
)
# add benchmark to ranger
ranger.add_benchmark(custom_benchmark)
# run and retrieve results
ranger.run_all()
print(ranger.get_results())
# TEST
def local_model_hf_dataset_() -> None:
model_source = "gpt2"
ranger: Ranger = Ranger(model_source)
custom_benchmark: Benchmark = Benchmark("my_custom_benchmark")
custom_benchmark.add_dataset_from_hf(
"bugged_dataset", "NeuroDragon/BuggedPythonLeetCode"
)
# define custom comparison function
def my_comparison(a: str, b: str) -> bool:
# TODO make an actual comparison
return True
custom_benchmark.add_assignment(
"bugged_assignemnt",
"bugged_dataset",
"question",
"answer",
comparison_function=my_comparison,
)
ranger.add_benchmark(custom_benchmark)
ranger.run_all()
print(ranger.get_results())
# TEST
def cloud_model_local_data():
model_source = "gpt2"
data_csv_location = "dummy_data.csv"
ranger: Ranger = Ranger(model_source, "baseten", key="aaa", _id="bbb")
custom_benchmark: Benchmark = Benchmark("my_custom_benchmark")
custom_benchmark.add_dataset_from_csv("dummy_dataset", data_csv_location)
custom_benchmark.add_assignment(
"custom_assignment", "dummy_dataset", "text", "answer"
)
ranger.add_benchmark(custom_benchmark)
ranger.run_all()
print(ranger.get_results())
# TEST
def cloud_model_hf_data():
return
if __name__ == "__main__":
log.basicConfig(level=log.DEBUG, filename="benchmark_runner.log")
local_model_local_data()
# local_model_hf_dataset_()
# cloud_model_local_data()
# cloud_model_hf_data()