-
Notifications
You must be signed in to change notification settings - Fork 0
/
AB test relavant context.py
65 lines (50 loc) · 2.25 KB
/
AB test relavant context.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import random
from entire_pipeline import*
import gradio as gr
# a/b test for similarity score
def get_context(query: str, method: str = 'cs', n_contexts: int = 5):
"""
This function is the pipeline for the entire project. It takes in a query and finds the most relevant document.
and gives it to the OpenAI API to generate a answer
:param n_contexts: The number of contexts to return
:param semantic_search_model: The semantic search model to use
:param query: The query to search for
:return:
"""
# 1. Preprocess the query
embedding = get_text_embedding(query)
# 2. Semantic Search
best_ctx = semantic_search_model(embedding, method, n_contexts)
# 4. Return best context
return best_ctx
def chatbot(input):
if input:
context_cs = get_context(input, method='cs')
#context_wcs = get_context(input, method='weighted_cs')
#context_ann = get_context(input, method='ann')
return context_cs
with open("AB_Questions.txt") as f:
lines = f.readlines()
# select questions to use. Use index in lines to choose.
questions = [q[:-1] for q in lines[31:61]]
outputs = gr.outputs.Textbox()
inputs = []
responses = [] # Track selected responses
for question in questions:
#uses the pipeline to genrerate context paragraphs
context_cs = chatbot(question)
#Just for testing without using the api
#answer_pipeline_cs, answer_pipeline_wcs, answer_pipeline_ann, answer_chatgpt = "1", "2", "3", "4"
inputs.append(gr.inputs.Checkbox(label=(question+":"+context_cs)))
#responses.append(input_choices) # Add choices to selected_responses
def evaluate_responses(*preferred_responses):
print(preferred_responses)
# count how many times context was found relevant
relevant = sum(preferred_responses)
not_relevant = len(preferred_responses)-relevant
summary = f" You found {relevant} of the context paragraphs relevant and {not_relevant} of the context paragraphs not relevant."
return summary
interface = gr.Interface(fn=evaluate_responses, inputs=inputs, outputs=outputs, title="AI Chatbot Evaluation",
description="Evaluate responses from pipeline and ChatGPT",
theme="compact", allow_flagging="never")
interface.launch()