Skip to content

Commit

Permalink
Merge pull request EmergenceAI#96 from EmergenceAI/test.json-improvem…
Browse files Browse the repository at this point in the history
…ents

test improvements
  • Loading branch information
teaxio authored Aug 21, 2024
2 parents 9ad1ef0 + 4eba555 commit a98aeba
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 23 deletions.
48 changes: 27 additions & 21 deletions test/evaluators.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,31 +161,37 @@ def parse_urls(
ref_urls = [clean_url(url) for url in ref_urls]
matching_rule = task_config["eval"].get("url_note", "GOLD in PRED")
if matching_rule == "GOLD in PRED":
ref_base_paths, ref_queries = parse_urls(ref_urls)
pred_base_paths, pred_query = parse_url(pred)

base_score = float(
any(
[
ref_base_path in pred_base_paths
for ref_base_path in ref_base_paths
]
)
)
query_score = 1.0
for k, possible_values in ref_queries.items():
query_score *= float(
any(
possible_ref_value in pred_query.get(k, [])
for possible_ref_value in possible_values
)
)
score = base_score * query_score
for ref_url in ref_urls:
ref_base_path, ref_query = parse_url(ref_url)
pred_base_paths, pred_query = parse_url(pred)
# Calculate base score for each ref_url
base_score = float(ref_base_path in pred_base_paths)
# Calculate query score for each ref_url
query_score = 1.0
for k, possible_values in ref_query.items(): # type: ignore
if k in pred_query:
query_score *= float(
any(
possible_ref_value in pred_query.get(k, []) # type: ignore
for possible_ref_value in possible_values # type: ignore
)
)
else:
# If the key is not in pred_query, check if the reference URL has no query parameters
if not possible_values:
query_score *= 1.0 # No query parameters to match, so consider it a match
else:
query_score *= 0.0 # Reference URL has query parameters but predicted URL does not
# Calculate final score for each ref_url
score = base_score * query_score
# Return immediately if any score is 1
if score == 1.0:
return {"score": score}

else:
raise ValueError(f"Unknown matching rule: {matching_rule}")

return {"score": score}
return {"score": 0.0}


class HTMLContentEvaluator(Evaluator):
Expand Down
8 changes: 6 additions & 2 deletions test/tasks/test.json
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@
"geolocation": null,
"intent_template": "search {{site}} for {{item}}",
"instantiation_dict": {
"site": "the web",
"site": "on google",
"item": "the library of Alexandria"
},
"intent": "search the web for the library of Alexandria",
Expand Down Expand Up @@ -748,6 +748,8 @@
"battery life",
"too small",
"size",
"slow",
"usability",
"performance",
"functionality",
"speaker quality",
Expand Down Expand Up @@ -789,6 +791,8 @@
"slow",
"sluggish",
"gps",
"performance",
"durability",
"misrepresentation",
"not as advertised"
],
Expand Down Expand Up @@ -915,7 +919,7 @@
"url_match"
],
"reference_answers": null,
"reference_url": "https://www.bbc.com/search?q=china",
"reference_url": "https://www.bbc.com/search?q=china |OR| https://www.bbc.com/news/world/asia/china",
"program_html": null
}
}
Expand Down

0 comments on commit a98aeba

Please sign in to comment.