Merge pull request EmergenceAI#96 from EmergenceAI/test.json-improvem…

…ents test improvements
test-zeus-ai · Aug 21, 2024 · a98aeba · a98aeba
2 parents 9ad1ef0 + 4eba555
commit a98aeba
Show file tree

Hide file tree

Showing 2 changed files with 33 additions and 23 deletions.
diff --git a/test/evaluators.py b/test/evaluators.py
@@ -161,31 +161,37 @@ def parse_urls(
         ref_urls = [clean_url(url) for url in ref_urls]
         matching_rule = task_config["eval"].get("url_note", "GOLD in PRED")
         if matching_rule == "GOLD in PRED":
-            ref_base_paths, ref_queries = parse_urls(ref_urls)
-            pred_base_paths, pred_query = parse_url(pred)
-
-            base_score = float(
-                any(
-                    [
-                        ref_base_path in pred_base_paths
-                        for ref_base_path in ref_base_paths
-                    ]
-                )
-            )
-            query_score = 1.0
-            for k, possible_values in ref_queries.items():
-                query_score *= float(
-                    any(
-                        possible_ref_value in pred_query.get(k, [])
-                        for possible_ref_value in possible_values
-                    )
-                )
-            score = base_score * query_score
+            for ref_url in ref_urls:
+                ref_base_path, ref_query = parse_url(ref_url)
+                pred_base_paths, pred_query = parse_url(pred)
+                # Calculate base score for each ref_url
+                base_score = float(ref_base_path in pred_base_paths)
+                # Calculate query score for each ref_url
+                query_score = 1.0
+                for k, possible_values in ref_query.items(): # type: ignore
+                    if k in pred_query:
+                        query_score *= float(
+                            any(
+                                possible_ref_value in pred_query.get(k, []) # type: ignore
+                                for possible_ref_value in possible_values # type: ignore
+                            )
+                        )
+                    else:
+                        # If the key is not in pred_query, check if the reference URL has no query parameters
+                        if not possible_values:
+                            query_score *= 1.0  # No query parameters to match, so consider it a match
+                        else:
+                            query_score *= 0.0  # Reference URL has query parameters but predicted URL does not
+                # Calculate final score for each ref_url
+                score = base_score * query_score
+                # Return immediately if any score is 1
+                if score == 1.0:
+                    return {"score": score}
 
         else:
             raise ValueError(f"Unknown matching rule: {matching_rule}")
 
-        return {"score": score}
+        return {"score": 0.0}
 
 
 class HTMLContentEvaluator(Evaluator):

diff --git a/test/tasks/test.json b/test/tasks/test.json
@@ -305,7 +305,7 @@
         "geolocation": null,
         "intent_template": "search {{site}} for {{item}}",
         "instantiation_dict": {
-            "site": "the web",
+            "site": "on google",
             "item": "the library of Alexandria"
         },
         "intent": "search the web for the library of Alexandria",
@@ -748,6 +748,8 @@
                         "battery life",
                         "too small",
                         "size",
+                        "slow",
+                        "usability",
                         "performance",
                         "functionality",
                         "speaker quality",
@@ -789,6 +791,8 @@
                         "slow",
                         "sluggish",
                         "gps",
+                        "performance",
+                        "durability",
                         "misrepresentation",
                         "not as advertised"
                     ],
@@ -915,7 +919,7 @@
                 "url_match"
             ],
             "reference_answers": null,
-            "reference_url": "https://www.bbc.com/search?q=china",
+            "reference_url": "https://www.bbc.com/search?q=china |OR| https://www.bbc.com/news/world/asia/china",
             "program_html": null
         }
     }