a high performning meta prompt for the tgd optimizer adding prompt en…

…gineering strategy
pgmiso · Jan 5, 2025 · 0ad2490 · 0ad2490
1 parent f0328b0
commit 0ad2490
Show file tree

Hide file tree

Showing 27 changed files with 1,288 additions and 883 deletions.
diff --git a/adalflow/adalflow/core/component.py b/adalflow/adalflow/core/component.py
@@ -553,8 +553,6 @@ def __call__(self, *args, **kwargs):
         #    the difference between training vs. inference.
         from adalflow.optim.parameter import Parameter
 
-        print("has_bicall", self._has_bicall())
-
         if self._has_bicall():
             output = self.bicall(*args, **kwargs)
 

diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py
@@ -699,7 +699,12 @@ def backward(
             else:
                 backward = False
                 for pred in children_params:
-                    if pred.requires_opt and pred.param_type == ParameterType.PROMPT:
+                    if pred.requires_opt and pred.param_type in [
+                        ParameterType.PROMPT,
+                        ParameterType.GENERATOR_OUTPUT,
+                        ParameterType.RETRIEVER_OUTPUT,
+                        ParameterType.OUTPUT,
+                    ]:
                         backward = True
                         break
                 if backward:
@@ -738,9 +743,12 @@ def _backward_through_all_predecessors(
             for k, v in prompt_kwargs.items()
         }
 
+        print(f"gt: {response.get_gt()}")
+
         conversation_prompt_kwargs = {
             "input_value": input_prompt_kwargs,
             "llm_output": response.get_prompt_data(),
+            # "gt": response.get_gt(),
         }
 
         conversation_str = Prompt(
@@ -759,6 +767,9 @@ def _backward_through_all_predecessors(
             conv_ins_template = CONVERSATION_START_INSTRUCTION_CHAIN
             obj_ins_template = OBJECTIVE_INSTRUCTION_CHAIN
             response_gradient = response.get_gradients_str()
+            # response_gradient = response.get_gradients_component_schema(
+            #     skip_correct_sample=False
+            # )
             if not response_gradient:
                 raise ValueError(
                     f"Generator: No gradient found for {response}. Please check the response."
@@ -791,7 +802,7 @@ def _backward_through_all_predecessors(
         backward_engine_prompt_str = backward_engine.get_prompt(
             **backward_engine_prompt_kwargs
         )
-        print(f"Backward engine prompt: {backward_engine_prompt_str}")
+        # print(f"Backward engine prompt: {backward_engine_prompt_str}")
 
         gradient_output: GeneratorOutput = None
         response_gradient_list = [""] * len(children_params)
@@ -830,6 +841,8 @@ def _backward_through_all_predecessors(
                     response_gradient_list = [failure_message] * len(children_params)
                 printc(f"failure_message: {failure_message}", color="red")
 
+        print(f"gradient list: {response_gradient_list}")
+
         # generate the gradient for each child
         for i, pred in enumerate(children_params):
             if not pred.requires_opt or pred.param_type == ParameterType.DEMOS:
@@ -879,9 +892,6 @@ def _backward_through_one_predecessor(
                 f"Generator: Skipping {pred} as it does not require optimization."
             )
             return
-        printc(
-            f"Generator: Backward through {pred}, is_intermediate_node: {is_intermediate_node}"
-        )
 
         if pred.check_if_already_computed_gradient_respect_to(response.id):
             log.debug(
@@ -902,6 +912,7 @@ def _backward_through_one_predecessor(
         conversation_prompt_kwargs = {
             "input_value": input_prompt_kwargs,
             "llm_output": response.get_prompt_data(),
+            "gt": response.get_gt(),
         }
 
         conversation_str = Prompt(
@@ -953,7 +964,7 @@ def _backward_through_one_predecessor(
         backward_engine_prompt_str = backward_engine.get_prompt(
             **backward_engine_prompt_kwargs
         )
-        print(f"Backward engine prompt: {backward_engine_prompt_str}")
+        # print(f"Backward engine prompt: {backward_engine_prompt_str}")
         gradient_output: GeneratorOutput = None
         if (
             backward_pass_setup.compute_grad_for_errors_only

diff --git a/adalflow/adalflow/datasets/hotpot_qa.py b/adalflow/adalflow/datasets/hotpot_qa.py
@@ -1,13 +1,12 @@
 import random
 import os
-import csv
 from typing import Literal
 
 from adalflow.utils.lazy_import import safe_import, OptionalPackages
 
 
 from adalflow.utils.data import Dataset
-from adalflow.utils.file_io import save_csv
+from adalflow.utils.file_io import save_csv, save_json, load_json
 from adalflow.datasets.utils import prepare_dataset_path
 from adalflow.core.base_data_class import DataClass
 from adalflow.datasets.types import HotPotQAData
@@ -29,6 +28,8 @@ def __init__(
         sampled_valset: 3916
         test: 7405
 
+        All answers are a phrase in the supporting context where we can choose supporting facts from the context.
+
         You can specify the size of the dataset to load by setting the size parameter.
         """
         if split not in ["train", "val", "test"]:
@@ -44,7 +45,7 @@ def __init__(
         self.task_name = f"hotpot_qa_{keep_details}"
         data_path = prepare_dataset_path(self.root, self.task_name)
         # download and save
-        split_csv_path = os.path.join(data_path, f"{split}.csv")
+        split_csv_path = os.path.join(data_path, f"{split}.json")
         print(f"split_csv_path: {split_csv_path}")
         self._check_or_download_dataset(
             split_csv_path, split, only_hard_examples, keep_details
@@ -55,12 +56,20 @@ def __init__(
         # created_data_class = DynamicDataClassFactory.from_dict(
         #  "HotPotQAData", {"id": "str", "question": "str", "answer": "str"}
 
-        with open(split_csv_path, newline="") as csvfile:
-            reader = csv.DictReader(csvfile)
-            for i, row in enumerate(reader):
-                if size is not None and i >= size:
-                    break
-                self.data.append(HotPotQAData.from_dict(row))
+        # with open(split_csv_path, newline="") as csvfile:
+        #     reader = csv.DictReader(csvfile)
+        #     for i, row in enumerate(reader):
+        #         if size is not None and i >= size:
+        #             break
+        #         self.data.append(HotPotQAData.from_dict(row))
+
+        self.data = load_json(split_csv_path)
+        if size is not None:
+            # use random seed to make sure the same data is loaded
+            # random.Random(0).shuffle(self.data)
+            self.data = self.data[:size]
+        # convert to dataclass
+        self.data = [HotPotQAData.from_dict(d) for d in self.data]
 
     def _check_or_download_dataset(
         self,
@@ -99,6 +108,24 @@ def _check_or_download_dataset(
         hf_official_dev = load_dataset(
             "hotpot_qa", "fullwiki", split="validation", trust_remote_code=True
         )
+        data_path_dir = os.path.dirname(data_path)
+        # save all the original data
+        all_original_keys = hf_official_train[0].keys()
+        for split, examples in zip(
+            ["hf_official_train", "hf_official_dev"],
+            [hf_official_train, hf_official_dev],
+        ):
+            target_path = os.path.join(data_path_dir, f"{split}.csv")
+            save_csv(examples, f=target_path, fieldnames=all_original_keys)
+            # for example in examples:
+            #     # is answer in the context
+            #     print(f"example: {example}")
+            #     context = str(json.dumps(example["context"]))
+            #     if example["answer"] in context:
+            #         print(f"answer in context")
+            #     else:
+            #         print(f"answer not in context")
+            print(f"saved {split} to {target_path}")
         keys = ["question", "answer"]
         if keep_details == "all":
             keys = [
@@ -110,7 +137,7 @@ def _check_or_download_dataset(
                 "context",
             ]
         elif keep_details == "dev_titles":
-            keys = ["id", "question", "answer", "supporting_facts"]
+            keys = ["id", "question", "answer", "supporting_facts", "context"]
 
         official_train = []  # 15661
         for raw_example in hf_official_train:
@@ -119,19 +146,19 @@ def _check_or_download_dataset(
 
                 if "supporting_facts" in example:
                     example["gold_titles"] = set(example["supporting_facts"]["title"])
-                    del example["supporting_facts"]
+                    # del example["supporting_facts"]
 
                 official_train.append(example)
         print(f"official_train: {len(official_train)}")
 
         rng = random.Random(0)
         rng.shuffle(official_train)
 
-        sampled_trainset = official_train[: len(official_train) * 75 // 100]  # 11745
+        sampled_trainset = official_train[: len(official_train) * 70 // 100]  # 11745
         print(f"sampled_trainset: {len(sampled_trainset)}")
 
         sampled_valset = official_train[  # 3916
-            len(official_train) * 75 // 100 :
+            len(official_train) * 70 // 100 :
         ]  # this is not the official dev set
 
         print(f"sampled_valset: {len(sampled_valset)}")
@@ -141,6 +168,8 @@ def _check_or_download_dataset(
         #         del example["gold_titles"]
 
         test = []  # 7405
+
+        print(f"raw_example: {hf_official_dev[0]}")
         for raw_example in hf_official_dev:
             assert raw_example["level"] == "hard"
             example = {
@@ -149,19 +178,33 @@ def _check_or_download_dataset(
             }
             if "supporting_facts" in example:
                 example["gold_titles"] = set(example["supporting_facts"]["title"])
-                del example["supporting_facts"]
+
+                # del example["supporting_facts"]
             test.append(example)
 
-        keys = ["id", "question", "answer", "gold_titles"]
-        data_path_dir = os.path.dirname(data_path)
+        keys = ["id", "question", "answer", "gold_titles", "context"]
+
+        # split test into val and test
+        # random shuff the test
+        rng.shuffle(test)
+        test_split = test[: len(test) * 50 // 100]  # 3702
+        val_split = test[len(test) * 50 // 100 :]  # 3703
+
         # save to csv
         for split, examples in zip(
             ["train", "val", "test"],
-            [sampled_trainset, sampled_valset, test],
+            [sampled_trainset, val_split, test_split],
         ):
             # target_path = prepare_dataset_path(self.root, task_name, split)
-            target_path = os.path.join(data_path_dir, f"{split}.csv")
-            save_csv(examples, f=target_path, fieldnames=keys)
+            target_path = os.path.join(data_path_dir, f"{split}.json")
+            # filter the examples with only the keys
+            save_examples = []
+            for example in examples:
+                save_example = {k: example[k] for k in keys if k in example}
+                save_examples.append(save_example)
+            save_json(save_examples, f=target_path)
+            if split == "train":
+                print(f"train example: {examples[0]}")
             print(f"saved {split} to {target_path}")
 
         if split == "train":
@@ -190,3 +233,107 @@ def __len__(self):
     print(len(testdataset))
     print(f"valdataset[0]: {valdataset[0]}")
     print(f"testdataset[0]: {testdataset[0]}")
+    # example = {
+    #     "id": "5a8b57f25542995d1e6f1371",
+    #     "question": "Were Scott Derrickson and Ed Wood of the same nationality?",
+    #     "answer": "yes",
+    #     "type": "comparison",
+    #     "level": "hard",
+    #     "supporting_facts": {
+    #         "title": ["Scott Derrickson", "Ed Wood"],
+    #         "sent_id": [0, 0],
+    #     },
+    #     "context": {
+    #         "title": [
+    #             "Adam Collis",
+    #             "Ed Wood (film)",
+    #             "Tyler Bates",
+    #             "Doctor Strange (2016 film)",
+    #             "Hellraiser: Inferno",
+    #             "Sinister (film)",
+    #             "Deliver Us from Evil (2014 film)",
+    #             "Woodson, Arkansas",
+    #             "Conrad Brooks",
+    #             "The Exorcism of Emily Rose",
+    #         ],
+    #         "sentences": [
+    #             [
+    #                 "Adam Collis is an American filmmaker and actor.",
+    #                 " He attended the Duke University from 1986 to 1990 and the University of California, Los Angeles from 2007 to 2010.",
+    #                 " He also studied cinema at the University of Southern California from 1991 to 1997.",
+    #                 ' Collis first work was the assistant director for the Scott Derrickson\'s short "Love in the Ruins" (1995).',
+    #                 ' In 1998, he played "Crankshaft" in Eric Koyanagi\'s "Hundred Percent".',
+    #             ],
+    #             [
+    #                 "Ed Wood is a 1994 American biographical period comedy-drama film directed and produced by Tim Burton, and starring Johnny Depp as cult filmmaker Ed Wood.",
+    #                 " The film concerns the period in Wood's life when he made his best-known films as well as his relationship with actor Bela Lugosi, played by Martin Landau.",
+    #                 " Sarah Jessica Parker, Patricia Arquette, Jeffrey Jones, Lisa Marie, and Bill Murray are among the supporting cast.",
+    #             ],
+    #             [
+    #                 "Tyler Bates (born June 5, 1965) is an American musician, music producer, and composer for films, television, and video games.",
+    #                 ' Much of his work is in the action and horror film genres, with films like "Dawn of the Dead, 300, Sucker Punch," and "John Wick."',
+    #                 " He has collaborated with directors like Zack Snyder, Rob Zombie, Neil Marshall, William Friedkin, Scott Derrickson, and James Gunn.",
+    #                 ' With Gunn, he has scored every one of the director\'s films; including "Guardians of the Galaxy", which became one of the highest grossing domestic movies of 2014, and its 2017 sequel.',
+    #                 ' In addition, he is also the lead guitarist of the American rock band Marilyn Manson, and produced its albums "The Pale Emperor" and "Heaven Upside Down".',
+    #             ],
+    #             [
+    #                 "Doctor Strange is a 2016 American superhero film based on the Marvel Comics character of the same name, produced by Marvel Studios and distributed by Walt Disney Studios Motion Pictures.",
+    #                 " It is the fourteenth film of the Marvel Cinematic Universe (MCU).",
+    #                 " The film was directed by Scott Derrickson, who wrote it with Jon Spaihts and C. Robert Cargill, and stars Benedict Cumberbatch as Stephen Strange, along with Chiwetel Ejiofor, Rachel McAdams, Benedict Wong, Michael Stuhlbarg, Benjamin Bratt, Scott Adkins, Mads Mikkelsen, and Tilda Swinton.",
+    #                 ' In "Doctor Strange", surgeon Strange learns the mystic arts after a career-ending car accident.',
+    #             ],
+    #             [
+    #                 "Hellraiser: Inferno (also known as Hellraiser V: Inferno) is a 2000 American horror film.",
+    #                 ' It is the fifth installment in the "Hellraiser" series and the first "Hellraiser" film to go straight-to-DVD.',
+    #                 " It was directed by Scott Derrickson and released on October 3, 2000.",
+    #                 " The film concerns a corrupt detective who discovers Lemarchand's box at a crime scene.",
+    #                 " The film's reviews were mixed.",
+    #             ],
+    #             [
+    #                 "Sinister is a 2012 supernatural horror film directed by Scott Derrickson and written by Derrickson and C. Robert Cargill.",
+    #                 " It stars Ethan Hawke as fictional true-crime writer Ellison Oswalt who discovers a box of home movies in his attic that puts his family in danger.",
+    #             ],
+    #             [
+    #                 "Deliver Us from Evil is a 2014 American supernatural horror film directed by Scott Derrickson and produced by Jerry Bruckheimer.",
+    #                 ' The film is officially based on a 2001 non-fiction book entitled "Beware the Night" by Ralph Sarchie and Lisa Collier Cool, and its marketing campaign highlighted that it was "inspired by actual accounts".',
+    #                 " The film stars Eric Bana, Édgar Ramírez, Sean Harris, Olivia Munn, and Joel McHale in the main roles and was released on July 2, 2014.",
+    #             ],
+    #             [
+    #                 "Woodson is a census-designated place (CDP) in Pulaski County, Arkansas, in the United States.",
+    #                 " Its population was 403 at the 2010 census.",
+    #                 " It is part of the Little Rock–North Little Rock–Conway Metropolitan Statistical Area.",
+    #                 " Woodson and its accompanying Woodson Lake and Wood Hollow are the namesake for Ed Wood Sr., a prominent plantation owner, trader, and businessman at the turn of the 20th century.",
+    #                 " Woodson is adjacent to the Wood Plantation, the largest of the plantations own by Ed Wood Sr.",
+    #             ],
+    #             [
+    #                 "Conrad Brooks (born Conrad Biedrzycki on January 3, 1931 in Baltimore, Maryland) is an American actor.",
+    #                 " He moved to Hollywood, California in 1948 to pursue a career in acting.",
+    #                 ' He got his start in movies appearing in Ed Wood films such as "Plan 9 from Outer Space", "Glen or Glenda", and "Jail Bait."',
+    #                 " He took a break from acting during the 1960s and 1970s but due to the ongoing interest in the films of Ed Wood, he reemerged in the 1980s and has become a prolific actor.",
+    #                 " He also has since gone on to write, produce and direct several films.",
+    #             ],
+    #             [
+    #                 "The Exorcism of Emily Rose is a 2005 American legal drama horror film directed by Scott Derrickson and starring Laura Linney and Tom Wilkinson.",
+    #                 " The film is loosely based on the story of Anneliese Michel and follows a self-proclaimed agnostic who acts as defense counsel (Linney) representing a parish priest (Wilkinson), accused by the state of negligent homicide after he performed an exorcism.",
+    #             ],
+    #         ],
+    #     },
+    # }
+
+    # # save to csv
+    # keys = ["id", "question", "answer", "gold_titles", "context"]
+    # example["gold_titles"] = set(example["supporting_facts"]["title"])
+
+    # # test, save to hotpotQA
+
+    # data = HotPotQAData.from_dict({k: example[k] for k in keys})
+    # print(f"data: {data}")
+
+    # # save to json
+    # save_json([data.to_dict()], f="test.json")
+
+    # # load from json
+    # loaded_data = load_json("test.json")
+    # # convert to dataclass
+    # data = HotPotQAData.from_dict(loaded_data[0])
+    # print(f"data: {data}")