commit-0
diff --git a/‎examples/star/inference.py
Lines changed: 11 additions & 23 deletions b/‎examples/star/inference.py
Lines changed: 11 additions & 23 deletions
diff --git a/‎examples/star/star.py
Lines changed: 16 additions & 6 deletions b/‎examples/star/star.py
Lines changed: 16 additions & 6 deletions
@@ -1,28 +1,29 @@
 from typing import List
 from datasets import Dataset
 from vllm import LLM, SamplingParams
+from utils import generate_prompt
+
 
 def generate_predictions(
-    model_name: str,
-    dataset: Dataset,
-    temperature: float = 1.0,
-    n: int = 1
+    model_name: str, dataset: Dataset, temperature: float = 1.0, n: int = 1
 ) -> List[List[str]]:
-    """
-    Generate predictions for a given dataset using a specified language model and
+    """Generate predictions for a given dataset using a specified language model and
     sampling parameters. The function loads the dataset, constructs prompts from
     each example, and obtains generated predictions. The resulting predictions are
     then added as a new column to the dataset.
 
     Args:
+    ----
         model_name (str): Name of the model to use for generation.
         dataset (Dataset): The Dataset object.
         temperature (float, optional): Temperature setting for the model's
             sampling strategy. Default is 1.0.
         n (int, optional): Number of sampling runs per prompt. Default is 1.
 
     Returns:
+    -------
         predictions (List[List[str]]): Predictions on the dataset.
+
     """
     sampling_params = SamplingParams(n=n, temperature=temperature, max_tokens=512)
     llm = LLM(model=model_name)
@@ -31,19 +32,7 @@ def generate_predictions(
     for example in dataset:
         prompt = example["prompt"]
         test = example["test"]
-        prompt = f"""Write a Python function implementation for the following prompt:
-
-{prompt}
-
-Your code should satisfy these tests:
-
-{test}
-
-Return only the implementation code, no tests or explanations. Be sure to include the relevant import statements:
-```python
-code
-```
-"""
+        prompt = generate_prompt(prompt, test)
         prompts.append(prompt)
 
     outputs = llm.generate(prompts, sampling_params)
@@ -53,7 +42,6 @@ def generate_predictions(
         generated_texts = [one.text for one in output.outputs]
         results.append(generated_texts)
     return results
-    #out_name = dataset_name.split("/")[-1]
-    #out_name = f"wentingzhao/{out_name}_predictions_{n}"
-    #ds.push_to_hub(out_name)
-
+    # out_name = dataset_name.split("/")[-1]
+    # out_name = f"wentingzhao/{out_name}_predictions_{n}"
+    # ds.push_to_hub(out_name)
@@ -1,4 +1,5 @@
 """Main STaR Loop"""
+
 import argparse
 from datasets import Dataset, load_dataset
 from inference import generate_predictions
@@ -8,24 +9,33 @@
 def main():
     parser = argparse.ArgumentParser()
     parser.add_argument("--model_name", type=str, required=True, help="model to use")
-    parser.add_argument("--dataset_name", type=str, required=True, help="dataset to use")
+    parser.add_argument(
+        "--dataset_name", type=str, required=True, help="dataset to use"
+    )
     parser.add_argument("--temperature", type=float, default=1)
     parser.add_argument("-n", type=int, default=1)
     args = parser.parse_args()
 
     ds = load_dataset(args.dataset_name)
     assert "train" in ds
-    all_samples = generate_predictions(args.model_name, ds["train"], args.temperature, args.n)
+    all_samples = generate_predictions(
+        args.model_name, ds["train"], args.temperature, args.n
+    )
     assert len(ds["train"]) == len(all_samples)
     all_traces, all_execution_results = execute_tests(ds["train"], all_samples)
     passed_examples = []
-    for example, execution_results, samples in zip(ds["train"], all_execution_results, all_samples):
+    for example, execution_results, samples in zip(
+        ds["train"], all_execution_results, all_samples
+    ):
         for execution_result, sample in zip(execution_results, samples):
             if execution_result == 0:
-                example['prediction'] = sample
+                example["prediction"] = sample
                 passed_examples.append(example)
                 break
-    print(len(passed_examples)/len(ds["train"]))
+    new_ds = Dataset.from_list(passed_examples)
+    new_ds.to_json("star_training.json")
+    print(len(passed_examples) / len(ds["train"]))
+
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()