Merge pull request #6 from AutoResearch/carlosg/generate

carlosgjs · web-flow · commit d1d6d4cc9197 · 2023-12-06T14:55:32.000-08:00
feat: Generate command
diff --git a/README.md b/README.md
@@ -78,7 +78,7 @@ az storage blob upload  --account-name <account> --container <container>> --file
 
 Prediction
 ```sh
-az ml job create -f azureml/predict.yml  --set display_name="Test prediction job" --web
+az ml job create -f azureml/eval.yml  --set display_name="Test prediction job" --web
 ```
 
 Notes:
diff --git a/azureml/eval.yml b/azureml/eval.yml
@@ -1,6 +1,6 @@
 $schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
 command: >
-  python -m autora.doc.pipelines.main predict
+  python -m autora.doc.pipelines.main eval 
   ${{inputs.data_dir}}/data.jsonl
   ${{inputs.model_dir}}/llama-2-7b-chat-hf
   SYS_1
diff --git a/azureml/generate.yml b/azureml/generate.yml
@@ -0,0 +1,18 @@
+$schema: https://azuremlschemas.azureedge.net/latest/commandJob.schema.json
+command: >
+  python -m autora.doc.pipelines.main generate 
+  --model-path ${{inputs.model_dir}}/llama-2-7b-chat-hf
+  --output ./outputs/output.txt
+  autora/doc/pipelines/main.py    
+code: ../src
+inputs:
+  model_dir:
+    type: uri_folder 
+    path: azureml://datastores/workspaceblobstore/paths/base_models    
+environment: 
+  image: mcr.microsoft.com/azureml/curated/acpt-pytorch-2.0-cuda11.7:21
+  conda_file: conda.yml
+display_name: autodoc_prediction
+compute: azureml:v100cluster
+experiment_name: autodoc_prediction
+description: |
diff --git a/pyproject.toml b/pyproject.toml
@@ -16,7 +16,6 @@ classifiers = [
 ]
 dynamic = ["version"]
 dependencies = [
-    "transformers>=4.35.2",
     "typer",
     "scipy",
     # This works, while installing from pytorch and cuda from conda does not",
@@ -42,17 +41,18 @@ dev = [
     "nbsphinx", # Used to integrate Python notebooks into Sphinx documentation
     "ipython", # Also used in building notebooks into Sphinx
     "matplotlib", # Used in sample notebook intro_notebook.ipynb
-    "numpy", # Used in sample notebook intro_notebook.ipynb
     "ipykernel",
 ]
 train = [
+    "jsonlines",
     "mlflow",
-    "azureml-mlflow",
+]
+azure = [
     "azureml-core",
-    "jsonlines",
+    "azureml-mlflow",
 ]
-
-train_cuda = [
+cuda = [
+    "transformers>=4.35.2",
     "bitsandbytes>=0.41.2.post2",
     "accelerate>=0.24.1",
     "xformers",
diff --git a/src/autora/doc/pipelines/main.py b/src/autora/doc/pipelines/main.py
@@ -2,8 +2,6 @@
 from timeit import default_timer as timer
 from typing import List
 
-import jsonlines
-import mlflow
 import torch
 import typer
 
@@ -19,9 +17,12 @@
 
 
 @app.command()
-def predict(
-    data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: InstructionPrompts
-) -> List[str]:
+def eval(data_file: str, model_path: str, sys_id: SystemPrompts, instruc_id: InstructionPrompts) -> List[str]:
+    import jsonlines
+    import mlflow
+
+    mlflow.autolog()
+
     run = mlflow.active_run()
 
     sys_prompt = SYS[sys_id]
@@ -33,7 +34,6 @@ def predict(
         logger.info(f"running predict with {data_file}")
         logger.info(f"model path: {model_path}")
 
-        # predictions = []
         with jsonlines.open(data_file) as reader:
             items = [item for item in reader]
             inputs = [item["instruction"] for item in items]
@@ -57,6 +57,26 @@ def predict(
         return predictions
 
 
+@app.command()
+def generate(
+    python_file: str,
+    model_path: str = "meta-llama/llama-2-7b-chat-hf",
+    output: str = "output.txt",
+    sys_id: SystemPrompts = SystemPrompts.SYS_1,
+    instruc_id: InstructionPrompts = InstructionPrompts.INSTR_SWEETP_1,
+) -> None:
+    with open(python_file, "r") as f:
+        inputs = [f.read()]
+    sys_prompt = SYS[sys_id]
+    instr_prompt = INSTR[instruc_id]
+    pred = Predictor(model_path)
+    predictions = pred.predict(sys_prompt, instr_prompt, inputs)
+    assert len(predictions) == 1, f"Expected only one output, got {len(predictions)}"
+    logger.info(f"Writing output to {output}")
+    with open(output, "w") as f:
+        f.write(predictions[0])
+
+
 @app.command()
 def import_model(model_name: str) -> None:
     pass
@@ -65,5 +85,4 @@ def import_model(model_name: str) -> None:
 if __name__ == "__main__":
     logger.info(f"Torch version: {torch.__version__} , Cuda available: {torch.cuda.is_available()}")
 
-    mlflow.autolog()
     app()
diff --git a/src/autora/doc/runtime/predict_hf.py b/src/autora/doc/runtime/predict_hf.py
@@ -39,7 +39,7 @@ def predict(self, sys: str, instr: str, inputs: List[str]) -> List[str]:
             top_k=40,
             num_return_sequences=1,
             eos_token_id=self.tokenizer.eos_token_id,
-            max_length=1000,
+            max_length=2048,
         )
 
         results = [Predictor.trim_prompt(sequence[0]["generated_text"]) for sequence in sequences]
diff --git a/src/autora/doc/runtime/prompts.py b/src/autora/doc/runtime/prompts.py
@@ -24,11 +24,11 @@
 paragraph should explain the purpose and the second one the procedure, but don't use the word 'Paragraph'"""
 
 
-class SystemPrompts(Enum):
+class SystemPrompts(str, Enum):
     SYS_1 = "SYS_1"
 
 
-class InstructionPrompts(Enum):
+class InstructionPrompts(str, Enum):
     INSTR_SWEETP_1 = "INSTR_SWEETP_1"
 
 
diff --git a/tests/test_main.py b/tests/test_main.py
@@ -1,6 +1,6 @@
 from pathlib import Path
 
-from autora.doc.pipelines.main import predict
+from autora.doc.pipelines.main import eval, generate
 from autora.doc.runtime.prompts import InstructionPrompts, SystemPrompts
 
 # dummy HF model for testing
@@ -9,7 +9,17 @@
 
 def test_predict() -> None:
     data = Path(__file__).parent.joinpath("../data/data.jsonl").resolve()
-    outputs = predict(str(data), TEST_HF_MODEL, SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1)
+    outputs = eval(str(data), TEST_HF_MODEL, SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1)
     assert len(outputs) == 3, "Expected 3 outputs"
     for output in outputs:
         assert len(output) > 0, "Expected non-empty output"
+
+
+def test_generate() -> None:
+    python_file = __file__
+    output = Path("output.txt")
+    output.unlink(missing_ok=True)
+    generate(python_file, TEST_HF_MODEL, str(output), SystemPrompts.SYS_1, InstructionPrompts.INSTR_SWEETP_1)
+    assert output.exists(), f"Expected output file {output} to exist"
+    with open(str(output), "r") as f:
+        assert len(f.read()) > 0, f"Expected non-empty output file {output}"

Original file line number	Diff line number	Diff line change
`@@ -39,7 +39,7 @@ def predict(self, sys: str, instr: str, inputs: List[str]) -> List[str]:`
`39`	`39`	`top_k=40,`
`40`	`40`	`num_return_sequences=1,`
`41`	`41`	`eos_token_id=self.tokenizer.eos_token_id,`
`42`		`- max_length=1000,`
	`42`	`+ max_length=2048,`
`43`	`43`	`)`
`44`	`44`
`45`	`45`	`results = [Predictor.trim_prompt(sequence[0]["generated_text"]) for sequence in sequences]`