Skip to content

Commit d3105e8

Browse files
authored
Merge branch 'main' into feature-azure-ml-pipeline-eval-prompts
2 parents b689592 + 7891902 commit d3105e8

File tree

7 files changed

+187
-55
lines changed

7 files changed

+187
-55
lines changed

.github/actions/deps/action.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@ runs:
1818
python3 -m pip install --upgrade pip
1919
- name: Install project
2020
shell: sh
21-
run: pip install ".[dev,train]"
21+
run: pip install ".[dev,train,cuda]"

notebooks/generate.ipynb

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,9 @@
2525
"# Uncomment to clone and install autodoc from GitHub\n",
2626
"# !pip uninstall -y autora-doc\n",
2727
"# !git clone https://github.com/AutoResearch/autodoc.git\n",
28-
"# !pip install -e \"./autodoc[cuda,train]\"\n",
28+
"# !pip install \"./autodoc[cuda,train]\"\n",
2929
"\n",
30-
"# Login to Huggingface since access to the model repo is private\n",
31-
"# 1) Request access through: https://ai.meta.com/resources/models-and-libraries/llama-downloads/\n",
32-
"# 2) Get a Huggingface token from: https://huggingface.co/settings/token (use same email as above)\n",
33-
"# !huggingface-cli login --token <your HF token>"
30+
"# IMPORTANT: Please restart the runtime after running the above commands"
3431
]
3532
},
3633
{

notebooks/import_model.ipynb

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"metadata": {},
7+
"outputs": [],
8+
"source": [
9+
"from transformers import AutoModelForCausalLM, BitsAndBytesConfig, AutoTokenizer\n",
10+
"import torch\n",
11+
"import huggingface_hub\n",
12+
"\n",
13+
"print(torch.cuda.is_available())"
14+
]
15+
},
16+
{
17+
"cell_type": "code",
18+
"execution_count": null,
19+
"metadata": {},
20+
"outputs": [],
21+
"source": [
22+
"model_path = \"meta-llama/Llama-2-7b-chat-hf\"\n",
23+
"print(model_path)\n",
24+
"target_model_path = \"autora-doc/Llama-2-7b-chat-hf-nf4\""
25+
]
26+
},
27+
{
28+
"cell_type": "code",
29+
"execution_count": null,
30+
"metadata": {},
31+
"outputs": [],
32+
"source": [
33+
"# Load the model in 4bit quantization for faster inference on smaller GPUs\n",
34+
"conf = BitsAndBytesConfig(\n",
35+
" load_in_4bit=True,\n",
36+
" bnb_4bit_use_double_quant=True,\n",
37+
" bnb_4bit_quant_type=\"nf4\",\n",
38+
" bnb_4bit_compute_dtype=torch.bfloat16,\n",
39+
")"
40+
]
41+
},
42+
{
43+
"cell_type": "code",
44+
"execution_count": null,
45+
"metadata": {},
46+
"outputs": [],
47+
"source": [
48+
"# Load the tokenizer and model\n",
49+
"tokenizer = AutoTokenizer.from_pretrained(model_path)\n",
50+
"model = AutoModelForCausalLM.from_pretrained(model_path, quantization_config=conf, device_map=\"auto\")"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": null,
56+
"metadata": {},
57+
"outputs": [],
58+
"source": [
59+
"# This will work when running from a Jupyter notebook or Colab.\n",
60+
"# For other authentication methods, see https://huggingface.co/docs/huggingface_hub/main/en/quick-start#authentication\n",
61+
"huggingface_hub.notebook_login(new_session=False, write_permission=True)"
62+
]
63+
},
64+
{
65+
"cell_type": "code",
66+
"execution_count": null,
67+
"metadata": {},
68+
"outputs": [],
69+
"source": [
70+
"tokenizer.push_to_hub(target_model_path)\n",
71+
"model.push_to_hub(target_model_path)"
72+
]
73+
},
74+
{
75+
"cell_type": "code",
76+
"execution_count": null,
77+
"metadata": {},
78+
"outputs": [],
79+
"source": [
80+
"# Alternatvely, upload to Azure Blob Storage (currently not used)\n",
81+
"from azureml.core import Workspace\n",
82+
"\n",
83+
"# save locally first\n",
84+
"tokenizer.save_pretrained(f\"./models/{model_path}\")\n",
85+
"model.save_pretrained(f\"./models/{model_path}\")\n",
86+
"\n",
87+
"# If all goes well, upload to blob storage:\n",
88+
"workspace = Workspace.from_config()\n",
89+
"ds = workspace.get_default_datastore()\n",
90+
"ds.upload(f\"./models/{model_path}\", f\"./base_models/{target_model_path}\", show_progress=True, overwrite=True)"
91+
]
92+
}
93+
],
94+
"metadata": {
95+
"kernelspec": {
96+
"display_name": ".env",
97+
"language": "python",
98+
"name": "python3"
99+
},
100+
"language_info": {
101+
"codemirror_mode": {
102+
"name": "ipython",
103+
"version": 3
104+
},
105+
"file_extension": ".py",
106+
"mimetype": "text/x-python",
107+
"name": "python",
108+
"nbconvert_exporter": "python",
109+
"pygments_lexer": "ipython3",
110+
"version": "3.11.5"
111+
}
112+
},
113+
"nbformat": 4,
114+
"nbformat_minor": 4
115+
}

pyproject.toml

Lines changed: 18 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
11
[project]
22
name = "autora-doc"
3-
license = {file = "LICENSE"}
3+
license = { file = "LICENSE" }
44
readme = "README.md"
5-
authors = [
6-
{ name = "Carlos Garcia Jurado Suarez", email = "carlosg@uw.edu" }
7-
]
5+
authors = [{ name = "Carlos Garcia Jurado Suarez", email = "carlosg@uw.edu" }]
86
requires-python = ">=3.8"
97
classifiers = [
108
"Development Status :: 4 - Beta",
@@ -20,7 +18,7 @@ dependencies = [
2018
"scipy",
2119
# This works, while installing from pytorch and cuda from conda does not",
2220
"torch==2.0.1",
23-
"transformers>=4.35.2",
21+
"transformers>=4.37.2",
2422
"nltk",
2523
]
2624

@@ -29,36 +27,26 @@ description = "Automatic documentation generator from AutoRA code"
2927
[project.optional-dependencies]
3028
dev = [
3129
"pytest",
32-
"pytest-cov", # Used to report total code coverage
33-
"pre-commit", # Used to run checks before finalizing a git commit
34-
"sphinx", # Used to automatically generate documentation
30+
"pytest-cov", # Used to report total code coverage
31+
"pre-commit", # Used to run checks before finalizing a git commit
32+
"sphinx", # Used to automatically generate documentation
3533
"sphinx-rtd-theme", # Used to render documentation
36-
"sphinx-autoapi", # Used to automatically generate api documentation
37-
"black", # Used for static linting of files
38-
"mypy", # Used for static type checking of files
34+
"sphinx-autoapi", # Used to automatically generate api documentation
35+
"black", # Used for static linting of files
36+
"mypy", # Used for static type checking of files
3937
# if you add dependencies here while experimenting in a notebook and you
4038
# want that notebook to render in your documentation, please add the
4139
# dependencies to ./docs/requirements.txt as well.
42-
"nbconvert", # Needed for pre-commit check to clear output from Python notebooks
43-
"nbsphinx", # Used to integrate Python notebooks into Sphinx documentation
44-
"ipython", # Also used in building notebooks into Sphinx
45-
"matplotlib", # Used in sample notebook intro_notebook.ipynb
40+
"nbconvert", # Needed for pre-commit check to clear output from Python notebooks
41+
"nbsphinx", # Used to integrate Python notebooks into Sphinx documentation
42+
"ipython", # Also used in building notebooks into Sphinx
43+
"matplotlib", # Used in sample notebook intro_notebook.ipynb
4644
"ipykernel",
4745
"hf_transfer",
4846
]
49-
train = [
50-
"jsonlines",
51-
"mlflow",
52-
]
53-
azure = [
54-
"azureml-core",
55-
"azureml-mlflow",
56-
]
57-
cuda = [
58-
"bitsandbytes>=0.41.2.post2",
59-
"accelerate>=0.24.1",
60-
"xformers",
61-
]
47+
train = ["jsonlines", "mlflow"]
48+
azure = ["azureml-core", "azureml-mlflow"]
49+
cuda = ["bitsandbytes>=0.42.0", "accelerate>=0.24.1", "xformers"]
6250

6351
[project.urls]
6452
Homepage = "https://github.com/AutoResearch/autodoc"
@@ -68,9 +56,7 @@ requires = ["hatchling", "hatch-vcs"]
6856
build-backend = "hatchling.build"
6957

7058
[tool.pytest.ini_options]
71-
testpaths = [
72-
"tests",
73-
]
59+
testpaths = ["tests"]
7460

7561
[tool.black]
7662
line-length = 110
@@ -81,7 +67,7 @@ profile = "black"
8167
line_length = 110
8268

8369
[tool.coverage.run]
84-
omit=["src/autora/doc/_version.py"]
70+
omit = ["src/autora/doc/_version.py"]
8571

8672
[tool.hatch]
8773

src/autora/doc/pipelines/main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ def generate(
187187
predictions = pred.predict(prompt, [input], **param_dict)
188188
assert len(predictions) == 1, f"Expected only one output, got {len(predictions)}"
189189
logger.info(f"Writing output to {output}")
190-
with open(output, "w") as f:
190+
with open(output, "w", encoding="utf-8") as f:
191191
f.write(predictions[0])
192192

193193

src/autora/doc/runtime/predict_hf.py

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import logging
2-
from typing import Dict, Iterable, List
2+
from typing import Dict, Iterable, List, Tuple
33

44
import torch
55
import transformers
@@ -9,6 +9,8 @@
99

1010
logger = logging.getLogger(__name__)
1111

12+
quantized_models = {"meta-llama/Llama-2-7b-chat-hf": "autora-doc/Llama-2-7b-chat-hf-nf4"}
13+
1214

1315
def preprocess_code(code: str) -> str:
1416
lines: Iterable[str] = code.splitlines()
@@ -21,10 +23,12 @@ def preprocess_code(code: str) -> str:
2123

2224

2325
class Predictor:
24-
def __init__(self, model_path: str):
25-
config = self.get_config()
26+
def __init__(self, input_model_path: str):
27+
model_path, config = Predictor.get_config(input_model_path)
28+
if model_path != input_model_path:
29+
logger.info(f"Mapped requested model '{input_model_path}' to '{model_path}'")
2630

27-
logger.info(f"Loading model from {model_path}")
31+
logger.info(f"Loading model from {model_path} using config {config}")
2832
self.tokenizer = AutoTokenizer.from_pretrained(model_path)
2933
self.model = AutoModelForCausalLM.from_pretrained(
3034
model_path,
@@ -84,19 +88,24 @@ def tokenize(self, input: List[str]) -> Dict[str, List[List[int]]]:
8488
tokens: Dict[str, List[List[int]]] = self.tokenizer(input)
8589
return tokens
8690

87-
def get_config(self) -> Dict[str, str]:
91+
@staticmethod
92+
def get_config(model_path: str) -> Tuple[str, Dict[str, str]]:
8893
if torch.cuda.is_available():
8994
from transformers import BitsAndBytesConfig
9095

96+
config = {"device_map": "auto"}
97+
mapped_path = quantized_models.get(model_path, None)
98+
if mapped_path:
99+
# found an already quantized model, so no need to get a new quant config
100+
return mapped_path, config
101+
91102
# Load the model in 4bit quantization for faster inference on smaller GPUs
92-
return {
93-
"quantization_config": BitsAndBytesConfig(
94-
load_in_4bit=True,
95-
bnb_4bit_use_double_quant=True,
96-
bnb_4bit_quant_type="nf4",
97-
bnb_4bit_compute_dtype=torch.bfloat16,
98-
),
99-
"device_map": "auto",
100-
}
103+
config["quantization_config"] = BitsAndBytesConfig(
104+
load_in_4bit=True,
105+
bnb_4bit_use_double_quant=True,
106+
bnb_4bit_quant_type="nf4",
107+
bnb_4bit_compute_dtype=torch.bfloat16,
108+
)
109+
return model_path, config
101110
else:
102-
return {}
111+
return model_path, {}

tests/test_predict_hf.py

Lines changed: 26 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,10 @@
1-
from autora.doc.runtime.predict_hf import Predictor
1+
from unittest import mock
2+
3+
from autora.doc.runtime.predict_hf import Predictor, quantized_models
4+
5+
# Test models with and without available quantized models
6+
MODEL_NO_QUANTIZED = "hf-internal-testing/tiny-random-FalconForCausalLM"
7+
MODEL_WITH_QUANTIZED = "meta-llama/Llama-2-7b-chat-hf"
28

39

410
def test_trim_prompt() -> None:
@@ -14,3 +20,22 @@ def test_trim_prompt() -> None:
1420
"""
1521
output = Predictor.trim_prompt(with_marker)
1622
assert output == "output\n"
23+
24+
25+
@mock.patch("torch.cuda.is_available", return_value=True)
26+
def test_get_config_cuda(mock: mock.Mock) -> None:
27+
model, config = Predictor.get_config(MODEL_WITH_QUANTIZED)
28+
assert model == quantized_models[MODEL_WITH_QUANTIZED]
29+
assert "quantization_config" not in config
30+
31+
model, config = Predictor.get_config(MODEL_NO_QUANTIZED)
32+
# no pre-quantized model available
33+
assert model == MODEL_NO_QUANTIZED
34+
assert "quantization_config" in config
35+
36+
37+
@mock.patch("torch.cuda.is_available", return_value=False)
38+
def test_get_config_nocuda(mock: mock.Mock) -> None:
39+
model, config = Predictor.get_config(MODEL_WITH_QUANTIZED)
40+
assert model == MODEL_WITH_QUANTIZED
41+
assert len(config) == 0

0 commit comments

Comments
 (0)