Skip to content

Commit 8ebbeb6

Browse files
authored
feat: Running notebook in Colab (#28)
1 parent 3c7e0a0 commit 8ebbeb6

File tree

1 file changed

+80
-17
lines changed

1 file changed

+80
-17
lines changed

notebooks/generate.ipynb

Lines changed: 80 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,42 @@
33
{
44
"cell_type": "code",
55
"execution_count": null,
6-
"metadata": {},
6+
"metadata": {
7+
"id": "3q1p1MKYxZei"
8+
},
9+
"outputs": [],
10+
"source": [
11+
"# Uncoment the following to work around an ocassisional bug in Colab:\n",
12+
"# \"A UTF-8 locale is required. Got ANSI_X3.4-1968\"\n",
13+
"# import locale\n",
14+
"# locale.getpreferredencoding = lambda: \"UTF-8\""
15+
]
16+
},
17+
{
18+
"cell_type": "code",
19+
"execution_count": null,
20+
"metadata": {
21+
"id": "YE9-CnCLUueM"
22+
},
23+
"outputs": [],
24+
"source": [
25+
"# Uncomment to clone and install autodoc from GitHub\n",
26+
"# !pip uninstall -y autora-doc\n",
27+
"# !git clone https://github.com/AutoResearch/autodoc.git\n",
28+
"# !pip install -e \"./autodoc[cuda,train]\"\n",
29+
"\n",
30+
"# Login to Huggingface since access to the model repo is private\n",
31+
"# 1) Request access through: https://ai.meta.com/resources/models-and-libraries/llama-downloads/\n",
32+
"# 2) Get a Huggingface token from: https://huggingface.co/settings/token (use same email as above)\n",
33+
"# !huggingface-cli login --token <your HF token>"
34+
]
35+
},
36+
{
37+
"cell_type": "code",
38+
"execution_count": null,
39+
"metadata": {
40+
"id": "jeu8zXoFUtXM"
41+
},
742
"outputs": [],
843
"source": [
944
"%load_ext autoreload\n",
@@ -17,7 +52,9 @@
1752
{
1853
"cell_type": "code",
1954
"execution_count": null,
20-
"metadata": {},
55+
"metadata": {
56+
"id": "yOJoE_pnUtXN"
57+
},
2158
"outputs": [],
2259
"source": [
2360
"model = \"meta-llama/Llama-2-7b-chat-hf\""
@@ -26,23 +63,29 @@
2663
{
2764
"cell_type": "code",
2865
"execution_count": null,
29-
"metadata": {},
66+
"metadata": {
67+
"id": "l6zK76t5UtXN"
68+
},
3069
"outputs": [],
3170
"source": [
3271
"pred = Predictor(model)"
3372
]
3473
},
3574
{
3675
"cell_type": "markdown",
37-
"metadata": {},
76+
"metadata": {
77+
"id": "uAKbxADnUtXN"
78+
},
3879
"source": [
3980
"## Test generation for the variable declararion only"
4081
]
4182
},
4283
{
4384
"cell_type": "code",
4485
"execution_count": null,
45-
"metadata": {},
86+
"metadata": {
87+
"id": "EfOMJxaFUtXN"
88+
},
4689
"outputs": [],
4790
"source": [
4891
"TEST_VAR_CODE = \"\"\"\n",
@@ -56,7 +99,9 @@
5699
{
57100
"cell_type": "code",
58101
"execution_count": null,
59-
"metadata": {},
102+
"metadata": {
103+
"id": "pc7LAzGoUtXN"
104+
},
60105
"outputs": [],
61106
"source": [
62107
"def test(promptid, code, label):\n",
@@ -70,14 +115,16 @@
70115
" num_ret_seq=1,\n",
71116
" )\n",
72117
" bleu, meteor = evaluate_documentation(output, [label])\n",
73-
" for i, o in enumerate(output[0]):\n",
118+
" for i, o in enumerate(output):\n",
74119
" print(f\"{promptid}\\n******* Output {i} ********. bleu={bleu}, meteor={meteor}\\n{o}\\n*************\\n\")"
75120
]
76121
},
77122
{
78123
"cell_type": "code",
79124
"execution_count": null,
80-
"metadata": {},
125+
"metadata": {
126+
"id": "BJgptog3UtXO"
127+
},
81128
"outputs": [],
82129
"source": [
83130
"# Zero shot test\n",
@@ -87,7 +134,9 @@
87134
{
88135
"cell_type": "code",
89136
"execution_count": null,
90-
"metadata": {},
137+
"metadata": {
138+
"id": "8T5dVHUMUtXO"
139+
},
91140
"outputs": [],
92141
"source": [
93142
"# One shot test\n",
@@ -96,18 +145,24 @@
96145
},
97146
{
98147
"cell_type": "markdown",
99-
"metadata": {},
148+
"metadata": {
149+
"id": "fIeXbZXxUtXO"
150+
},
100151
"source": [
101152
"## One-shot generation for the complete code sample"
102153
]
103154
},
104155
{
105156
"cell_type": "code",
106157
"execution_count": null,
107-
"metadata": {},
158+
"metadata": {
159+
"id": "K0241jrdUtXO"
160+
},
108161
"outputs": [],
109162
"source": [
110163
"data_file = \"../data/autora/data.jsonl\"\n",
164+
"# Use this path if running in Colab and cloning the repo\n",
165+
"# data_file = \"./autodoc/data/autora/data.jsonl\"\n",
111166
"inputs, labels = load_data(data_file)\n",
112167
"# preprocessing removes comments, import statements and empty lines\n",
113168
"inputs = [preprocess_code(i) for i in inputs]\n",
@@ -119,25 +174,33 @@
119174
{
120175
"cell_type": "code",
121176
"execution_count": null,
122-
"metadata": {},
177+
"metadata": {
178+
"id": "ws7LUe7mUtXO"
179+
},
123180
"outputs": [],
124181
"source": [
125182
"out, bleu, meteor = eval_prompt(data_file, pred, prompt, {\"max_new_tokens\": 800.0})\n",
126-
"print(f\"bleu={bleu}, meteor={meteor}\\n{out[0][0]}\\n*************\\n\")"
183+
"print(f\"bleu={bleu}, meteor={meteor}\\n{out[0]}\\n*************\\n\")"
127184
]
128185
},
129186
{
130187
"cell_type": "code",
131188
"execution_count": null,
132-
"metadata": {},
189+
"metadata": {
190+
"id": "5L2RZveeUtXO"
191+
},
133192
"outputs": [],
134193
"source": []
135194
}
136195
],
137196
"metadata": {
197+
"accelerator": "GPU",
198+
"colab": {
199+
"gpuType": "T4",
200+
"provenance": []
201+
},
138202
"kernelspec": {
139-
"display_name": "autodoc",
140-
"language": "python",
203+
"display_name": "Python 3",
141204
"name": "python3"
142205
},
143206
"language_info": {
@@ -154,5 +217,5 @@
154217
}
155218
},
156219
"nbformat": 4,
157-
"nbformat_minor": 2
220+
"nbformat_minor": 0
158221
}

0 commit comments

Comments
 (0)