|
3 | 3 | { |
4 | 4 | "cell_type": "code", |
5 | 5 | "execution_count": null, |
6 | | - "metadata": {}, |
| 6 | + "metadata": { |
| 7 | + "id": "3q1p1MKYxZei" |
| 8 | + }, |
| 9 | + "outputs": [], |
| 10 | + "source": [ |
| 11 | + "# Uncoment the following to work around an ocassisional bug in Colab:\n", |
| 12 | + "# \"A UTF-8 locale is required. Got ANSI_X3.4-1968\"\n", |
| 13 | + "# import locale\n", |
| 14 | + "# locale.getpreferredencoding = lambda: \"UTF-8\"" |
| 15 | + ] |
| 16 | + }, |
| 17 | + { |
| 18 | + "cell_type": "code", |
| 19 | + "execution_count": null, |
| 20 | + "metadata": { |
| 21 | + "id": "YE9-CnCLUueM" |
| 22 | + }, |
| 23 | + "outputs": [], |
| 24 | + "source": [ |
| 25 | + "# Uncomment to clone and install autodoc from GitHub\n", |
| 26 | + "# !pip uninstall -y autora-doc\n", |
| 27 | + "# !git clone https://github.com/AutoResearch/autodoc.git\n", |
| 28 | + "# !pip install -e \"./autodoc[cuda,train]\"\n", |
| 29 | + "\n", |
| 30 | + "# Login to Huggingface since access to the model repo is private\n", |
| 31 | + "# 1) Request access through: https://ai.meta.com/resources/models-and-libraries/llama-downloads/\n", |
| 32 | + "# 2) Get a Huggingface token from: https://huggingface.co/settings/token (use same email as above)\n", |
| 33 | + "# !huggingface-cli login --token <your HF token>" |
| 34 | + ] |
| 35 | + }, |
| 36 | + { |
| 37 | + "cell_type": "code", |
| 38 | + "execution_count": null, |
| 39 | + "metadata": { |
| 40 | + "id": "jeu8zXoFUtXM" |
| 41 | + }, |
7 | 42 | "outputs": [], |
8 | 43 | "source": [ |
9 | 44 | "%load_ext autoreload\n", |
|
17 | 52 | { |
18 | 53 | "cell_type": "code", |
19 | 54 | "execution_count": null, |
20 | | - "metadata": {}, |
| 55 | + "metadata": { |
| 56 | + "id": "yOJoE_pnUtXN" |
| 57 | + }, |
21 | 58 | "outputs": [], |
22 | 59 | "source": [ |
23 | 60 | "model = \"meta-llama/Llama-2-7b-chat-hf\"" |
|
26 | 63 | { |
27 | 64 | "cell_type": "code", |
28 | 65 | "execution_count": null, |
29 | | - "metadata": {}, |
| 66 | + "metadata": { |
| 67 | + "id": "l6zK76t5UtXN" |
| 68 | + }, |
30 | 69 | "outputs": [], |
31 | 70 | "source": [ |
32 | 71 | "pred = Predictor(model)" |
33 | 72 | ] |
34 | 73 | }, |
35 | 74 | { |
36 | 75 | "cell_type": "markdown", |
37 | | - "metadata": {}, |
| 76 | + "metadata": { |
| 77 | + "id": "uAKbxADnUtXN" |
| 78 | + }, |
38 | 79 | "source": [ |
39 | 80 | "## Test generation for the variable declararion only" |
40 | 81 | ] |
41 | 82 | }, |
42 | 83 | { |
43 | 84 | "cell_type": "code", |
44 | 85 | "execution_count": null, |
45 | | - "metadata": {}, |
| 86 | + "metadata": { |
| 87 | + "id": "EfOMJxaFUtXN" |
| 88 | + }, |
46 | 89 | "outputs": [], |
47 | 90 | "source": [ |
48 | 91 | "TEST_VAR_CODE = \"\"\"\n", |
|
56 | 99 | { |
57 | 100 | "cell_type": "code", |
58 | 101 | "execution_count": null, |
59 | | - "metadata": {}, |
| 102 | + "metadata": { |
| 103 | + "id": "pc7LAzGoUtXN" |
| 104 | + }, |
60 | 105 | "outputs": [], |
61 | 106 | "source": [ |
62 | 107 | "def test(promptid, code, label):\n", |
|
70 | 115 | " num_ret_seq=1,\n", |
71 | 116 | " )\n", |
72 | 117 | " bleu, meteor = evaluate_documentation(output, [label])\n", |
73 | | - " for i, o in enumerate(output[0]):\n", |
| 118 | + " for i, o in enumerate(output):\n", |
74 | 119 | " print(f\"{promptid}\\n******* Output {i} ********. bleu={bleu}, meteor={meteor}\\n{o}\\n*************\\n\")" |
75 | 120 | ] |
76 | 121 | }, |
77 | 122 | { |
78 | 123 | "cell_type": "code", |
79 | 124 | "execution_count": null, |
80 | | - "metadata": {}, |
| 125 | + "metadata": { |
| 126 | + "id": "BJgptog3UtXO" |
| 127 | + }, |
81 | 128 | "outputs": [], |
82 | 129 | "source": [ |
83 | 130 | "# Zero shot test\n", |
|
87 | 134 | { |
88 | 135 | "cell_type": "code", |
89 | 136 | "execution_count": null, |
90 | | - "metadata": {}, |
| 137 | + "metadata": { |
| 138 | + "id": "8T5dVHUMUtXO" |
| 139 | + }, |
91 | 140 | "outputs": [], |
92 | 141 | "source": [ |
93 | 142 | "# One shot test\n", |
|
96 | 145 | }, |
97 | 146 | { |
98 | 147 | "cell_type": "markdown", |
99 | | - "metadata": {}, |
| 148 | + "metadata": { |
| 149 | + "id": "fIeXbZXxUtXO" |
| 150 | + }, |
100 | 151 | "source": [ |
101 | 152 | "## One-shot generation for the complete code sample" |
102 | 153 | ] |
103 | 154 | }, |
104 | 155 | { |
105 | 156 | "cell_type": "code", |
106 | 157 | "execution_count": null, |
107 | | - "metadata": {}, |
| 158 | + "metadata": { |
| 159 | + "id": "K0241jrdUtXO" |
| 160 | + }, |
108 | 161 | "outputs": [], |
109 | 162 | "source": [ |
110 | 163 | "data_file = \"../data/autora/data.jsonl\"\n", |
| 164 | + "# Use this path if running in Colab and cloning the repo\n", |
| 165 | + "# data_file = \"./autodoc/data/autora/data.jsonl\"\n", |
111 | 166 | "inputs, labels = load_data(data_file)\n", |
112 | 167 | "# preprocessing removes comments, import statements and empty lines\n", |
113 | 168 | "inputs = [preprocess_code(i) for i in inputs]\n", |
|
119 | 174 | { |
120 | 175 | "cell_type": "code", |
121 | 176 | "execution_count": null, |
122 | | - "metadata": {}, |
| 177 | + "metadata": { |
| 178 | + "id": "ws7LUe7mUtXO" |
| 179 | + }, |
123 | 180 | "outputs": [], |
124 | 181 | "source": [ |
125 | 182 | "out, bleu, meteor = eval_prompt(data_file, pred, prompt, {\"max_new_tokens\": 800.0})\n", |
126 | | - "print(f\"bleu={bleu}, meteor={meteor}\\n{out[0][0]}\\n*************\\n\")" |
| 183 | + "print(f\"bleu={bleu}, meteor={meteor}\\n{out[0]}\\n*************\\n\")" |
127 | 184 | ] |
128 | 185 | }, |
129 | 186 | { |
130 | 187 | "cell_type": "code", |
131 | 188 | "execution_count": null, |
132 | | - "metadata": {}, |
| 189 | + "metadata": { |
| 190 | + "id": "5L2RZveeUtXO" |
| 191 | + }, |
133 | 192 | "outputs": [], |
134 | 193 | "source": [] |
135 | 194 | } |
136 | 195 | ], |
137 | 196 | "metadata": { |
| 197 | + "accelerator": "GPU", |
| 198 | + "colab": { |
| 199 | + "gpuType": "T4", |
| 200 | + "provenance": [] |
| 201 | + }, |
138 | 202 | "kernelspec": { |
139 | | - "display_name": "autodoc", |
140 | | - "language": "python", |
| 203 | + "display_name": "Python 3", |
141 | 204 | "name": "python3" |
142 | 205 | }, |
143 | 206 | "language_info": { |
|
154 | 217 | } |
155 | 218 | }, |
156 | 219 | "nbformat": 4, |
157 | | - "nbformat_minor": 2 |
| 220 | + "nbformat_minor": 0 |
158 | 221 | } |
0 commit comments