Skip to content

Commit e074e3a

Browse files
committed
initial commit
1 parent 9c0f005 commit e074e3a

12 files changed

+14637
-187
lines changed

1. Generative AI.ipynb

Lines changed: 2978 additions & 0 deletions
Large diffs are not rendered by default.

2. Prompt Engineering.ipynb

Lines changed: 1097 additions & 0 deletions
Large diffs are not rendered by default.

3. NLP with HuggingFace.ipynb

Lines changed: 1945 additions & 0 deletions
Large diffs are not rendered by default.

4. Whisper.ipynb

Lines changed: 367 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,367 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "markdown",
5+
"id": "1fb4b605",
6+
"metadata": {},
7+
"source": [
8+
"<div style=\"width: 100%; overflow: hidden;\">\n",
9+
" <div style=\"width: 150px; float: left;\"> <img src=\"data/D4Sci_logo_ball.png\" alt=\"Data For Science, Inc\" align=\"left\" border=\"0\" width=150px> </div>\n",
10+
" <div style=\"float: left; margin-left: 10px;\"> <h1>LLMs for Data Science</h1>\n",
11+
" <h1>Text to Speech with OpenAI</h1>\n",
12+
" <p>Bruno Gonçalves<br/>\n",
13+
" <a href=\"http://www.data4sci.com/\">www.data4sci.com</a><br/>\n",
14+
" @bgoncalves, @data4sci</p></div>\n",
15+
"</div>"
16+
]
17+
},
18+
{
19+
"cell_type": "code",
20+
"execution_count": 2,
21+
"id": "6cf47f33",
22+
"metadata": {},
23+
"outputs": [
24+
{
25+
"name": "stdout",
26+
"output_type": "stream",
27+
"text": [
28+
"The watermark extension is already loaded. To reload it, use:\n",
29+
" %reload_ext watermark\n"
30+
]
31+
}
32+
],
33+
"source": [
34+
"from collections import Counter, defaultdict\n",
35+
"import random\n",
36+
"\n",
37+
"import pandas as pd\n",
38+
"import numpy as np\n",
39+
"\n",
40+
"import matplotlib\n",
41+
"import matplotlib.pyplot as plt \n",
42+
"\n",
43+
"import openai\n",
44+
"from openai import OpenAI\n",
45+
"\n",
46+
"import tqdm as tq\n",
47+
"from tqdm.notebook import tqdm\n",
48+
"\n",
49+
"import watermark\n",
50+
"\n",
51+
"%load_ext watermark\n",
52+
"%matplotlib inline"
53+
]
54+
},
55+
{
56+
"cell_type": "markdown",
57+
"id": "d27ac09b",
58+
"metadata": {},
59+
"source": [
60+
"We start by printing out the versions of the libraries we're using for future reference"
61+
]
62+
},
63+
{
64+
"cell_type": "code",
65+
"execution_count": 3,
66+
"id": "acecf310",
67+
"metadata": {},
68+
"outputs": [
69+
{
70+
"name": "stdout",
71+
"output_type": "stream",
72+
"text": [
73+
"Python implementation: CPython\n",
74+
"Python version : 3.13.3\n",
75+
"IPython version : 9.7.0\n",
76+
"\n",
77+
"Compiler : Clang 17.0.0 (clang-1700.0.13.3)\n",
78+
"OS : Darwin\n",
79+
"Release : 25.1.0\n",
80+
"Machine : arm64\n",
81+
"Processor : arm\n",
82+
"CPU cores : 16\n",
83+
"Architecture: 64bit\n",
84+
"\n",
85+
"Git hash: 9c0f005b405008a296ca6d92ba14b5bde219449c\n",
86+
"\n",
87+
"watermark : 2.5.0\n",
88+
"pandas : 2.3.3\n",
89+
"openai : 2.8.0\n",
90+
"tqdm : 4.67.1\n",
91+
"numpy : 2.3.5\n",
92+
"matplotlib: 3.10.7\n",
93+
"\n"
94+
]
95+
}
96+
],
97+
"source": [
98+
"%watermark -n -v -m -g -iv"
99+
]
100+
},
101+
{
102+
"cell_type": "markdown",
103+
"id": "94cf9da3",
104+
"metadata": {},
105+
"source": [
106+
"Load default figure style"
107+
]
108+
},
109+
{
110+
"cell_type": "code",
111+
"execution_count": 4,
112+
"id": "03cc2e1f",
113+
"metadata": {},
114+
"outputs": [],
115+
"source": [
116+
"plt.style.use('d4sci.mplstyle')\n",
117+
"colors = plt.rcParams['axes.prop_cycle'].by_key()['color']"
118+
]
119+
},
120+
{
121+
"cell_type": "markdown",
122+
"id": "7309e242",
123+
"metadata": {},
124+
"source": [
125+
"# Audio to Text"
126+
]
127+
},
128+
{
129+
"cell_type": "code",
130+
"execution_count": 5,
131+
"id": "ae0313d2",
132+
"metadata": {},
133+
"outputs": [],
134+
"source": [
135+
"client = OpenAI()"
136+
]
137+
},
138+
{
139+
"cell_type": "markdown",
140+
"id": "49713958",
141+
"metadata": {},
142+
"source": [
143+
"Let us parse a small local file"
144+
]
145+
},
146+
{
147+
"cell_type": "code",
148+
"execution_count": 6,
149+
"id": "81fc83ad",
150+
"metadata": {},
151+
"outputs": [],
152+
"source": [
153+
"!open data/gettysburg10.wav"
154+
]
155+
},
156+
{
157+
"cell_type": "code",
158+
"execution_count": 8,
159+
"id": "c126e84b",
160+
"metadata": {},
161+
"outputs": [
162+
{
163+
"name": "stdout",
164+
"output_type": "stream",
165+
"text": [
166+
"CPU times: user 10.9 ms, sys: 6.11 ms, total: 17.1 ms\n",
167+
"Wall time: 2.3 s\n"
168+
]
169+
}
170+
],
171+
"source": [
172+
"%%time\n",
173+
"transcription = client.audio.transcriptions.create(\n",
174+
" model=\"gpt-4o-transcribe\", \n",
175+
" file=open(\"data/gettysburg10.wav\", \"rb\"),\n",
176+
" response_format=\"text\",\n",
177+
" language=\"en\"\n",
178+
")"
179+
]
180+
},
181+
{
182+
"cell_type": "markdown",
183+
"id": "d7016614",
184+
"metadata": {},
185+
"source": [
186+
"And the transcript is simply:"
187+
]
188+
},
189+
{
190+
"cell_type": "code",
191+
"execution_count": 11,
192+
"id": "55d4c9ef",
193+
"metadata": {},
194+
"outputs": [
195+
{
196+
"name": "stdout",
197+
"output_type": "stream",
198+
"text": [
199+
"Four score and seven years ago our fathers brought forth on this continent a new nation, conceived in liberty, and dedicated to the proposition that all men are created equal.\n",
200+
"\n"
201+
]
202+
}
203+
],
204+
"source": [
205+
"print(transcription)"
206+
]
207+
},
208+
{
209+
"cell_type": "code",
210+
"execution_count": null,
211+
"id": "b309b6c3-9601-4490-9b40-678fd7054edf",
212+
"metadata": {},
213+
"outputs": [],
214+
"source": []
215+
},
216+
{
217+
"cell_type": "markdown",
218+
"id": "98b9aab5",
219+
"metadata": {},
220+
"source": [
221+
"# Text to Speech"
222+
]
223+
},
224+
{
225+
"cell_type": "markdown",
226+
"id": "fce51d44",
227+
"metadata": {},
228+
"source": [
229+
"Now the opposite approach, going from written text to high quality audio"
230+
]
231+
},
232+
{
233+
"cell_type": "code",
234+
"execution_count": 15,
235+
"id": "6db6fd86",
236+
"metadata": {},
237+
"outputs": [],
238+
"source": [
239+
"quote = \"\"\"\n",
240+
"Scientists have calculated that the chances of something so patently absurd \n",
241+
"actually existing are millions to one.\n",
242+
"But magicians have calculated that million-to-one chances crop up nine times out of ten.\n",
243+
"\"\"\""
244+
]
245+
},
246+
{
247+
"cell_type": "markdown",
248+
"id": "15217f9b",
249+
"metadata": {},
250+
"source": [
251+
"You can learn more about text to speech (and sample the various voices) in the [Official documentation](https://platform.openai.com/docs/guides/text-to-speech/quickstart)"
252+
]
253+
},
254+
{
255+
"cell_type": "code",
256+
"execution_count": 16,
257+
"id": "6cbb88b3",
258+
"metadata": {},
259+
"outputs": [
260+
{
261+
"name": "stdout",
262+
"output_type": "stream",
263+
"text": [
264+
"CPU times: user 24.1 ms, sys: 17.7 ms, total: 41.9 ms\n",
265+
"Wall time: 2.46 s\n"
266+
]
267+
}
268+
],
269+
"source": [
270+
"%%time\n",
271+
"audio = client.audio.speech.create(\n",
272+
" input=quote, \n",
273+
" model=\"gpt-4o-mini-tts\", \n",
274+
" voice='fable',\n",
275+
" response_format='mp3')"
276+
]
277+
},
278+
{
279+
"cell_type": "markdown",
280+
"id": "e12f1247",
281+
"metadata": {},
282+
"source": [
283+
"Which we can write directly to a file"
284+
]
285+
},
286+
{
287+
"cell_type": "code",
288+
"execution_count": 17,
289+
"id": "a55a56b2",
290+
"metadata": {},
291+
"outputs": [],
292+
"source": [
293+
"audio.write_to_file('data/pratchett.mp3')"
294+
]
295+
},
296+
{
297+
"cell_type": "code",
298+
"execution_count": 18,
299+
"id": "717a1f60",
300+
"metadata": {},
301+
"outputs": [],
302+
"source": [
303+
"!open data/pratchett.mp3"
304+
]
305+
},
306+
{
307+
"cell_type": "markdown",
308+
"id": "36364452",
309+
"metadata": {},
310+
"source": [
311+
"<center>\n",
312+
" <img src=\"data/D4Sci_logo_full.png\" alt=\"Data For Science, Inc\" align=\"center\" border=\"0\" width=300px> \n",
313+
"</center>"
314+
]
315+
}
316+
],
317+
"metadata": {
318+
"kernelspec": {
319+
"display_name": "Python 3 (ipykernel)",
320+
"language": "python",
321+
"name": "python3"
322+
},
323+
"language_info": {
324+
"codemirror_mode": {
325+
"name": "ipython",
326+
"version": 3
327+
},
328+
"file_extension": ".py",
329+
"mimetype": "text/x-python",
330+
"name": "python",
331+
"nbconvert_exporter": "python",
332+
"pygments_lexer": "ipython3",
333+
"version": "3.13.3"
334+
},
335+
"varInspector": {
336+
"cols": {
337+
"lenName": 16,
338+
"lenType": 16,
339+
"lenVar": 40
340+
},
341+
"kernels_config": {
342+
"python": {
343+
"delete_cmd_postfix": "",
344+
"delete_cmd_prefix": "del ",
345+
"library": "var_list.py",
346+
"varRefreshCmd": "print(var_dic_list())"
347+
},
348+
"r": {
349+
"delete_cmd_postfix": ") ",
350+
"delete_cmd_prefix": "rm(",
351+
"library": "var_list.r",
352+
"varRefreshCmd": "cat(var_dic_list()) "
353+
}
354+
},
355+
"types_to_exclude": [
356+
"module",
357+
"function",
358+
"builtin_function_or_method",
359+
"instance",
360+
"_Feature"
361+
],
362+
"window_display": false
363+
}
364+
},
365+
"nbformat": 4,
366+
"nbformat_minor": 5
367+
}

0 commit comments

Comments
 (0)