Skip to content

Commit bb65815

Browse files
committed
feat(python/sdk): add support to list transcripts (#4824)
GitOrigin-RevId: c39fd04618bf1e9548776d73d84f065f7f286a18
1 parent 56dec5e commit bb65815

File tree

8 files changed

+316
-1
lines changed

8 files changed

+316
-1
lines changed

README.md

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,74 @@ print(transcript.text)
192192

193193
</details>
194194

195+
<details>
196+
<summary>Upload a file</summary>
197+
198+
```python
199+
import assemblyai as aai
200+
201+
transcriber = aai.Transcriber()
202+
upload_url = transcriber.upload_file(data)
203+
```
204+
205+
</details>
206+
207+
<details>
208+
<summary>Delete a transcript</summary>
209+
210+
```python
211+
import assemblyai as aai
212+
213+
transcript = aai.Transcriber().transcribe(audio_url)
214+
215+
aai.Transcript.delete_by_id(transcript.id)
216+
```
217+
218+
</details>
219+
220+
<details>
221+
<summary>List transcripts</summary>
222+
223+
This returns a page of transcripts you created.
224+
225+
```python
226+
import assemblyai as aai
227+
228+
transcriber = aai.Transcriber()
229+
230+
page = transcriber.list_transcripts()
231+
print(page.page_details) # Page details
232+
print(page.transcripts) # List of transcripts
233+
```
234+
235+
You can apply filter parameters:
236+
237+
```python
238+
params = aai.ListTranscriptParameters(
239+
limit=3,
240+
status=aai.TranscriptStatus.completed,
241+
)
242+
page = transcriber.list_transcripts(params)
243+
```
244+
245+
You can also paginate over all pages by using the helper property `before_id_of_prev_url`.
246+
247+
The `prev_url` always points to a page with older transcripts. If you extract the `before_id`
248+
of the `prev_url` query parameters, you can paginate over all pages from newest to oldest.
249+
250+
```python
251+
transcriber = aai.Transcriber()
252+
253+
params = aai.ListTranscriptParameters()
254+
255+
page = transcriber.list_transcripts(params)
256+
while page.page_details.before_id_of_prev_url is not None:
257+
params.before_id = page.page_details.before_id_of_prev_url
258+
page = transcriber.list_transcripts(params)
259+
```
260+
261+
</details>
262+
195263
---
196264

197265
### **LeMUR Examples**

assemblyai/__init__.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@
3434
LemurTaskResponse,
3535
LemurTranscriptSource,
3636
LemurUsage,
37+
ListTranscriptParameters,
38+
ListTranscriptResponse,
39+
PageDetails,
3740
Paragraph,
3841
PIIRedactedAudioQuality,
3942
PIIRedactionPolicy,
@@ -57,6 +60,7 @@
5760
Timestamp,
5861
TranscriptError,
5962
TranscriptionConfig,
63+
TranscriptItem,
6064
TranscriptStatus,
6165
Utterance,
6266
UtteranceWord,
@@ -103,6 +107,9 @@
103107
"LemurSummaryResponse",
104108
"LemurTaskResponse",
105109
"LemurUsage",
110+
"ListTranscriptParameters",
111+
"ListTranscriptResponse",
112+
"PageDetails",
106113
"Sentence",
107114
"Sentiment",
108115
"SentimentType",
@@ -117,6 +124,7 @@
117124
"Transcript",
118125
"TranscriptError",
119126
"TranscriptGroup",
127+
"TranscriptItem",
120128
"TranscriptStatus",
121129
"Utterance",
122130
"UtteranceWord",

assemblyai/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.29.0"
1+
__version__ = "0.31.0"

assemblyai/api.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,29 @@ def get_paragraphs(
247247
return types.ParagraphsResponse.parse_obj(response.json())
248248

249249

250+
def list_transcripts(
251+
client: httpx.Client,
252+
params: Optional[types.ListTranscriptParameters],
253+
) -> types.ListTranscriptResponse:
254+
response = client.get(
255+
ENDPOINT_TRANSCRIPT,
256+
params=(
257+
params.dict(
258+
exclude_none=True,
259+
)
260+
if params
261+
else None
262+
),
263+
)
264+
265+
if response.status_code != httpx.codes.ok:
266+
raise types.AssemblyAIError(
267+
f"failed to retrieve transcripts: {_get_error_message(response)}"
268+
)
269+
270+
return types.ListTranscriptResponse.parse_obj(response.json())
271+
272+
250273
def lemur_question(
251274
client: httpx.Client,
252275
request: types.LemurQuestionRequest,

assemblyai/transcriber.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -821,6 +821,12 @@ def transcribe_group(
821821

822822
return transcript_group
823823

824+
def list_transcripts(
825+
self,
826+
params: Optional[types.ListTranscriptParameters],
827+
) -> types.ListTranscriptResponse:
828+
return api.list_transcripts(client=self._client.http_client, params=params)
829+
824830

825831
class Transcriber:
826832
"""
@@ -1036,6 +1042,45 @@ def transcribe_group_async(
10361042
poll=True,
10371043
)
10381044

1045+
def list_transcripts(
1046+
self,
1047+
params: Optional[types.ListTranscriptParameters] = None,
1048+
) -> types.ListTranscriptResponse:
1049+
"""
1050+
Retrieve a list of transcripts that were created. Transcripts are sorted from newest to oldest.
1051+
1052+
Args:
1053+
params: The parameters to filter the transcript list by.
1054+
1055+
Returns: A page with a list of transcripts along with page details.
1056+
1057+
To paginate over all pages, you can set the `ListTranscriptParameters.before_id`
1058+
to the `before_id` of the `prev_url`. Example:
1059+
```
1060+
transcriber = aai.Transcriber()
1061+
params = aai.ListTranscriptParameters()
1062+
page = transcriber.list_transcripts(params)
1063+
while page.page_details.before_id_of_prev_url is not None:
1064+
params.before_id = page.page_details.before_id_of_prev_url
1065+
page = transcriber.list_transcripts(params)
1066+
```
1067+
"""
1068+
return self._impl.list_transcripts(params=params)
1069+
1070+
def list_transcripts_async(
1071+
self,
1072+
params: Optional[types.ListTranscriptParameters] = None,
1073+
) -> concurrent.futures.Future[types.ListTranscriptResponse]:
1074+
"""
1075+
Retrieve a list of transcripts that were created. Transcripts are sorted from newest to oldest.
1076+
1077+
Args:
1078+
params: The parameters to filter the transcript list by.
1079+
1080+
Returns: A page with a list of transcripts along with page details.
1081+
"""
1082+
return self._executor.submit(self._impl.list_transcripts, params=params)
1083+
10391084

10401085
class _RealtimeTranscriberImpl:
10411086
def __init__(

assemblyai/types.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from datetime import datetime
22
from enum import Enum, EnumMeta
33
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Sequence, Tuple, Union
4+
from urllib.parse import parse_qs, urlparse
45
from warnings import warn
56

67
if TYPE_CHECKING:
@@ -1736,6 +1737,89 @@ def __init__(self, **data: Any):
17361737
super().__init__(**data)
17371738

17381739

1740+
class ListTranscriptParameters(BaseModel):
1741+
"""
1742+
The query parameters when listing transcripts.
1743+
"""
1744+
1745+
after_id: Optional[str]
1746+
"Get transcripts that were created after this transcript ID"
1747+
1748+
before_id: Optional[str]
1749+
"Get transcripts that were created before this transcript ID"
1750+
1751+
created_on: Optional[str]
1752+
"Get only transcripts created on this date"
1753+
1754+
limit: Optional[int]
1755+
"Maximum amount of transcripts to retrieve. Default is 10"
1756+
1757+
status: Optional[TranscriptStatus]
1758+
"Filter by transcript status"
1759+
1760+
throttled_only: Optional[bool]
1761+
"Get only throttled transcripts, overrides the status filter"
1762+
1763+
class Config:
1764+
use_enum_values = True # Populate the enum value for the query parameters
1765+
1766+
1767+
class PageDetails(BaseModel):
1768+
"""
1769+
Details of the transcript page.
1770+
"""
1771+
1772+
current_url: str
1773+
"The URL used to retrieve the current page of transcripts"
1774+
1775+
limit: int
1776+
"The number of results this page is limited to"
1777+
1778+
next_url: Optional[str]
1779+
"The URL to the next page of transcripts. The next URL always points to a page with newer transcripts."
1780+
1781+
prev_url: Optional[str]
1782+
"The URL to the next page of transcripts. The previous URL always points to a page with older transcripts."
1783+
1784+
result_count: int
1785+
"The actual number of results in the page"
1786+
1787+
@property
1788+
def before_id_of_prev_url(self) -> Optional[str]:
1789+
"""
1790+
The `before_id` contained in the `prev_url` query params. Can be used as the
1791+
`ListTranscriptParameters.before_id` for the subsequent `list_transcripts()` call to paginate over results.
1792+
"""
1793+
if not self.prev_url:
1794+
return None
1795+
parsed_query_params = parse_qs(urlparse(self.prev_url).query)
1796+
before_id_list = parsed_query_params.get("before_id")
1797+
return before_id_list[0] if before_id_list else None
1798+
1799+
1800+
class TranscriptItem(BaseModel):
1801+
audio_url: str
1802+
completed: Optional[str]
1803+
created: str
1804+
error: Optional[str]
1805+
id: str
1806+
resource_url: str
1807+
status: TranscriptStatus
1808+
1809+
1810+
class ListTranscriptResponse(BaseModel):
1811+
"""
1812+
A list of returned transcripts along with page details.
1813+
Transcripts are sorted from newest to oldest. The previous URL always points to a page with older transcripts.
1814+
"""
1815+
1816+
page_details: PageDetails
1817+
"Details of the returned transcript page"
1818+
1819+
transcripts: List[TranscriptItem]
1820+
"A list of transcripts sorted from newest to oldest"
1821+
1822+
17391823
class LemurSourceType(str, Enum):
17401824
"""
17411825
The source type of the LeMUR request

tests/unit/factories.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,43 @@ class Meta:
204204
model = types.TranscriptRequest
205205

206206

207+
class PageDetails(factory.Factory):
208+
class Meta:
209+
model = types.PageDetails
210+
211+
current_url = factory.Faker("url")
212+
limit = 10
213+
next_url = None
214+
prev_url = None
215+
result_count = 2
216+
217+
218+
class TranscriptItem(factory.Factory):
219+
class Meta:
220+
model = types.TranscriptItem
221+
222+
audio_url = factory.Faker("url")
223+
created = factory.Faker("iso8601")
224+
id = factory.Faker("uuid4")
225+
resource_url = factory.Faker("url")
226+
status = aai.TranscriptStatus.completed
227+
completed = None
228+
error = None
229+
230+
231+
class ListTranscriptResponse(factory.Factory):
232+
class Meta:
233+
model = types.ListTranscriptResponse
234+
235+
page_details = factory.SubFactory(PageDetails)
236+
transcripts = factory.List(
237+
[
238+
factory.SubFactory(TranscriptItem),
239+
factory.SubFactory(TranscriptItem),
240+
]
241+
)
242+
243+
207244
class LemurUsage(factory.Factory):
208245
class Meta:
209246
model = types.LemurUsage

0 commit comments

Comments
 (0)