Skip to content

Commit 832afe2

Browse files
oconnoobryan-assemblyai
authored andcommitted
feat(python/sdk): add better error messaging for get_by_id (#5717)
Co-authored-by: Ryan O'Connor <ryan@assemblyai.com> GitOrigin-RevId: 398e9fe83a9f962bac7012c6b79d2a49d90943ba
1 parent 7cecf71 commit 832afe2

File tree

6 files changed

+263
-97
lines changed

6 files changed

+263
-97
lines changed

assemblyai/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "0.31.0"
1+
__version__ = "0.32.0"

assemblyai/transcriber.py

Lines changed: 80 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
Iterator,
1818
List,
1919
Optional,
20+
Tuple,
2021
Union,
2122
)
2223
from urllib.parse import urlencode, urlparse
@@ -75,19 +76,11 @@ def wait_for_completion(self) -> Self:
7576
"""
7677

7778
while True:
78-
try:
79-
self.transcript = api.get_transcript(
80-
self._client.http_client,
81-
self.transcript_id,
82-
)
83-
except Exception as exc:
84-
self.transcript = types.TranscriptResponse(
85-
**self.transcript.dict(
86-
exclude_none=True, exclude={"status", "error"}
87-
),
88-
status=types.TranscriptStatus.error,
89-
error=str(exc),
90-
)
79+
# No try-except - if there is an HTTP error then surface it to user
80+
self.transcript = api.get_transcript(
81+
self._client.http_client,
82+
self.transcript_id,
83+
)
9184

9285
if self.transcript.status in (
9386
types.TranscriptStatus.completed,
@@ -563,8 +556,9 @@ def add_transcript(self, transcript: Union[Transcript, str]) -> None:
563556

564557
return self
565558

566-
def wait_for_completion(self) -> None:
559+
def wait_for_completion(self, return_failures) -> Union[None, List[str]]:
567560
transcripts: List[Transcript] = []
561+
failures: List[str] = []
568562

569563
future_transcripts: Dict[concurrent.futures.Future[Transcript], str] = {}
570564

@@ -575,10 +569,16 @@ def wait_for_completion(self) -> None:
575569
finished_futures, _ = concurrent.futures.wait(future_transcripts)
576570

577571
for future in finished_futures:
578-
transcripts.append(future.result())
572+
try:
573+
transcripts.append(future.result())
574+
except types.TranscriptError as e:
575+
failures.append(str(e))
579576

580577
self.transcripts = transcripts
581578

579+
if return_failures:
580+
return failures
581+
582582

583583
class TranscriptGroup:
584584
"""
@@ -669,19 +669,37 @@ def add_transcript(
669669

670670
return self
671671

672-
def wait_for_completion(self) -> Self:
672+
def wait_for_completion(
673+
self,
674+
return_failures: Optional[bool] = False,
675+
) -> Union[Self, Tuple[Self, List[str]]]:
673676
"""
674677
Polls each transcript within the `TranscriptGroup`.
675678
679+
Note - if an HTTP error is encountered when waiting for a Transcript in the TranscriptGroup, it will be popped from the group and added to the list of failures.
680+
You can return this list of failures with `return_failures=True`.
681+
682+
Args:
683+
return_failures: Whether to return a list of errors for transcripts that failed due to HTTP errors.
676684
"""
677-
self._impl.wait_for_completion()
685+
if return_failures:
686+
failures = self._impl.wait_for_completion(return_failures=return_failures)
687+
return self, failures
688+
689+
self._impl.wait_for_completion(return_failures=return_failures)
678690

679691
return self
680692

681693
def wait_for_completion_async(
682694
self,
683-
) -> concurrent.futures.Future[Self]:
684-
return self._executor.submit(self.wait_for_completion)
695+
return_failures: Optional[bool] = False,
696+
) -> Union[
697+
concurrent.futures.Future[Self],
698+
concurrent.futures.Future[Tuple[Self, List[str]]],
699+
]:
700+
return self._executor.submit(
701+
self.wait_for_completion, return_failures=return_failures
702+
)
685703

686704

687705
class _TranscriberImpl:
@@ -722,24 +740,14 @@ def transcribe_url(
722740
audio_url=url,
723741
**config.raw.dict(exclude_none=True),
724742
)
725-
try:
726-
transcript = Transcript.from_response(
727-
client=self._client,
728-
response=api.create_transcript(
729-
client=self._client.http_client,
730-
request=transcript_request,
731-
),
732-
)
733-
except Exception as exc:
734-
return Transcript.from_response(
735-
client=self._client,
736-
response=types.TranscriptResponse(
737-
audio_url=url,
738-
**config.raw.dict(exclude_none=True),
739-
status=types.TranscriptStatus.error,
740-
error=str(exc),
741-
),
742-
)
743+
# No try-except - if there is an HTTP error raise it to the user
744+
transcript = Transcript.from_response(
745+
client=self._client,
746+
response=api.create_transcript(
747+
client=self._client.http_client,
748+
request=transcript_request,
749+
),
750+
)
743751

744752
if poll:
745753
return transcript.wait_for_completion()
@@ -790,7 +798,8 @@ def transcribe_group(
790798
data: List[Union[str, BinaryIO]],
791799
config: Optional[types.TranscriptionConfig],
792800
poll: bool,
793-
) -> TranscriptGroup:
801+
return_failures: Optional[bool] = False,
802+
) -> Union[TranscriptGroup, Tuple[TranscriptGroup, List[str]]]:
794803
if config is None:
795804
config = self.config
796805

@@ -812,14 +821,28 @@ def transcribe_group(
812821
transcript_group = TranscriptGroup(
813822
client=self._client,
814823
)
824+
failures = []
815825

816826
for future in finished_futures:
817-
transcript_group.add_transcript(future.result())
827+
try:
828+
transcript_group.add_transcript(future.result())
829+
except types.TranscriptError as e:
830+
failures.append(f"Error processing {future_transcripts[future]}: {e}")
818831

819-
if poll:
820-
return transcript_group.wait_for_completion()
832+
if poll and return_failures:
833+
transcript_group, completion_failures = (
834+
transcript_group.wait_for_completion(return_failures=return_failures)
835+
)
836+
failures.extend(completion_failures)
837+
elif poll:
838+
transcript_group = transcript_group.wait_for_completion(
839+
return_failures=return_failures
840+
)
821841

822-
return transcript_group
842+
if return_failures:
843+
return transcript_group, failures
844+
else:
845+
return transcript_group
823846

824847
def list_transcripts(
825848
self,
@@ -945,19 +968,22 @@ def submit_group(
945968
self,
946969
data: List[Union[str, BinaryIO]],
947970
config: Optional[types.TranscriptionConfig] = None,
948-
) -> TranscriptGroup:
971+
return_failures: Optional[bool] = False,
972+
) -> Union[TranscriptGroup, Tuple[TranscriptGroup, List[str]]]:
949973
"""
950974
Submits multiple transcription jobs without waiting for their completion.
951975
952976
Args:
953977
data: A list of local paths, URLs, or binary objects (can be mixed).
954978
config: Transcription options and features. If `None` is given, the Transcriber's
955979
default configuration will be used.
980+
return_failures: Whether to include a list of errors for transcriptions that failed due to HTTP errors
956981
"""
957982
return self._impl.transcribe_group(
958983
data=data,
959984
config=config,
960985
poll=False,
986+
return_failures=return_failures,
961987
)
962988

963989
def transcribe(
@@ -1005,41 +1031,50 @@ def transcribe_group(
10051031
self,
10061032
data: List[Union[str, BinaryIO]],
10071033
config: Optional[types.TranscriptionConfig] = None,
1008-
) -> TranscriptGroup:
1034+
return_failures: Optional[bool] = False,
1035+
) -> Union[TranscriptGroup, Tuple[TranscriptGroup, List[str]]]:
10091036
"""
10101037
Transcribes a list of files (as local paths, URLs, or binary objects).
10111038
10121039
Args:
10131040
data: A list of local paths, URLs, or binary objects (can be mixed).
10141041
config: Transcription options and features. If `None` is given, the Transcriber's
10151042
default configuration will be used.
1043+
return_failures: Whether to include a list of errors for transcriptions that failed due to HTTP errors
10161044
"""
10171045

10181046
return self._impl.transcribe_group(
10191047
data=data,
10201048
config=config,
10211049
poll=True,
1050+
return_failures=return_failures,
10221051
)
10231052

10241053
def transcribe_group_async(
10251054
self,
10261055
data: List[Union[str, BinaryIO]],
10271056
config: Optional[types.TranscriptionConfig] = None,
1028-
) -> concurrent.futures.Future[TranscriptGroup]:
1057+
return_failures: Optional[bool] = False,
1058+
) -> Union[
1059+
concurrent.futures.Future[TranscriptGroup],
1060+
concurrent.futures.Future[Tuple[TranscriptGroup, List[str]]],
1061+
]:
10291062
"""
10301063
Transcribes a list of files (as local paths, URLs, or binary objects) asynchronously.
10311064
10321065
Args:
10331066
data: A list of local paths, URLs, or binary objects (can be mixed).
10341067
config: Transcription options and features. If `None` is given, the Transcriber's
10351068
default configuration will be used.
1069+
return_failures: Whether to include a list of errors for transcriptions that failed due to HTTP errors
10361070
"""
10371071

10381072
return self._executor.submit(
10391073
self._impl.transcribe_group,
10401074
data=data,
10411075
config=config,
10421076
poll=True,
1077+
return_failures=return_failures,
10431078
)
10441079

10451080
def list_transcripts(

tests/unit/factories.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -191,7 +191,7 @@ class Meta:
191191
audio_duration = None
192192

193193

194-
class TranscriptErrorResponseFactory(TranscriptProcessingResponseFactory):
194+
class TranscriptErrorResponseFactory(BaseTranscriptFactory):
195195
class Meta:
196196
model = types.TranscriptResponse
197197

tests/unit/test_content_safety.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -196,7 +196,9 @@ def test_content_safety_with_confidence_threshold(httpx_mock: HTTPXMock):
196196
confidence = 40
197197
request, _ = unit_test_utils.submit_mock_transcription_request(
198198
httpx_mock,
199-
mock_response={}, # Response doesn't matter here; we're just testing the request body
199+
mock_response=factories.generate_dict_factory(
200+
factories.TranscriptCompletedResponseFactory
201+
)(),
200202
config=aai.TranscriptionConfig(
201203
content_safety=True, content_safety_confidence=confidence
202204
),

0 commit comments

Comments
 (0)