Skip to content

Commit d2ac671

Browse files
committed
feat(python/sdk): Add multichannel support (#6814)
GitOrigin-RevId: 668f1b8c9e1339b818ef321887ab7e8fe363d58c
1 parent 39b0552 commit d2ac671

File tree

3 files changed

+92
-3
lines changed

3 files changed

+92
-3
lines changed

assemblyai/types.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -477,6 +477,9 @@ class RawTranscriptionConfig(BaseModel):
477477
dual_channel: Optional[bool] = None
478478
"Enable Dual Channel transcription"
479479

480+
multichannel: Optional[bool] = None
481+
"Enable Multichannel transcription"
482+
480483
webhook_url: Optional[str] = None
481484
"The URL we should send webhooks to when your transcript is complete."
482485
webhook_auth_header_name: Optional[str] = None
@@ -578,6 +581,7 @@ def __init__(
578581
punctuate: Optional[bool] = None,
579582
format_text: Optional[bool] = None,
580583
dual_channel: Optional[bool] = None,
584+
multichannel: Optional[bool] = None,
581585
webhook_url: Optional[str] = None,
582586
webhook_auth_header_name: Optional[str] = None,
583587
webhook_auth_header_value: Optional[str] = None,
@@ -617,6 +621,7 @@ def __init__(
617621
punctuate: Enable Automatic Punctuation
618622
format_text: Enable Text Formatting
619623
dual_channel: Enable Dual Channel transcription
624+
multichannel: Enable Multichannel transcription
620625
webhoook_url: The URL we should send webhooks to when your transcript is complete.
621626
webhook_auth_header_name: The name of the header that is sent when the `webhook_url` is being called.
622627
webhook_auth_header_value: The value of the `webhook_auth_header_name` that is sent when the `webhoook_url` is being called.
@@ -660,6 +665,7 @@ def __init__(
660665
self.punctuate = punctuate
661666
self.format_text = format_text
662667
self.dual_channel = dual_channel
668+
self.multichannel = multichannel
663669
self.set_webhook(
664670
webhook_url,
665671
webhook_auth_header_name,
@@ -760,6 +766,18 @@ def dual_channel(self, enable: Optional[bool]) -> None:
760766

761767
self._raw_transcription_config.dual_channel = enable
762768

769+
@property
770+
def multichannel(self) -> Optional[bool]:
771+
"Returns the status of the Multichannel transcription feature"
772+
773+
return self._raw_transcription_config.multichannel
774+
775+
@multichannel.setter
776+
def multichannel(self, enable: Optional[bool]) -> None:
777+
"Enable Multichannel transcription"
778+
779+
self._raw_transcription_config.multichannel = enable
780+
763781
@property
764782
def webhook_url(self) -> Optional[str]:
765783
"The URL we should send webhooks to when your transcript is complete."
@@ -1391,6 +1409,7 @@ class Word(BaseModel):
13911409
end: int
13921410
confidence: float
13931411
speaker: Optional[str] = None
1412+
channel: Optional[str] = None
13941413

13951414

13961415
class UtteranceWord(Word):
@@ -1485,6 +1504,7 @@ class IABResponse(BaseModel):
14851504
class Sentiment(Word):
14861505
sentiment: SentimentType
14871506
speaker: Optional[str] = None
1507+
channel: Optional[str] = None
14881508

14891509

14901510
class Entity(BaseModel):
@@ -1530,6 +1550,7 @@ class Sentence(Word):
15301550
end: int
15311551
confidence: float
15321552
speaker: Optional[str] = None
1553+
channel: Optional[str] = None
15331554

15341555

15351556
class SentencesResponse(BaseModel):
@@ -1576,6 +1597,11 @@ class BaseTranscript(BaseModel):
15761597
dual_channel: Optional[bool] = None
15771598
"Enable Dual Channel transcription"
15781599

1600+
multichannel: Optional[bool] = None
1601+
"Enable Multichannel transcription"
1602+
audio_channels: Optional[int] = None
1603+
"The number of audio channels in the media file"
1604+
15791605
webhook_url: Optional[str] = None
15801606
"The URL we should send webhooks to when your transcript is complete."
15811607
webhook_auth_header_name: Optional[str] = None
@@ -1694,7 +1720,7 @@ class TranscriptResponse(BaseTranscript):
16941720
"A list of all the individual words transcribed"
16951721

16961722
utterances: Optional[List[Utterance]] = None
1697-
"When `dual_channel` or `speaker_labels` is enabled, a list of turn-by-turn utterances"
1723+
"When `dual_channel`, `multichannel`, or `speaker_labels` is enabled, a list of turn-by-turn utterances"
16981724

16991725
confidence: Optional[float] = None
17001726
"The confidence our model has in the transcribed text, between 0.0 and 1.0"

tests/unit/factories.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,16 @@ class Meta:
3030
start = factory.Faker("pyint")
3131
end = factory.Faker("pyint")
3232
confidence = factory.Faker("pyfloat", min_value=0.0, max_value=1.0)
33+
speaker = "1"
34+
channel = "1"
3335

3436

3537
class UtteranceWordFactory(WordFactory):
3638
class Meta:
3739
model = aai.UtteranceWord
3840

39-
speaker = factory.Faker("name")
41+
speaker = "1"
42+
channel = "1"
4043

4144

4245
class UtteranceFactory(UtteranceWordFactory):
@@ -65,7 +68,8 @@ class Meta:
6568
audio_url = factory.Faker("url")
6669
punctuate = True
6770
format_text = True
68-
dual_channel = True
71+
multichannel = None
72+
dual_channel = None
6973
webhook_url = None
7074
webhook_auth_header_name = None
7175
audio_start_from = None
@@ -119,6 +123,7 @@ class TranscriptDeletedResponseFactory(BaseTranscriptResponseFactory):
119123
punctuate = None
120124
format_text = None
121125
dual_channel = None
126+
multichannel = None
122127
webhook_url = "http://deleted_by_user"
123128
webhook_status_code = None
124129
webhook_auth = False

tests/unit/test_multichannel.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
from pytest_httpx import HTTPXMock
2+
3+
import tests.unit.unit_test_utils as unit_test_utils
4+
import assemblyai as aai
5+
from tests.unit import factories
6+
7+
aai.settings.api_key = "test"
8+
9+
10+
class MultichannelResponseFactory(factories.TranscriptCompletedResponseFactory):
11+
multichannel = True
12+
audio_channels = 2
13+
14+
15+
def test_multichannel_disabled_by_default(httpx_mock: HTTPXMock):
16+
"""
17+
Tests that not setting `multichannel=True` in the `TranscriptionConfig`
18+
will result in the default behavior of it being excluded from the request body.
19+
"""
20+
request_body, transcript = unit_test_utils.submit_mock_transcription_request(
21+
httpx_mock,
22+
mock_response=factories.generate_dict_factory(
23+
factories.TranscriptCompletedResponseFactory
24+
)(),
25+
config=aai.TranscriptionConfig(),
26+
)
27+
assert request_body.get("multichannel") is None
28+
assert transcript.json_response.get("multichannel") is None
29+
30+
31+
def test_multichannel_enabled(httpx_mock: HTTPXMock):
32+
"""
33+
Tests that not setting `multichannel=True` in the `TranscriptionConfig`
34+
will result in correct `multichannel` in the request body, and that the
35+
response is properly parsed into the `multichannel` and `utterances` field.
36+
"""
37+
38+
mock_response = factories.generate_dict_factory(MultichannelResponseFactory)()
39+
request_body, transcript = unit_test_utils.submit_mock_transcription_request(
40+
httpx_mock,
41+
mock_response=mock_response,
42+
config=aai.TranscriptionConfig(multichannel=True),
43+
)
44+
45+
# Check that request body was properly defined
46+
multichannel_response = request_body.get("multichannel")
47+
assert multichannel_response is not None
48+
49+
# Check that transcript has no errors and multichannel response is correctly returned
50+
assert transcript.error is None
51+
assert transcript.json_response["multichannel"] == multichannel_response
52+
assert transcript.json_response["audio_channels"] > 1
53+
54+
# Check that utterances are correctly parsed
55+
assert transcript.utterances is not None
56+
assert len(transcript.utterances) > 0
57+
for utterance in transcript.utterances:
58+
assert int(utterance.channel) > 0

0 commit comments

Comments
 (0)