Skip to content

Commit 81015fb

Browse files
feat: Add transcript normalization (#235)
- [ ] Regenerate this pull request now. Committer: @jameszhao PiperOrigin-RevId: 391603958 Source-Link: googleapis/googleapis@76bed90 Source-Link: googleapis/googleapis-gen@c7aaaa7
1 parent 1c85a16 commit 81015fb

File tree

4 files changed

+56
-1
lines changed

4 files changed

+56
-1
lines changed

packages/google-cloud-python-speech/google/cloud/speech_v1p1beta1/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@
5252
from .types.resource import CustomClass
5353
from .types.resource import PhraseSet
5454
from .types.resource import SpeechAdaptation
55+
from .types.resource import TranscriptNormalization
5556

5657
from google.cloud.speech_v1.helpers import SpeechHelpers
5758

@@ -94,6 +95,7 @@ class SpeechClient(SpeechHelpers, SpeechClient):
9495
"StreamingRecognitionResult",
9596
"StreamingRecognizeRequest",
9697
"StreamingRecognizeResponse",
98+
"TranscriptNormalization",
9799
"TranscriptOutputConfig",
98100
"UpdateCustomClassRequest",
99101
"UpdatePhraseSetRequest",

packages/google-cloud-python-speech/google/cloud/speech_v1p1beta1/types/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
CustomClass,
5252
PhraseSet,
5353
SpeechAdaptation,
54+
TranscriptNormalization,
5455
)
5556

5657
__all__ = (
@@ -87,4 +88,5 @@
8788
"CustomClass",
8889
"PhraseSet",
8990
"SpeechAdaptation",
91+
"TranscriptNormalization",
9092
)

packages/google-cloud-python-speech/google/cloud/speech_v1p1beta1/types/cloud_speech.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,13 @@ class RecognitionConfig(proto.Message):
265265
information, see the `speech
266266
adaptation <https://cloud.google.com/speech-to-text/docs/adaptation>`__
267267
documentation.
268+
transcript_normalization (google.cloud.speech_v1p1beta1.types.TranscriptNormalization):
269+
Use transcription normalization to
270+
automatically replace parts of the transcript
271+
with phrases of your choosing. For
272+
StreamingRecognize, this normalization only
273+
applies to stable partial transcripts (stability
274+
> 0.8) and final transcripts.
268275
speech_contexts (Sequence[google.cloud.speech_v1p1beta1.types.SpeechContext]):
269276
Array of
270277
[SpeechContext][google.cloud.speech.v1p1beta1.SpeechContext].
@@ -427,6 +434,9 @@ class AudioEncoding(proto.Enum):
427434
adaptation = proto.Field(
428435
proto.MESSAGE, number=20, message=resource.SpeechAdaptation,
429436
)
437+
transcript_normalization = proto.Field(
438+
proto.MESSAGE, number=24, message=resource.TranscriptNormalization,
439+
)
430440
speech_contexts = proto.RepeatedField(
431441
proto.MESSAGE, number=6, message="SpeechContext",
432442
)

packages/google-cloud-python-speech/google/cloud/speech_v1p1beta1/types/resource.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,12 @@
1818

1919
__protobuf__ = proto.module(
2020
package="google.cloud.speech.v1p1beta1",
21-
manifest={"CustomClass", "PhraseSet", "SpeechAdaptation",},
21+
manifest={
22+
"CustomClass",
23+
"PhraseSet",
24+
"SpeechAdaptation",
25+
"TranscriptNormalization",
26+
},
2227
)
2328

2429

@@ -150,4 +155,40 @@ class SpeechAdaptation(proto.Message):
150155
)
151156

152157

158+
class TranscriptNormalization(proto.Message):
159+
r"""Transcription normalization configuration. Use transcription
160+
normalization to automatically replace parts of the transcript
161+
with phrases of your choosing. For StreamingRecognize, this
162+
normalization only applies to stable partial transcripts
163+
(stability > 0.8) and final transcripts.
164+
165+
Attributes:
166+
entries (Sequence[google.cloud.speech_v1p1beta1.types.TranscriptNormalization.Entry]):
167+
A list of replacement entries. We will perform replacement
168+
with one entry at a time. For example, the second entry in
169+
["cat" => "dog", "mountain cat" => "mountain dog"] will
170+
never be applied because we will always process the first
171+
entry before it. At most 100 entries.
172+
"""
173+
174+
class Entry(proto.Message):
175+
r"""A single replacement configuration.
176+
Attributes:
177+
search (str):
178+
What to replace. Max length is 100
179+
characters.
180+
replace (str):
181+
What to replace with. Max length is 100
182+
characters.
183+
case_sensitive (bool):
184+
Whether the search is case sensitive.
185+
"""
186+
187+
search = proto.Field(proto.STRING, number=1,)
188+
replace = proto.Field(proto.STRING, number=2,)
189+
case_sensitive = proto.Field(proto.BOOL, number=3,)
190+
191+
entries = proto.RepeatedField(proto.MESSAGE, number=1, message=Entry,)
192+
193+
153194
__all__ = tuple(sorted(__protobuf__.manifest))

0 commit comments

Comments
 (0)