77from langchain .utilities .vertexai import get_client_info
88
99if TYPE_CHECKING :
10- from google .api_core .client_options import ClientOptions
1110 from google .cloud .speech_v2 import (
1211 RecognitionConfig ,
1312 SpeechClient ,
1413 )
14+ from google .protobuf .field_mask_pb2 import FieldMask
1515
1616
1717class GoogleSpeechToTextLoader (BaseLoader ):
@@ -26,7 +26,8 @@ class GoogleSpeechToTextLoader(BaseLoader):
2626
2727 Audio files can be specified via a Google Cloud Storage uri or a local file path.
2828
29- For a detailed explanation of Google Cloud Speech-to-Text, refer to the product documentation.
29+ For a detailed explanation of Google Cloud Speech-to-Text, refer to the product
30+ documentation.
3031 https://cloud.google.com/speech-to-text
3132 """
3233
@@ -39,8 +40,8 @@ def __init__(
3940 file_path : str ,
4041 location : str = "global" ,
4142 recognizer_id : str = "_" ,
42- * ,
4343 config : Optional [RecognitionConfig ] = None ,
44+ config_mask : Optional [FieldMask ] = None ,
4445 ):
4546 """
4647 Initializes the GoogleSpeechToTextLoader.
@@ -53,10 +54,20 @@ def __init__(
5354 config: Recognition options and features.
5455 For more information:
5556 https://cloud.google.com/python/docs/reference/speech/latest/google.cloud.speech_v2.types.RecognitionConfig
57+ config_mask: The list of fields in config that override the values in the
58+ ``default_recognition_config`` of the recognizer during this
59+ recognition request.
60+ For more information:
61+ https://cloud.google.com/python/docs/reference/speech/latest/google.cloud.speech_v2.types.RecognizeRequest
5662 """
5763 try :
5864 from google .api_core .client_options import ClientOptions
59- from google .cloud .speech_v2 import SpeechClient
65+ from google .cloud .speech_v2 import (
66+ AutoDetectDecodingConfig ,
67+ RecognitionConfig ,
68+ RecognitionFeatures ,
69+ SpeechClient ,
70+ )
6071 except ImportError as exc :
6172 raise ImportError (
6273 "Could not import google-cloud-speech python package. "
@@ -67,7 +78,17 @@ def __init__(
6778 self .file_path = file_path
6879 self .location = location
6980 self .recognizer_id = recognizer_id
70- self .config = config
81+ # Config must be set in speech recognition request.
82+ self .config = config or RecognitionConfig (
83+ auto_decoding_config = AutoDetectDecodingConfig (),
84+ language_codes = ["en-US" ],
85+ model = "long" ,
86+ features = RecognitionFeatures (
87+ # Automatic punctuation could be useful for language applications
88+ enable_automatic_punctuation = True ,
89+ ),
90+ )
91+ self .config_mask = config_mask
7192
7293 self ._client = SpeechClient (
7394 client_info = get_client_info (module = "speech-to-text" ),
@@ -95,16 +116,18 @@ def load(self) -> List[Document]:
95116 "Please install it with `pip install google-cloud-speech`."
96117 ) from exc
97118
98- request = RecognizeRequest (recognizer = self ._recognizer_path , config = self .config )
119+ request = RecognizeRequest (
120+ recognizer = self ._recognizer_path ,
121+ config = self .config ,
122+ config_mask = self .config_mask ,
123+ )
99124
100125 if "gs://" in self .file_path :
101- request .gcs_uri = self .file_path
126+ request .uri = self .file_path
102127 else :
103- # Reads a file as bytes
104128 with open (self .file_path , "rb" ) as f :
105129 request .content = f .read ()
106130
107- # Transcribes the audio into text
108131 response = self ._client .recognize (request = request )
109132
110133 return [
0 commit comments