46
46
Group ,
47
47
SinglePackEntries ,
48
48
Generics ,
49
+ AudioAnnotation ,
49
50
)
50
51
from forte .data .span import Span
51
52
from forte .data .types import ReplaceOperationsType , DataRequest
@@ -166,6 +167,7 @@ def __init__(self, pack_name: Optional[str] = None):
166
167
self .links : SortedList [Link ] = SortedList ()
167
168
self .groups : SortedList [Group ] = SortedList ()
168
169
self .generics : SortedList [Generics ] = SortedList ()
170
+ self .audio_annotations : SortedList [AudioAnnotation ] = SortedList ()
169
171
170
172
self .__replace_back_operations : ReplaceOperationsType = []
171
173
self .__processed_original_spans : List [Tuple [Span , Span ]] = []
@@ -185,6 +187,7 @@ def __getstate__(self):
185
187
state ["links" ] = list (state ["links" ])
186
188
state ["groups" ] = list (state ["groups" ])
187
189
state ["generics" ] = list (state ["generics" ])
190
+ state ["audio_annotations" ] = list (state ["audio_annotations" ])
188
191
return state
189
192
190
193
def __setstate__ (self , state ):
@@ -212,12 +215,14 @@ def __setstate__(self, state):
212
215
self .links = as_sorted_error_check (self .links )
213
216
self .groups = as_sorted_error_check (self .groups )
214
217
self .generics = as_sorted_error_check (self .generics )
218
+ self .audio_annotations = as_sorted_error_check (self .audio_annotations )
215
219
216
220
self ._index = DataIndex ()
217
221
self ._index .update_basic_index (list (self .annotations ))
218
222
self ._index .update_basic_index (list (self .links ))
219
223
self ._index .update_basic_index (list (self .groups ))
220
224
self ._index .update_basic_index (list (self .generics ))
225
+ self ._index .update_basic_index (list (self .audio_annotations ))
221
226
222
227
for a in self .annotations :
223
228
a .set_pack (self )
@@ -231,11 +236,15 @@ def __setstate__(self, state):
231
236
for a in self .generics :
232
237
a .set_pack (self )
233
238
239
+ for a in self .audio_annotations :
240
+ a .set_pack (self )
241
+
234
242
def __iter__ (self ):
235
243
yield from self .annotations
236
244
yield from self .links
237
245
yield from self .groups
238
246
yield from self .generics
247
+ yield from self .audio_annotations
239
248
240
249
def _init_meta (self , pack_name : Optional [str ] = None ) -> Meta :
241
250
return Meta (pack_name )
@@ -341,6 +350,27 @@ def num_generics_entries(self):
341
350
"""
342
351
return len (self .generics )
343
352
353
+ @property
354
+ def all_audio_annotations (self ) -> Iterator [AudioAnnotation ]:
355
+ """
356
+ An iterator of all audio annotations in this data pack.
357
+
358
+ Returns: Iterator of all audio annotations, of
359
+ type :class:`~forte.data.ontology.top.AudioAnnotation`.
360
+
361
+ """
362
+ yield from self .audio_annotations
363
+
364
+ @property
365
+ def num_audio_annotations (self ):
366
+ """
367
+ Number of audio annotations in this data pack.
368
+
369
+ Returns: Number of audio annotations.
370
+
371
+ """
372
+ return len (self .audio_annotations )
373
+
344
374
def get_span_text (self , begin : int , end : int ) -> str :
345
375
r"""Get the text in the data pack contained in the span.
346
376
@@ -353,6 +383,23 @@ def get_span_text(self, begin: int, end: int) -> str:
353
383
"""
354
384
return self ._text [begin :end ]
355
385
386
+ def get_span_audio (self , begin : int , end : int ) -> str :
387
+ r"""Get the audio in the data pack contained in the span.
388
+
389
+ Args:
390
+ begin (int): begin index to query.
391
+ end (int): end index to query.
392
+
393
+ Returns:
394
+ The audio within this span.
395
+ """
396
+ if self ._audio is None :
397
+ raise ProcessExecutionException (
398
+ "The audio payload of this DataPack is not set. Please call"
399
+ " method `set_audio` before running `get_span_audio`."
400
+ )
401
+ return self ._audio [begin :end ]
402
+
356
403
def set_text (
357
404
self ,
358
405
text : str ,
@@ -619,10 +666,13 @@ def __add_entry_with_check(
619
666
target = self .groups
620
667
elif isinstance (entry , Generics ):
621
668
target = self .generics
669
+ elif isinstance (entry , AudioAnnotation ):
670
+ target = self .audio_annotations
622
671
else :
623
672
raise ValueError (
624
673
f"Invalid entry type { type (entry )} . A valid entry "
625
- f"should be an instance of Annotation, Link, Group of Generics."
674
+ f"should be an instance of Annotation, Link, Group, Generics "
675
+ "or AudioAnnotation."
626
676
)
627
677
628
678
if not allow_duplicate :
@@ -664,6 +714,8 @@ def delete_entry(self, entry: EntryType):
664
714
target = self .groups
665
715
elif isinstance (entry , Generics ):
666
716
target = self .generics
717
+ elif isinstance (entry , AudioAnnotation ):
718
+ target = self .audio_annotations
667
719
else :
668
720
raise ValueError (
669
721
f"Invalid entry type { type (entry )} . A valid entry "
@@ -779,6 +831,9 @@ def get_data(
779
831
link_types : Dict [Type [Link ], Union [Dict , List ]] = {}
780
832
group_types : Dict [Type [Group ], Union [Dict , List ]] = {}
781
833
generics_types : Dict [Type [Generics ], Union [Dict , List ]] = {}
834
+ audio_annotation_types : Dict [
835
+ Type [AudioAnnotation ], Union [Dict , List ]
836
+ ] = {}
782
837
783
838
if request is not None :
784
839
for key_ , value in request .items ():
@@ -791,6 +846,8 @@ def get_data(
791
846
group_types [key ] = value
792
847
elif issubclass (key , Generics ):
793
848
generics_types [key ] = value
849
+ elif issubclass (key , AudioAnnotation ):
850
+ audio_annotation_types [key ] = value
794
851
795
852
context_args = annotation_types .get (context_type_ )
796
853
@@ -866,6 +923,12 @@ def get_data(
866
923
"currently not supported."
867
924
)
868
925
926
+ if audio_annotation_types :
927
+ raise NotImplementedError (
928
+ "Querying audio annotation types based on ranges is "
929
+ "currently not supported."
930
+ )
931
+
869
932
yield data
870
933
871
934
def _parse_request_args (self , a_type , a_args ):
@@ -1127,6 +1190,12 @@ def iter_in_range(
1127
1190
for group in self .groups :
1128
1191
if self ._index .in_span (group , range_annotation .span ):
1129
1192
yield group
1193
+ elif issubclass (entry_type , AudioAnnotation ):
1194
+ for audio_annotation in self .audio_annotations :
1195
+ if self ._index .in_span (
1196
+ audio_annotation , range_annotation .span
1197
+ ):
1198
+ yield audio_annotation
1130
1199
1131
1200
def get ( # type: ignore
1132
1201
self ,
@@ -1224,6 +1293,7 @@ def require_annotations() -> bool:
1224
1293
issubclass (entry_type_ , Annotation )
1225
1294
or issubclass (entry_type_ , Link )
1226
1295
or issubclass (entry_type_ , Group )
1296
+ or issubclass (entry_type_ , AudioAnnotation )
1227
1297
):
1228
1298
entry_iter = self .iter_in_range (entry_type_ , range_annotation )
1229
1299
elif issubclass (entry_type_ , Annotation ):
@@ -1232,6 +1302,8 @@ def require_annotations() -> bool:
1232
1302
entry_iter = self .links
1233
1303
elif issubclass (entry_type_ , Group ):
1234
1304
entry_iter = self .groups
1305
+ elif issubclass (entry_type_ , AudioAnnotation ):
1306
+ entry_iter = self .audio_annotations
1235
1307
else :
1236
1308
raise ValueError (
1237
1309
f"The requested type { str (entry_type_ )} is not supported."
@@ -1426,8 +1498,9 @@ def in_span(self, inner_entry: Union[int, Entry], span: Span) -> bool:
1426
1498
r"""Check whether the ``inner entry`` is within the given ``span``. The
1427
1499
criterion are as followed:
1428
1500
1429
- Annotation entries: they are considered in a span if the begin is not
1430
- smaller than `span.begin` and the end is not larger than `span.end`.
1501
+ Annotation/AudioAnnotation entries: they are considered in a span if the
1502
+ begin is not smaller than `span.begin` and the end is not larger than
1503
+ `span.end`.
1431
1504
1432
1505
Link entries: if the parent and child of the links are both
1433
1506
`Annotation` type, this link will be considered in span if both parent
@@ -1463,7 +1536,9 @@ def in_span(self, inner_entry: Union[int, Entry], span: Span) -> bool:
1463
1536
inner_begin = - 1
1464
1537
inner_end = - 1
1465
1538
1466
- if isinstance (inner_entry , Annotation ):
1539
+ if isinstance (inner_entry , Annotation ) or isinstance (
1540
+ inner_entry , AudioAnnotation
1541
+ ):
1467
1542
inner_begin = inner_entry .begin
1468
1543
inner_end = inner_entry .end
1469
1544
elif isinstance (inner_entry , Link ):
0 commit comments