mylibrar
diff --git a/‎forte/data/data_pack.py
+79-4 b/‎forte/data/data_pack.py
+79-4
diff --git a/‎forte/data/ontology/top.py
+129 b/‎forte/data/ontology/top.py
+129
diff --git a/‎forte/ontology_specs/base_ontology.json
+24 b/‎forte/ontology_specs/base_ontology.json
+24
@@ -46,6 +46,7 @@
     Group,
     SinglePackEntries,
     Generics,
+    AudioAnnotation,
 )
 from forte.data.span import Span
 from forte.data.types import ReplaceOperationsType, DataRequest
@@ -166,6 +167,7 @@ def __init__(self, pack_name: Optional[str] = None):
         self.links: SortedList[Link] = SortedList()
         self.groups: SortedList[Group] = SortedList()
         self.generics: SortedList[Generics] = SortedList()
+        self.audio_annotations: SortedList[AudioAnnotation] = SortedList()
 
         self.__replace_back_operations: ReplaceOperationsType = []
         self.__processed_original_spans: List[Tuple[Span, Span]] = []
@@ -185,6 +187,7 @@ def __getstate__(self):
         state["links"] = list(state["links"])
         state["groups"] = list(state["groups"])
         state["generics"] = list(state["generics"])
+        state["audio_annotations"] = list(state["audio_annotations"])
         return state
 
     def __setstate__(self, state):
@@ -212,12 +215,14 @@ def __setstate__(self, state):
         self.links = as_sorted_error_check(self.links)
         self.groups = as_sorted_error_check(self.groups)
         self.generics = as_sorted_error_check(self.generics)
+        self.audio_annotations = as_sorted_error_check(self.audio_annotations)
 
         self._index = DataIndex()
         self._index.update_basic_index(list(self.annotations))
         self._index.update_basic_index(list(self.links))
         self._index.update_basic_index(list(self.groups))
         self._index.update_basic_index(list(self.generics))
+        self._index.update_basic_index(list(self.audio_annotations))
 
         for a in self.annotations:
             a.set_pack(self)
@@ -231,11 +236,15 @@ def __setstate__(self, state):
         for a in self.generics:
             a.set_pack(self)
 
+        for a in self.audio_annotations:
+            a.set_pack(self)
+
     def __iter__(self):
         yield from self.annotations
         yield from self.links
         yield from self.groups
         yield from self.generics
+        yield from self.audio_annotations
 
     def _init_meta(self, pack_name: Optional[str] = None) -> Meta:
         return Meta(pack_name)
@@ -341,6 +350,27 @@ def num_generics_entries(self):
         """
         return len(self.generics)
 
+    @property
+    def all_audio_annotations(self) -> Iterator[AudioAnnotation]:
+        """
+        An iterator of all audio annotations in this data pack.
+
+        Returns: Iterator of all audio annotations, of
+        type :class:`~forte.data.ontology.top.AudioAnnotation`.
+
+        """
+        yield from self.audio_annotations
+
+    @property
+    def num_audio_annotations(self):
+        """
+        Number of audio annotations in this data pack.
+
+        Returns: Number of audio annotations.
+
+        """
+        return len(self.audio_annotations)
+
     def get_span_text(self, begin: int, end: int) -> str:
         r"""Get the text in the data pack contained in the span.
 
@@ -353,6 +383,23 @@ def get_span_text(self, begin: int, end: int) -> str:
         """
         return self._text[begin:end]
 
+    def get_span_audio(self, begin: int, end: int) -> str:
+        r"""Get the audio in the data pack contained in the span.
+
+        Args:
+            begin (int): begin index to query.
+            end (int): end index to query.
+
+        Returns:
+            The audio within this span.
+        """
+        if self._audio is None:
+            raise ProcessExecutionException(
+                "The audio payload of this DataPack is not set. Please call"
+                " method `set_audio` before running `get_span_audio`."
+            )
+        return self._audio[begin:end]
+
     def set_text(
         self,
         text: str,
@@ -619,10 +666,13 @@ def __add_entry_with_check(
             target = self.groups
         elif isinstance(entry, Generics):
             target = self.generics
+        elif isinstance(entry, AudioAnnotation):
+            target = self.audio_annotations
         else:
             raise ValueError(
                 f"Invalid entry type {type(entry)}. A valid entry "
-                f"should be an instance of Annotation, Link, Group of Generics."
+                f"should be an instance of Annotation, Link, Group, Generics "
+                "or AudioAnnotation."
             )
 
         if not allow_duplicate:
@@ -664,6 +714,8 @@ def delete_entry(self, entry: EntryType):
             target = self.groups
         elif isinstance(entry, Generics):
             target = self.generics
+        elif isinstance(entry, AudioAnnotation):
+            target = self.audio_annotations
         else:
             raise ValueError(
                 f"Invalid entry type {type(entry)}. A valid entry "
@@ -779,6 +831,9 @@ def get_data(
         link_types: Dict[Type[Link], Union[Dict, List]] = {}
         group_types: Dict[Type[Group], Union[Dict, List]] = {}
         generics_types: Dict[Type[Generics], Union[Dict, List]] = {}
+        audio_annotation_types: Dict[
+            Type[AudioAnnotation], Union[Dict, List]
+        ] = {}
 
         if request is not None:
             for key_, value in request.items():
@@ -791,6 +846,8 @@ def get_data(
                     group_types[key] = value
                 elif issubclass(key, Generics):
                     generics_types[key] = value
+                elif issubclass(key, AudioAnnotation):
+                    audio_annotation_types[key] = value
 
         context_args = annotation_types.get(context_type_)
 
@@ -866,6 +923,12 @@ def get_data(
                     "currently not supported."
                 )
 
+            if audio_annotation_types:
+                raise NotImplementedError(
+                    "Querying audio annotation types based on ranges is "
+                    "currently not supported."
+                )
+
             yield data
 
     def _parse_request_args(self, a_type, a_args):
@@ -1127,6 +1190,12 @@ def iter_in_range(
                 for group in self.groups:
                     if self._index.in_span(group, range_annotation.span):
                         yield group
+            elif issubclass(entry_type, AudioAnnotation):
+                for audio_annotation in self.audio_annotations:
+                    if self._index.in_span(
+                        audio_annotation, range_annotation.span
+                    ):
+                        yield audio_annotation
 
     def get(  # type: ignore
         self,
@@ -1224,6 +1293,7 @@ def require_annotations() -> bool:
                 issubclass(entry_type_, Annotation)
                 or issubclass(entry_type_, Link)
                 or issubclass(entry_type_, Group)
+                or issubclass(entry_type_, AudioAnnotation)
             ):
                 entry_iter = self.iter_in_range(entry_type_, range_annotation)
         elif issubclass(entry_type_, Annotation):
@@ -1232,6 +1302,8 @@ def require_annotations() -> bool:
             entry_iter = self.links
         elif issubclass(entry_type_, Group):
             entry_iter = self.groups
+        elif issubclass(entry_type_, AudioAnnotation):
+            entry_iter = self.audio_annotations
         else:
             raise ValueError(
                 f"The requested type {str(entry_type_)} is not supported."
@@ -1426,8 +1498,9 @@ def in_span(self, inner_entry: Union[int, Entry], span: Span) -> bool:
         r"""Check whether the ``inner entry`` is within the given ``span``. The
         criterion are as followed:
 
-        Annotation entries: they are considered in a span if the begin is not
-        smaller than `span.begin` and the end is not larger than `span.end`.
+        Annotation/AudioAnnotation entries: they are considered in a span if the
+        begin is not smaller than `span.begin` and the end is not larger than
+        `span.end`.
 
         Link entries: if the parent and child of the links are both
         `Annotation` type, this link will be considered in span if both parent
@@ -1463,7 +1536,9 @@ def in_span(self, inner_entry: Union[int, Entry], span: Span) -> bool:
         inner_begin = -1
         inner_end = -1
 
-        if isinstance(inner_entry, Annotation):
+        if isinstance(inner_entry, Annotation) or isinstance(
+            inner_entry, AudioAnnotation
+        ):
             inner_begin = inner_entry.begin
             inner_end = inner_entry.end
         elif isinstance(inner_entry, Link):
 
@@ -38,6 +38,7 @@
     "Query",
     "SinglePackEntries",
     "MultiPackEntries",
+    "AudioAnnotation",
 ]
 
 QueryType = Union[Dict[str, Any], np.ndarray]
@@ -545,5 +546,133 @@ def update_results(self, pid_to_score: Dict[str, float]):
         self.results.update(pid_to_score)
 
 
+@total_ordering
+class AudioAnnotation(Entry):
+    r"""AudioAnnotation type entries, such as "recording" and "audio utterance".
+    Each audio annotation has a :class:`Span` corresponding to its offset
+    in the audio. Most methods in this class are the same as the ones in
+    :class:`Annotation`, except that it replaces property `text` with `audio`.
+
+    Args:
+        pack (PackType): The container that this audio annotation
+            will be added to.
+        begin (int): The offset of the first sample in the audio annotation.
+        end (int): The offset of the last sample in the audio annotation + 1.
+    """
+
+    def __init__(self, pack: PackType, begin: int, end: int):
+        self._span: Optional[Span] = None
+        self._begin: int = begin
+        self._end: int = end
+        super().__init__(pack)
+
+    @property
+    def audio(self):
+        if self.pack is None:
+            raise ValueError(
+                "Cannot get audio because annotation is not "
+                "attached to any data pack."
+            )
+        return self.pack.get_span_audio(self.begin, self.end)
+
+    def __getstate__(self):
+        r"""For serializing AudioAnnotation, we should create Span annotations
+        for compatibility purposes.
+        """
+        self._span = Span(self._begin, self._end)
+        state = super().__getstate__()
+        state.pop("_begin")
+        state.pop("_end")
+        return state
+
+    def __setstate__(self, state):
+        """
+        For de-serializing AudioAnnotation, we load the begin, end from Span,
+        for compatibility purposes.
+        """
+        super().__setstate__(state)
+        self._begin = self._span.begin
+        self._end = self._span.end
+
+    @property
+    def span(self) -> Span:
+        # Delay span creation at usage.
+        if self._span is None:
+            self._span = Span(self._begin, self._end)
+        return self._span
+
+    @property
+    def begin(self):
+        return self._begin
+
+    @property
+    def end(self):
+        return self._end
+
+    def __eq__(self, other):
+        r"""The eq function of :class:`AudioAnnotation`.
+        By default, :class:`AudioAnnotation` objects are regarded as the same if
+        they have the same type, span, and are generated by the same component.
+
+        Users can define their own eq function by themselves but this must
+        be consistent to :meth:`hash`.
+        """
+        if other is None:
+            return False
+        return (type(self), self.begin, self.end) == (
+            type(other),
+            other.begin,
+            other.end,
+        )
+
+    def __lt__(self, other):
+        r"""To support total_ordering, `AudioAnnotation` must implement
+        `__lt__`. The ordering is defined in the following way:
+
+        1. If the begin of the audio annotations are different, the one with
+           larger begin will be larger.
+        2. In the case where the begins are the same, the one with larger
+           end will be larger.
+        3. In the case where both offsets are the same, we break the tie using
+           the normal sorting of the class name.
+        """
+        if self.begin == other.begin:
+            if self.end == other.end:
+                return str(type(self)) < str(type(other))
+            return self.end < other.end
+        else:
+            return self.begin < other.begin
+
+    @property
+    def index_key(self) -> int:
+        return self.tid
+
+    def get(
+        self,
+        entry_type: Union[str, Type[EntryType]],
+        components: Optional[Union[str, Iterable[str]]] = None,
+        include_sub_type=True,
+    ) -> Iterable[EntryType]:
+        """
+        This function wraps the :meth:`~forte.data.DataPack.get()` method to find
+        entries "covered" by this audio annotation. See that method for more
+        information. For usage details, refer to
+        :meth:`forte.data.ontology.top.Annotation.get()`.
+
+        Args:
+            entry_type (type): The type of entries requested.
+            components (str or list, optional): The component (creator)
+                generating the entries requested. If `None`, will return valid
+                entries generated by any component.
+            include_sub_type (bool): whether to consider the sub types of
+                the provided entry type. Default `True`.
+
+        Yields:
+            Each `Entry` found using this method.
+
+        """
+        yield from self.pack.get(entry_type, self, components, include_sub_type)
+
+
 SinglePackEntries = (Link, Group, Annotation, Generics)
 MultiPackEntries = (MultiPackLink, MultiPackGroup, MultiPackGeneric)
@@ -415,6 +415,30 @@
           "item_type": "ft.onto.base_ontology.Phrase"
         }
       ]
+    },
+    {
+      "entry_name": "ft.onto.base_ontology.Recording",
+      "parent_entry": "forte.data.ontology.top.AudioAnnotation",
+      "description": "A span based annotation `Recording`, normally used to represent a recording.",
+      "attributes": [
+        {
+          "name": "recording_class",
+          "type": "List",
+          "item_type": "str",
+          "description": "A list of class names that the recording belongs to."
+        }
+      ]
+    },
+    {
+      "entry_name": "ft.onto.base_ontology.AudioUtterance",
+      "parent_entry": "forte.data.ontology.top.AudioAnnotation",
+      "description": "A span based annotation `AudioUtterance`, normally used to represent an utterance in dialogue.",
+      "attributes": [
+        {
+          "name": "speaker",
+          "type": "str"
+        }
+      ]
     }
   ]
 }