Skip to content

Datastore entry method #771

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
May 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
b330243
implment all_entries and num_entries methods and their test cases
hepengfe May 4, 2022
5b35334
add docstring for methods
hepengfe May 4, 2022
2f512d7
add count's initial value
hepengfe May 4, 2022
f8b8985
Merge branch 'master' into datastore_entry_method
hepengfe May 4, 2022
3dc434e
correct entry method to get subclasses of requested entry type
hepengfe May 5, 2022
ac612f8
add check for Group entry
hepengfe May 6, 2022
3baf317
exclude link and group types while geting entries
hepengfe May 6, 2022
956ed11
Merge branch 'master' into datastore_entry_method
hepengfe May 6, 2022
1de8c95
add num_entries check after deleting an entry
hepengfe May 7, 2022
902c3f3
Merge branch 'datastore_entry_method' of https://github.com/feipenghe…
hepengfe May 7, 2022
dcaaba3
Merge branch 'master' into datastore_entry_method
hepengfe May 23, 2022
4b8fba9
allow non-annotation-like entries in all_entris and num_entries and m…
hepengfe May 23, 2022
fab1b3d
update implementations of all_entries and num_entries based on the ne…
hepengfe May 23, 2022
844c706
update tests of all_entries and num_entries based on the new DataStor…
hepengfe May 23, 2022
00e8388
Merge branch 'master' into datastore_entry_method
hepengfe May 24, 2022
5c026bd
black
hepengfe May 24, 2022
3b51e27
Merge branch 'master' into datastore_entry_method
hepengfe May 25, 2022
11e2096
add class method _get_all_subclass and edit other methods based on it
hepengfe May 25, 2022
3446fbb
sort self.__elements.keys() in _get_all_subclass
hepengfe May 25, 2022
f47f405
simplify implementation of num_entries based on _get_all_subclass
hepengfe May 25, 2022
3fee8d8
add test cases based on code review
hepengfe May 25, 2022
71f77fb
use iter() in all_entries() to skip None
hepengfe May 25, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 57 additions & 2 deletions forte/data/data_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,6 +470,29 @@ def _is_subclass(
else:
return False

def _get_all_subclass(self, entry_type_name: str, inclusive: bool = False):
"""
Get all subclasses of ``entry_type_name``.

Args:
entry_type_name (str): subclasses of entry of ``entry_type_name``
will be yielded if it's in ``DataStore`` storage.
inclusive: if it's True, then ``entry_type_name`` itself will be
yielded. False otherwise.


Yields:
subclass entry type name of ``entry_type_name``
"""
for entry_type_key in sorted(self.__elements.keys()):
if (
entry_type_key == entry_type_name and inclusive
) or self._is_subclass(
entry_type_key,
get_class(entry_type_name),
):
yield entry_type_key

def _is_annotation(self, type_name: str) -> bool:
r"""This function takes a type_name and returns whether a type
is an annotation type or not.
Expand All @@ -484,6 +507,40 @@ def _is_annotation(self, type_name: str) -> bool:
entry_class = get_class(type_name)
return issubclass(entry_class, (Annotation, AudioAnnotation))

def all_entries(self, entry_type_name: str) -> Iterator[List]:
"""
Retrieve all entry data of entry type ``entry_type_name`` and
entries of subclasses of entry type ``entry_type_name``.

Args:
entry_type_name (str): the type name of entries that the User wants to retrieve.

Yields:
Iterator of raw entry data in list format.
"""
for entry_type_key in self._get_all_subclass(entry_type_name, True):
yield from self.iter(entry_type_key)

def num_entries(self, entry_type_name: str) -> int:
"""
Compute the number of entries of given ``entry_type_name`` and
entries of subclasses of entry type ``entry_type_name``.

Args:
entry_type_name (str): the type name of entries that the User wants to get its count.

Returns:
The number of entries of given ``entry_type_name``.
"""
count = 0
for entry_type_key in self._get_all_subclass(entry_type_name, True):
count += len(self.__elements[entry_type_key])
# if non-annotation-like entries_type_name
# we need to minus the corresponding delete count
if entry_type_key in self.__deletion_count:
count -= self.__deletion_count[entry_type_key]
return count

def _add_entry_raw(
self,
entry_type: Type[Entry],
Expand Down Expand Up @@ -924,9 +981,7 @@ def co_iterator_annotation_like(
type_names: a list of string type names

Returns:

An iterator of entry elements.

"""

n = len(type_names)
Expand Down
80 changes: 73 additions & 7 deletions tests/forte/data/data_store_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,8 @@ def setUp(self) -> None:
]

sorting_fn = lambda s: (
s[constants.BEGIN_INDEX], s[constants.END_INDEX],
s[constants.BEGIN_INDEX],
s[constants.END_INDEX],
)
self.data_store._DataStore__elements = {
"ft.onto.base_ontology.Document": SortedList(
Expand Down Expand Up @@ -317,6 +318,61 @@ def test_get_type_info(self):
DataStore._type_attributes = self.reference_type_attributes
# TODO: need more tests for ontology file input

def test_entry_methods(self):
sent_type = "ft.onto.base_ontology.Sentence"
doc_type = "ft.onto.base_ontology.Document"
ann_type = "forte.data.ontology.top.Annotation"
group_type = "forte.data.ontology.top.Group"
sent_list = list(self.data_store._DataStore__elements[sent_type])
doc_list = list(self.data_store._DataStore__elements[doc_type])
ann_list = (
list(self.data_store._DataStore__elements[ann_type])
+ doc_list
+ sent_list
)
group_list = list(self.data_store._DataStore__elements[group_type])
sent_entries = list(self.data_store.all_entries(sent_type))
doc_entries = list(self.data_store.all_entries(doc_type))
ann_entries = list(self.data_store.all_entries(ann_type))

self.assertEqual(sent_list, sent_entries)
self.assertEqual(doc_list, doc_entries)
self.assertEqual(ann_list, ann_entries)

self.assertEqual(self.data_store.num_entries(sent_type), len(sent_list))
self.assertEqual(self.data_store.num_entries(doc_type), len(doc_list))
self.assertEqual(
self.data_store.num_entries(ann_type), len(ann_entries)
)

# remove two sentence
self.data_store.delete_entry(9999)
self.data_store.delete_entry(1234567)
self.assertEqual(
self.data_store.num_entries(sent_type), len(sent_list) - 2
)
self.assertEqual(
self.data_store.num_entries(ann_type), len(ann_list) - 2
) # test parent entry count
# add a sentence back and count
add_count = 5
for i in range(add_count):
self.data_store.add_annotation_raw(
"ft.onto.base_ontology.Sentence", i, i + 1
)
self.assertEqual(
self.data_store.num_entries(sent_type),
len(sent_list) - 2 + add_count,
)
self.assertEqual(
self.data_store.num_entries(ann_type), len(ann_list) - 2 + add_count
) # test parent entry count

# remove a group
self.data_store.delete_entry(23456)
num_group_entries = self.data_store.num_entries(group_type)
self.assertEqual(num_group_entries, len(group_list) - 1)

def test_co_iterator_annotation_like(self):
type_names = [
"ft.onto.base_ontology.Sentence",
Expand Down Expand Up @@ -545,16 +601,26 @@ def test_add_annotation_raw(self):
tid_sent_duplicate: int = self.data_store.add_annotation_raw(
"ft.onto.base_ontology.Sentence", 5, 8, allow_duplicate=False
)
self.assertEqual(len(self.data_store._DataStore__elements[
"ft.onto.base_ontology.Sentence"
]), num_sent)
self.assertEqual(
len(
self.data_store._DataStore__elements[
"ft.onto.base_ontology.Sentence"
]
),
num_sent,
)
self.assertEqual(tid_sent, tid_sent_duplicate)
self.data_store.add_annotation_raw(
"ft.onto.base_ontology.Sentence", 5, 9, allow_duplicate=False
)
self.assertEqual(len(self.data_store._DataStore__elements[
"ft.onto.base_ontology.Sentence"
]), num_sent + 1)
self.assertEqual(
len(
self.data_store._DataStore__elements[
"ft.onto.base_ontology.Sentence"
]
),
num_sent + 1,
)

# check add annotation raw with tid
tid = 77
Expand Down