Skip to content

Commit 0407b7a

Browse files
authored
Merge branch 'master' into issue#556
2 parents 19f5af9 + edd0f49 commit 0407b7a

File tree

4 files changed

+681
-1
lines changed

4 files changed

+681
-1
lines changed

docs/ontology_generation.md

+10-1
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,13 @@ Let us consider a simple ontology for documents of a pet shop.
7272
{
7373
"name": "pet_type",
7474
"type": "str"
75+
},
76+
{
77+
"name": "price",
78+
"description": "Price for pet. A 2x2 matrix, whose columns are female/male and rows are juvenile/adult.",
79+
"type": "NdArray",
80+
"ndarray_dtype": "float",
81+
"ndarray_shape": [2, 2]
7582
}
7683
]
7784
},
@@ -133,7 +140,7 @@ Each entry definition will define a couple (can be empty) attributes, mimicking
133140
* The `description` keyword is optionally used as the comment to describe the attribute.
134141
* The `type` keyword is used to define the type of the attribute. Currently supported types are:
135142
* Primitive types - `int`, `float`, `str`, `bool`
136-
* Composite types - `List`, `Dict`
143+
* Composite types - `List`, `Dict`, `NdArray`
137144
* Entries defined in the `top` module - The attributes can be of the type base
138145
entries (defined in the `forte.data.ontology.top` module) and can be directly
139146
referred by the class name.
@@ -146,6 +153,8 @@ Each entry definition will define a couple (can be empty) attributes, mimicking
146153
* `key_type` and `value_type`: If the `type` of the property is a `Dict`,
147154
then these two represent the types of the key and value of the dictionary,
148155
currently, only primitive types are supported as the `key_type`.
156+
* `ndarray_dtype: str` and `ndarray_shape: array`: If the `type` of the property is a `NdArray`, then
157+
these two represent the data type and the shape of the array. `NdArray` allows storing a N-dimensional (N-d) array in an entry. For instance, through the simple ontology of pet shop above, we are able to instantiate `Pet` and name it `dog`. Then, we can assign a matrix to the attribute `price` by `dog.price.data = [[2.99, 1.99], [4.99, 3.99]]`. Internally, this $2 \times 2$ matrix is stored as a NumPy array. When `ndarray_shape`/`ndarray_dtype` is specified, the shape/data type of the upcoming array will be verified whether they match. If both `ndarray_dtype` and `ndarray_shape` are provided, a placeholder will be created by `numpy.ndarray(ndarray_shape, dtype=ndarray_dtype)`.
149158

150159
## Major ontology types, Annotations, Links, Groups and Generics
151160
There are some very frequently used types in NLP:

forte/data/base_store.py

+161
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
# Copyright 2019 The Forte Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from abc import abstractmethod
16+
from typing import (
17+
List,
18+
Type,
19+
Union,
20+
Iterator,
21+
Tuple,
22+
)
23+
24+
from forte.data.ontology.core import EntryType
25+
26+
__all__ = ["BaseStore"]
27+
28+
29+
class BaseStore:
30+
r"""The base class which will be used by :class:
31+
`~forte.data.data_store.DataStore`."""
32+
33+
def __init__(self):
34+
r"""
35+
This is a base class for the efficient underlying data structure. A
36+
current implementation of `BaseStore` is `DataStore`.
37+
38+
A `BaseStore` contains a collection of Forte entries.
39+
Each entry type contains some subtypes, which could have
40+
various fields stored in entry lists.
41+
"""
42+
43+
@abstractmethod
44+
def add_annotation_raw(
45+
self, type_id: int, begin: int, end: int
46+
) -> Tuple[int, int]:
47+
r"""This function adds an annotation entry with `begin` and `end` index
48+
to the sortedlist at index `type_id` of the array which records all
49+
sortedlists, return tid for the entry.
50+
51+
Args:
52+
type_id (int): The index of Annotation sortedlist in the array.
53+
begin (int): begin index of the entry.
54+
end (int): end index of the entry.
55+
56+
Returns:
57+
The `tid` of the entry.
58+
59+
"""
60+
raise NotImplementedError
61+
62+
@abstractmethod
63+
def set_attr(self, tid: int, attr_id: int, attr_value):
64+
r"""This function locates the entry data with `tid` and sets its
65+
attribute `attr_id` with value `attr_value`.
66+
67+
Args:
68+
tid (int): Unique id of the entry.
69+
attr_id (int): id of the attribute.
70+
attr_value: value of the attribute.
71+
72+
"""
73+
74+
raise NotImplementedError
75+
76+
@abstractmethod
77+
def get_attr(self, tid: int, attr_id: int):
78+
r"""This function locates the entry data with `tid` and gets the value
79+
of `attr_id` of this entry.
80+
81+
Args:
82+
tid (int): Unique id of the entry.
83+
attr_id (int): id of the attribute.
84+
85+
Returns:
86+
The value of `attr_id` for the entry with `tid`.
87+
"""
88+
89+
raise NotImplementedError
90+
91+
@abstractmethod
92+
def delete_entry(self, tid: int):
93+
r"""This function locates the entry list with `tid` and removes it
94+
from the data store.
95+
96+
Args:
97+
tid (int): Unique id of the entry.
98+
99+
"""
100+
101+
raise NotImplementedError
102+
103+
@abstractmethod
104+
def get_entry(self, tid: int) -> Tuple[List, int, int]:
105+
r"""Look up the entry_dict with key `tid`. Find its type_id and its
106+
index in the `entry_type` sortedlist.
107+
108+
Args:
109+
tid (int): Unique id of the entry.
110+
111+
Returns:
112+
The entry which `tid` corresponds to.
113+
114+
"""
115+
raise NotImplementedError
116+
117+
@abstractmethod
118+
def get(
119+
self, entry_type: Union[str, Type[EntryType]], **kwargs
120+
) -> Iterator[List]:
121+
"""
122+
Implementation of this method should provide to obtain the entries of
123+
type `entry_type`.
124+
125+
Args:
126+
entry_type: The type of the entry to obtain.
127+
128+
Returns:
129+
An iterator of the entries matching the provided arguments.
130+
131+
"""
132+
133+
raise NotImplementedError
134+
135+
@abstractmethod
136+
def next_entry(self, tid: int) -> List:
137+
r"""Get the next entry of the same type as the `tid` entry.
138+
139+
Args:
140+
tid (int): Unique id of the entry.
141+
142+
Returns:
143+
The next entry of the same type as the `tid` entry.
144+
145+
"""
146+
147+
raise NotImplementedError
148+
149+
@abstractmethod
150+
def prev_entry(self, tid: int) -> List:
151+
r"""Get the previous entry of the same type as the `tid` entry.
152+
153+
Args:
154+
tid (int): Unique id of the entry.
155+
156+
Returns:
157+
The previous entry of the same type as the `tid` entry.
158+
159+
"""
160+
161+
raise NotImplementedError

0 commit comments

Comments
 (0)