Skip to content

Commit e83165b

Browse files
mylibrarSuqi Sunhunterhector
authored
Implement Stave as a processor (#440)
* Add time profiling for processors * Add time profiling for Reader component * Add unit test for time profiling * Improve code style. * Add comments. * Update unit test. * Remove unnecessary annotations. * Fix undefined name issue. * Fix some issues in PR. * Use lazy % formatting in logging functions * Fix bugs in unit test. * Format docstring. * Remove trailing whitespace. * Add StaveProcessor. * Add unittest for StaveProcessor * Imporve style. * Update workflow. * Update StaveProcessor path. * Fix PR issues. * Fix latest PR issues. * Update copyright year. * Update unit test. * Improve style. * Reroute datasource to disk. * Fix pylint issue * Fix doctest issue * Fix PR issues * Fix typo * Remove trailing whitespaces * Update doc * Remove trailing whitespace * Update docs * Remove hard coding * Remove hard coding in StaveProcessor * Fix pylint issue * Use StaveSession * Update unit test * Remove requests * Update workflow * Remove full mode stave from StaveProcessor * Fix pylint error * Remove username/password in configs * Fix docs in default_configs Co-authored-by: Suqi Sun <suqi.sun@mbp00482s-MacBook-Pro.local> Co-authored-by: Hector <hunterhector@gmail.com>
1 parent 911c1e9 commit e83165b

File tree

7 files changed

+585
-3
lines changed

7 files changed

+585
-3
lines changed

.github/workflows/main.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ jobs:
6161
rm -rf texar-pytorch
6262
- name: Install Forte
6363
run: |
64-
pip install --use-feature=in-tree-build --progress-bar off .[ner,test,example,ir,wikipedia,augment]
64+
pip install --use-feature=in-tree-build --progress-bar off .[ner,test,example,ir,wikipedia,augment,stave]
6565
- name: Install a few wrappers for testing
6666
run: |
6767
git clone https://github.com/asyml/forte-wrappers.git

forte/processors/stave/__init__.py

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Copyright 2021 The Forte Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from forte.processors.stave.stave_processor import *
+265
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,265 @@
1+
# Copyright 2021 The Forte Authors. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""
16+
A StaveProcessor is introduced to enable immediate visualization of forte
17+
pipeline result. It supports serving Stave instance with StaveViewer.
18+
Forte users can plug it into the pipeline to easily visualize datapacks
19+
with annotations. It is also highly configurable for users to change port
20+
number, host name, layout, etc.
21+
22+
Package Requirements:
23+
forte
24+
stave
25+
"""
26+
27+
import os
28+
import logging
29+
import collections
30+
from typing import Dict, Set, Any
31+
32+
from nlpviewer_backend.lib.stave_viewer import StaveViewer
33+
from nlpviewer_backend.lib.stave_project import StaveProjectWriter
34+
35+
from forte.common import Resources, ProcessorConfigError
36+
from forte.common.configuration import Config
37+
from forte.data.data_pack import DataPack
38+
from forte.data.ontology.code_generation_objects import search
39+
from forte.processors.base import PackProcessor
40+
41+
logger = logging.getLogger(__name__)
42+
43+
__all__ = [
44+
"StaveProcessor"
45+
]
46+
47+
48+
class StaveProcessor(PackProcessor):
49+
r"""
50+
``StaveProcessor`` provides easy visualization for forte users. We can
51+
visualize datapack with annotations by inserting it into the forte
52+
pipeline without affecting the original functionalities.
53+
54+
``StaveProcessor`` requires an ontology file being passed to the pipeline
55+
by setting the input parameter ``ontology_file``. Otherwise a
56+
``ProcessorConfigError`` will be raised. It then generates default
57+
configurations based on the input ontology to start a stave instance
58+
without any additional specification by users.
59+
Example usage:
60+
61+
.. code-block:: python
62+
63+
Pipeline(ontology_file="ontology/path") \
64+
.set_reader(plaintext_reader(), {"input_path":"some/path"}) \
65+
.add(StaveProcessor())
66+
67+
After initialized, ``StaveProcessor`` will create a project directory
68+
(or use an existing directory specified in ``project_path``). Metadata
69+
and textpacks will be dumped into the direcotry.
70+
71+
``StaveProcessor`` is also highly customizable for users to set up. Users
72+
may configure port number, server host, project name, etc.
73+
Example usage:
74+
75+
.. code-block:: python
76+
77+
Pipeline(ontology_file="ontology/path") \
78+
.set_reader(plaintext_reader(), {"input_path":"some/path"}) \
79+
.add(StaveProcessor(), configs={
80+
"port": 8880,
81+
"project_name": "serialization_pipeline_test"
82+
})
83+
84+
Users can modify project configs by changing the ``project_configs`` field.
85+
Example usage:
86+
87+
.. code-block:: python
88+
89+
Pipeline(ontology_file="ontology/path") \
90+
.set_reader(plaintext_reader(), {"input_path":"some/path"}) \
91+
.add(StaveProcessor(), configs={
92+
"port": 8879,
93+
"project_configs": {
94+
# Configure Stave layout. Replace the normal annotation
95+
# viewer "default-nlp" with a dialogue box.
96+
"layoutConfigs": {
97+
"center-middle": "DialogueBox"
98+
}
99+
}
100+
})
101+
"""
102+
103+
def __init__(self):
104+
super().__init__()
105+
self._project_id: int = -1
106+
self._viewer: StaveViewer
107+
self._project_writer: StaveProjectWriter
108+
109+
def initialize(self, resources: Resources, configs: Config):
110+
super().initialize(resources, configs)
111+
112+
# Validate multi_pack project config:
113+
# A `multi_pack` project must have `multi_ontology` set.
114+
if self.configs.project_type != "single_pack" and (
115+
self.configs.project_type != "multi_pack" or
116+
self.configs.multi_ontology is None):
117+
raise ProcessorConfigError("Invalid project type configuration.")
118+
119+
# Generate default configurations
120+
self.configs.project_configs = Config(
121+
hparams=self.configs.project_configs,
122+
default_hparams=self._default_project_configs()
123+
)
124+
self.configs.multi_ontology = \
125+
self.configs.multi_ontology or Config({}, {})
126+
self.configs.project_path = os.path.abspath(
127+
self.configs.project_path or self.configs.project_name)
128+
129+
self._viewer = StaveViewer(
130+
project_path=self.configs.project_path,
131+
host=self.configs.host,
132+
port=self.configs.port,
133+
thread_daemon=self.configs.server_thread_daemon
134+
)
135+
136+
# Write meta data to project folder
137+
self._project_writer = StaveProjectWriter(
138+
project_path=self.configs.project_path,
139+
project_name=self.configs.project_name,
140+
project_type=self.configs.project_type,
141+
ontology=self.resources.get("onto_specs_dict"),
142+
project_configs=self.configs.project_configs.todict(),
143+
multi_ontology=self.configs.multi_ontology.todict()
144+
)
145+
146+
def _process(self, input_pack: DataPack):
147+
148+
if not self._viewer.server_started:
149+
self._viewer.run()
150+
151+
if self._viewer.server_started:
152+
textpack_id = self._project_writer.write_textpack(
153+
input_pack.pack_name
154+
if self.configs.use_pack_name
155+
else input_pack.pack_id,
156+
input_pack.serialize()
157+
)
158+
if textpack_id == 0:
159+
self._viewer.open()
160+
161+
def _default_project_configs(self):
162+
# pylint: disable=line-too-long
163+
"""
164+
Create default project configuration based on ontology.
165+
This is translated from JavaScript function `createDefaultConfig` in
166+
https://github.com/asyml/stave/blob/d82383de3d74bf09c0d30f33d8a902595f5aff80/src/app/pages/Projects.tsx#L140
167+
168+
Returns:
169+
configs: A dictionary with the default config for project.
170+
171+
"""
172+
# pylint: enable=line-too-long
173+
174+
if not (self.resources.contains("onto_specs_dict") and
175+
self.resources.contains("merged_entry_tree")):
176+
raise ProcessorConfigError(
177+
"onto_specs_dict/merged_entry_tree is not set in resources.")
178+
ontology = self.resources.get("onto_specs_dict")
179+
entry_tree = self.resources.get("merged_entry_tree")
180+
181+
configs: Dict[str, Any] = {
182+
"legendConfigs": {},
183+
"scopeConfigs": {},
184+
"layoutConfigs": {
185+
"center-middle": "default-nlp",
186+
"left": "default-meta",
187+
"right": "default-attribute",
188+
"center-bottom": "disable"
189+
}
190+
}
191+
192+
# Create legend configs
193+
legend_configs: Dict[str, Any] = {}
194+
entry_name_set: Set[str] = set()
195+
for entry in ontology["definitions"]:
196+
entry_name = entry["entry_name"]
197+
entry_name_set.add(entry_name)
198+
legend_configs[entry_name] = {
199+
"is_selected": False,
200+
"is_shown": True,
201+
}
202+
if "attributes" in entry and len(entry["attributes"]) > 0:
203+
attributes_configs = {}
204+
for attribute in entry["attributes"]:
205+
if attribute["type"] == "str":
206+
attributes_configs[attribute["name"]] = False
207+
legend_configs[entry_name]["attributes"] = attributes_configs
208+
configs["legendConfigs"] = legend_configs
209+
210+
# Find all subclass of `forte.data.ontology.top.Annotation` and
211+
# update `scopeConfigs` accordingly.
212+
queue = collections.deque([
213+
search(entry_tree.root, "forte.data.ontology.top.Annotation")
214+
])
215+
while queue:
216+
size = len(queue)
217+
for _ in range(size):
218+
node = queue.pop()
219+
if node.name in entry_name_set:
220+
configs['scopeConfigs'][node.name] = False
221+
for entry in node.children:
222+
queue.appendleft(entry)
223+
return configs
224+
225+
@classmethod
226+
def default_configs(cls) -> Dict[str, Any]:
227+
"""
228+
This defines a basic config structure for StaveProcessor.
229+
Following are the keys for this dictionary:
230+
231+
- ``project_path``: Path to the project directory for rendering.
232+
Default to None, which creates a folder using ``project_name``.
233+
- ``port``: Port number for Stave server. Default value is `8888`.
234+
- ``host``: Host name for Stave server. Default value is
235+
`"localhost"`.
236+
- ``project_type``: `"single_pack\"` (default) or `\"multi_pack"`.
237+
- ``project_name``: Project name displayed on Stave. Default name
238+
is `"Auto generated project"`.
239+
- ``multi_ontology``: A dictionary for multi_pack ontology
240+
Default to `None`.
241+
- ``project_configs``: Project configurations. Default to `None`.
242+
- ``server_thread_daemon``: Sets whether the server thread is
243+
daemonic. Default to `False`.
244+
- ``use_pack_name``: Use ``pack_name`` to name the textpack being
245+
saved to project path in viewer mode. If `False`, will use
246+
``pack_id`` for naming. Default to False.
247+
248+
Returns:
249+
dict: A dictionary with the default config for this processor.
250+
"""
251+
config = super().default_configs()
252+
253+
config.update({
254+
"project_path": None,
255+
"port": 8888,
256+
"host": "localhost",
257+
"project_type": "single_pack",
258+
"project_name": "Auto generated project",
259+
"multi_ontology": None,
260+
"project_configs": None,
261+
"server_thread_daemon": False,
262+
"use_pack_name": False
263+
})
264+
265+
return config

setup.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,8 @@
4343
'test': ['ddt', 'testfixtures'],
4444
'example': ['termcolor==1.1.0', 'hypothesis==2.0.0'],
4545
'wikipedia': ['rdflib==4.2.2'],
46-
'augment': ['transformers>=3.1']
46+
'augment': ['transformers>=3.1'],
47+
'stave': ['stave']
4748
},
4849
entry_points={
4950
'console_scripts': [

0 commit comments

Comments
 (0)