Skip to content

MultiPack to SinglePack boxer #564

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 41 commits into from
Feb 1, 2022
Merged
Show file tree
Hide file tree
Changes from 35 commits
Commits
Show all changes
41 commits
Select commit Hold shift + click to select a range
ca91b95
bug435_follow_up
VincentYaoMBZUAI Nov 7, 2021
6c20736
bug435_follow_up_1
VincentYaoMBZUAI Nov 7, 2021
b469614
bug435_follow_up
VincentYaoMBZUAI Nov 7, 2021
5f3096c
bug435_follow_up_line_80_fixed
VincentYaoMBZUAI Nov 7, 2021
5cb510a
bug435_follow_up_line_using_raise_attributeerror
VincentYaoMBZUAI Nov 7, 2021
e85610e
bug435_follow_up_import_logging_fixed
VincentYaoMBZUAI Nov 7, 2021
1883701
bug435_follow_up_line_80_fixed
VincentYaoMBZUAI Nov 7, 2021
fcfa024
bug435_follow_up_using_logging
VincentYaoMBZUAI Nov 7, 2021
68ff87f
bug435_follow_up_using_logging_format_fixed
VincentYaoMBZUAI Nov 7, 2021
a00a7c8
bug435_follow_up_using_logging_line_80
VincentYaoMBZUAI Nov 7, 2021
79a69ca
bug435_follow_up_using_raise_error_line_80
VincentYaoMBZUAI Nov 8, 2021
a39f852
master_clean
VincentYaoMBZUAI Nov 8, 2021
c266b8f
DataPackBoxer_fixed
VincentYaoMBZUAI Nov 21, 2021
d47ed7f
Merge branch 'master' into bug561fix
VincentYaoMBZUAI Nov 28, 2021
b7df51f
allow using pack_index to take a pack, and make the default value to …
VincentYaoMBZUAI Dec 15, 2021
f989586
modification and adding test_multi_pack_to_data_pack_boxer
VincentYaoMBZUAI Dec 16, 2021
8f4eda0
Merge branch 'bug561fix' of https://github.com/VincentYaoMBZUAI/forte…
VincentYaoMBZUAI Dec 20, 2021
135bff3
Merge branch 'master' into bug561fix
VincentYaoMBZUAI Dec 20, 2021
d372cc8
created a new boxer test file
VincentYaoMBZUAI Dec 20, 2021
c24576d
Merge branch 'bug561fix' of https://github.com/VincentYaoMBZUAI/forte…
VincentYaoMBZUAI Dec 20, 2021
e28f791
expected_pack_name modified
VincentYaoMBZUAI Dec 20, 2021
3a4af65
expected_pack_name modified
VincentYaoMBZUAI Dec 20, 2021
a40ccd6
return the pack from the multipack that matches the pack_index
VincentYaoMBZUAI Dec 21, 2021
2aa38fb
line 80 fixed
VincentYaoMBZUAI Dec 21, 2021
b4524b0
line 80 fixed
VincentYaoMBZUAI Dec 21, 2021
f8061f0
line 80 fixed
VincentYaoMBZUAI Dec 21, 2021
fc59f01
using pack.num_pack instead of len(_pack_names)
VincentYaoMBZUAI Jan 4, 2022
434fb2f
line_80
VincentYaoMBZUAI Jan 4, 2022
d3a88cf
line_80
VincentYaoMBZUAI Jan 4, 2022
b67d568
line_80
VincentYaoMBZUAI Jan 4, 2022
853cf48
line_80
VincentYaoMBZUAI Jan 4, 2022
ac90f3d
class type check, IndexError, default_config, check for less <, pack_…
VincentYaoMBZUAI Jan 5, 2022
097b8f9
clean
VincentYaoMBZUAI Jan 5, 2022
39ec1aa
00_1 to 00
VincentYaoMBZUAI Jan 5, 2022
b5cac94
00 to 00_1
VincentYaoMBZUAI Jan 5, 2022
9438894
randomseed
VincentYaoMBZUAI Jan 6, 2022
b7e1d4b
randomseed_cancel
VincentYaoMBZUAI Jan 6, 2022
108740f
using /one_file rather than /00
VincentYaoMBZUAI Jan 11, 2022
a6e71f9
using /one_file rather than /00
VincentYaoMBZUAI Jan 11, 2022
8b233fe
line 80
VincentYaoMBZUAI Jan 11, 2022
bbc763b
Merge branch 'master' into bug561fix
hunterhector Feb 1, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions data_samples/ontonotes/00_1/abc_0039.gold_conll
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
#begin document (bn/abc/00/abc_0039); part 000
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can reuse the current dataset without adding a new file

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should remove this file if it is no longer needed

bn/abc/00/abc_0039 0 0 Some DT (TOP(NP* - - - - * -
bn/abc/00/abc_0039 0 1 overseas JJ * - - - - * -
bn/abc/00/abc_0039 0 2 news NN * news - 1 - * -
bn/abc/00/abc_0039 0 3 . . *)) - - - - * -

bn/abc/00/abc_0039 0 0 Barak NNP (TOP(S(NP(NP*) - - - - (PERSON) * * (ARG1*) * (ARG0* (ARG0* -
bn/abc/00/abc_0039 0 1 , , * - - - - * * * * * * * -
bn/abc/00/abc_0039 0 2 who WP (SBAR(WHNP*) - - - - * * * (R-ARG1*) (ARG0*) * * -
bn/abc/00/abc_0039 0 3 has VBZ (S(VP* have 01 - - * (V*) * * * * * -
bn/abc/00/abc_0039 0 4 been VBN (VP* be 03 - - * * (V*) * * * * -
bn/abc/00/abc_0039 0 5 forced VBN (VP* force 01 1 - * * * (V*) * * * -
bn/abc/00/abc_0039 0 6 to TO (S(VP* - - - - * * * (ARG2* * * * -
bn/abc/00/abc_0039 0 7 call VB (VP* call 03 3 - * * * * (V*) * * -
bn/abc/00/abc_0039 0 8 national JJ (NP* - - - - * * * * (ARG1* * * -
bn/abc/00/abc_0039 0 9 elections NNS *)))))))))) - - - - * * * *) *) *) *) -
bn/abc/00/abc_0039 0 10 , , * - - - - * * * * * * * -
bn/abc/00/abc_0039 0 11 offered VBD (VP* offer 01 3 - * * * * * (V*) * -
bn/abc/00/abc_0039 0 12 to TO (S(VP* - - - - * * * * * (ARG1* * -
bn/abc/00/abc_0039 0 13 recognize VB (VP* recognize 01 3 - * * * * * * (V*) -
bn/abc/00/abc_0039 0 14 a DT (NP* - - - - * * * * * * (ARG1* -
bn/abc/00/abc_0039 0 15 Palestinian JJ * - - - - (NORP) * * * * * * -
bn/abc/00/abc_0039 0 16 state NN *))))) state - 1 - * * * * * *) *) -
bn/abc/00/abc_0039 0 17 . . *)) - - - - * * * * * * * -

bn/abc/00/abc_0039 0 0 There EX (TOP(S(NP*) - - - - * -
bn/abc/00/abc_0039 0 1 's VBZ (VP* be - 3 - * -
bn/abc/00/abc_0039 0 2 one CD (NP* - - - - (CARDINAL) (0
bn/abc/00/abc_0039 0 3 very RB (ADJP* - - - - * -
bn/abc/00/abc_0039 0 4 difficult JJ *) - - - - * -
bn/abc/00/abc_0039 0 5 condition NN *)) condition - 1 - * 0)
bn/abc/00/abc_0039 0 6 . . *)) - - - - * -

bn/abc/00/abc_0039 0 0 There EX (TOP(S(NP*) - - - - * * * -
bn/abc/00/abc_0039 0 1 could MD (VP* - - - - * (ARGM-MOD*) * -
bn/abc/00/abc_0039 0 2 be VB (VP* be 02 5 - * (V*) * -
bn/abc/00/abc_0039 0 3 no DT (NP(NP* - - - - * (ARG1* * (0
bn/abc/00/abc_0039 0 4 discussion NN *) discussion - 2 - * * * -
bn/abc/00/abc_0039 0 5 for IN (PP* - - - - * * * -
bn/abc/00/abc_0039 0 6 now RB (NP*)) - - - - * * * -
bn/abc/00/abc_0039 0 7 about IN (PP* - - - - * * * -
bn/abc/00/abc_0039 0 8 who WP (SBAR(WHNP*) - - - - * * (ARG0*) -
bn/abc/00/abc_0039 0 9 will MD (S(VP* - - - - * * (ARGM-MOD*) -
bn/abc/00/abc_0039 0 10 have VB (VP* have 03 2 - * * (V*) -
bn/abc/00/abc_0039 0 11 what WDT (NP(NP* - - - - * * (ARG1* -
bn/abc/00/abc_0039 0 12 status NN *) status - 1 - * * * -
bn/abc/00/abc_0039 0 13 in IN (PP* - - - - * * * -
bn/abc/00/abc_0039 0 14 Jerusalem NNP (NP*))))))))))) - - - - (GPE) *) *) 0)
bn/abc/00/abc_0039 0 15 . . *)) - - - - * * * -

bn/abc/00/abc_0039 0 0 Palestinians NNPS (TOP(S(NP*) - - - - (NORP) * (ARG0*) -
bn/abc/00/abc_0039 0 1 do VBP (VP* do 01 - - * (V*) * -
bn/abc/00/abc_0039 0 2 n't RB * - - - - * * (ARGM-NEG*) -
bn/abc/00/abc_0039 0 3 like VB (VP* like 01 2 - * * (V*) -
bn/abc/00/abc_0039 0 4 it PRP (NP*))) - - - - * * (ARG1*) (0)
bn/abc/00/abc_0039 0 5 . . *)) - - - - * * * -

bn/abc/00/abc_0039 0 0 In IN (TOP(S(PP* - - - - * * * * * * (ARGM-LOC* -
bn/abc/00/abc_0039 0 1 Bethlehem NNP (NP(NP*) - - - - (GPE) * * * * * * -
bn/abc/00/abc_0039 0 2 , , * - - - - * * * * * * * -
bn/abc/00/abc_0039 0 3 where WRB (SBAR(WHADVP*) - - - - * * * (ARGM-LOC*) * * * -
bn/abc/00/abc_0039 0 4 Christians NNPS (S(NP*) - - - - (NORP) (ARG0*) * * * * * -
bn/abc/00/abc_0039 0 5 believe VBP (VP* believe 01 2 - * (V*) * * * * * -
bn/abc/00/abc_0039 0 6 Jesus NNP (SBAR(S(NP*) - - - - (PERSON) (ARG1* * (ARG1*) * * * -
bn/abc/00/abc_0039 0 7 was VBD (VP* be 03 - - * * (V*) * * * * -
bn/abc/00/abc_0039 0 8 born VBN (VP*))))))))) bear 02 - - * *) * (V*) * * *) -
bn/abc/00/abc_0039 0 9 , , * - - - - * * * * * * * -
bn/abc/00/abc_0039 0 10 Christmas NNP (NP* - - - - (DATE) * * * * * (ARG1* -
bn/abc/00/abc_0039 0 11 plans NNS *) plan - 1 - * * * * * * *) -
bn/abc/00/abc_0039 0 12 have VBP (VP* have 01 - - * * * * (V*) * * -
bn/abc/00/abc_0039 0 13 been VBN (VP* be 03 - - * * * * * (V*) * -
bn/abc/00/abc_0039 0 14 scaled VBN (VP* scale 01 5 - * * * * * * (V*) -
bn/abc/00/abc_0039 0 15 back RP (PRT*) - - - - * * * * * * (ARG5*) -
bn/abc/00/abc_0039 0 16 because IN (PP* - - - - * * * * * * (ARGM-CAU* -
bn/abc/00/abc_0039 0 17 of IN * - - - - * * * * * * * -
bn/abc/00/abc_0039 0 18 the DT (NP* - - - - * * * * * * * -
bn/abc/00/abc_0039 0 19 violence NN *))))) - - - - * * * * * * *) -
bn/abc/00/abc_0039 0 20 . . *)) - - - - * * * * * * * -

#end document
1 change: 1 addition & 0 deletions forte/data/base_pack.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
Iterable,
)


import jsonpickle

from forte.common import ProcessExecutionException, EntryNotFoundError
Expand Down
46 changes: 43 additions & 3 deletions forte/data/caster.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,15 +54,18 @@ class MultiPackBoxer(Caster[DataPack, MultiPack]):

def cast(self, pack: DataPack) -> MultiPack:
"""
Auto-box the data-pack into a multi-pack by simple wrapping.
Auto-box the DataPack into a MultiPack by simple wrapping.

Args:
pack: The data pack to be boxed
pack: The DataPack to be boxed

Returns: An iterator that produces the boxed multi pack.
Returns: An iterator that produces the boxed MultiPack.

"""
# p = MultiPack()
pack_name = pack.pack_name + "_multi" if pack.pack_name else None
# if pack_name in p._name_index:
# raise ValueError(f"The name {pack_name} has already been taken.")
p = MultiPack(pack_name=pack_name)
p.add_pack_(pack, self.configs.pack_name)
return p
Expand All @@ -78,3 +81,40 @@ def input_pack_type():
@staticmethod
def output_pack_type():
return MultiPack


class MultiPackUnboxer(Caster[MultiPack, DataPack]):
"""
This passes on a single DataPack within the MultiPack.
"""

def cast(self, pack: MultiPack) -> DataPack:
"""
Auto-box the MultiPack into a DataPack by using pack_index to take the unique pack.

Args:
pack: The MultiPack to be boxed.

Returns: A DataPack boxed from the MultiPack.

"""

if self.configs.pack_index < pack.num_pack:
p = pack.get_pack_at(self.configs.pack_index)
return p
else:
raise IndexError(
f"pack_index: {self.configs.pack_index} is not in this multi-pack."
)

@classmethod
def default_configs(cls):
return {"pack_index": 0}

@staticmethod
def input_pack_type():
return MultiPack

@staticmethod
def output_pack_type():
return DataPack
11 changes: 9 additions & 2 deletions tests/forte/data/datapack_type_infer_test.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import unittest
from ddt import data, ddt

from forte.data.caster import MultiPackBoxer
from forte.data.caster import MultiPackBoxer, MultiPackUnboxer
from forte.data.data_pack import DataPack
from forte.data.multi_pack import MultiPack
from forte.data.readers.misc_readers import RawPackReader, RawMultiPackReader
Expand All @@ -12,7 +12,6 @@

@ddt
class DataPackTypeInferTest(unittest.TestCase):

@data(
PlainTextReader,
RawPackReader,
Expand All @@ -37,3 +36,11 @@ def test_multipack_boxer(self, component):
caster = component()
self.assertTrue(caster.input_pack_type() is DataPack)
self.assertTrue(caster.output_pack_type() is MultiPack)

@data(
MultiPackUnboxer,
)
def test_multipack_unboxer(self, component):
caster = component()
self.assertTrue(caster.input_pack_type() is MultiPack)
self.assertTrue(caster.output_pack_type() is DataPack)
82 changes: 82 additions & 0 deletions tests/forte/datapack_boxer_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright 2019 The Forte Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Unit tests for DataPack Boxer.
"""

import os
import unittest
from ddt import ddt, data, unpack
from forte.data.caster import MultiPackBoxer, MultiPackUnboxer
from forte.data.data_pack import DataPack
from forte.data.multi_pack import MultiPack
from forte.pipeline import Pipeline

data_samples_root = os.path.abspath(
os.path.join(
os.path.dirname(os.path.realpath(__file__)),
*([os.path.pardir] * 2),
"data_samples"
)
)

onto_specs_samples_root = os.path.abspath(
os.path.join(
os.path.dirname(os.path.realpath(__file__)),
*([os.path.pardir] * 1),
"forte",
"data",
"ontology",
"test_specs"
)
)


@ddt
class MultiPackUnboxerTest(unittest.TestCase):
def test_multi_pack_to_data_pack_unboxer(self):
from forte.data.readers import OntonotesReader

# Define and config the Pipeline for MultiPackBoxer test
nlp_1 = Pipeline[DataPack]()
nlp_1.set_reader(OntonotesReader())
pack_name = "test_pack"
nlp_1.add(MultiPackBoxer(), {"pack_name": pack_name})
nlp_1.initialize()

# Define and config the Pipeline for DataPackBoxer test
nlp_2 = Pipeline[DataPack]()
nlp_2.set_reader(OntonotesReader())
pack_name = "test_pack"
nlp_2.add(MultiPackBoxer(), {"pack_name": pack_name})
nlp_2.add(MultiPackUnboxer())
nlp_2.initialize()

dataset_path = data_samples_root + "/ontonotes/00_1"
expected_pack_name_multi = "bn/abc/00/abc_0039_multi"
expected_pack_name = "bn/abc/00/abc_0039"

# check that the MultiPack is yielded
pack_1 = nlp_1.process(dataset_path)
self.assertEqual(pack_1.pack_name, expected_pack_name_multi)
self.assertTrue(isinstance(pack_1, MultiPack))

# check that the unboxed DataPack is yielded from the corresponding MultiPack
pack_2 = nlp_2.process(dataset_path)
self.assertEqual(pack_2.pack_name, expected_pack_name)
self.assertTrue(isinstance(pack_2, DataPack))


if __name__ == "__main__":
unittest.main()