@@ -39,6 +39,7 @@ def __init__(
39
39
base_path : Union [str , Path ] = None ,
40
40
in_memory : bool = True ,
41
41
augment_train : bool = False ,
42
+ ** corpusargs ,
42
43
):
43
44
"""
44
45
SemEval-2010 Task 8 on Multi-Way Classification of Semantic Relations Between Pairs of
@@ -83,6 +84,7 @@ def __init__(
83
84
column_format = {1 : "text" , 2 : "ner" },
84
85
comment_symbol = "# " ,
85
86
in_memory = in_memory ,
87
+ ** corpusargs ,
86
88
)
87
89
88
90
def extract_and_convert_to_conllu (self , data_file , data_folder , augment_train ):
@@ -227,7 +229,7 @@ def _semeval_lines_to_token_list(self, raw_lines, augment_relations):
227
229
228
230
229
231
class RE_ENGLISH_TACRED (ColumnCorpus ):
230
- def __init__ (self , base_path : Union [str , Path ] = None , in_memory : bool = True ):
232
+ def __init__ (self , base_path : Union [str , Path ] = None , in_memory : bool = True , ** corpusargs ):
231
233
"""
232
234
TAC Relation Extraction Dataset with 41 relations from https://nlp.stanford.edu/projects/tacred/.
233
235
Manual download is required for this dataset.
@@ -260,6 +262,7 @@ def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True):
260
262
column_format = {1 : "text" , 2 : "ner" },
261
263
comment_symbol = "# " ,
262
264
in_memory = in_memory ,
265
+ ** corpusargs ,
263
266
)
264
267
265
268
def extract_and_convert_to_conllu (self , data_file , data_folder ):
@@ -351,7 +354,7 @@ def _tacred_example_to_token_list(self, example: Dict[str, Any]) -> conllu.Token
351
354
352
355
353
356
class RE_ENGLISH_CONLL04 (ColumnCorpus ):
354
- def __init__ (self , base_path : Union [str , Path ] = None , in_memory : bool = True ):
357
+ def __init__ (self , base_path : Union [str , Path ] = None , in_memory : bool = True , ** corpusargs ):
355
358
if not base_path :
356
359
base_path = flair .cache_root / "datasets"
357
360
else :
@@ -385,6 +388,7 @@ def __init__(self, base_path: Union[str, Path] = None, in_memory: bool = True):
385
388
in_memory = in_memory ,
386
389
column_format = {1 : "text" , 2 : "ner" },
387
390
comment_symbol = "# " ,
391
+ ** corpusargs ,
388
392
)
389
393
390
394
def _parse_incr (self , source_file ) -> Iterable [conllu .TokenList ]:
@@ -536,6 +540,7 @@ def __init__(
536
540
base_path : Union [str , Path ] = None ,
537
541
in_memory : bool = True ,
538
542
sentence_splitter : SentenceSplitter = SegtokSentenceSplitter (),
543
+ ** corpusargs ,
539
544
):
540
545
"""
541
546
DrugProt corpus: Biocreative VII Track 1 from https://zenodo.org/record/5119892#.YSdSaVuxU5k/ on
@@ -570,6 +575,7 @@ def __init__(
570
575
sample_missing_splits = False ,
571
576
column_format = {1 : "text" , 2 : "ner" , 3 : "ner" },
572
577
comment_symbol = "# " ,
578
+ ** corpusargs ,
573
579
)
574
580
575
581
def extract_and_convert_to_conllu (self , data_file , data_folder ):
0 commit comments