Skip to content

Commit e10224c

Browse files
committed
Split only 1 _ in case the dataset name has _ in it (generally language short names don't have _)
1 parent c21d2c1 commit e10224c

File tree

2 files changed

+2
-2
lines changed

2 files changed

+2
-2
lines changed

stanza/utils/datasets/prepare_depparse_treebank.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def process_treebank(treebank, model_type, paths, args) -> None:
8888
prepare_tokenizer_treebank.copy_conllu_treebank(treebank, model_type, paths, paths["DEPPARSE_DATA_DIR"])
8989
elif args.tag_method is Tags.PREDICTED:
9090
short_name = treebank_to_short_name(treebank)
91-
short_language, dataset = short_name.split("_")
91+
short_language, dataset = short_name.split("_", 1)
9292

9393
# fmt: off
9494
base_args = ["--wordvec_dir", paths["WORDVEC_DIR"],

stanza/utils/training/run_depparse.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def build_model_filename(paths, short_name, command_args, extra_args):
4141

4242
def run_treebank(mode, paths, treebank, short_name,
4343
temp_output_file, command_args, extra_args):
44-
short_language, dataset = short_name.split("_")
44+
short_language, dataset = short_name.split("_", 1)
4545

4646
# TODO: refactor these blocks?
4747
depparse_dir = paths["DEPPARSE_DATA_DIR"]

0 commit comments

Comments
 (0)