Skip to content

Commit

Permalink
allow missing fields
Browse files Browse the repository at this point in the history
Oeiginal fix from #33
  • Loading branch information
BramVanroy committed Jul 2, 2024
1 parent 97e28da commit 22a14f7
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 6 deletions.
12 changes: 6 additions & 6 deletions src/spacy_conll/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@
},
)
def create_conll_formatter(
nlp: Language,
name: str,
nlp: Language, # qa: ignore
name: str, # qa: ignore
conversion_maps: Optional[Dict[str, Dict[str, str]]] = None,
ext_names: Optional[Dict[str, str]] = None,
field_names: Dict[str, str] = None,
Expand Down Expand Up @@ -200,12 +200,12 @@ def _set_token_conll(self, token: Token, token_idx: int = 1) -> Token:
token_conll = (
token_idx,
token.text,
token.lemma_,
token.pos_,
token.tag_,
token.lemma_ if token.lemma_ else "_",
token.pos_ if token.pos_ else "_",
token.tag_ if token.tag_ else "_",
str(token.morph) if token.has_morph and str(token.morph) else "_",
head_idx,
token.dep_,
token.dep_ if token.dep_ else "_",
token._.conll_deps_graphs_field,
token._.conll_misc_field,
)
Expand Down
25 changes: 25 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from pathlib import Path

import pytest
from spacy import Vocab
from spacy.tokens import Doc, Token
from spacy.tokens.underscore import Underscore
from spacy_conll import init_parser

Expand Down Expand Up @@ -156,3 +158,26 @@ def conllparser_parse_conllfile(spacy_vanila):
return ConllParser(spacy_vanila).parse_conll_as_spacy(
Path(__file__).parent.joinpath("en_ewt-ud-dev.conllu-sample.txt"), input_encoding="utf-8"
)


@pytest.fixture
def spacy_vocab():
return Vocab(strings=["hello", "world"])


@pytest.fixture
def spacy_doc(spacy_vocab):
words = ["hello", "world", "!"]
spaces = [True, False, False]
sent_starts = [True, False, False]
return Doc(
spacy_vocab,
words=words,
spaces=spaces,
sent_starts=sent_starts,
)


@pytest.fixture
def spacy_token(spacy_vocab, spacy_doc):
return Token(spacy_vocab, spacy_doc, 1)
26 changes: 26 additions & 0 deletions tests/test_formatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from collections import OrderedDict

from spacy.tokens import Token
from spacy_conll.formatter import ConllFormatter


def test_set_token_conll(spacy_token: Token):
"""Test for https://github.com/BramVanroy/spacy_conll/issues/29"""
formatter = ConllFormatter()
assert formatter._set_token_conll(spacy_token)._.get("conll") == OrderedDict(
[
("ID", 1),
("FORM", "world"),
("LEMMA", "_"),
("UPOS", "_"),
("XPOS", "_"),
("FEATS", "_"),
("HEAD", 2),
("DEPREL", "_"),
("DEPS", "_"),
("MISC", "SpaceAfter=No"),
]
)
assert (
formatter._set_token_conll(spacy_token)._.get("conll_str") == "1\tworld\t_\t_\t_\t_\t2\t_\t_\tSpaceAfter=No\n"
)

0 comments on commit 22a14f7

Please sign in to comment.