Skip to content

Commit

Permalink
fix: doc strings and function name
Browse files Browse the repository at this point in the history
  • Loading branch information
MattGPT-ai committed Feb 4, 2025
1 parent 5784834 commit 082e845
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 12 deletions.
11 changes: 6 additions & 5 deletions flair/training_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,24 +463,25 @@ def create_labeled_sentence_from_tokens(
return sentence


def create_labeled_sentence(
def create_labeled_sentence_from_entity_offsets(
text: str,
entities: list[CharEntity],
token_limit: float = inf,
) -> Sentence:
"""Chunks and labels a text from a list of entity annotations.
"""Creates a labeled sentence from a text and a list of entity annotations.
The function explicitly tokenizes the text and labels separately, ensuring entity labels are
not partially split across tokens.
not partially split across tokens. The sentence is truncated if a token limit is set.
Args:
text (str): The full text to be tokenized and labeled.
entities (list of tuples): Ordered non-overlapping entity annotations with each tuple in the
format (start_char_index, end_char_index, entity_class, entity_text).
token_limit: numerical value that determines the maximum size of a chunk. use inf to not perform chunking
token_limit: numerical value that determines the maximum token length of the sentence.
use inf to not perform chunking
Returns:
A list of labeled Sentence objects representing the chunks of the original text
A labeled Sentence objects representing the text and entity annotations.
"""
tokens: list[Token] = []
current_index = 0
Expand Down
14 changes: 7 additions & 7 deletions tests/test_sentence_labeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest

from flair.data import Sentence
from flair.training_utils import CharEntity, TokenEntity, create_labeled_sentence
from flair.training_utils import CharEntity, TokenEntity, create_labeled_sentence_from_entity_offsets


@pytest.fixture(params=["resume1.txt"])
Expand Down Expand Up @@ -63,7 +63,7 @@ def small_token_limit_response() -> list[Sentence]:

class TestChunking:
def test_empty_string(self):
sentences = create_labeled_sentence("", [])
sentences = create_labeled_sentence_from_entity_offsets("", [])
assert len(sentences) == 0

def check_tokens(self, sentence: Sentence, expected_tokens: list[str]):
Expand Down Expand Up @@ -101,11 +101,11 @@ def check_split_entities(self, entity_labels, sentence: Sentence):
)
def test_short_text(self, test_text: str, expected_text: str):
"""Short texts that should fit nicely into a single chunk."""
chunks = create_labeled_sentence(test_text, [])
chunks = create_labeled_sentence_from_entity_offsets(test_text, [])
assert chunks.text == expected_text

def test_create_labeled_sentence(self, parsed_resume_dict: dict):
create_labeled_sentence(parsed_resume_dict["raw_text"], parsed_resume_dict["entities"])
create_labeled_sentence_from_entity_offsets(parsed_resume_dict["raw_text"], parsed_resume_dict["entities"])

@pytest.mark.parametrize(
"test_text, entities, expected_tokens, expected_labels",
Expand Down Expand Up @@ -161,7 +161,7 @@ def test_create_labeled_sentence(self, parsed_resume_dict: dict):
def test_contractions_and_hyphens(
self, test_text: str, entities: list[CharEntity], expected_tokens: list[str], expected_labels: list[TokenEntity]
):
sentence = create_labeled_sentence(test_text, entities)
sentence = create_labeled_sentence_from_entity_offsets(test_text, entities)
self.check_tokens(sentence, expected_tokens)
self.check_token_entities(sentence, expected_labels)

Expand All @@ -176,7 +176,7 @@ def test_contractions_and_hyphens(
)
def test_long_text(self, test_text: str, entities: list[CharEntity]):
"""Test for handling long texts that should be split into multiple chunks."""
create_labeled_sentence(test_text, entities)
create_labeled_sentence_from_entity_offsets(test_text, entities)

@pytest.mark.parametrize(
"test_text, entities, expected_labels",
Expand All @@ -201,5 +201,5 @@ def test_long_text(self, test_text: str, entities: list[CharEntity]):
def test_text_with_punctuation(
self, test_text: str, entities: list[CharEntity], expected_labels: list[TokenEntity]
):
sentence = create_labeled_sentence(test_text, entities)
sentence = create_labeled_sentence_from_entity_offsets(test_text, entities)
self.check_token_entities(sentence, expected_labels)

0 comments on commit 082e845

Please sign in to comment.