Skip to content

Add Synchronous processing #32

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jul 14, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
try to fix black errors
  • Loading branch information
sroy9675 committed Jul 13, 2024
commit f88a09a0e8c227ff7e32b44f0acff424771af92d
14 changes: 9 additions & 5 deletions datafog/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,13 @@
logger = logging.getLogger("datafog_logger")
logger.setLevel(logging.INFO)


class DataFog:
def __init__(
self,
image_service = ImageService(),
text_service = TextService(),
spark_service = None,
image_service=ImageService(),
text_service=TextService(),
spark_service=None,
operations: List[OperationType] = [OperationType.ANNOTATE_PII],
):
self.image_service = image_service
Expand Down Expand Up @@ -44,7 +45,7 @@
)

if OperationType.ANNOTATE_PII in self.operations:
annotated_text = await self.text_service.batch_annotate_text_async(
annotated_text = await self.text_service.batch_annotate_text_async(
extracted_text
)
self.logger.info(
Expand All @@ -62,14 +63,16 @@
try:
self.logger.info(f"Starting text pipeline with {len(str_list)} texts.")
if OperationType.ANNOTATE_PII in self.operations:
annotated_text = await self.text_service.batch_annotate_text_async(str_list)
annotated_text = await self.text_service.batch_annotate_text_async(
str_list
)
self.logger.info(
f"Text annotation completed with {len(annotated_text)} annotations."
)
return annotated_text

self.logger.info("No annotation operation found; returning original texts.")
return str_list

Check warning on line 75 in datafog/main.py

View check run for this annotation

Codecov / codecov/patch

datafog/main.py#L75

Added line #L75 was not covered by tests
except Exception as e:
self.logger.error(f"Error in run_text_pipeline: {str(e)}")
raise
Expand All @@ -85,17 +88,18 @@
)
return annotated_text

self.logger.info("No annotation operation found; returning original texts.")
return str_list
except Exception as e:
self.logger.error(f"Error in run_text_pipeline: {str(e)}")
raise

Check warning on line 95 in datafog/main.py

View check run for this annotation

Codecov / codecov/patch

datafog/main.py#L91-L95

Added lines #L91 - L95 were not covered by tests

def _add_attributes(self, attributes: dict):
"""Add multiple attributes."""
for key, value in attributes.items():

Check warning on line 99 in datafog/main.py

View check run for this annotation

Codecov / codecov/patch

datafog/main.py#L99

Added line #L99 was not covered by tests
pass


class TextPIIAnnotator:
def __init__(self):
self.text_annotator = SpacyPIIAnnotator.create()
Expand Down
2 changes: 2 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ def test_textpii_annotator():
# annotated_text = await ocr_annotator.run([image_url])
# assert "Satya Nadella" in annotated_text[0].get("PER", []), "PII not annotated correctly."


def test_datafog_text_annotation_sync():
"""Test DataFog class for synchronous text annotation."""
text = ["Joe Biden is the President of the United States."]
Expand All @@ -49,6 +50,7 @@ def test_datafog_text_annotation_sync():
annotated_text, "the United States"
), "United States not found in annotated results."


@pytest.mark.asyncio
async def test_datafog_text_annotation():
"""Test DataFog class for text annotation."""
Expand Down
Loading