Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: enforce kwarg logging #7207

Merged
merged 17 commits into from
Feb 29, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion haystack/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import haystack.logging
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

explanation for import order change here: https://github.com/deepset-ai/haystack/pull/7207/files#r1505884720

from haystack.core.component import component
from haystack.core.errors import ComponentError, DeserializationError
from haystack.core.pipeline import Pipeline
from haystack.core.serialization import default_from_dict, default_to_dict
from haystack.dataclasses import Answer, Document, ExtractedAnswer, GeneratedAnswer
import haystack.logging

# Initialize the logging configuration
# This is a no-op unless `structlog` is installed
Expand Down
6 changes: 2 additions & 4 deletions haystack/components/audio/whisper_local.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
from typing import List, Optional, Dict, Any, Union, Literal, get_args

import logging
import tempfile
from pathlib import Path
from typing import Any, Dict, List, Literal, Optional, Union, get_args

from haystack import component, Document, default_to_dict, ComponentError, default_from_dict
from haystack import ComponentError, Document, component, default_from_dict, default_to_dict, logging
from haystack.dataclasses import ByteStream
from haystack.lazy_imports import LazyImport
from haystack.utils import ComponentDevice
Expand Down
3 changes: 1 addition & 2 deletions haystack/components/audio/whisper_remote.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import io
import logging
from pathlib import Path
from typing import Any, Dict, List, Optional, Union

from openai import OpenAI

from haystack import Document, component, default_from_dict, default_to_dict
from haystack import Document, component, default_from_dict, default_to_dict, logging
from haystack.dataclasses import ByteStream
from haystack.utils import Secret, deserialize_secrets_inplace

Expand Down
4 changes: 2 additions & 2 deletions haystack/components/builders/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from haystack.components.builders.answer_builder import AnswerBuilder
from haystack.components.builders.prompt_builder import PromptBuilder
from haystack.components.builders.dynamic_prompt_builder import DynamicPromptBuilder
from haystack.components.builders.dynamic_chat_prompt_builder import DynamicChatPromptBuilder
from haystack.components.builders.dynamic_prompt_builder import DynamicPromptBuilder
from haystack.components.builders.prompt_builder import PromptBuilder

__all__ = ["AnswerBuilder", "PromptBuilder", "DynamicPromptBuilder", "DynamicChatPromptBuilder"]
10 changes: 5 additions & 5 deletions haystack/components/builders/answer_builder.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import logging
import re
from typing import List, Dict, Any, Optional

from haystack import component, GeneratedAnswer, Document
from typing import Any, Dict, List, Optional

from haystack import Document, GeneratedAnswer, component, logging

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -99,7 +97,9 @@ def run(
try:
referenced_docs.append(documents[idx])
except IndexError:
logger.warning("Document index '%s' referenced in Generator output is out of range. ", idx + 1)
logger.warning(
"Document index '{index}' referenced in Generator output is out of range. ", index=idx + 1
)

answer_string = AnswerBuilder._extract_answer_string(reply, pattern)
answer = GeneratedAnswer(data=answer_string, query=query, documents=referenced_docs, meta=metadata)
Expand Down
5 changes: 2 additions & 3 deletions haystack/components/builders/dynamic_chat_prompt_builder.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import logging
from typing import Dict, Any, Optional, List, Set
from typing import Any, Dict, List, Optional, Set

from jinja2 import Template, meta

from haystack import component
from haystack import component, logging
from haystack.dataclasses.chat_message import ChatMessage, ChatRole

logger = logging.getLogger(__name__)
Expand Down
5 changes: 2 additions & 3 deletions haystack/components/builders/dynamic_prompt_builder.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import logging
from typing import Dict, Any, Optional, List, Set
from typing import Any, Dict, List, Optional, Set

from jinja2 import Template, meta

from haystack import component
from haystack import component, logging

logger = logging.getLogger(__name__)

Expand Down
10 changes: 3 additions & 7 deletions haystack/components/caching/cache_checker.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,9 @@
from typing import List, Dict, Any

import importlib
from typing import Any, Dict, List

import logging

from haystack import component, Document, default_from_dict, default_to_dict, DeserializationError
from haystack import DeserializationError, Document, component, default_from_dict, default_to_dict, logging
from haystack.document_stores.types import DocumentStore


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -44,7 +40,7 @@ def from_dict(cls, data: Dict[str, Any]) -> "CacheChecker":

try:
module_name, type_ = init_params["document_store"]["type"].rsplit(".", 1)
logger.debug("Trying to import %s", module_name)
logger.debug("Trying to import module '{module}'", module=module_name)
module = importlib.import_module(module_name)
except (ImportError, DeserializationError) as e:
raise DeserializationError(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
from typing import List, Dict, Optional
from typing import Dict, List, Optional

from haystack import component, Document
from haystack import Document, component, logging
from haystack.lazy_imports import LazyImport

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -96,6 +95,8 @@ def _detect_language(self, document: Document) -> Optional[str]:
try:
language = langdetect.detect(document.content)
except langdetect.LangDetectException:
logger.warning("Langdetect cannot detect the language of Document with id: %s", document.id)
logger.warning(
"Langdetect cannot detect the language of Document with id: {document_id}", document_id=document.id
)
language = None
return language
7 changes: 3 additions & 4 deletions haystack/components/connectors/openapi_service.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import json
import logging
from collections import defaultdict
from copy import copy
from typing import List, Dict, Any, Optional, Union
from typing import Any, Dict, List, Optional, Union

from haystack import component
from haystack import component, logging
from haystack.dataclasses import ChatMessage, ChatRole
from haystack.lazy_imports import LazyImport

Expand Down Expand Up @@ -179,7 +178,7 @@ def _invoke_method(self, openapi_service: OpenAPI, method_invocation_descriptor:
:rtype: Any
:raises RuntimeError: If the method is not found or invocation fails.
"""
name = method_invocation_descriptor.get("name", None)
name = method_invocation_descriptor.get("name")
invocation_arguments = copy(method_invocation_descriptor.get("arguments", {}))
if not name or not invocation_arguments:
raise ValueError(
Expand Down
6 changes: 3 additions & 3 deletions haystack/components/converters/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from haystack.components.converters.txt import TextFileToDocument
from haystack.components.converters.tika import TikaDocumentConverter
from haystack.components.converters.azure import AzureOCRDocumentConverter
from haystack.components.converters.pypdf import PyPDFToDocument
from haystack.components.converters.html import HTMLToDocument
from haystack.components.converters.markdown import MarkdownToDocument
from haystack.components.converters.openapi_functions import OpenAPIServiceToFunctions
from haystack.components.converters.output_adapter import OutputAdapter
from haystack.components.converters.pypdf import PyPDFToDocument
from haystack.components.converters.tika import TikaDocumentConverter
from haystack.components.converters.txt import TextFileToDocument

__all__ = [
"TextFileToDocument",
Expand Down
13 changes: 6 additions & 7 deletions haystack/components/converters/azure.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
from pathlib import Path
from typing import List, Union, Dict, Any, Optional
import logging
from typing import Any, Dict, List, Optional, Union

from haystack.lazy_imports import LazyImport
from haystack import component, Document, default_to_dict, default_from_dict
from haystack.dataclasses import ByteStream
from haystack import Document, component, default_from_dict, default_to_dict, logging
from haystack.components.converters.utils import get_bytestream_from_source, normalize_metadata
from haystack.dataclasses import ByteStream
from haystack.lazy_imports import LazyImport
from haystack.utils import Secret, deserialize_secrets_inplace

logger = logging.getLogger(__name__)

with LazyImport(message="Run 'pip install \"azure-ai-formrecognizer>=3.2.0b2\"'") as azure_import:
from azure.ai.formrecognizer import DocumentAnalysisClient, AnalyzeResult
from azure.ai.formrecognizer import AnalyzeResult, DocumentAnalysisClient
from azure.core.credentials import AzureKeyCredential


Expand Down Expand Up @@ -83,7 +82,7 @@ def run(self, sources: List[Union[str, Path, ByteStream]], meta: Optional[List[D
try:
bytestream = get_bytestream_from_source(source=source)
except Exception as e:
logger.warning("Could not read %s. Skipping it. Error: %s", source, e)
logger.warning("Could not read {source}. Skipping it. Error: {error}", source=source, error=e)
continue

poller = self.document_analysis_client.begin_analyze_document(
Expand Down
16 changes: 10 additions & 6 deletions haystack/components/converters/html.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import logging
from pathlib import Path
from typing import Any, Dict, List, Optional, Union, Literal
from typing import Any, Dict, List, Literal, Optional, Union

from boilerpy3 import extractors

from haystack import Document, component, default_from_dict, default_to_dict
from haystack.dataclasses import ByteStream
from haystack import Document, component, default_from_dict, default_to_dict, logging
from haystack.components.converters.utils import get_bytestream_from_source, normalize_metadata
from haystack.dataclasses import ByteStream

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -84,13 +84,17 @@ def run(
try:
bytestream = get_bytestream_from_source(source=source)
except Exception as e:
logger.warning("Could not read %s. Skipping it. Error: %s", source, e)
logger.warning("Could not read {source}. Skipping it. Error: {error}", source=source, error=e)
continue
try:
file_content = bytestream.data.decode("utf-8")
text = extractor.get_content(file_content)
except Exception as conversion_e:
logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e)
logger.warning(
"Failed to extract text from {source}. Skipping it. Error: {error}",
source=source,
error=conversion_e,
)
continue

merged_metadata = {**bytestream.meta, **metadata}
Expand Down
13 changes: 8 additions & 5 deletions haystack/components/converters/markdown.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import logging
from pathlib import Path
from typing import Any, Dict, List, Optional, Union

from tqdm import tqdm

from haystack import Document, component
from haystack import Document, component, logging
from haystack.components.converters.utils import get_bytestream_from_source, normalize_metadata
from haystack.dataclasses import ByteStream
from haystack.lazy_imports import LazyImport
from haystack.components.converters.utils import get_bytestream_from_source, normalize_metadata

with LazyImport("Run 'pip install markdown-it-py mdit_plain'") as markdown_conversion_imports:
from markdown_it import MarkdownIt
Expand Down Expand Up @@ -77,13 +76,17 @@ def run(
try:
bytestream = get_bytestream_from_source(source)
except Exception as e:
logger.warning("Could not read %s. Skipping it. Error: %s", source, e)
logger.warning("Could not read {source}. Skipping it. Error: {error}", source=source, error=e)
continue
try:
file_content = bytestream.data.decode("utf-8")
text = parser.render(file_content)
except Exception as conversion_e:
logger.warning("Failed to extract text from %s. Skipping it. Error: %s", source, conversion_e)
logger.warning(
"Failed to extract text from {source}. Skipping it. Error: {error}",
source=source,
error=conversion_e,
)
continue

merged_metadata = {**bytestream.meta, **metadata}
Expand Down
21 changes: 13 additions & 8 deletions haystack/components/converters/openapi_functions.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
import json
import logging
import os
from pathlib import Path
from typing import List, Dict, Any, Union, Optional
from typing import Any, Dict, List, Optional, Union

import requests
import yaml
from requests import RequestException

from haystack import component, Document
from haystack import Document, component, logging
from haystack.dataclasses.byte_stream import ByteStream
from haystack.lazy_imports import LazyImport

Expand Down Expand Up @@ -80,7 +79,9 @@ def run(
elif isinstance(source, ByteStream):
openapi_spec_content = source.data.decode("utf-8")
else:
logger.warning("Invalid source type %s. Only str, Path, and ByteStream are supported.", type(source))
logger.warning(
"Invalid source type {source}. Only str, Path, and ByteStream are supported.", source=type(source)
)
continue

if openapi_spec_content:
Expand All @@ -94,7 +95,9 @@ def run(
doc = Document(content=json.dumps(function), meta=meta)
documents.append(doc)
except Exception as e:
logger.error("Error processing OpenAPI specification from source %s: %s", source, e)
logger.error(
"Error processing OpenAPI specification from source {source}: {error}", source=source, error=e
)

return {"documents": documents}

Expand Down Expand Up @@ -167,7 +170,9 @@ def _parse_endpoint_spec(self, resolved_spec: Dict[str, Any]) -> Optional[Dict[s
if function_name and description and schema["properties"]:
return {"name": function_name, "description": description, "parameters": schema}
else:
logger.warning("Invalid OpenAPI spec format provided. Could not extract function from %s", resolved_spec)
logger.warning(
"Invalid OpenAPI spec format provided. Could not extract function from {spec}", spec=resolved_spec
)
return {}

def _parse_property_attributes(
Expand Down Expand Up @@ -246,7 +251,7 @@ def _read_from_file(self, path: Union[str, Path]) -> Optional[str]:
with open(path, "r") as f:
return f.read()
except IOError as e:
logger.warning("IO error reading file: %s. Error: %s", path, e)
logger.warning("IO error reading file: {path}. Error: {error}", path=path, error=e)
return None

def _read_from_url(self, url: str) -> Optional[str]:
Expand All @@ -261,5 +266,5 @@ def _read_from_url(self, url: str) -> Optional[str]:
response.raise_for_status()
return response.text
except RequestException as e:
logger.warning("Error fetching URL: %s. Error: %s", url, e)
logger.warning("Error fetching URL: {url}. Error: {error}", url=url, error=e)
return None
6 changes: 3 additions & 3 deletions haystack/components/converters/output_adapter.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import Optional, Dict, Any, Set, Callable
from typing import Any, Callable, Dict, Optional, Set

import jinja2.runtime
from jinja2 import TemplateSyntaxError, meta
from jinja2.nativetypes import NativeEnvironment
from typing_extensions import TypeAlias

from haystack import component, default_to_dict, default_from_dict
from haystack.utils import serialize_callable, deserialize_callable, serialize_type, deserialize_type
from haystack import component, default_from_dict, default_to_dict
from haystack.utils import deserialize_callable, deserialize_type, serialize_callable, serialize_type


class OutputAdaptationException(Exception):
Expand Down
13 changes: 7 additions & 6 deletions haystack/components/converters/pypdf.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import io
import logging
from typing import List, Union, Protocol, Dict, Any, Optional
from pathlib import Path
from typing import Any, Dict, List, Optional, Protocol, Union

from haystack import Document, component, default_to_dict, logging
from haystack.components.converters.utils import get_bytestream_from_source, normalize_metadata
from haystack.dataclasses import ByteStream
from haystack.lazy_imports import LazyImport
from haystack import Document, component, default_to_dict
from haystack.components.converters.utils import get_bytestream_from_source, normalize_metadata

with LazyImport("Run 'pip install pypdf'") as pypdf_import:
from pypdf import PdfReader
Expand Down Expand Up @@ -105,13 +104,15 @@ def run(
try:
bytestream = get_bytestream_from_source(source)
except Exception as e:
logger.warning("Could not read %s. Skipping it. Error: %s", source, e)
logger.warning("Could not read {source}. Skipping it. Error: {error}", source=source, error=e)
continue
try:
pdf_reader = PdfReader(io.BytesIO(bytestream.data))
document = self._converter.convert(pdf_reader)
except Exception as e:
logger.warning("Could not read %s and convert it to Document, skipping. %s", source, e)
logger.warning(
"Could not read {source} and convert it to Document, skipping. {error}", source=source, error=e
)
continue

merged_metadata = {**bytestream.meta, **metadata}
Expand Down
Loading
Loading