Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 15 additions & 6 deletions projects/pgai/pgai/vectorizer/loading.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from dataclasses import dataclass
from io import BytesIO
from typing import Any, Literal
from urllib.parse import urlparse

from filetype import filetype # type: ignore
from pydantic import BaseModel
Expand All @@ -15,13 +16,21 @@ class LoadedDocument:


def guess_filetype(file_like: BytesIO, file_path: str | None = None) -> str | None:
guess = filetype.guess(file_like) # type: ignore
guess = filetype.guess(file_like) # type: ignore[reportUnknownArgumentType,reportUnknownMemberType]
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This error wasn't popping previously but now it does. Im just skipping. Other solution would be to do explicit casts.

file_like.seek(0)
if guess is None:
if file_path is None:
return None
return file_path.split(".")[-1]
return guess.extension
if guess is not None:
return guess.extension

if file_path is None:
return None

try:
parsed = urlparse(file_path)
_, ext = os.path.splitext(parsed.path)
except Exception:
return None

return ext[1:].lower() if ext else None
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added the .lower() so we ensure always to go with lowercase extensions. Just for consistency.



class ColumnLoading(BaseModel):
Expand Down
Loading