diff --git a/private_gpt/components/ingest/ingest_helper.py b/private_gpt/components/ingest/ingest_helper.py index a11090702..da62568bc 100644 --- a/private_gpt/components/ingest/ingest_helper.py +++ b/private_gpt/components/ingest/ingest_helper.py @@ -92,7 +92,13 @@ def _load_file_to_documents(file_name: str, file_data: Path) -> list[Document]: return string_reader.load_data([file_data.read_text()]) logger.debug("Specific reader found for extension=%s", extension) - return reader_cls().load_data(file_data) + documents = reader_cls().load_data(file_data) + + # Sanitize NUL bytes in text which can't be stored in Postgres + for i in range(len(documents)): + documents[i].text = documents[i].text.replace("\u0000", "") + + return documents @staticmethod def _exclude_metadata(documents: list[Document]) -> None: