Running the below with Python 3.13, docling==2.74.0, docling-core==2.65.1, docling-parse==5.3.2, and fpdf2==2.8.6:
import os
import tempfile
from docling.document_converter import DocumentConverter
from docling.exceptions import ConversionError
from fpdf import FPDF
def make_pdf_with_bare_email_link(path: str | os.PathLike) -> None:
pdf = FPDF()
pdf.add_page()
pdf.set_font("Helvetica", size=12)
pdf.cell(text="Contact: ")
pdf.cell(
text="author@example.com",
link="author@example.com", # bare email, no mailto: prefix
)
pdf.output(path)
def main() -> None:
with tempfile.NamedTemporaryFile(suffix=".pdf") as tmp:
make_pdf_with_bare_email_link(tmp.name)
converter = DocumentConverter()
try:
result = converter.convert(tmp.name)
except ConversionError as exc:
raise AssertionError(
"A bare email hyperlink URI ('user@example.com' without 'mailto:')"
" causes PdfHyperlink Pydantic validation to fail, crashing the page."
) from exc
print(f"Successfully converted PDF with status {result.status}.")
if __name__ == "__main__":
main()
This script blows up with:
ConversionStatus.FAILURE. Errors: Page 1: 1 validation error for PdfHyperlink
uri
Input should be a valid URL, relative URL without a base [type=url_parsing, input_value='author@example.com', input_type=str]
For further information visit https://errors.pydantic.dev/2.12/v/url_parsing
This is a failure to handle a PDF page without mailto:.
With docling==2.73.1 and docling-parse==4.7.3 the code runs fine.
Running the below with Python 3.13,
docling==2.74.0,docling-core==2.65.1,docling-parse==5.3.2, andfpdf2==2.8.6:This script blows up with:
This is a failure to handle a PDF page without
mailto:.With
docling==2.73.1anddocling-parse==4.7.3the code runs fine.