diff --git a/.buildinfo b/.buildinfo new file mode 100644 index 0000000000..a9b75733dd --- /dev/null +++ b/.buildinfo @@ -0,0 +1,4 @@ +# Sphinx build info version 1 +# This file records the configuration used when building these files. When it is not found, a full rebuild will be done. +config: c971fbf16817456587a7ef112b0f2c02 +tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/.doctrees/changelog.doctree b/.doctrees/changelog.doctree new file mode 100644 index 0000000000..5ba1cdd317 Binary files /dev/null and b/.doctrees/changelog.doctree differ diff --git a/.doctrees/community/resources.doctree b/.doctrees/community/resources.doctree new file mode 100644 index 0000000000..483d62c1b8 Binary files /dev/null and b/.doctrees/community/resources.doctree differ diff --git a/.doctrees/community/tools.doctree b/.doctrees/community/tools.doctree new file mode 100644 index 0000000000..68461843db Binary files /dev/null and b/.doctrees/community/tools.doctree differ diff --git a/.doctrees/contributing/code_of_conduct.doctree b/.doctrees/contributing/code_of_conduct.doctree new file mode 100644 index 0000000000..9c05e8f74a Binary files /dev/null and b/.doctrees/contributing/code_of_conduct.doctree differ diff --git a/.doctrees/contributing/contributing.doctree b/.doctrees/contributing/contributing.doctree new file mode 100644 index 0000000000..d27732368b Binary files /dev/null and b/.doctrees/contributing/contributing.doctree differ diff --git a/.doctrees/environment.pickle b/.doctrees/environment.pickle new file mode 100644 index 0000000000..f1f004fb59 Binary files /dev/null and b/.doctrees/environment.pickle differ diff --git a/.doctrees/getting_started/installing.doctree b/.doctrees/getting_started/installing.doctree new file mode 100644 index 0000000000..c584d0b60b Binary files /dev/null and b/.doctrees/getting_started/installing.doctree differ diff --git a/.doctrees/index.doctree b/.doctrees/index.doctree new file mode 100644 index 0000000000..38ad3e041a Binary files /dev/null and b/.doctrees/index.doctree differ diff --git a/.doctrees/modules/contrib.doctree b/.doctrees/modules/contrib.doctree new file mode 100644 index 0000000000..f109d02918 Binary files /dev/null and b/.doctrees/modules/contrib.doctree differ diff --git a/.doctrees/modules/datasets.doctree b/.doctrees/modules/datasets.doctree new file mode 100644 index 0000000000..b67af63173 Binary files /dev/null and b/.doctrees/modules/datasets.doctree differ diff --git a/.doctrees/modules/io.doctree b/.doctrees/modules/io.doctree new file mode 100644 index 0000000000..c91bcf5e06 Binary files /dev/null and b/.doctrees/modules/io.doctree differ diff --git a/.doctrees/modules/models.doctree b/.doctrees/modules/models.doctree new file mode 100644 index 0000000000..af12498626 Binary files /dev/null and b/.doctrees/modules/models.doctree differ diff --git a/.doctrees/modules/transforms.doctree b/.doctrees/modules/transforms.doctree new file mode 100644 index 0000000000..1c570ca31b Binary files /dev/null and b/.doctrees/modules/transforms.doctree differ diff --git a/.doctrees/modules/utils.doctree b/.doctrees/modules/utils.doctree new file mode 100644 index 0000000000..77694347ab Binary files /dev/null and b/.doctrees/modules/utils.doctree differ diff --git a/.doctrees/notebooks.doctree b/.doctrees/notebooks.doctree new file mode 100644 index 0000000000..c2254b1839 Binary files /dev/null and b/.doctrees/notebooks.doctree differ diff --git a/.doctrees/using_doctr/custom_models_training.doctree b/.doctrees/using_doctr/custom_models_training.doctree new file mode 100644 index 0000000000..3581eaaeca Binary files /dev/null and b/.doctrees/using_doctr/custom_models_training.doctree differ diff --git a/.doctrees/using_doctr/running_on_aws.doctree b/.doctrees/using_doctr/running_on_aws.doctree new file mode 100644 index 0000000000..bdf145eef2 Binary files /dev/null and b/.doctrees/using_doctr/running_on_aws.doctree differ diff --git a/.doctrees/using_doctr/sharing_models.doctree b/.doctrees/using_doctr/sharing_models.doctree new file mode 100644 index 0000000000..dc85831b3f Binary files /dev/null and b/.doctrees/using_doctr/sharing_models.doctree differ diff --git a/.doctrees/using_doctr/using_contrib_modules.doctree b/.doctrees/using_doctr/using_contrib_modules.doctree new file mode 100644 index 0000000000..c2c9d5447b Binary files /dev/null and b/.doctrees/using_doctr/using_contrib_modules.doctree differ diff --git a/.doctrees/using_doctr/using_datasets.doctree b/.doctrees/using_doctr/using_datasets.doctree new file mode 100644 index 0000000000..175d4503da Binary files /dev/null and b/.doctrees/using_doctr/using_datasets.doctree differ diff --git a/.doctrees/using_doctr/using_model_export.doctree b/.doctrees/using_doctr/using_model_export.doctree new file mode 100644 index 0000000000..894ecbb38a Binary files /dev/null and b/.doctrees/using_doctr/using_model_export.doctree differ diff --git a/.doctrees/using_doctr/using_models.doctree b/.doctrees/using_doctr/using_models.doctree new file mode 100644 index 0000000000..657cb68c98 Binary files /dev/null and b/.doctrees/using_doctr/using_models.doctree differ diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml deleted file mode 100644 index a6771b95f1..0000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ /dev/null @@ -1,63 +0,0 @@ -name: 🐛 Bug report -description: Create a report to help us improve the library -labels: 'type: bug' - -body: -- type: markdown - attributes: - value: > - #### Before reporting a bug, please check that the issue hasn't already been addressed in [the existing and past issues](https://github.com/mindee/doctr/issues?q=is%3Aissue). -- type: textarea - attributes: - label: Bug description - description: | - A clear and concise description of what the bug is. - - Please explain the result you observed and the behavior you were expecting. - placeholder: | - A clear and concise description of what the bug is. - validations: - required: true - -- type: textarea - attributes: - label: Code snippet to reproduce the bug - description: | - Sample code to reproduce the problem. - - Please wrap your code snippet with ```` ```triple quotes blocks``` ```` for readability. - placeholder: | - ```python - Sample code to reproduce the problem - ``` - validations: - required: true -- type: textarea - attributes: - label: Error traceback - description: | - The error message you received running the code snippet, with the full traceback. - - Please wrap your error message with ```` ```triple quotes blocks``` ```` for readability. - placeholder: | - ``` - The error message you got, with the full traceback. - ``` - validations: - required: true -- type: textarea - attributes: - label: Environment - description: | - Please run the following command and paste the output below. - ```sh - wget https://raw.githubusercontent.com/mindee/doctr/main/scripts/collect_env.py - # For security purposes, please check the contents of collect_env.py before running it. - python collect_env.py - ``` - validations: - required: true -- type: markdown - attributes: - value: > - Thanks for helping us improve the library! diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml deleted file mode 100644 index 7670faa78d..0000000000 --- a/.github/ISSUE_TEMPLATE/config.yml +++ /dev/null @@ -1,5 +0,0 @@ -blank_issues_enabled: true -contact_links: - - name: Usage questions - url: https://github.com/mindee/doctr/discussions - about: Ask questions and discuss with other docTR community members diff --git a/.github/ISSUE_TEMPLATE/feature_request.yml b/.github/ISSUE_TEMPLATE/feature_request.yml deleted file mode 100644 index dface9d3c2..0000000000 --- a/.github/ISSUE_TEMPLATE/feature_request.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: 🚀 Feature request -description: Submit a proposal/request for a new feature for docTR -labels: 'type: enhancement' - -body: -- type: textarea - attributes: - label: 🚀 The feature - description: > - A clear and concise description of the feature proposal - validations: - required: true -- type: textarea - attributes: - label: Motivation, pitch - description: > - Please outline the motivation for the proposal. Is your feature request related to a specific problem? e.g., *"I'm working on X and would like Y to be possible"*. If this is related to another GitHub issue, please link here too. - validations: - required: true -- type: textarea - attributes: - label: Alternatives - description: > - A description of any alternative solutions or features you've considered, if any. -- type: textarea - attributes: - label: Additional context - description: > - Add any other context or screenshots about the feature request. -- type: markdown - attributes: - value: > - Thanks for contributing 🎉 diff --git a/.github/dependabot.yml b/.github/dependabot.yml deleted file mode 100644 index 7d3ef31202..0000000000 --- a/.github/dependabot.yml +++ /dev/null @@ -1,30 +0,0 @@ -version: 2 -updates: - - package-ecosystem: "pip" - directory: "/" - open-pull-requests-limit: 10 - target-branch: "main" - labels: ["topic: build"] - schedule: - interval: weekly - day: sunday - reviewers: - - "charlesmindee" - - "felixdittrich92" - - "odulcy-mindee" - - package-ecosystem: "github-actions" - directory: "/" - open-pull-requests-limit: 10 - target-branch: "main" - labels: ["topic: ci"] - schedule: - interval: weekly - day: sunday - reviewers: - - "charlesmindee" - - "felixdittrich92" - - "odulcy-mindee" - groups: - github-actions: - patterns: - - "*" diff --git a/.github/release.yml b/.github/release.yml deleted file mode 100644 index 2efdfdfcf6..0000000000 --- a/.github/release.yml +++ /dev/null @@ -1,24 +0,0 @@ -changelog: - exclude: - labels: - - ignore-for-release - categories: - - title: Breaking Changes 🛠 - labels: - - "type: breaking change" - # NEW FEATURES - - title: New Features - labels: - - "type: new feature" - # BUG FIXES - - title: Bug Fixes - labels: - - "type: bug" - # IMPROVEMENTS - - title: Improvements - labels: - - "type: enhancement" - # MISC - - title: Miscellaneous - labels: - - "type: misc" diff --git a/.github/verify_pr_labels.py b/.github/verify_pr_labels.py deleted file mode 100644 index e149e3c4e6..0000000000 --- a/.github/verify_pr_labels.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright (C) 2021-2026, Mindee. - -# This program is licensed under the Apache License 2.0. -# See LICENSE or go to for full license details. - -"""Borrowed & adapted from https://github.com/pytorch/vision/blob/main/.github/process_commit.py -This script finds the merger responsible for labeling a PR by a commit SHA. It is used by the workflow in -'.github/workflows/pr-labels.yml'. If there exists no PR associated with the commit or the PR is properly labeled, -this script is a no-op. -Note: we ping the merger only, not the reviewers, as the reviewers can sometimes be external to torchvision -with no labeling responsibility, so we don't want to bother them. -""" - -from typing import Any - -import requests - -# For a PR to be properly labeled it should have one primary label and one secondary label - -# Should specify the type of change -PRIMARY_LABELS = { - "type: new feature", - "type: bug", - "type: enhancement", - "type: misc", -} - -# Should specify what has been modified -SECONDARY_LABELS = { - "topic: documentation", - "module: datasets", - "module: io", - "module: models", - "module: transforms", - "module: utils", - "ext: api", - "ext: demo", - "ext: docs", - "ext: notebooks", - "ext: references", - "ext: scripts", - "ext: tests", - "topic: build", - "topic: ci", - "topic: docker", -} - -GH_ORG = "mindee" -GH_REPO = "doctr" - - -def query_repo(cmd: str, *, accept) -> Any: - response = requests.get(f"https://api.github.com/repos/{GH_ORG}/{GH_REPO}/{cmd}", headers=dict(Accept=accept)) - return response.json() - - -def get_pr_merger_and_labels(pr_number: int) -> tuple[str, set[str]]: - # See https://docs.github.com/en/rest/reference/pulls#get-a-pull-request - data = query_repo(f"pulls/{pr_number}", accept="application/vnd.github.v3+json") - merger = data.get("merged_by", {}).get("login") - labels = {label["name"] for label in data["labels"]} - return merger, labels - - -def main(args): - merger, labels = get_pr_merger_and_labels(args.pr) - is_properly_labeled = bool(PRIMARY_LABELS.intersection(labels) and SECONDARY_LABELS.intersection(labels)) - if isinstance(merger, str) and not is_properly_labeled: - print(f"@{merger}") - - -def parse_args(): - import argparse - - parser = argparse.ArgumentParser( - description="PR label checker", formatter_class=argparse.ArgumentDefaultsHelpFormatter - ) - - parser.add_argument("pr", type=int, help="PR number") - args = parser.parse_args() - - return args - - -if __name__ == "__main__": - args = parse_args() - main(args) diff --git a/.github/workflows/builds.yml b/.github/workflows/builds.yml deleted file mode 100644 index 23971ec364..0000000000 --- a/.github/workflows/builds.yml +++ /dev/null @@ -1,43 +0,0 @@ -name: builds - -on: - push: - branches: main - pull_request: - branches: main - -jobs: - build: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - python: ["3.10", "3.11"] - steps: - - uses: actions/checkout@v6 - - if: matrix.os == 'macos-latest' - name: Install MacOS prerequisites - run: brew install cairo pango gdk-pixbuf libffi - - name: Set up Python - uses: actions/setup-python@v6 - with: - # MacOS issue ref.: https://github.com/actions/setup-python/issues/855 & https://github.com/actions/setup-python/issues/865 - python-version: ${{ matrix.os == 'macos-latest' && matrix.python == '3.10' && '3.11' || matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }} - - name: Install package - run: | - python -m pip install --upgrade pip - if [ "${{ runner.os }}" = "Windows" ]; then - pip install -e .[viz] --upgrade - else - pip install -e .[viz,html] --upgrade - fi - shell: bash # Ensures shell is consistent across OSes - - name: Import package - run: python -c "import doctr; print(doctr.__version__)" diff --git a/.github/workflows/clear_caches.yml b/.github/workflows/clear_caches.yml deleted file mode 100644 index 1f5510f529..0000000000 --- a/.github/workflows/clear_caches.yml +++ /dev/null @@ -1,15 +0,0 @@ -name: Clear GitHub runner caches - -on: - workflow_dispatch: - schedule: - - cron: '0 0 * * *' # Runs once a day - -jobs: - clear: - name: Clear caches - runs-on: ubuntu-latest - steps: - - uses: MyAlbum/purge-cache@v2 - with: - max-age: 172800 # Caches older than 2 days are deleted diff --git a/.github/workflows/demo.yml b/.github/workflows/demo.yml deleted file mode 100644 index 60e6633708..0000000000 --- a/.github/workflows/demo.yml +++ /dev/null @@ -1,89 +0,0 @@ -name: demo - -on: - # Run 'test-demo' on every pull request to the main branch - pull_request: - branches: [main] - - # Run 'test-demo' on every push to the main branch or both jobs when a new version tag is pushed - push: - branches: - - main - tags: - - 'v*' - - # Run 'sync-to-hub' on a scheduled cron job - schedule: - - cron: '0 2 10 * *' # At 02:00 on day-of-month 10 (every month) - - # Allow manual triggering of the workflow - workflow_dispatch: - -jobs: - test-demo: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python: ["3.10"] - steps: - - if: matrix.os == 'macos-latest' - name: Install MacOS prerequisites - run: brew install cairo pango gdk-pixbuf libffi - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements.txt') }}-${{ hashFiles('demo/pt-requirements.txt') }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[viz,html] --upgrade - pip install -r demo/pt-requirements.txt - - name: Run demo - run: | - streamlit --version - screen -dm streamlit run demo/app.py - sleep 10 - curl http://localhost:8501/docs - - # This job only runs when a new version tag is pushed or during the cron job or when manually triggered - sync-to-hub: - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' - needs: test-demo - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python: ["3.10"] - steps: - - uses: actions/checkout@v6 - with: - fetch-depth: 0 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Install huggingface_hub - run: pip install huggingface-hub - - name: Upload folder to Hugging Face - # Only keep the requirements.txt file for the demo (PyTorch) - run: | - mv demo/pt-requirements.txt demo/requirements.txt - - python -c " - from huggingface_hub import HfApi - api = HfApi(token='${{ secrets.HF_TOKEN }}') - repo_id = 'mindee/doctr' - api.upload_folder(repo_id=repo_id, repo_type='space', folder_path='demo/') - api.restart_space(repo_id=repo_id, factory_reboot=True) - " diff --git a/.github/workflows/doc-status.yml b/.github/workflows/doc-status.yml deleted file mode 100644 index 318f0e344a..0000000000 --- a/.github/workflows/doc-status.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: doc-status -on: - page_build - -jobs: - see-page-build-payload: - runs-on: ubuntu-latest - steps: - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: "3.10" - architecture: x64 - - name: check status - run: | - import os - status, errormsg = os.getenv('STATUS'), os.getenv('ERROR') - if status != 'built': raise AssertionError(f"There was an error building the page on GitHub pages.\n\nStatus: {status}\n\nError messsage: {errormsg}") - shell: python - env: - STATUS: ${{ github.event.build.status }} - ERROR: ${{ github.event.build.error.message }} diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml deleted file mode 100644 index 97242c8eae..0000000000 --- a/.github/workflows/docker.yml +++ /dev/null @@ -1,36 +0,0 @@ -name: docker - -on: - push: - branches: main - pull_request: - branches: main - -jobs: - docker-package: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - name: Build docker image - run: docker build -t doctr-py3.10-slim --build-arg SYSTEM=cpu . - - name: Run docker container - run: docker run doctr-py3.10-slim python3 -c 'import doctr' - - pytest-api: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python: ["3.10"] - steps: - - uses: actions/checkout@v6 - - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Build & run docker - run: cd api && make lock && make run - - name: Ping server - run: wget --spider --tries=12 http://localhost:8080/docs - - name: Run docker test - run: cd api && make test diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml deleted file mode 100644 index 7fabb5ca35..0000000000 --- a/.github/workflows/docs.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: docs -on: - push: - branches: main - -jobs: - docs-deploy: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python: ["3.10"] - steps: - - uses: actions/checkout@v6 - with: - persist-credentials: false - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-docs - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[viz,html] --upgrade - pip install -e .[docs] - - - name: Build documentation - run: cd docs && bash build.sh - - - name: Documentation sanity check - run: test -e docs/build/index.html || exit - - - name: Install SSH Client 🔑 - uses: webfactory/ssh-agent@v0.9.1 - with: - ssh-private-key: ${{ secrets.SSH_DEPLOY_KEY }} - - - name: Deploy to Github Pages - uses: JamesIves/github-pages-deploy-action@v4.8.0 - with: - BRANCH: gh-pages - FOLDER: 'docs/build' - COMMIT_MESSAGE: '[skip ci] Documentation updates' - CLEAN: true - SSH: true diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml deleted file mode 100644 index 956230bc6d..0000000000 --- a/.github/workflows/main.yml +++ /dev/null @@ -1,90 +0,0 @@ -name: tests - -on: - push: - branches: main - pull_request: - branches: main - -jobs: - pytest-common: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python: ["3.10"] - steps: - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-tests - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[viz,html] --upgrade - pip install -e .[testing] - - name: Run unittests - run: | - coverage run -m pytest tests/common/ -rs - coverage xml -o coverage-common.xml - - uses: actions/upload-artifact@v6 - with: - name: coverage-common - path: ./coverage-common.xml - if-no-files-found: error - - - pytest-torch: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python: ["3.10"] - steps: - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-tests - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[viz,html] --upgrade - pip install -e .[testing] - - - name: Run unittests - run: | - coverage run -m pytest tests/pytorch/ -rs - coverage xml -o coverage-pt.xml - - - uses: actions/upload-artifact@v6 - with: - name: coverage-pytorch - path: ./coverage-pt.xml - if-no-files-found: error - - codecov-upload: - runs-on: ubuntu-latest - needs: [ pytest-common, pytest-torch ] - steps: - - uses: actions/checkout@v6 - - uses: actions/download-artifact@v7 - - name: Upload coverage to Codecov - uses: codecov/codecov-action@v5 - with: - flags: unittests - fail_ci_if_error: true - token: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/pr-labels.yml b/.github/workflows/pr-labels.yml deleted file mode 100644 index 3b52b82a90..0000000000 --- a/.github/workflows/pr-labels.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: pr-labels - -on: - pull_request: - branches: main - types: closed - -jobs: - is-properly-labeled: - if: github.event.pull_request.merged == true - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v6 - - name: Set up python - uses: actions/setup-python@v6 - - name: Install requests - run: pip install requests - - name: Process commit and find merger responsible for labeling - id: commit - run: echo "::set-output name=merger::$(python .github/verify_pr_labels.py ${{ github.event.pull_request.number }})" - - name: 'Comment PR' - uses: actions/github-script@v8.0.0 - if: ${{ steps.commit.outputs.merger != '' }} - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - script: | - const { issue: { number: issue_number }, repo: { owner, repo } } = context; - github.rest.issues.createComment({ issue_number, owner, repo, body: 'Hey ${{ steps.commit.outputs.merger }} 👋\nYou merged this PR, but it is not correctly labeled. The list of valid labels is available at https://github.com/mindee/doctr/blob/main/.github/verify_pr_labels.py' }); diff --git a/.github/workflows/public_docker_images.yml b/.github/workflows/public_docker_images.yml deleted file mode 100644 index 7d0c51f334..0000000000 --- a/.github/workflows/public_docker_images.yml +++ /dev/null @@ -1,91 +0,0 @@ -# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages -# -name: Docker image on ghcr.io - -on: - push: - tags: - - 'v*' - pull_request: - branches: main - schedule: - - cron: '0 2 1 */3 *' # At 02:00 on the 1st day of every 3rd month - -env: - REGISTRY: ghcr.io - -jobs: - build-and-push-image: - runs-on: ubuntu-latest - - strategy: - fail-fast: false - matrix: - # Must match version at https://www.python.org/ftp/python/ - python: ["3.10.13", "3.11.8", "3.12.7"] - # NOTE: Since docTR 1.0.0 torch doesn't exist as a seperate install option it's only to keep the naming convention - framework: ["torch", "torch,viz,html,contrib"] - - # Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job. - permissions: - contents: read - packages: write - - steps: - - name: Checkout repository - uses: actions/checkout@v6 - - - name: Log in to the Container registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Sanitize docker tag - run: | - PREFIX_DOCKER_TAG="${{ matrix.framework }}-py${{ matrix.python }}-" - PREFIX_DOCKER_TAG=$(echo ${PREFIX_DOCKER_TAG}|sed 's/,/-/g') - echo PREFIX_DOCKER_TAG=${PREFIX_DOCKER_TAG} >> $GITHUB_ENV - echo $PREFIX_DOCKER_TAG - - - name: Extract metadata (tags, labels) for Docker - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.REGISTRY }}/${{ github.repository }} - tags: | - # used only on schedule event - type=schedule,pattern={{date 'YYYY-MM'}},prefix=${{ env.PREFIX_DOCKER_TAG }} - # used only if a tag following semver is published - type=semver,pattern={{raw}},prefix=${{ env.PREFIX_DOCKER_TAG }} - - - name: Build Docker image - id: build - uses: docker/build-push-action@v6 - with: - context: . - build-args: | - FRAMEWORK=${{ matrix.framework }} - PYTHON_VERSION=${{ matrix.python }} - DOCTR_REPO=${{ github.repository }} - DOCTR_VERSION=${{ github.sha }} - push: false # push only if `import doctr` works - tags: ${{ steps.meta.outputs.tags }} - - - name: Check if `import doctr` works - run: docker run ${{ steps.build.outputs.imageid }} python3 -c 'import doctr' - - - name: Push Docker image - # Push only if the CI is not triggered by "PR on main" - if: ${{ (github.ref == 'refs/heads/main' && github.event_name != 'pull_request') || (startsWith(github.ref, 'refs/tags') && github.event_name == 'push') }} - uses: docker/build-push-action@v6 - with: - context: . - build-args: | - FRAMEWORK=${{ matrix.framework }} - PYTHON_VERSION=${{ matrix.python }} - DOCTR_REPO=${{ github.repository }} - DOCTR_VERSION=${{ github.sha }} - push: true - tags: ${{ steps.meta.outputs.tags }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml deleted file mode 100644 index e9670b0b11..0000000000 --- a/.github/workflows/publish.yml +++ /dev/null @@ -1,65 +0,0 @@ -name: publish - -on: - release: - types: [published] - -jobs: - pypi: - if: "!github.event.release.prerelease" - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python: ["3.10"] - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install setuptools wheel twine --upgrade - - name: Get release tag - id: release_tag - run: echo "VERSION=${GITHUB_REF/refs\/tags\//}" >> $GITHUB_ENV - - name: Build and publish - env: - TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} - TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} - VERSION: ${{ env.VERSION }} - run: | - BUILD_VERSION=$VERSION python setup.py sdist bdist_wheel - twine check dist/* - twine upload dist/* - - pypi-check: - needs: pypi - if: "!github.event.release.prerelease" - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python: ["3.10"] - runs-on: ${{ matrix.os }} - steps: - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Install package - run: | - python -m pip install --upgrade pip - pip install python-doctr - python -c "from importlib.metadata import version; print(version('python-doctr'))" diff --git a/.github/workflows/pull_requests.yml b/.github/workflows/pull_requests.yml deleted file mode 100644 index f530c93771..0000000000 --- a/.github/workflows/pull_requests.yml +++ /dev/null @@ -1,32 +0,0 @@ -name: pull_requests - -on: - pull_request: - branches: main - -jobs: - docs-build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: "3.10" - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-docs - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[viz,html] --upgrade - pip install -e .[docs] - - - name: Build documentation - run: cd docs && bash build.sh - - - name: Documentation sanity check - run: test -e docs/build/index.html || exit diff --git a/.github/workflows/references.yml b/.github/workflows/references.yml deleted file mode 100644 index e4c9503136..0000000000 --- a/.github/workflows/references.yml +++ /dev/null @@ -1,253 +0,0 @@ -name: references - -on: - push: - branches: main - pull_request: - branches: main - -jobs: - train-char-classification: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python: ["3.10"] - steps: - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}-${{ hashFiles('references/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}- - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[viz,html] --upgrade - pip install -r references/requirements.txt - sudo apt-get update && sudo apt-get install fonts-freefont-ttf -y - - name: Train for a short epoch - run: python references/classification/train_character.py vit_s -b 32 --val-samples 1 --train-samples 1 --epochs 1 - - train-orientation-classification: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python: ["3.10"] - steps: - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}-${{ hashFiles('references/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}- - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[viz,html] --upgrade - pip install -r references/requirements.txt - - name: Download and extract detection toy set - run: | - wget https://github.com/mindee/doctr/releases/download/v0.3.1/toy_detection_set-bbbb4243.zip - sudo apt-get update && sudo apt-get install unzip -y - unzip toy_detection_set-bbbb4243.zip -d det_set - - name: Download and extract recognition toy set - run: | - wget https://github.com/mindee/doctr/releases/download/v0.3.1/toy_recogition_set-036a4d80.zip - sudo apt-get update && sudo apt-get install unzip -y - unzip toy_recogition_set-036a4d80.zip -d reco_set - - name: Train for a short epoch (document orientation) - run: python references/classification/train_orientation.py resnet18 --type page --train_path ./det_set --val_path ./det_set -b 2 --epochs 1 - - name: Train for a short epoch (crop orientation) - run: python references/classification/train_orientation.py resnet18 --type crop --train_path ./reco_set --val_path ./reco_set -b 4 --epochs 1 - - train-text-recognition: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python: ["3.10"] - steps: - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}-${{ hashFiles('references/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}- - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[viz,html] --upgrade - pip install -r references/requirements.txt - - name: Download and extract toy set - run: | - wget https://github.com/mindee/doctr/releases/download/v0.3.1/toy_recogition_set-036a4d80.zip - sudo apt-get update && sudo apt-get install unzip -y - unzip toy_recogition_set-036a4d80.zip -d reco_set - - name: Train for a short epoch - run: python references/recognition/train.py crnn_mobilenet_v3_small --train_path ./reco_set --val_path ./reco_set -b 4 --epochs 1 - - evaluate-text-recognition: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python: ["3.10"] - steps: - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[viz,html] --upgrade - - name: Evaluate text recognition - run: python references/recognition/evaluate.py crnn_mobilenet_v3_small --dataset SVT -b 32 - - latency-text-recognition: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python: ["3.10"] - steps: - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[viz,html] --upgrade - - name: Benchmark latency - run: python references/recognition/latency.py crnn_mobilenet_v3_small --it 5 - - train-text-detection: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python: ["3.10"] - steps: - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}-${{ hashFiles('references/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }}- - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[viz,html] --upgrade - pip install -r references/requirements.txt - - name: Download and extract toy set - run: | - wget https://github.com/mindee/doctr/releases/download/v0.3.1/toy_detection_set-bbbb4243.zip - sudo apt-get update && sudo apt-get install unzip -y - unzip toy_detection_set-bbbb4243.zip -d det_set - - name: Train for a short epoch - run: python references/detection/train.py db_mobilenet_v3_large --train_path ./det_set --val_path ./det_set -b 2 --epochs 1 - - evaluate-text-detection: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python: ["3.10"] - steps: - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[viz,html] --upgrade - pip install -r references/requirements.txt - - name: Evaluate text detection - run: python references/detection/evaluate.py db_mobilenet_v3_large - - latency-text-detection: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python: ["3.10"] - steps: - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('requirements-pt.txt') }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[viz,html] --upgrade - - name: Benchmark latency - run: python references/detection/latency.py db_mobilenet_v3_large --it 5 --size 512 diff --git a/.github/workflows/scripts.yml b/.github/workflows/scripts.yml deleted file mode 100644 index 28eac27ec9..0000000000 --- a/.github/workflows/scripts.yml +++ /dev/null @@ -1,121 +0,0 @@ -name: scripts - -on: - push: - branches: main - pull_request: - branches: main - -jobs: - test-analyze: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python: ["3.10", "3.11"] - steps: - - if: matrix.os == 'macos-latest' - name: Install MacOS prerequisites - run: brew install cairo pango gdk-pixbuf libffi - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }} - - name: Install package - run: | - python -m pip install --upgrade pip - pip install -e .[viz,html] --upgrade - - - name: Run analysis script - run: | - wget https://github.com/mindee/doctr/releases/download/v0.1.0/sample.pdf - python scripts/analyze.py sample.pdf --noblock --detection db_mobilenet_v3_large - - test-detect-text: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python: ["3.10", "3.11"] - steps: - - if: matrix.os == 'macos-latest' - name: Install MacOS prerequisites - run: brew install cairo pango gdk-pixbuf libffi - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }} - - name: Install package - run: | - python -m pip install --upgrade pip - pip install -e .[viz,html] --upgrade - - - name: Run detection script - run: | - wget https://github.com/mindee/doctr/releases/download/v0.1.0/sample.pdf - python scripts/detect_text.py sample.pdf --detection db_mobilenet_v3_large - - test-evaluate: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python: ["3.10", "3.11"] - steps: - - if: matrix.os == 'macos-latest' - name: Install MacOS prerequisites - run: brew install cairo pango gdk-pixbuf libffi - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }} - - name: Install package - run: | - python -m pip install --upgrade pip - pip install -e .[viz,html] --upgrade - - name: Run evaluation script - run: | - python scripts/evaluate.py db_resnet50 crnn_vgg16_bn --samples 10 - python scripts/evaluate_kie.py db_resnet50 crnn_vgg16_bn --samples 10 - - test-collectenv: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - python: ["3.10", "3.11"] - steps: - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - # MacOS issue ref.: https://github.com/actions/setup-python/issues/855 & https://github.com/actions/setup-python/issues/865 - python-version: ${{ matrix.os == 'macos-latest' && matrix.python == '3.10' && '3.11' || matrix.python }} - architecture: x64 - - name: Run environment collection script - run: python scripts/collect_env.py diff --git a/.github/workflows/style.yml b/.github/workflows/style.yml deleted file mode 100644 index 7bf645c9f4..0000000000 --- a/.github/workflows/style.yml +++ /dev/null @@ -1,55 +0,0 @@ -name: style - -on: - push: - branches: main - pull_request: - branches: main - -jobs: - ruff: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python: ["3.10"] - steps: - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Run ruff - run: | - pip install ruff --upgrade - ruff --version - ruff check --diff . - - mypy: - runs-on: ${{ matrix.os }} - strategy: - matrix: - os: [ubuntu-latest] - python: ["3.10"] - steps: - - uses: actions/checkout@v6 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: ${{ matrix.python }} - architecture: x64 - - name: Cache python modules - uses: actions/cache@v5 - with: - path: ~/.cache/pip - key: ${{ runner.os }}-pkg-deps-${{ matrix.python }}-${{ hashFiles('pyproject.toml') }}-style - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[dev] --upgrade - pip install mypy --upgrade - - name: Run mypy - run: | - mypy --version - mypy diff --git a/.gitignore b/.gitignore deleted file mode 100644 index 1c285ec6bb..0000000000 --- a/.gitignore +++ /dev/null @@ -1,140 +0,0 @@ -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Temp files -doctr/version.py -logs/ -wandb/ -.idea/ - -# Checkpoints -*.pt -*.pb -*.index diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000000..8b13789179 --- /dev/null +++ b/.nojekyll @@ -0,0 +1 @@ + diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index e885088d2d..0000000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,23 +0,0 @@ -repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v6.0.0 - hooks: - - id: check-ast - - id: check-yaml - exclude: .conda - - id: check-toml - - id: check-json - - id: check-added-large-files - exclude: docs/images/ - - id: end-of-file-fixer - - id: trailing-whitespace - - id: debug-statements - - id: check-merge-conflict - - id: no-commit-to-branch - args: ['--branch', 'main'] - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.14.14 - hooks: - - id: ruff - args: [ --fix ] - - id: ruff-format diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 1a2bd38a59..0000000000 --- a/Dockerfile +++ /dev/null @@ -1,46 +0,0 @@ -FROM nvidia/cuda:12.2.0-base-ubuntu22.04 - -ENV DEBIAN_FRONTEND=noninteractive -ENV LANG=C.UTF-8 -ENV PYTHONUNBUFFERED=1 -ENV PYTHONDONTWRITEBYTECODE=1 - - -RUN apt-get update && apt-get install -y --no-install-recommends \ - # - Other packages - build-essential \ - pkg-config \ - curl \ - wget \ - software-properties-common \ - unzip \ - git \ - # - Packages to build Python - tar make gcc zlib1g-dev libffi-dev libssl-dev liblzma-dev libbz2-dev libsqlite3-dev \ - # - Packages for docTR - libgl1-mesa-dev libsm6 libxext6 libxrender-dev libpangocairo-1.0-0 \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -# Install Python -ARG PYTHON_VERSION=3.10.13 - -RUN wget http://www.python.org/ftp/python/$PYTHON_VERSION/Python-$PYTHON_VERSION.tgz && \ - tar -zxf Python-$PYTHON_VERSION.tgz && \ - cd Python-$PYTHON_VERSION && \ - mkdir /opt/python/ && \ - ./configure --prefix=/opt/python && \ - make && \ - make install && \ - cd .. && \ - rm Python-$PYTHON_VERSION.tgz && \ - rm -r Python-$PYTHON_VERSION - -ENV PATH=/opt/python/bin:$PATH - -# Install docTR -ARG FRAMEWORK=torch -ARG DOCTR_REPO='mindee/doctr' -ARG DOCTR_VERSION=main -RUN pip3 install -U pip setuptools wheel && \ - pip3 install "python-doctr[$FRAMEWORK]@git+https://github.com/$DOCTR_REPO.git@$DOCTR_VERSION" diff --git a/LICENSE b/LICENSE deleted file mode 100644 index 63d22de4a6..0000000000 --- a/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2022 Mindee - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/Makefile b/Makefile deleted file mode 100644 index fed26fb5f3..0000000000 --- a/Makefile +++ /dev/null @@ -1,29 +0,0 @@ -.PHONY: quality style test test-common test-tf test-torch docs-single-version docs -# this target runs checks on all files -quality: - ruff check . - mypy doctr/ - -# this target runs checks on all files and potentially modifies some of them -style: - ruff format . - ruff check --fix . - -# Run tests for the library -test: - coverage run -m pytest tests/common/ -rs - coverage run -m pytest tests/pytorch/ -rs - -test-common: - coverage run -m pytest tests/common/ -rs - -test-torch: - coverage run -m pytest tests/pytorch/ -rs - -# Check that docs can build -docs-single-version: - sphinx-build docs/source docs/_build -a - -# Check that docs can build -docs: - cd docs && bash build.sh diff --git a/README.md b/README.md deleted file mode 100644 index 9c20065e31..0000000000 --- a/README.md +++ /dev/null @@ -1,356 +0,0 @@ -

- -

- -[![Slack Icon](https://img.shields.io/badge/Slack-Community-4A154B?style=flat-square&logo=slack&logoColor=white)](https://slack.mindee.com) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) ![Build Status](https://github.com/mindee/doctr/workflows/builds/badge.svg) [![Docker Images](https://img.shields.io/badge/Docker-4287f5?style=flat&logo=docker&logoColor=white)](https://github.com/mindee/doctr/pkgs/container/doctr) [![codecov](https://codecov.io/gh/mindee/doctr/branch/main/graph/badge.svg?token=577MO567NM)](https://codecov.io/gh/mindee/doctr) [![CodeFactor](https://www.codefactor.io/repository/github/mindee/doctr/badge?s=bae07db86bb079ce9d6542315b8c6e70fa708a7e)](https://www.codefactor.io/repository/github/mindee/doctr) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/340a76749b634586a498e1c0ab998f08)](https://app.codacy.com/gh/mindee/doctr?utm_source=github.com&utm_medium=referral&utm_content=mindee/doctr&utm_campaign=Badge_Grade) [![Doc Status](https://github.com/mindee/doctr/workflows/doc-status/badge.svg)](https://mindee.github.io/doctr) [![Pypi](https://img.shields.io/badge/pypi-v1.0.1-blue.svg)](https://pypi.org/project/python-doctr/) [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mindee/notebooks/blob/main/doctr/quicktour.ipynb) [![Gurubase](https://img.shields.io/badge/Gurubase-Ask%20docTR%20Guru-006BFF)](https://gurubase.io/g/doctr) - - -**Optical Character Recognition made seamless & accessible to anyone, powered by PyTorch** - -What you can expect from this repository: - -- efficient ways to parse textual information (localize and identify each word) from your documents -- guidance on how to integrate this in your current architecture - -![OCR_example](https://github.com/mindee/doctr/raw/main/docs/images/ocr.png) - -## Quick Tour - -### Getting your pretrained model - -End-to-End OCR is achieved in docTR using a two-stage approach: text detection (localizing words), then text recognition (identify all characters in the word). -As such, you can select the architecture used for [text detection](https://mindee.github.io/doctr/latest/modules/models.html#doctr-models-detection), and the one for [text recognition](https://mindee.github.io/doctr/latest//modules/models.html#doctr-models-recognition) from the list of available implementations. - -```python -from doctr.models import ocr_predictor - -model = ocr_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True) -``` - -### Reading files - -Documents can be interpreted from PDF or images: - -```python -from doctr.io import DocumentFile -# PDF -pdf_doc = DocumentFile.from_pdf("path/to/your/doc.pdf") -# Image -single_img_doc = DocumentFile.from_images("path/to/your/img.jpg") -# Webpage (requires `weasyprint` to be installed) -webpage_doc = DocumentFile.from_url("https://www.yoursite.com") -# Multiple page images -multi_img_doc = DocumentFile.from_images(["path/to/page1.jpg", "path/to/page2.jpg"]) -``` - -### Putting it together - -Let's use the default pretrained model for an example: - -```python -from doctr.io import DocumentFile -from doctr.models import ocr_predictor - -model = ocr_predictor(pretrained=True) -# PDF -doc = DocumentFile.from_pdf("path/to/your/doc.pdf") -# Analyze -result = model(doc) -``` - -### Dealing with rotated documents - -Should you use docTR on documents that include rotated pages, or pages with multiple box orientations, -you have multiple options to handle it: - -- If you only use straight document pages with straight words (horizontal, same reading direction), -consider passing `assume_straight_pages=True` to the ocr_predictor. It will directly fit straight boxes -on your page and return straight boxes, which makes it the fastest option. - -- If you want the predictor to output straight boxes (no matter the orientation of your pages, the final localizations -will be converted to straight boxes), you need to pass `export_as_straight_boxes=True` in the predictor. Otherwise, if `assume_straight_pages=False`, it will return rotated bounding boxes (potentially with an angle of 0°). - -If both options are set to False, the predictor will always fit and return rotated boxes. - -To interpret your model's predictions, you can visualize them interactively as follows: - -```python -# Display the result (requires matplotlib & mplcursors to be installed) -result.show() -``` - -![Visualization sample](https://github.com/mindee/doctr/raw/main/docs/images/doctr_example_script.gif) - -Or even rebuild the original document from its predictions: - -```python -import matplotlib.pyplot as plt - -synthetic_pages = result.synthesize() -plt.imshow(synthetic_pages[0]); plt.axis('off'); plt.show() -``` - -![Synthesis sample](https://github.com/mindee/doctr/raw/main/docs/images/synthesized_sample.png) - -The `ocr_predictor` returns a `Document` object with a nested structure (with `Page`, `Block`, `Line`, `Word`, `Artefact`). -To get a better understanding of our document model, check our [documentation](https://mindee.github.io/doctr/modules/io.html#document-structure): - -You can also export them as a nested dict, more appropriate for JSON format: - -```python -json_output = result.export() -``` - -### Use the KIE predictor - -The KIE predictor is a more flexible predictor compared to OCR as your detection model can detect multiple classes in a document. For example, you can have a detection model to detect just dates and addresses in a document. - -The KIE predictor makes it possible to use detector with multiple classes with a recognition model and to have the whole pipeline already setup for you. - -```python -from doctr.io import DocumentFile -from doctr.models import kie_predictor - -# Model -model = kie_predictor(det_arch='db_resnet50', reco_arch='crnn_vgg16_bn', pretrained=True) -# PDF -doc = DocumentFile.from_pdf("path/to/your/doc.pdf") -# Analyze -result = model(doc) - -predictions = result.pages[0].predictions -for class_name in predictions.keys(): - list_predictions = predictions[class_name] - for prediction in list_predictions: - print(f"Prediction for {class_name}: {prediction}") -``` - -The KIE predictor results per page are in a dictionary format with each key representing a class name and it's value are the predictions for that class. - -### If you are looking for support from the Mindee team - -[![Bad OCR test detection image asking the developer if they need help](https://github.com/mindee/doctr/raw/main/docs/images/doctr-need-help.png)](https://mindee.com/product/doctr) - -## Installation - -### Prerequisites - -Python 3.10 (or higher) and [pip](https://pip.pypa.io/en/stable/) are required to install docTR. - -### Latest release - -You can then install the latest release of the package using [pypi](https://pypi.org/project/python-doctr/) as follows: - -```shell -pip install python-doctr -``` - -We try to keep extra dependencies to a minimum. You can install specific builds as follows: - -```shell -# standard build -pip install python-doctr -# optional dependencies for visualization, html, and contrib modules can be installed as follows: -pip install "python-doctr[viz,html,contrib]" -``` - -### Developer mode - -Alternatively, you can install it from source, which will require you to install [Git](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git). -First clone the project repository: - -```shell -git clone https://github.com/mindee/doctr.git -pip install -e doctr/. -``` - -Again, if you prefer to avoid the risk of missing dependencies, you can install the build: - -```shell -pip install -e doctr/. -``` - -## Models architectures - -Credits where it's due: this repository is implementing, among others, architectures from published research papers. - -### Text Detection - -- DBNet: [Real-time Scene Text Detection with Differentiable Binarization](https://arxiv.org/pdf/1911.08947.pdf). -- LinkNet: [LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation](https://arxiv.org/pdf/1707.03718.pdf) -- FAST: [FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation](https://arxiv.org/pdf/2111.02394.pdf) - -### Text Recognition - -- CRNN: [An End-to-End Trainable Neural Network for Image-based Sequence Recognition and Its Application to Scene Text Recognition](https://arxiv.org/pdf/1507.05717.pdf). -- SAR: [Show, Attend and Read:A Simple and Strong Baseline for Irregular Text Recognition](https://arxiv.org/pdf/1811.00751.pdf). -- MASTER: [MASTER: Multi-Aspect Non-local Network for Scene Text Recognition](https://arxiv.org/pdf/1910.02562.pdf). -- ViTSTR: [Vision Transformer for Fast and Efficient Scene Text Recognition](https://arxiv.org/pdf/2105.08582.pdf). -- PARSeq: [Scene Text Recognition with Permuted Autoregressive Sequence Models](https://arxiv.org/pdf/2207.06966). -- VIPTR: [A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition](https://arxiv.org/abs/2401.10110). - -## More goodies - -### Documentation - -The full package documentation is available [here](https://mindee.github.io/doctr/) for detailed specifications. - -### Demo app - -A minimal demo app is provided for you to play with our end-to-end OCR models! - -![Demo app](https://github.com/mindee/doctr/raw/main/docs/images/demo_update.png) - -#### Live demo - -Courtesy of :hugs: [Hugging Face](https://huggingface.co/) :hugs:, docTR has now a fully deployed version available on [Spaces](https://huggingface.co/spaces)! -Check it out [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/mindee/doctr) - -#### Running it locally - -If you prefer to use it locally, there is an extra dependency ([Streamlit](https://streamlit.io/)) that is required. - -```shell -pip install -r demo/pt-requirements.txt -``` - -Then run your app in your default browser with: - -```shell -streamlit run demo/app.py -``` - -### Docker container - -We offer Docker container support for easy testing and deployment. [Here are the available docker tags.](https://github.com/mindee/doctr/pkgs/container/doctr). - -#### Using GPU with docTR Docker Images - -The docTR Docker images are GPU-ready and based on CUDA `12.2`. Make sure your host is **at least `12.2`**, otherwise Torch won't be able to initialize the GPU. -Please ensure that Docker is configured to use your GPU. - -To verify and configure GPU support for Docker, please follow the instructions provided in the [NVIDIA Container Toolkit Installation Guide](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html). - -Once Docker is configured to use GPUs, you can run docTR Docker containers with GPU support: - -```shell -docker run -it --gpus all ghcr.io/mindee/doctr:torch-py3.9.18-2024-10 bash -``` - -#### Available Tags - -The Docker images for docTR follow a specific tag nomenclature: `-py-`. Here's a breakdown of the tag structure: - -- ``: `torch`, `torch-viz-html-contrib`. -- ``: `3.9.18`, `3.10.13` or `3.11.8`. -- ``: a tag >= `v0.11.0` -- ``: e.g. `2014-10` - -Here are examples of different image tags: - -| Tag | Description | -|----------------------------|---------------------------------------------------| -| `torch-viz-html-contrib-py3.11.8-2024-10` | Torch with extra dependencies version `3.11.8` from latest commit on `main` in `2024-10`. | -| `torch-py3.11.8-2024-10`| PyTorch version `3.11.8` from latest commit on `main` in `2024-10`. | - -#### Building Docker Images Locally - -You can also build docTR Docker images locally on your computer. - -```shell -docker build -t doctr . -``` - -You can specify custom Python versions and docTR versions using build arguments. For example, to build a docTR image with PyTorch, Python version `3.9.10`, and docTR version `v0.7.0`, run the following command: - -```shell -docker build -t doctr --build-arg FRAMEWORK=torch --build-arg PYTHON_VERSION=3.9.10 --build-arg DOCTR_VERSION=v0.7.0 . -``` - -### Example script - -An example script is provided for a simple documentation analysis of a PDF or image file: - -```shell -python scripts/analyze.py path/to/your/doc.pdf -``` - -All script arguments can be checked using `python scripts/analyze.py --help` - -### Minimal API integration - -Looking to integrate docTR into your API? Here is a template to get you started with a fully working API using the wonderful [FastAPI](https://github.com/tiangolo/fastapi) framework. - -#### Deploy your API locally - -Specific dependencies are required to run the API template, which you can install as follows: - -```shell -cd api/ -pip install poetry -make lock -pip install -r requirements.txt -``` - -You can now run your API locally: - -```shell -uvicorn --reload --workers 1 --host 0.0.0.0 --port=8002 --app-dir api/ app.main:app -``` - -Alternatively, you can run the same server on a docker container if you prefer using: - -```shell -PORT=8002 docker-compose up -d --build -``` - -#### What you have deployed - -Your API should now be running locally on your port 8002. Access your automatically-built documentation at [http://localhost:8002/redoc](http://localhost:8002/redoc) and enjoy your three functional routes ("/detection", "/recognition", "/ocr", "/kie"). Here is an example with Python to send a request to the OCR route: - -```python -import requests - -params = {"det_arch": "db_resnet50", "reco_arch": "crnn_vgg16_bn"} - -with open('/path/to/your/doc.jpg', 'rb') as f: - files = [ # application/pdf, image/jpeg, image/png supported - ("files", ("doc.jpg", f.read(), "image/jpeg")), - ] -print(requests.post("http://localhost:8080/ocr", params=params, files=files).json()) -``` - -### Example notebooks - -Looking for more illustrations of docTR features? You might want to check the [Jupyter notebooks](https://github.com/mindee/doctr/tree/main/notebooks) designed to give you a broader overview. - -## Supported By - -This project is supported by [t2k GmbH](https://www.text2knowledge.de/de), -

- -

- -## Citation - -If you wish to cite this project, feel free to use this [BibTeX](http://www.bibtex.org/) reference: - -```bibtex -@misc{doctr2021, - title={docTR: Document Text Recognition}, - author={Mindee}, - year={2021}, - publisher = {GitHub}, - howpublished = {\url{https://github.com/mindee/doctr}} -} -``` - -## Contributing - -If you scrolled down to this section, you most likely appreciate open source. Do you feel like extending the range of our supported characters? Or perhaps submitting a paper implementation? Or contributing in any other way? - -You're in luck, we compiled a short guide (cf. [`CONTRIBUTING`](https://mindee.github.io/doctr/contributing/contributing.html)) for you to easily do so! - -## License - -Distributed under the Apache 2.0 License. See [`LICENSE`](https://github.com/mindee/doctr?tab=Apache-2.0-1-ov-file#readme) for more information. diff --git a/_modules/doctr/contrib/artefacts.html b/_modules/doctr/contrib/artefacts.html new file mode 100644 index 0000000000..7d4c23100f --- /dev/null +++ b/_modules/doctr/contrib/artefacts.html @@ -0,0 +1,463 @@ + + + + + + + + + + + + + doctr.contrib.artefacts - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.contrib.artefacts

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from typing import Any
+
+import cv2
+import numpy as np
+
+from doctr.file_utils import requires_package
+
+from .base import _BasePredictor
+
+__all__ = ["ArtefactDetector"]
+
+default_cfgs: dict[str, dict[str, Any]] = {
+    "yolov8_artefact": {
+        "input_shape": (3, 1024, 1024),
+        "labels": ["bar_code", "qr_code", "logo", "photo"],
+        "url": "https://doctr-static.mindee.com/models?id=v0.8.1/yolo_artefact-f9d66f14.onnx&src=0",
+    },
+}
+
+
+
+[docs] +class ArtefactDetector(_BasePredictor): + """ + A class to detect artefacts in images + + >>> from doctr.io import DocumentFile + >>> from doctr.contrib.artefacts import ArtefactDetector + >>> doc = DocumentFile.from_images(["path/to/image.jpg"]) + >>> detector = ArtefactDetector() + >>> results = detector(doc) + + Args: + arch: the architecture to use + batch_size: the batch size to use + model_path: the path to the model to use + labels: the labels to use + input_shape: the input shape to use + mask_labels: the mask labels to use + conf_threshold: the confidence threshold to use + iou_threshold: the intersection over union threshold to use + **kwargs: additional arguments to be passed to `download_from_url` + """ + + def __init__( + self, + arch: str = "yolov8_artefact", + batch_size: int = 2, + model_path: str | None = None, + labels: list[str] | None = None, + input_shape: tuple[int, int, int] | None = None, + conf_threshold: float = 0.5, + iou_threshold: float = 0.5, + **kwargs: Any, + ) -> None: + super().__init__(batch_size=batch_size, url=default_cfgs[arch]["url"], model_path=model_path, **kwargs) + self.labels = labels or default_cfgs[arch]["labels"] + self.input_shape = input_shape or default_cfgs[arch]["input_shape"] + self.conf_threshold = conf_threshold + self.iou_threshold = iou_threshold + + def preprocess(self, img: np.ndarray) -> np.ndarray: + return np.transpose(cv2.resize(img, (self.input_shape[2], self.input_shape[1])), (2, 0, 1)) / np.array(255.0) + + def postprocess(self, output: list[np.ndarray], input_images: list[list[np.ndarray]]) -> list[list[dict[str, Any]]]: + results = [] + + for batch in zip(output, input_images): + for out, img in zip(batch[0], batch[1]): + org_height, org_width = img.shape[:2] + width_scale, height_scale = org_width / self.input_shape[2], org_height / self.input_shape[1] + for res in out: + sample_results = [] + for row in np.transpose(np.squeeze(res)): + classes_scores = row[4:] + max_score = np.amax(classes_scores) + if max_score >= self.conf_threshold: + class_id = np.argmax(classes_scores) + x, y, w, h = row[0], row[1], row[2], row[3] + # to rescaled xmin, ymin, xmax, ymax + xmin = int((x - w / 2) * width_scale) + ymin = int((y - h / 2) * height_scale) + xmax = int((x + w / 2) * width_scale) + ymax = int((y + h / 2) * height_scale) + + sample_results.append({ + "label": self.labels[class_id], + "confidence": float(max_score), + "box": [xmin, ymin, xmax, ymax], + }) + + # Filter out overlapping boxes + boxes = [res["box"] for res in sample_results] + scores = [res["confidence"] for res in sample_results] + keep_indices = cv2.dnn.NMSBoxes(boxes, scores, self.conf_threshold, self.iou_threshold) # type: ignore[arg-type] + sample_results = [sample_results[i] for i in keep_indices] + + results.append(sample_results) + + self._results = results + return results + + def show(self, **kwargs: Any) -> None: + """ + Display the results + + Args: + **kwargs: additional keyword arguments to be passed to `plt.show` + """ + requires_package("matplotlib", "`.show()` requires matplotlib installed") + import matplotlib.pyplot as plt + from matplotlib.patches import Rectangle + + # visualize the results with matplotlib + if self._results and self._inputs: + for img, res in zip(self._inputs, self._results): + plt.figure(figsize=(10, 10)) + plt.imshow(img) + for obj in res: + xmin, ymin, xmax, ymax = obj["box"] + label = obj["label"] + plt.text(xmin, ymin, f"{label} {obj['confidence']:.2f}", color="red") + plt.gca().add_patch( + Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, edgecolor="red", linewidth=2) + ) + plt.show(**kwargs)
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/coco_text.html b/_modules/doctr/datasets/coco_text.html new file mode 100644 index 0000000000..2454b27405 --- /dev/null +++ b/_modules/doctr/datasets/coco_text.html @@ -0,0 +1,473 @@ + + + + + + + + + + + + + doctr.datasets.coco_text - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.coco_text

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+from tqdm import tqdm
+
+from .datasets import AbstractDataset
+from .utils import convert_target_to_relative, crop_bboxes_from_image
+
+__all__ = ["COCOTEXT"]
+
+
+
+[docs] +class COCOTEXT(AbstractDataset): + """ + COCO-Text dataset from `"COCO-Text: Dataset and Benchmark for Text Detection and Recognition in Natural Images" + <https://arxiv.org/pdf/1601.07140v2>`_ | + `"homepage" <https://bgshih.github.io/cocotext/>`_. + + >>> # NOTE: You need to download the dataset first. + >>> from doctr.datasets import COCOTEXT + >>> train_set = COCOTEXT(train=True, img_folder="/path/to/coco_text/train2014/", + >>> label_path="/path/to/coco_text/cocotext.v2.json") + >>> img, target = train_set[0] + >>> test_set = COCOTEXT(train=False, img_folder="/path/to/coco_text/train2014/", + >>> label_path = "/path/to/coco_text/cocotext.v2.json") + >>> img, target = test_set[0] + + Args: + img_folder: folder with all the images of the dataset + label_path: path to the annotations file of the dataset + train: whether the subset should be the training one + use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones) + recognition_task: whether the dataset should be used for recognition task + detection_task: whether the dataset should be used for detection task + **kwargs: keyword arguments from `AbstractDataset`. + """ + + def __init__( + self, + img_folder: str, + label_path: str, + train: bool = True, + use_polygons: bool = False, + recognition_task: bool = False, + detection_task: bool = False, + **kwargs: Any, + ) -> None: + super().__init__( + img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs + ) + # Task check + if recognition_task and detection_task: + raise ValueError( + " 'recognition' and 'detection task' cannot be set to True simultaneously. " + + " To get the whole dataset with boxes and labels leave both parameters to False " + ) + + # File existence check + if not os.path.exists(label_path) or not os.path.exists(img_folder): + raise FileNotFoundError(f"unable to find {label_path if not os.path.exists(label_path) else img_folder}") + + tmp_root = img_folder + self.train = train + np_dtype = np.float32 + self.data: list[tuple[str | Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] + + with open(label_path, "r") as file: + data = json.load(file) + + # Filter images based on the set + img_items = [img for img in data["imgs"].items() if (img[1]["set"] == "train") == train] + box: list[float] | np.ndarray + + for img_id, img_info in tqdm(img_items, desc="Preparing and Loading COCOTEXT", total=len(img_items)): + img_path = os.path.join(img_folder, img_info["file_name"]) + + # File existence check + if not os.path.exists(img_path): # pragma: no cover + raise FileNotFoundError(f"Unable to locate {img_path}") + + # Get annotations for the current image (only legible text) + annotations = [ + ann + for ann in data["anns"].values() + if ann["image_id"] == int(img_id) and ann["legibility"] == "legible" + ] + + # Some images have no annotations with readable text + if not annotations: # pragma: no cover + continue + + _targets = [] + + for annotation in annotations: + x, y, w, h = annotation["bbox"] + if use_polygons: + # (x, y) coordinates of top left, top right, bottom right, bottom left corners + box = np.array( + [ + [x, y], + [x + w, y], + [x + w, y + h], + [x, y + h], + ], + dtype=np_dtype, + ) + else: + # (xmin, ymin, xmax, ymax) coordinates + box = [x, y, x + w, y + h] + _targets.append((annotation["utf8_string"], box)) + text_targets, box_targets = zip(*_targets) + + if recognition_task: + crops = crop_bboxes_from_image( + img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0) + ) + for crop, label in zip(crops, list(text_targets)): + if label and " " not in label: + self.data.append((crop, label)) + + elif detection_task: + self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0))) + else: + self.data.append(( + img_path, + dict(boxes=np.asarray(box_targets, dtype=int).clip(min=0), labels=list(text_targets)), + )) + + self.root = tmp_root + + def extra_repr(self) -> str: + return f"train={self.train}"
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/cord.html b/_modules/doctr/datasets/cord.html new file mode 100644 index 0000000000..4572516b88 --- /dev/null +++ b/_modules/doctr/datasets/cord.html @@ -0,0 +1,466 @@ + + + + + + + + + + + + + doctr.datasets.cord - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.cord

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+from tqdm import tqdm
+
+from .datasets import VisionDataset
+from .utils import convert_target_to_relative, crop_bboxes_from_image
+
+__all__ = ["CORD"]
+
+
+
+[docs] +class CORD(VisionDataset): + """CORD dataset from `"CORD: A Consolidated Receipt Dataset forPost-OCR Parsing" + <https://openreview.net/pdf?id=SJl3z659UH>`_. + + .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/cord-grid.png&src=0 + :align: center + + >>> from doctr.datasets import CORD + >>> train_set = CORD(train=True, download=True) + >>> img, target = train_set[0] + + Args: + train: whether the subset should be the training one + use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones) + recognition_task: whether the dataset should be used for recognition task + detection_task: whether the dataset should be used for detection task + **kwargs: keyword arguments from `VisionDataset`. + """ + + TRAIN = ( + "https://doctr-static.mindee.com/models?id=v0.1.1/cord_train.zip&src=0", + "45f9dc77f126490f3e52d7cb4f70ef3c57e649ea86d19d862a2757c9c455d7f8", + "cord_train.zip", + ) + + TEST = ( + "https://doctr-static.mindee.com/models?id=v0.1.1/cord_test.zip&src=0", + "8c895e3d6f7e1161c5b7245e3723ce15c04d84be89eaa6093949b75a66fb3c58", + "cord_test.zip", + ) + + def __init__( + self, + train: bool = True, + use_polygons: bool = False, + recognition_task: bool = False, + detection_task: bool = False, + **kwargs: Any, + ) -> None: + url, sha256, name = self.TRAIN if train else self.TEST + super().__init__( + url, + name, + sha256, + True, + pre_transforms=convert_target_to_relative if not recognition_task else None, + **kwargs, + ) + if recognition_task and detection_task: + raise ValueError( + "`recognition_task` and `detection_task` cannot be set to True simultaneously. " + + "To get the whole dataset with boxes and labels leave both parameters to False." + ) + + # list images + tmp_root = os.path.join(self.root, "image") + self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] + self.train = train + np_dtype = np.float32 + for img_path in tqdm( + iterable=os.listdir(tmp_root), desc="Preparing and Loading CORD", total=len(os.listdir(tmp_root)) + ): + # File existence check + if not os.path.exists(os.path.join(tmp_root, img_path)): + raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, img_path)}") + + stem = Path(img_path).stem + _targets = [] + with open(os.path.join(self.root, "json", f"{stem}.json"), "rb") as f: + label = json.load(f) + for line in label["valid_line"]: + for word in line["words"]: + if len(word["text"]) > 0: + x = word["quad"]["x1"], word["quad"]["x2"], word["quad"]["x3"], word["quad"]["x4"] + y = word["quad"]["y1"], word["quad"]["y2"], word["quad"]["y3"], word["quad"]["y4"] + box: list[float] | np.ndarray + if use_polygons: + # (x, y) coordinates of top left, top right, bottom right, bottom left corners + box = np.array( + [ + [x[0], y[0]], + [x[1], y[1]], + [x[2], y[2]], + [x[3], y[3]], + ], + dtype=np_dtype, + ) + else: + # Reduce 8 coords to 4 -> xmin, ymin, xmax, ymax + box = [min(x), min(y), max(x), max(y)] + _targets.append((word["text"], box)) + + text_targets, box_targets = zip(*_targets) + + if recognition_task: + crops = crop_bboxes_from_image( + img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0) + ) + for crop, label in zip(crops, list(text_targets)): + if " " not in label: + self.data.append((crop, label)) + elif detection_task: + self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0))) + else: + self.data.append(( + img_path, + dict(boxes=np.asarray(box_targets, dtype=int).clip(min=0), labels=list(text_targets)), + )) + + self.root = tmp_root + + def extra_repr(self) -> str: + return f"train={self.train}"
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/detection.html b/_modules/doctr/datasets/detection.html new file mode 100644 index 0000000000..4ee032717d --- /dev/null +++ b/_modules/doctr/datasets/detection.html @@ -0,0 +1,429 @@ + + + + + + + + + + + + + doctr.datasets.detection - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.detection

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import json
+import os
+from typing import Any
+
+import numpy as np
+
+from doctr.file_utils import CLASS_NAME
+
+from .datasets import AbstractDataset
+from .utils import pre_transform_multiclass
+
+__all__ = ["DetectionDataset"]
+
+
+
+[docs] +class DetectionDataset(AbstractDataset): + """Implements a text detection dataset + + >>> from doctr.datasets import DetectionDataset + >>> train_set = DetectionDataset(img_folder="/path/to/images", + >>> label_path="/path/to/labels.json") + >>> img, target = train_set[0] + + Args: + img_folder: folder with all the images of the dataset + label_path: path to the annotations of each image + use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones) + **kwargs: keyword arguments from `AbstractDataset`. + """ + + def __init__( + self, + img_folder: str, + label_path: str, + use_polygons: bool = False, + **kwargs: Any, + ) -> None: + super().__init__( + img_folder, + pre_transforms=pre_transform_multiclass, + **kwargs, + ) + + # File existence check + self._class_names: list = [] + if not os.path.exists(label_path): + raise FileNotFoundError(f"unable to locate {label_path}") + with open(label_path, "rb") as f: + labels = json.load(f) + + self.data: list[tuple[str, tuple[np.ndarray, list[str]]]] = [] + np_dtype = np.float32 + for img_name, label in labels.items(): + # File existence check + if not os.path.exists(os.path.join(self.root, img_name)): + raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_name)}") + + geoms, polygons_classes = self.format_polygons(label["polygons"], use_polygons, np_dtype) + + self.data.append((img_name, (np.asarray(geoms, dtype=np_dtype), polygons_classes))) + + def format_polygons( + self, polygons: list | dict, use_polygons: bool, np_dtype: type + ) -> tuple[np.ndarray, list[str]]: + """Format polygons into an array + + Args: + polygons: the bounding boxes + use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones) + np_dtype: dtype of array + + Returns: + geoms: bounding boxes as np array + polygons_classes: list of classes for each bounding box + """ + if isinstance(polygons, list): + self._class_names += [CLASS_NAME] + polygons_classes = [CLASS_NAME for _ in polygons] + _polygons: np.ndarray = np.asarray(polygons, dtype=np_dtype) + elif isinstance(polygons, dict): + self._class_names += list(polygons.keys()) + polygons_classes = [k for k, v in polygons.items() for _ in v] + _polygons = np.concatenate([np.asarray(poly, dtype=np_dtype) for poly in polygons.values() if poly], axis=0) + else: + raise TypeError(f"polygons should be a dictionary or list, it was {type(polygons)}") + geoms = _polygons if use_polygons else np.concatenate((_polygons.min(axis=1), _polygons.max(axis=1)), axis=1) + return geoms, polygons_classes + + @property + def class_names(self): + return sorted(set(self._class_names))
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/doc_artefacts.html b/_modules/doctr/datasets/doc_artefacts.html new file mode 100644 index 0000000000..b11940aa85 --- /dev/null +++ b/_modules/doctr/datasets/doc_artefacts.html @@ -0,0 +1,415 @@ + + + + + + + + + + + + + doctr.datasets.doc_artefacts - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.doc_artefacts

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import json
+import os
+from typing import Any
+
+import numpy as np
+
+from .datasets import VisionDataset
+
+__all__ = ["DocArtefacts"]
+
+
+
+[docs] +class DocArtefacts(VisionDataset): + """Object detection dataset for non-textual elements in documents. + The dataset includes a variety of synthetic document pages with non-textual elements. + + .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/artefacts-grid.png&src=0 + :align: center + + >>> from doctr.datasets import DocArtefacts + >>> train_set = DocArtefacts(train=True, download=True) + >>> img, target = train_set[0] + + Args: + train: whether the subset should be the training one + use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones) + **kwargs: keyword arguments from `VisionDataset`. + """ + + URL = "https://doctr-static.mindee.com/models?id=v0.4.0/artefact_detection-13fab8ce.zip&src=0" + SHA256 = "13fab8ced7f84583d9dccd0c634f046c3417e62a11fe1dea6efbbaba5052471b" + CLASSES = ["background", "qr_code", "bar_code", "logo", "photo"] + + def __init__( + self, + train: bool = True, + use_polygons: bool = False, + **kwargs: Any, + ) -> None: + super().__init__(self.URL, None, self.SHA256, True, **kwargs) + self.train = train + + # Update root + self.root = os.path.join(self.root, "train" if train else "val") + # List images + tmp_root = os.path.join(self.root, "images") + with open(os.path.join(self.root, "labels.json"), "rb") as f: + labels = json.load(f) + self.data: list[tuple[str, dict[str, Any]]] = [] + img_list = os.listdir(tmp_root) + if len(labels) != len(img_list): + raise AssertionError("the number of images and labels do not match") + np_dtype = np.float32 + for img_name, label in labels.items(): + # File existence check + if not os.path.exists(os.path.join(tmp_root, img_name)): + raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, img_name)}") + + # xmin, ymin, xmax, ymax + boxes: np.ndarray = np.asarray([obj["geometry"] for obj in label], dtype=np_dtype) + classes: np.ndarray = np.asarray([self.CLASSES.index(obj["label"]) for obj in label], dtype=np.int64) + if use_polygons: + # (x, y) coordinates of top left, top right, bottom right, bottom left corners + boxes = np.stack( + [ + np.stack([boxes[:, 0], boxes[:, 1]], axis=-1), + np.stack([boxes[:, 2], boxes[:, 1]], axis=-1), + np.stack([boxes[:, 2], boxes[:, 3]], axis=-1), + np.stack([boxes[:, 0], boxes[:, 3]], axis=-1), + ], + axis=1, + ) + self.data.append((img_name, dict(boxes=boxes, labels=classes))) + self.root = tmp_root + + def extra_repr(self) -> str: + return f"train={self.train}"
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/funsd.html b/_modules/doctr/datasets/funsd.html new file mode 100644 index 0000000000..821c8c94d1 --- /dev/null +++ b/_modules/doctr/datasets/funsd.html @@ -0,0 +1,457 @@ + + + + + + + + + + + + + doctr.datasets.funsd - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.funsd

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+from tqdm import tqdm
+
+from .datasets import VisionDataset
+from .utils import convert_target_to_relative, crop_bboxes_from_image
+
+__all__ = ["FUNSD"]
+
+
+
+[docs] +class FUNSD(VisionDataset): + """FUNSD dataset from `"FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents" + <https://arxiv.org/pdf/1905.13538.pdf>`_. + + .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/funsd-grid.png&src=0 + :align: center + + >>> from doctr.datasets import FUNSD + >>> train_set = FUNSD(train=True, download=True) + >>> img, target = train_set[0] + + Args: + train: whether the subset should be the training one + use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones) + recognition_task: whether the dataset should be used for recognition task + detection_task: whether the dataset should be used for detection task + **kwargs: keyword arguments from `VisionDataset`. + """ + + URL = "https://guillaumejaume.github.io/FUNSD/dataset.zip" + SHA256 = "c31735649e4f441bcbb4fd0f379574f7520b42286e80b01d80b445649d54761f" + FILE_NAME = "funsd.zip" + + def __init__( + self, + train: bool = True, + use_polygons: bool = False, + recognition_task: bool = False, + detection_task: bool = False, + **kwargs: Any, + ) -> None: + super().__init__( + self.URL, + self.FILE_NAME, + self.SHA256, + True, + pre_transforms=convert_target_to_relative if not recognition_task else None, + **kwargs, + ) + if recognition_task and detection_task: + raise ValueError( + "`recognition_task` and `detection_task` cannot be set to True simultaneously. " + + "To get the whole dataset with boxes and labels leave both parameters to False." + ) + + self.train = train + np_dtype = np.float32 + + # Use the subset + subfolder = os.path.join("dataset", "training_data" if train else "testing_data") + + # # list images + tmp_root = os.path.join(self.root, subfolder, "images") + self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] + for img_path in tqdm( + iterable=os.listdir(tmp_root), desc="Preparing and Loading FUNSD", total=len(os.listdir(tmp_root)) + ): + # File existence check + if not os.path.exists(os.path.join(tmp_root, img_path)): + raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, img_path)}") + + stem = Path(img_path).stem + with open(os.path.join(self.root, subfolder, "annotations", f"{stem}.json"), "rb") as f: + data = json.load(f) + + _targets = [ + (word["text"], word["box"]) + for block in data["form"] + for word in block["words"] + if len(word["text"]) > 0 + ] + text_targets, box_targets = zip(*_targets) + if use_polygons: + # xmin, ymin, xmax, ymax -> (x, y) coordinates of top left, top right, bottom right, bottom left corners + box_targets = [ # type: ignore[assignment] + [ + [box[0], box[1]], + [box[2], box[1]], + [box[2], box[3]], + [box[0], box[3]], + ] + for box in box_targets + ] + + if recognition_task: + crops = crop_bboxes_from_image( + img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=np_dtype) + ) + for crop, label in zip(crops, list(text_targets)): + # filter labels with unknown characters + if not any(char in label for char in ["☑", "☐", "\u03bf", "\uf703", "\uf702", " "]): + self.data.append((crop, label.replace("–", "-"))) + elif detection_task: + self.data.append((img_path, np.asarray(box_targets, dtype=np_dtype))) + else: + self.data.append(( + img_path, + dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=list(text_targets)), + )) + + self.root = tmp_root + + def extra_repr(self) -> str: + return f"train={self.train}"
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/generator/pytorch.html b/_modules/doctr/datasets/generator/pytorch.html new file mode 100644 index 0000000000..8ea690df77 --- /dev/null +++ b/_modules/doctr/datasets/generator/pytorch.html @@ -0,0 +1,389 @@ + + + + + + + + + + + + + doctr.datasets.generator.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.generator.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from torch.utils.data._utils.collate import default_collate
+
+from .base import _CharacterGenerator, _WordGenerator
+
+__all__ = ["CharacterGenerator", "WordGenerator"]
+
+
+
+[docs] +class CharacterGenerator(_CharacterGenerator): + """Implements a character image generation dataset + + >>> from doctr.datasets import CharacterGenerator + >>> ds = CharacterGenerator(vocab='abdef', num_samples=100) + >>> img, target = ds[0] + + Args: + vocab: vocabulary to take the character from + num_samples: number of samples that will be generated iterating over the dataset + cache_samples: whether generated images should be cached firsthand + font_family: font to use to generate the text images + img_transforms: composable transformations that will be applied to each image + sample_transforms: composable transformations that will be applied to both the image and the target + """ + + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + setattr(self, "collate_fn", default_collate)
+ + + +
+[docs] +class WordGenerator(_WordGenerator): + """Implements a character image generation dataset + + >>> from doctr.datasets import WordGenerator + >>> ds = WordGenerator(vocab='abdef', min_chars=1, max_chars=32, num_samples=100) + >>> img, target = ds[0] + + Args: + vocab: vocabulary to take the character from + min_chars: minimum number of characters in a word + max_chars: maximum number of characters in a word + num_samples: number of samples that will be generated iterating over the dataset + cache_samples: whether generated images should be cached firsthand + font_family: font to use to generate the text images + img_transforms: composable transformations that will be applied to each image + sample_transforms: composable transformations that will be applied to both the image and the target + """ + + pass
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/ic03.html b/_modules/doctr/datasets/ic03.html new file mode 100644 index 0000000000..b1cc497f23 --- /dev/null +++ b/_modules/doctr/datasets/ic03.html @@ -0,0 +1,469 @@ + + + + + + + + + + + + + doctr.datasets.ic03 - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.ic03

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import os
+from typing import Any
+
+import defusedxml.ElementTree as ET
+import numpy as np
+from tqdm import tqdm
+
+from .datasets import VisionDataset
+from .utils import convert_target_to_relative, crop_bboxes_from_image
+
+__all__ = ["IC03"]
+
+
+
+[docs] +class IC03(VisionDataset): + """IC03 dataset from `"ICDAR 2003 Robust Reading Competitions: Entries, Results and Future Directions" + <http://www.iapr-tc11.org/mediawiki/index.php?title=ICDAR_2003_Robust_Reading_Competitions>`_. + + .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/ic03-grid.png&src=0 + :align: center + + >>> from doctr.datasets import IC03 + >>> train_set = IC03(train=True, download=True) + >>> img, target = train_set[0] + + Args: + train: whether the subset should be the training one + use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones) + recognition_task: whether the dataset should be used for recognition task + detection_task: whether the dataset should be used for detection task + **kwargs: keyword arguments from `VisionDataset`. + """ + + TRAIN = ( + "http://www.iapr-tc11.org/dataset/ICDAR2003_RobustReading/TrialTrain/scene.zip", + "9d86df514eb09dd693fb0b8c671ef54a0cfe02e803b1bbef9fc676061502eb94", + "ic03_train.zip", + ) + TEST = ( + "http://www.iapr-tc11.org/dataset/ICDAR2003_RobustReading/TrialTest/scene.zip", + "dbc4b5fd5d04616b8464a1b42ea22db351ee22c2546dd15ac35611857ea111f8", + "ic03_test.zip", + ) + + def __init__( + self, + train: bool = True, + use_polygons: bool = False, + recognition_task: bool = False, + detection_task: bool = False, + **kwargs: Any, + ) -> None: + url, sha256, file_name = self.TRAIN if train else self.TEST + super().__init__( + url, + file_name, + sha256, + True, + pre_transforms=convert_target_to_relative if not recognition_task else None, + **kwargs, + ) + if recognition_task and detection_task: + raise ValueError( + "`recognition_task` and `detection_task` cannot be set to True simultaneously. " + + "To get the whole dataset with boxes and labels leave both parameters to False." + ) + + self.train = train + self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] + np_dtype = np.float32 + + # Load xml data + tmp_root = ( + os.path.join(self.root, "SceneTrialTrain" if self.train else "SceneTrialTest") if sha256 else self.root + ) + xml_tree = ET.parse(os.path.join(tmp_root, "words.xml")) + xml_root = xml_tree.getroot() + + for image in tqdm(iterable=xml_root, desc="Preparing and Loading IC03", total=len(xml_root)): + name, _resolution, rectangles = image + + # File existence check + if not os.path.exists(os.path.join(tmp_root, name.text)): + raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, name.text)}") + + if use_polygons: + # (x, y) coordinates of top left, top right, bottom right, bottom left corners + _boxes = [ + [ + [float(rect.attrib["x"]), float(rect.attrib["y"])], + [float(rect.attrib["x"]) + float(rect.attrib["width"]), float(rect.attrib["y"])], + [ + float(rect.attrib["x"]) + float(rect.attrib["width"]), + float(rect.attrib["y"]) + float(rect.attrib["height"]), + ], + [float(rect.attrib["x"]), float(rect.attrib["y"]) + float(rect.attrib["height"])], + ] + for rect in rectangles + ] + else: + # x_min, y_min, x_max, y_max + _boxes = [ + [ + float(rect.attrib["x"]), # type: ignore[list-item] + float(rect.attrib["y"]), # type: ignore[list-item] + float(rect.attrib["x"]) + float(rect.attrib["width"]), # type: ignore[list-item] + float(rect.attrib["y"]) + float(rect.attrib["height"]), # type: ignore[list-item] + ] + for rect in rectangles + ] + + # filter images without boxes + if len(_boxes) > 0: + boxes: np.ndarray = np.asarray(_boxes, dtype=np_dtype) + # Get the labels + labels = [lab.text for rect in rectangles for lab in rect if lab.text] + + if recognition_task: + crops = crop_bboxes_from_image(img_path=os.path.join(tmp_root, name.text), geoms=boxes) + for crop, label in zip(crops, labels): + if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0 and " " not in label: + self.data.append((crop, label)) + elif detection_task: + self.data.append((name.text, boxes)) + else: + self.data.append((name.text, dict(boxes=boxes, labels=labels))) + + self.root = tmp_root + + def extra_repr(self) -> str: + return f"train={self.train}"
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/ic13.html b/_modules/doctr/datasets/ic13.html new file mode 100644 index 0000000000..35292538dd --- /dev/null +++ b/_modules/doctr/datasets/ic13.html @@ -0,0 +1,442 @@ + + + + + + + + + + + + + doctr.datasets.ic13 - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.ic13

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import csv
+import os
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+from tqdm import tqdm
+
+from .datasets import AbstractDataset
+from .utils import convert_target_to_relative, crop_bboxes_from_image
+
+__all__ = ["IC13"]
+
+
+
+[docs] +class IC13(AbstractDataset): + """IC13 dataset from `"ICDAR 2013 Robust Reading Competition" <https://rrc.cvc.uab.es/>`_. + + .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/ic13-grid.png&src=0 + :align: center + + >>> # NOTE: You need to download both image and label parts from Focused Scene Text challenge Task2.1 2013-2015. + >>> from doctr.datasets import IC13 + >>> train_set = IC13(img_folder="/path/to/Challenge2_Training_Task12_Images", + >>> label_folder="/path/to/Challenge2_Training_Task1_GT") + >>> img, target = train_set[0] + >>> test_set = IC13(img_folder="/path/to/Challenge2_Test_Task12_Images", + >>> label_folder="/path/to/Challenge2_Test_Task1_GT") + >>> img, target = test_set[0] + + Args: + img_folder: folder with all the images of the dataset + label_folder: folder with all annotation files for the images + use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones) + recognition_task: whether the dataset should be used for recognition task + detection_task: whether the dataset should be used for detection task + **kwargs: keyword arguments from `AbstractDataset`. + """ + + def __init__( + self, + img_folder: str, + label_folder: str, + use_polygons: bool = False, + recognition_task: bool = False, + detection_task: bool = False, + **kwargs: Any, + ) -> None: + super().__init__( + img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs + ) + if recognition_task and detection_task: + raise ValueError( + "`recognition_task` and `detection_task` cannot be set to True simultaneously. " + + "To get the whole dataset with boxes and labels leave both parameters to False." + ) + + # File existence check + if not os.path.exists(label_folder) or not os.path.exists(img_folder): + raise FileNotFoundError( + f"unable to locate {label_folder if not os.path.exists(label_folder) else img_folder}" + ) + + self.data: list[tuple[Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] + np_dtype = np.float32 + + img_names = os.listdir(img_folder) + + for img_name in tqdm(iterable=img_names, desc="Preparing and Loading IC13", total=len(img_names)): + img_path = Path(img_folder, img_name) + label_path = Path(label_folder, "gt_" + Path(img_name).stem + ".txt") + + with open(label_path, newline="\n") as f: + _lines = [ + [val[:-1] if val.endswith(",") else val for val in row] + for row in csv.reader(f, delimiter=" ", quotechar="'") + ] + labels = [line[-1].replace('"', "") for line in _lines] + # xmin, ymin, xmax, ymax + box_targets: np.ndarray = np.array([list(map(int, line[:4])) for line in _lines], dtype=np_dtype) + if use_polygons: + # (x, y) coordinates of top left, top right, bottom right, bottom left corners + box_targets = np.array( + [ + [ + [coords[0], coords[1]], + [coords[2], coords[1]], + [coords[2], coords[3]], + [coords[0], coords[3]], + ] + for coords in box_targets + ], + dtype=np_dtype, + ) + + if recognition_task: + crops = crop_bboxes_from_image(img_path=img_path, geoms=box_targets) + for crop, label in zip(crops, labels): + if " " not in label: + self.data.append((crop, label)) + elif detection_task: + self.data.append((img_path, box_targets)) + else: + self.data.append((img_path, dict(boxes=box_targets, labels=labels)))
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/iiit5k.html b/_modules/doctr/datasets/iiit5k.html new file mode 100644 index 0000000000..ff4304ee35 --- /dev/null +++ b/_modules/doctr/datasets/iiit5k.html @@ -0,0 +1,451 @@ + + + + + + + + + + + + + doctr.datasets.iiit5k - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.iiit5k

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import os
+from typing import Any
+
+import numpy as np
+import scipy.io as sio
+from PIL import Image
+from tqdm import tqdm
+
+from .datasets import VisionDataset
+from .utils import convert_target_to_relative
+
+__all__ = ["IIIT5K"]
+
+
+
+[docs] +class IIIT5K(VisionDataset): + """IIIT-5K character-level localization dataset from + `"BMVC 2012 Scene Text Recognition using Higher Order Language Priors" + <https://cdn.iiit.ac.in/cdn/cvit.iiit.ac.in/images/Projects/SceneTextUnderstanding/home/mishraBMVC12.pdf>`_. + + .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/iiit5k-grid.png&src=0 + :align: center + + >>> # NOTE: this dataset is for character-level localization + >>> from doctr.datasets import IIIT5K + >>> train_set = IIIT5K(train=True, download=True) + >>> img, target = train_set[0] + + Args: + train: whether the subset should be the training one + use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones) + recognition_task: whether the dataset should be used for recognition task + detection_task: whether the dataset should be used for detection task + **kwargs: keyword arguments from `VisionDataset`. + """ + + URL = "https://cvit.iiit.ac.in/images/Projects/SceneTextUnderstanding/IIIT5K-Word_V3.0.tar.gz" + SHA256 = "7872c9efbec457eb23f3368855e7738f72ce10927f52a382deb4966ca0ffa38e" + + def __init__( + self, + train: bool = True, + use_polygons: bool = False, + recognition_task: bool = False, + detection_task: bool = False, + **kwargs: Any, + ) -> None: + super().__init__( + self.URL, + None, + file_hash=self.SHA256, + extract_archive=True, + pre_transforms=convert_target_to_relative if not recognition_task else None, + **kwargs, + ) + if recognition_task and detection_task: + raise ValueError( + "`recognition_task` and `detection_task` cannot be set to True simultaneously. " + + "To get the whole dataset with boxes and labels leave both parameters to False." + ) + + self.train = train + + # Load mat data + tmp_root = os.path.join(self.root, "IIIT5K") if self.SHA256 else self.root + mat_file = "trainCharBound" if self.train else "testCharBound" + mat_data = sio.loadmat(os.path.join(tmp_root, f"{mat_file}.mat"))[mat_file][0] + + self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] + np_dtype = np.float32 + + for img_path, label, box_targets in tqdm( + iterable=mat_data, desc="Preparing and Loading IIIT5K", total=len(mat_data) + ): + _raw_path = img_path[0] + _raw_label = label[0] + + # File existence check + if not os.path.exists(os.path.join(tmp_root, _raw_path)): + raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, _raw_path)}") + + if use_polygons: + # (x, y) coordinates of top left, top right, bottom right, bottom left corners + box_targets = [ + [ + [box[0], box[1]], + [box[0] + box[2], box[1]], + [box[0] + box[2], box[1] + box[3]], + [box[0], box[1] + box[3]], + ] + for box in box_targets + ] + else: + # xmin, ymin, xmax, ymax + box_targets = [[box[0], box[1], box[0] + box[2], box[1] + box[3]] for box in box_targets] + + if recognition_task: + if " " not in _raw_label: + with Image.open(os.path.join(tmp_root, _raw_path)) as pil_img: + self.data.append((np.array(pil_img.convert("RGB")), _raw_label)) + elif detection_task: + self.data.append((_raw_path, np.asarray(box_targets, dtype=np_dtype))) + else: + # label are casted to list where each char corresponds to the character's bounding box + self.data.append(( + _raw_path, + dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=list(_raw_label)), + )) + + self.root = tmp_root + + def extra_repr(self) -> str: + return f"train={self.train}"
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/iiithws.html b/_modules/doctr/datasets/iiithws.html new file mode 100644 index 0000000000..9ebeaf462f --- /dev/null +++ b/_modules/doctr/datasets/iiithws.html @@ -0,0 +1,408 @@ + + + + + + + + + + + + + doctr.datasets.iiithws - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.iiithws

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import os
+from random import sample
+from typing import Any
+
+from tqdm import tqdm
+
+from .datasets import AbstractDataset
+
+__all__ = ["IIITHWS"]
+
+
+
+[docs] +class IIITHWS(AbstractDataset): + """IIITHWS dataset from `"Generating Synthetic Data for Text Recognition" + <https://arxiv.org/pdf/1608.04224.pdf>`_ | `"repository" <https://github.com/kris314/hwnet>`_ | + `"website" <https://cvit.iiit.ac.in/research/projects/cvit-projects/matchdocimgs>`_. + + >>> # NOTE: This is a pure recognition dataset without bounding box labels. + >>> # NOTE: You need to download the dataset. + >>> from doctr.datasets import IIITHWS + >>> train_set = IIITHWS(img_folder="/path/to/iiit-hws/Images_90K_Normalized", + >>> label_path="/path/to/IIIT-HWS-90K.txt", + >>> train=True) + >>> img, target = train_set[0] + >>> test_set = IIITHWS(img_folder="/path/to/iiit-hws/Images_90K_Normalized", + >>> label_path="/path/to/IIIT-HWS-90K.txt") + >>> train=False) + >>> img, target = test_set[0] + + Args: + img_folder: folder with all the images of the dataset + label_path: path to the file with the labels + train: whether the subset should be the training one + **kwargs: keyword arguments from `AbstractDataset`. + """ + + def __init__( + self, + img_folder: str, + label_path: str, + train: bool = True, + **kwargs: Any, + ) -> None: + super().__init__(img_folder, **kwargs) + + # File existence check + if not os.path.exists(label_path) or not os.path.exists(img_folder): + raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}") + + self.data: list[tuple[str, str]] = [] + self.train = train + + with open(label_path) as f: + annotations = f.readlines() + + # Shuffle the dataset otherwise the test set will contain the same labels n times + annotations = sample(annotations, len(annotations)) + train_samples = int(len(annotations) * 0.9) + set_slice = slice(train_samples) if self.train else slice(train_samples, None) + + for annotation in tqdm( + iterable=annotations[set_slice], desc="Preparing and Loading IIITHWS", total=len(annotations[set_slice]) + ): + img_path, label = annotation.split()[0:2] + img_path = os.path.join(img_folder, img_path) + + self.data.append((img_path, label)) + + def extra_repr(self) -> str: + return f"train={self.train}"
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/imgur5k.html b/_modules/doctr/datasets/imgur5k.html new file mode 100644 index 0000000000..72b67f171c --- /dev/null +++ b/_modules/doctr/datasets/imgur5k.html @@ -0,0 +1,498 @@ + + + + + + + + + + + + + doctr.datasets.imgur5k - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.imgur5k

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import glob
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+import cv2
+import numpy as np
+from PIL import Image
+from tqdm import tqdm
+
+from .datasets import AbstractDataset
+from .utils import convert_target_to_relative, crop_bboxes_from_image
+
+__all__ = ["IMGUR5K"]
+
+
+
+[docs] +class IMGUR5K(AbstractDataset): + """IMGUR5K dataset from `"TextStyleBrush: Transfer of Text Aesthetics from a Single Example" + <https://arxiv.org/abs/2106.08385>`_ | + `repository <https://github.com/facebookresearch/IMGUR5K-Handwriting-Dataset>`_. + + .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/imgur5k-grid.png&src=0 + :align: center + :width: 630 + :height: 400 + + >>> # NOTE: You need to download/generate the dataset from the repository. + >>> from doctr.datasets import IMGUR5K + >>> train_set = IMGUR5K(train=True, img_folder="/path/to/IMGUR5K-Handwriting-Dataset/images", + >>> label_path="/path/to/IMGUR5K-Handwriting-Dataset/dataset_info/imgur5k_annotations.json") + >>> img, target = train_set[0] + >>> test_set = IMGUR5K(train=False, img_folder="/path/to/IMGUR5K-Handwriting-Dataset/images", + >>> label_path="/path/to/IMGUR5K-Handwriting-Dataset/dataset_info/imgur5k_annotations.json") + >>> img, target = test_set[0] + + Args: + img_folder: folder with all the images of the dataset + label_path: path to the annotations file of the dataset + train: whether the subset should be the training one + use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones) + recognition_task: whether the dataset should be used for recognition task + detection_task: whether the dataset should be used for detection task + **kwargs: keyword arguments from `AbstractDataset`. + """ + + def __init__( + self, + img_folder: str, + label_path: str, + train: bool = True, + use_polygons: bool = False, + recognition_task: bool = False, + detection_task: bool = False, + **kwargs: Any, + ) -> None: + super().__init__( + img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs + ) + if recognition_task and detection_task: + raise ValueError( + "`recognition_task` and `detection_task` cannot be set to True simultaneously. " + + "To get the whole dataset with boxes and labels leave both parameters to False." + ) + + # File existence check + if not os.path.exists(label_path) or not os.path.exists(img_folder): + raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}") + + self.data: list[tuple[str | Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] + self.train = train + np_dtype = np.float32 + + img_names = os.listdir(img_folder) + train_samples = int(len(img_names) * 0.9) + set_slice = slice(train_samples) if self.train else slice(train_samples, None) + + # define folder to write IMGUR5K recognition dataset + reco_folder_name = "IMGUR5K_recognition_train" if self.train else "IMGUR5K_recognition_test" + reco_folder_name = "Poly_" + reco_folder_name if use_polygons else reco_folder_name + reco_folder_path = os.path.join(os.path.dirname(self.root), reco_folder_name) + reco_images_counter = 0 + + if recognition_task and os.path.isdir(reco_folder_path): + self._read_from_folder(reco_folder_path) + return + elif recognition_task and not os.path.isdir(reco_folder_path): + os.makedirs(reco_folder_path, exist_ok=False) + + with open(label_path) as f: + annotation_file = json.load(f) + + for img_name in tqdm( + iterable=img_names[set_slice], desc="Preparing and Loading IMGUR5K", total=len(img_names[set_slice]) + ): + img_path = Path(img_folder, img_name) + img_id = img_name.split(".")[0] + + # File existence check + if not os.path.exists(os.path.join(self.root, img_name)): + raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_name)}") + + # some files have no annotations which are marked with only a dot in the 'word' key + # ref: https://github.com/facebookresearch/IMGUR5K-Handwriting-Dataset/blob/main/README.md + if img_id not in annotation_file["index_to_ann_map"].keys(): + continue + ann_ids = annotation_file["index_to_ann_map"][img_id] + annotations = [annotation_file["ann_id"][a_id] for a_id in ann_ids] + + labels = [ann["word"] for ann in annotations if ann["word"] != "."] + # x_center, y_center, width, height, angle + _boxes = [ + list(map(float, ann["bounding_box"].strip("[ ]").split(", "))) + for ann in annotations + if ann["word"] != "." + ] + # (x, y) coordinates of top left, top right, bottom right, bottom left corners + box_targets = [cv2.boxPoints(((box[0], box[1]), (box[2], box[3]), box[4])) for box in _boxes] + + if not use_polygons: + # xmin, ymin, xmax, ymax + box_targets = [np.concatenate((points.min(0), points.max(0)), axis=-1) for points in box_targets] + + # filter images without boxes + if len(box_targets) > 0: + if recognition_task: + crops = crop_bboxes_from_image( + img_path=os.path.join(self.root, img_name), geoms=np.asarray(box_targets, dtype=np_dtype) + ) + for crop, label in zip(crops, labels): + if ( + crop.shape[0] > 0 + and crop.shape[1] > 0 + and len(label) > 0 + and len(label) < 30 + and " " not in label + ): + # write data to disk + with open(os.path.join(reco_folder_path, f"{reco_images_counter}.txt"), "w") as f: + f.write(label) + tmp_img = Image.fromarray(crop) + tmp_img.save(os.path.join(reco_folder_path, f"{reco_images_counter}.png")) + reco_images_counter += 1 + elif detection_task: + self.data.append((img_path, np.asarray(box_targets, dtype=np_dtype))) + else: + self.data.append((img_path, dict(boxes=np.asarray(box_targets, dtype=np_dtype), labels=labels))) + + if recognition_task: + self._read_from_folder(reco_folder_path) + + def extra_repr(self) -> str: + return f"train={self.train}" + + def _read_from_folder(self, path: str) -> None: + img_paths = glob.glob(os.path.join(path, "*.png")) + for img_path in tqdm(iterable=img_paths, desc="Preparing and Loading IMGUR5K", total=len(img_paths)): + with open(os.path.join(path, f"{os.path.basename(img_path)[:-4]}.txt"), "r") as f: + self.data.append((img_path, f.read()))
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/mjsynth.html b/_modules/doctr/datasets/mjsynth.html new file mode 100644 index 0000000000..3b1e5fe64d --- /dev/null +++ b/_modules/doctr/datasets/mjsynth.html @@ -0,0 +1,441 @@ + + + + + + + + + + + + + doctr.datasets.mjsynth - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.mjsynth

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import os
+from typing import Any
+
+from tqdm import tqdm
+
+from .datasets import AbstractDataset
+
+__all__ = ["MJSynth"]
+
+
+
+[docs] +class MJSynth(AbstractDataset): + """MJSynth dataset from `"Synthetic Data and Artificial Neural Networks for Natural Scene Text Recognition" + <https://www.robots.ox.ac.uk/~vgg/data/text/>`_. + + >>> # NOTE: This is a pure recognition dataset without bounding box labels. + >>> # NOTE: You need to download the dataset. + >>> from doctr.datasets import MJSynth + >>> train_set = MJSynth(img_folder="/path/to/mjsynth/mnt/ramdisk/max/90kDICT32px", + >>> label_path="/path/to/mjsynth/mnt/ramdisk/max/90kDICT32px/imlist.txt", + >>> train=True) + >>> img, target = train_set[0] + >>> test_set = MJSynth(img_folder="/path/to/mjsynth/mnt/ramdisk/max/90kDICT32px", + >>> label_path="/path/to/mjsynth/mnt/ramdisk/max/90kDICT32px/imlist.txt") + >>> train=False) + >>> img, target = test_set[0] + + Args: + img_folder: folder with all the images of the dataset + label_path: path to the file with the labels + train: whether the subset should be the training one + **kwargs: keyword arguments from `AbstractDataset`. + """ + + # filter corrupted or missing images + BLACKLIST = [ + "./1881/4/225_Marbling_46673.jpg\n", + "./2069/4/192_whittier_86389.jpg\n", + "./869/4/234_TRIASSIC_80582.jpg\n", + "./173/2/358_BURROWING_10395.jpg\n", + "./913/4/231_randoms_62372.jpg\n", + "./596/2/372_Ump_81662.jpg\n", + "./936/2/375_LOCALITIES_44992.jpg\n", + "./2540/4/246_SQUAMOUS_73902.jpg\n", + "./1332/4/224_TETHERED_78397.jpg\n", + "./627/6/83_PATRIARCHATE_55931.jpg\n", + "./2013/2/370_refract_63890.jpg\n", + "./2911/6/77_heretical_35885.jpg\n", + "./1730/2/361_HEREON_35880.jpg\n", + "./2194/2/334_EFFLORESCENT_24742.jpg\n", + "./2025/2/364_SNORTERS_72304.jpg\n", + "./368/4/232_friar_30876.jpg\n", + "./275/6/96_hackle_34465.jpg\n", + "./384/4/220_bolts_8596.jpg\n", + "./905/4/234_Postscripts_59142.jpg\n", + "./2749/6/101_Chided_13155.jpg\n", + "./495/6/81_MIDYEAR_48332.jpg\n", + "./2852/6/60_TOILSOME_79481.jpg\n", + "./554/2/366_Teleconferences_77948.jpg\n", + "./1696/4/211_Queened_61779.jpg\n", + "./2128/2/369_REDACTED_63458.jpg\n", + "./2557/2/351_DOWN_23492.jpg\n", + "./2489/4/221_snored_72290.jpg\n", + "./1650/2/355_stony_74902.jpg\n", + "./1863/4/223_Diligently_21672.jpg\n", + "./264/2/362_FORETASTE_30276.jpg\n", + "./429/4/208_Mainmasts_46140.jpg\n", + "./1817/2/363_actuating_904.jpg\n", + ] + + def __init__( + self, + img_folder: str, + label_path: str, + train: bool = True, + **kwargs: Any, + ) -> None: + super().__init__(img_folder, **kwargs) + + # File existence check + if not os.path.exists(label_path) or not os.path.exists(img_folder): + raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}") + + self.data: list[tuple[str, str]] = [] + self.train = train + + with open(label_path) as f: + img_paths = f.readlines() + + train_samples = int(len(img_paths) * 0.9) + set_slice = slice(train_samples) if self.train else slice(train_samples, None) + + for path in tqdm( + iterable=img_paths[set_slice], desc="Preparing and Loading MJSynth", total=len(img_paths[set_slice]) + ): + if path not in self.BLACKLIST: + label = path.split("_")[1] + img_path = os.path.join(img_folder, path[2:]).strip() + + self.data.append((img_path, label)) + + def extra_repr(self) -> str: + return f"train={self.train}"
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/ocr.html b/_modules/doctr/datasets/ocr.html new file mode 100644 index 0000000000..5310978159 --- /dev/null +++ b/_modules/doctr/datasets/ocr.html @@ -0,0 +1,404 @@ + + + + + + + + + + + + + doctr.datasets.ocr - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.ocr

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+
+from .datasets import AbstractDataset
+
+__all__ = ["OCRDataset"]
+
+
+
+[docs] +class OCRDataset(AbstractDataset): + """Implements an OCR dataset + + >>> from doctr.datasets import OCRDataset + >>> train_set = OCRDataset(img_folder="/path/to/images", + >>> label_file="/path/to/labels.json") + >>> img, target = train_set[0] + + Args: + img_folder: local path to image folder (all jpg at the root) + label_file: local path to the label file + use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones) + **kwargs: keyword arguments from `AbstractDataset`. + """ + + def __init__( + self, + img_folder: str, + label_file: str, + use_polygons: bool = False, + **kwargs: Any, + ) -> None: + super().__init__(img_folder, **kwargs) + + # List images + self.data: list[tuple[Path, dict[str, Any]]] = [] + np_dtype = np.float32 + with open(label_file, "rb") as f: + data = json.load(f) + + for img_name, annotations in data.items(): + # Get image path + img_name = Path(img_name) + # File existence check + if not os.path.exists(os.path.join(self.root, img_name)): + raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_name)}") + + # handle empty images + if len(annotations["typed_words"]) == 0: + self.data.append((img_name, dict(boxes=np.zeros((0, 4), dtype=np_dtype), labels=[]))) + continue + # Unpack the straight boxes (xmin, ymin, xmax, ymax) + geoms = [list(map(float, obj["geometry"][:4])) for obj in annotations["typed_words"]] + if use_polygons: + # (x, y) coordinates of top left, top right, bottom right, bottom left corners + geoms = [ + [geom[:2], [geom[2], geom[1]], geom[2:], [geom[0], geom[3]]] # type: ignore[list-item] + for geom in geoms + ] + + text_targets = [obj["value"] for obj in annotations["typed_words"]] + + self.data.append((img_name, dict(boxes=np.asarray(geoms, dtype=np_dtype), labels=text_targets)))
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/recognition.html b/_modules/doctr/datasets/recognition.html new file mode 100644 index 0000000000..74b44887ef --- /dev/null +++ b/_modules/doctr/datasets/recognition.html @@ -0,0 +1,389 @@ + + + + + + + + + + + + + doctr.datasets.recognition - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.recognition

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+from .datasets import AbstractDataset
+
+__all__ = ["RecognitionDataset"]
+
+
+
+[docs] +class RecognitionDataset(AbstractDataset): + """Dataset implementation for text recognition tasks + + >>> from doctr.datasets import RecognitionDataset + >>> train_set = RecognitionDataset(img_folder="/path/to/images", + >>> labels_path="/path/to/labels.json") + >>> img, target = train_set[0] + + Args: + img_folder: path to the images folder + labels_path: path to the json file containing all labels (character sequences) + **kwargs: keyword arguments from `AbstractDataset`. + """ + + def __init__( + self, + img_folder: str, + labels_path: str, + **kwargs: Any, + ) -> None: + super().__init__(img_folder, **kwargs) + + self.data: list[tuple[str, str]] = [] + with open(labels_path, encoding="utf-8") as f: + labels = json.load(f) + + for img_name, label in labels.items(): + if not os.path.exists(os.path.join(self.root, img_name)): + raise FileNotFoundError(f"unable to locate {os.path.join(self.root, img_name)}") + + self.data.append((img_name, label)) + + def merge_dataset(self, ds: AbstractDataset) -> None: + # Update data with new root for self + self.data = [(str(Path(self.root).joinpath(img_path)), label) for img_path, label in self.data] + # Define new root + self.root = Path("/") + # Merge with ds data + for img_path, label in ds.data: + self.data.append((str(Path(ds.root).joinpath(img_path)), label))
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/sroie.html b/_modules/doctr/datasets/sroie.html new file mode 100644 index 0000000000..3c6350622f --- /dev/null +++ b/_modules/doctr/datasets/sroie.html @@ -0,0 +1,448 @@ + + + + + + + + + + + + + doctr.datasets.sroie - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.sroie

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import csv
+import os
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+from tqdm import tqdm
+
+from .datasets import VisionDataset
+from .utils import convert_target_to_relative, crop_bboxes_from_image
+
+__all__ = ["SROIE"]
+
+
+
+[docs] +class SROIE(VisionDataset): + """SROIE dataset from `"ICDAR2019 Competition on Scanned Receipt OCR and Information Extraction" + <https://arxiv.org/pdf/2103.10213.pdf>`_. + + .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/sroie-grid.png&src=0 + :align: center + + >>> from doctr.datasets import SROIE + >>> train_set = SROIE(train=True, download=True) + >>> img, target = train_set[0] + + Args: + train: whether the subset should be the training one + use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones) + recognition_task: whether the dataset should be used for recognition task + detection_task: whether the dataset should be used for detection task + **kwargs: keyword arguments from `VisionDataset`. + """ + + TRAIN = ( + "https://doctr-static.mindee.com/models?id=v0.1.1/sroie2019_train_task1.zip&src=0", + "d4fa9e60abb03500d83299c845b9c87fd9c9430d1aeac96b83c5d0bb0ab27f6f", + "sroie2019_train_task1.zip", + ) + TEST = ( + "https://doctr-static.mindee.com/models?id=v0.1.1/sroie2019_test.zip&src=0", + "41b3c746a20226fddc80d86d4b2a903d43b5be4f521dd1bbe759dbf8844745e2", + "sroie2019_test.zip", + ) + + def __init__( + self, + train: bool = True, + use_polygons: bool = False, + recognition_task: bool = False, + detection_task: bool = False, + **kwargs: Any, + ) -> None: + url, sha256, name = self.TRAIN if train else self.TEST + super().__init__( + url, + name, + sha256, + True, + pre_transforms=convert_target_to_relative if not recognition_task else None, + **kwargs, + ) + if recognition_task and detection_task: + raise ValueError( + "`recognition_task` and `detection_task` cannot be set to True simultaneously. " + + "To get the whole dataset with boxes and labels leave both parameters to False." + ) + + self.train = train + + tmp_root = os.path.join(self.root, "images") + self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] + np_dtype = np.float32 + + for img_path in tqdm( + iterable=os.listdir(tmp_root), desc="Preparing and Loading SROIE", total=len(os.listdir(tmp_root)) + ): + # File existence check + if not os.path.exists(os.path.join(tmp_root, img_path)): + raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, img_path)}") + + stem = Path(img_path).stem + with open(os.path.join(self.root, "annotations", f"{stem}.txt"), encoding="latin") as f: + _rows = [row for row in list(csv.reader(f, delimiter=",")) if len(row) > 0] + + labels = [",".join(row[8:]) for row in _rows] + # reorder coordinates (8 -> (4,2) -> + # (x, y) coordinates of top left, top right, bottom right, bottom left corners) and filter empty lines + coords: np.ndarray = np.stack( + [np.array(list(map(int, row[:8])), dtype=np_dtype).reshape((4, 2)) for row in _rows], axis=0 + ) + + if not use_polygons: + # xmin, ymin, xmax, ymax + coords = np.concatenate((coords.min(axis=1), coords.max(axis=1)), axis=1) + + if recognition_task: + crops = crop_bboxes_from_image(img_path=os.path.join(tmp_root, img_path), geoms=coords) + for crop, label in zip(crops, labels): + if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0: + self.data.append((crop, label)) + elif detection_task: + self.data.append((img_path, coords)) + else: + self.data.append((img_path, dict(boxes=coords, labels=labels))) + + self.root = tmp_root + + def extra_repr(self) -> str: + return f"train={self.train}"
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/svhn.html b/_modules/doctr/datasets/svhn.html new file mode 100644 index 0000000000..83e8ff4e32 --- /dev/null +++ b/_modules/doctr/datasets/svhn.html @@ -0,0 +1,476 @@ + + + + + + + + + + + + + doctr.datasets.svhn - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.svhn

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import os
+from typing import Any
+
+import h5py
+import numpy as np
+from tqdm import tqdm
+
+from .datasets import VisionDataset
+from .utils import convert_target_to_relative, crop_bboxes_from_image
+
+__all__ = ["SVHN"]
+
+
+
+[docs] +class SVHN(VisionDataset): + """SVHN dataset from `"The Street View House Numbers (SVHN) Dataset" + <http://ufldl.stanford.edu/housenumbers/>`_. + + .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/svhn-grid.png&src=0 + :align: center + + >>> from doctr.datasets import SVHN + >>> train_set = SVHN(train=True, download=True) + >>> img, target = train_set[0] + + Args: + train: whether the subset should be the training one + use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones) + recognition_task: whether the dataset should be used for recognition task + detection_task: whether the dataset should be used for detection task + **kwargs: keyword arguments from `VisionDataset`. + """ + + TRAIN = ( + "http://ufldl.stanford.edu/housenumbers/train.tar.gz", + "4b17bb33b6cd8f963493168f80143da956f28ec406cc12f8e5745a9f91a51898", + "svhn_train.tar", + ) + + TEST = ( + "http://ufldl.stanford.edu/housenumbers/test.tar.gz", + "57ac9ceb530e4aa85b55d991be8fc49c695b3d71c6f6a88afea86549efde7fb5", + "svhn_test.tar", + ) + + def __init__( + self, + train: bool = True, + use_polygons: bool = False, + recognition_task: bool = False, + detection_task: bool = False, + **kwargs: Any, + ) -> None: + url, sha256, name = self.TRAIN if train else self.TEST + super().__init__( + url, + file_name=name, + file_hash=sha256, + extract_archive=True, + pre_transforms=convert_target_to_relative if not recognition_task else None, + **kwargs, + ) + if recognition_task and detection_task: + raise ValueError( + "`recognition_task` and `detection_task` cannot be set to True simultaneously. " + + "To get the whole dataset with boxes and labels leave both parameters to False." + ) + + self.train = train + self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] + np_dtype = np.float32 + + tmp_root = os.path.join(self.root, "train" if train else "test") + + # Load mat data (matlab v7.3 - can not be loaded with scipy) + with h5py.File(os.path.join(tmp_root, "digitStruct.mat"), "r") as f: + img_refs = f["digitStruct/name"] + box_refs = f["digitStruct/bbox"] + for img_ref, box_ref in tqdm( + iterable=zip(img_refs, box_refs), desc="Preparing and Loading SVHN", total=len(img_refs) + ): + # convert ascii matrix to string + img_name = "".join(map(chr, f[img_ref[0]][()].flatten())) + + # File existence check + if not os.path.exists(os.path.join(tmp_root, img_name)): + raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, img_name)}") + + # Unpack the information + box = f[box_ref[0]] + if box["left"].shape[0] == 1: + box_dict = {k: [int(vals[0][0])] for k, vals in box.items()} + else: + box_dict = {k: [int(f[v[0]][()].item()) for v in vals] for k, vals in box.items()} + + # Convert it to the right format + coords: np.ndarray = np.array( + [box_dict["left"], box_dict["top"], box_dict["width"], box_dict["height"]], dtype=np_dtype + ).transpose() + label_targets = list(map(str, box_dict["label"])) + + if use_polygons: + # (x, y) coordinates of top left, top right, bottom right, bottom left corners + box_targets: np.ndarray = np.stack( + [ + np.stack([coords[:, 0], coords[:, 1]], axis=-1), + np.stack([coords[:, 0] + coords[:, 2], coords[:, 1]], axis=-1), + np.stack([coords[:, 0] + coords[:, 2], coords[:, 1] + coords[:, 3]], axis=-1), + np.stack([coords[:, 0], coords[:, 1] + coords[:, 3]], axis=-1), + ], + axis=1, + ) + else: + # x, y, width, height -> xmin, ymin, xmax, ymax + box_targets = np.stack( + [ + coords[:, 0], + coords[:, 1], + coords[:, 0] + coords[:, 2], + coords[:, 1] + coords[:, 3], + ], + axis=-1, + ) + + if recognition_task: + crops = crop_bboxes_from_image(img_path=os.path.join(tmp_root, img_name), geoms=box_targets) + for crop, label in zip(crops, label_targets): + if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0 and " " not in label: + self.data.append((crop, label)) + elif detection_task: + self.data.append((img_name, box_targets)) + else: + self.data.append((img_name, dict(boxes=box_targets, labels=label_targets))) + + self.root = tmp_root + + def extra_repr(self) -> str: + return f"train={self.train}"
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/svt.html b/_modules/doctr/datasets/svt.html new file mode 100644 index 0000000000..ed735687c2 --- /dev/null +++ b/_modules/doctr/datasets/svt.html @@ -0,0 +1,460 @@ + + + + + + + + + + + + + doctr.datasets.svt - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.svt

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import os
+from typing import Any
+
+import defusedxml.ElementTree as ET
+import numpy as np
+from tqdm import tqdm
+
+from .datasets import VisionDataset
+from .utils import convert_target_to_relative, crop_bboxes_from_image
+
+__all__ = ["SVT"]
+
+
+
+[docs] +class SVT(VisionDataset): + """SVT dataset from `"The Street View Text Dataset - UCSD Computer Vision" + <http://vision.ucsd.edu/~kai/svt/>`_. + + .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/svt-grid.png&src=0 + :align: center + + >>> from doctr.datasets import SVT + >>> train_set = SVT(train=True, download=True) + >>> img, target = train_set[0] + + Args: + train: whether the subset should be the training one + use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones) + recognition_task: whether the dataset should be used for recognition task + detection_task: whether the dataset should be used for detection task + **kwargs: keyword arguments from `VisionDataset`. + """ + + URL = "http://www.iapr-tc11.org/dataset/SVT/svt.zip" + SHA256 = "63b3d55e6b6d1e036e2a844a20c034fe3af3c32e4d914d6e0c4a3cd43df3bebf" + + def __init__( + self, + train: bool = True, + use_polygons: bool = False, + recognition_task: bool = False, + detection_task: bool = False, + **kwargs: Any, + ) -> None: + super().__init__( + self.URL, + None, + self.SHA256, + True, + pre_transforms=convert_target_to_relative if not recognition_task else None, + **kwargs, + ) + if recognition_task and detection_task: + raise ValueError( + "`recognition_task` and `detection_task` cannot be set to True simultaneously. " + + "To get the whole dataset with boxes and labels leave both parameters to False." + ) + + self.train = train + self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] + np_dtype = np.float32 + + # Load xml data + tmp_root = os.path.join(self.root, "svt1") if self.SHA256 else self.root + xml_tree = ( + ET.parse(os.path.join(tmp_root, "train.xml")) + if self.train + else ET.parse(os.path.join(tmp_root, "test.xml")) + ) + xml_root = xml_tree.getroot() + + for image in tqdm(iterable=xml_root, desc="Preparing and Loading SVT", total=len(xml_root)): + name, _, _, _resolution, rectangles = image + + # File existence check + if not os.path.exists(os.path.join(tmp_root, name.text)): + raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, name.text)}") + + if use_polygons: + # (x, y) coordinates of top left, top right, bottom right, bottom left corners + _boxes = [ + [ + [float(rect.attrib["x"]), float(rect.attrib["y"])], + [float(rect.attrib["x"]) + float(rect.attrib["width"]), float(rect.attrib["y"])], + [ + float(rect.attrib["x"]) + float(rect.attrib["width"]), + float(rect.attrib["y"]) + float(rect.attrib["height"]), + ], + [float(rect.attrib["x"]), float(rect.attrib["y"]) + float(rect.attrib["height"])], + ] + for rect in rectangles + ] + else: + # x_min, y_min, x_max, y_max + _boxes = [ + [ + float(rect.attrib["x"]), # type: ignore[list-item] + float(rect.attrib["y"]), # type: ignore[list-item] + float(rect.attrib["x"]) + float(rect.attrib["width"]), # type: ignore[list-item] + float(rect.attrib["y"]) + float(rect.attrib["height"]), # type: ignore[list-item] + ] + for rect in rectangles + ] + + boxes: np.ndarray = np.asarray(_boxes, dtype=np_dtype) + # Get the labels + labels = [lab.text for rect in rectangles for lab in rect] + + if recognition_task: + crops = crop_bboxes_from_image(img_path=os.path.join(tmp_root, name.text), geoms=boxes) + for crop, label in zip(crops, labels): + if crop.shape[0] > 0 and crop.shape[1] > 0 and len(label) > 0 and " " not in label: + self.data.append((crop, label)) + elif detection_task: + self.data.append((name.text, boxes)) + else: + self.data.append((name.text, dict(boxes=boxes, labels=labels))) + + self.root = tmp_root + + def extra_repr(self) -> str: + return f"train={self.train}"
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/synthtext.html b/_modules/doctr/datasets/synthtext.html new file mode 100644 index 0000000000..c4a31863e1 --- /dev/null +++ b/_modules/doctr/datasets/synthtext.html @@ -0,0 +1,484 @@ + + + + + + + + + + + + + doctr.datasets.synthtext - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.synthtext

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import glob
+import os
+from typing import Any
+
+import numpy as np
+from PIL import Image
+from scipy import io as sio
+from tqdm import tqdm
+
+from .datasets import VisionDataset
+from .utils import convert_target_to_relative, crop_bboxes_from_image
+
+__all__ = ["SynthText"]
+
+
+
+[docs] +class SynthText(VisionDataset): + """SynthText dataset from `"Synthetic Data for Text Localisation in Natural Images" + <https://arxiv.org/abs/1604.06646>`_ | `"repository" <https://github.com/ankush-me/SynthText>`_ | + `"website" <https://www.robots.ox.ac.uk/~vgg/data/scenetext/>`_. + + .. image:: https://doctr-static.mindee.com/models?id=v0.5.0/svt-grid.png&src=0 + :align: center + + >>> from doctr.datasets import SynthText + >>> train_set = SynthText(train=True, download=True) + >>> img, target = train_set[0] + + Args: + train: whether the subset should be the training one + use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones) + recognition_task: whether the dataset should be used for recognition task + detection_task: whether the dataset should be used for detection task + **kwargs: keyword arguments from `VisionDataset`. + """ + + URL = "https://thor.robots.ox.ac.uk/~vgg/data/scenetext/SynthText.zip" + SHA256 = "28ab030485ec8df3ed612c568dd71fb2793b9afbfa3a9d9c6e792aef33265bf1" + + # filter corrupted or missing images + BLACKLIST = ( + "67/fruits_129_", + "194/window_19_", + ) + + def __init__( + self, + train: bool = True, + use_polygons: bool = False, + recognition_task: bool = False, + detection_task: bool = False, + **kwargs: Any, + ) -> None: + super().__init__( + self.URL, + None, + file_hash=None, + extract_archive=True, + pre_transforms=convert_target_to_relative if not recognition_task else None, + **kwargs, + ) + if recognition_task and detection_task: + raise ValueError( + "`recognition_task` and `detection_task` cannot be set to True simultaneously. " + + "To get the whole dataset with boxes and labels leave both parameters to False." + ) + + self.train = train + self.data: list[tuple[str | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] + np_dtype = np.float32 + + # Load mat data + tmp_root = os.path.join(self.root, "SynthText") if self.SHA256 else self.root + # define folder to write SynthText recognition dataset + reco_folder_name = "SynthText_recognition_train" if self.train else "SynthText_recognition_test" + reco_folder_name = "Poly_" + reco_folder_name if use_polygons else reco_folder_name + reco_folder_path = os.path.join(tmp_root, reco_folder_name) + reco_images_counter = 0 + + if recognition_task and os.path.isdir(reco_folder_path): + self._read_from_folder(reco_folder_path) + return + elif recognition_task and not os.path.isdir(reco_folder_path): + os.makedirs(reco_folder_path, exist_ok=False) + + mat_data = sio.loadmat(os.path.join(tmp_root, "gt.mat")) + train_samples = int(len(mat_data["imnames"][0]) * 0.9) + set_slice = slice(train_samples) if self.train else slice(train_samples, None) + paths = mat_data["imnames"][0][set_slice] + boxes = mat_data["wordBB"][0][set_slice] + labels = mat_data["txt"][0][set_slice] + del mat_data + + for img_path, word_boxes, txt in tqdm( + iterable=zip(paths, boxes, labels), desc="Preparing and Loading SynthText", total=len(paths) + ): + # File existence check + if not os.path.exists(os.path.join(tmp_root, img_path[0])): + raise FileNotFoundError(f"unable to locate {os.path.join(tmp_root, img_path[0])}") + + labels = [elt for word in txt.tolist() for elt in word.split()] + # (x, y) coordinates of top left, top right, bottom right, bottom left corners + word_boxes = ( + word_boxes.transpose(2, 1, 0) + if word_boxes.ndim == 3 + else np.expand_dims(word_boxes.transpose(1, 0), axis=0) + ) + + if not use_polygons: + # xmin, ymin, xmax, ymax + word_boxes = np.concatenate((word_boxes.min(axis=1), word_boxes.max(axis=1)), axis=1) + + if recognition_task: + crops = crop_bboxes_from_image(img_path=os.path.join(tmp_root, img_path[0]), geoms=word_boxes) + for crop, label in zip(crops, labels): + if ( + crop.shape[0] > 0 + and crop.shape[1] > 0 + and len(label) > 0 + and len(label) < 30 + and " " not in label + ): + # write data to disk + with open(os.path.join(reco_folder_path, f"{reco_images_counter}.txt"), "w") as f: + f.write(label) + tmp_img = Image.fromarray(crop) + tmp_img.save(os.path.join(reco_folder_path, f"{reco_images_counter}.png")) + reco_images_counter += 1 + elif detection_task: + self.data.append((img_path[0], np.asarray(word_boxes, dtype=np_dtype))) + else: + self.data.append((img_path[0], dict(boxes=np.asarray(word_boxes, dtype=np_dtype), labels=labels))) + + if recognition_task: + self._read_from_folder(reco_folder_path) + + self.root = tmp_root + + def extra_repr(self) -> str: + return f"train={self.train}" + + def _read_from_folder(self, path: str) -> None: + img_paths = glob.glob(os.path.join(path, "*.png")) + for img_path in tqdm(iterable=img_paths, desc="Preparing and Loading SynthText", total=len(img_paths)): + with open(os.path.join(path, f"{os.path.basename(img_path)[:-4]}.txt"), "r") as f: + self.data.append((img_path, f.read()))
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/utils.html b/_modules/doctr/datasets/utils.html new file mode 100644 index 0000000000..5074632ce2 --- /dev/null +++ b/_modules/doctr/datasets/utils.html @@ -0,0 +1,580 @@ + + + + + + + + + + + + + doctr.datasets.utils - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.utils

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import string
+import unicodedata
+from collections.abc import Sequence
+from collections.abc import Sequence as SequenceType
+from functools import partial
+from pathlib import Path
+from typing import Any, TypeVar
+
+import numpy as np
+from PIL import Image
+
+from doctr.io.image import get_img_shape
+from doctr.utils.geometry import convert_to_relative_coords, extract_crops, extract_rcrops
+
+from .vocabs import VOCABS
+
+__all__ = [
+    "translate",
+    "encode_string",
+    "decode_sequence",
+    "encode_sequences",
+    "pre_transform_multiclass",
+    "crop_bboxes_from_image",
+    "convert_target_to_relative",
+]
+
+ImageTensor = TypeVar("ImageTensor")
+
+
+
+[docs] +def translate( + input_string: str, + vocab_name: str, + unknown_char: str = "■", +) -> str: + """Translate a string input in a given vocabulary + + Args: + input_string: input string to translate + vocab_name: vocabulary to use (french, latin, ...) + unknown_char: unknown character for non-translatable characters + + Returns: + A string translated in a given vocab + """ + if VOCABS.get(vocab_name) is None: + raise KeyError("output vocabulary must be in vocabs dictionary") + + translated = "" + for char in input_string: + if char not in VOCABS[vocab_name]: + # we need to translate char into a vocab char + if char in string.whitespace: + # remove whitespaces + continue + # normalize character if it is not in vocab + char = unicodedata.normalize("NFD", char).encode("ascii", "ignore").decode("ascii") + if char == "" or char not in VOCABS[vocab_name]: + # if normalization fails or char still not in vocab, return unknown character) + char = unknown_char + translated += char + return translated
+ + + +
+[docs] +def encode_string( + input_string: str, + vocab: str, +) -> list[int]: + """Given a predefined mapping, encode the string to a sequence of numbers + + Args: + input_string: string to encode + vocab: vocabulary (string), the encoding is given by the indexing of the character sequence + + Returns: + A list encoding the input_string + """ + try: + return list(map(vocab.index, input_string)) + except ValueError as e: + missing_chars = [char for char in input_string if char not in vocab] + raise ValueError( + f"Some characters cannot be found in 'vocab': {set(missing_chars)}.\n" + f"Please check the input string `{input_string}` and the vocabulary `{vocab}`" + ) from e
+ + + +
+[docs] +def decode_sequence( + input_seq: np.ndarray | SequenceType[int], + mapping: str, +) -> str: + """Given a predefined mapping, decode the sequence of numbers to a string + + Args: + input_seq: array to decode + mapping: vocabulary (string), the encoding is given by the indexing of the character sequence + + Returns: + A string, decoded from input_seq + """ + if not isinstance(input_seq, (Sequence, np.ndarray)): + raise TypeError("Invalid sequence type") + if isinstance(input_seq, np.ndarray) and (input_seq.dtype != np.int_ or input_seq.max() >= len(mapping)): + raise AssertionError("Input must be an array of int, with max less than mapping size") + + return "".join(map(mapping.__getitem__, input_seq))
+ + + +
+[docs] +def encode_sequences( + sequences: list[str], + vocab: str, + target_size: int | None = None, + eos: int = -1, + sos: int | None = None, + pad: int | None = None, + dynamic_seq_length: bool = False, +) -> np.ndarray: + """Encode character sequences using a given vocab as mapping + + Args: + sequences: the list of character sequences of size N + vocab: the ordered vocab to use for encoding + target_size: maximum length of the encoded data + eos: encoding of End Of String + sos: optional encoding of Start Of String + pad: optional encoding for padding. In case of padding, all sequences are followed by 1 EOS then PAD + dynamic_seq_length: if `target_size` is specified, uses it as upper bound and enables dynamic sequence size + + Returns: + the padded encoded data as a tensor + """ + if 0 <= eos < len(vocab): + raise ValueError("argument 'eos' needs to be outside of vocab possible indices") + + if not isinstance(target_size, int) or dynamic_seq_length: + # Maximum string length + EOS + max_length = max(len(w) for w in sequences) + 1 + if isinstance(sos, int): + max_length += 1 + if isinstance(pad, int): + max_length += 1 + target_size = max_length if not isinstance(target_size, int) else min(max_length, target_size) + + # Pad all sequences + if isinstance(pad, int): # pad with padding symbol + if 0 <= pad < len(vocab): + raise ValueError("argument 'pad' needs to be outside of vocab possible indices") + # In that case, add EOS at the end of the word before padding + default_symbol = pad + else: # pad with eos symbol + default_symbol = eos + encoded_data: np.ndarray = np.full([len(sequences), target_size], default_symbol, dtype=np.int32) + + # Encode the strings + for idx, seq in enumerate(map(partial(encode_string, vocab=vocab), sequences)): + if isinstance(pad, int): # add eos at the end of the sequence + seq.append(eos) + encoded_data[idx, : min(len(seq), target_size)] = seq[: min(len(seq), target_size)] + + if isinstance(sos, int): # place sos symbol at the beginning of each sequence + if 0 <= sos < len(vocab): + raise ValueError("argument 'sos' needs to be outside of vocab possible indices") + encoded_data = np.roll(encoded_data, 1) + encoded_data[:, 0] = sos + + return encoded_data
+ + + +
+[docs] +def convert_target_to_relative( + img: ImageTensor, target: np.ndarray | dict[str, Any] +) -> tuple[ImageTensor, dict[str, Any] | np.ndarray]: + """Converts target to relative coordinates + + Args: + img: tf.Tensor or torch.Tensor representing the image + target: target to convert to relative coordinates (boxes (N, 4) or polygons (N, 4, 2)) + + Returns: + The image and the target in relative coordinates + """ + if isinstance(target, np.ndarray): + target = convert_to_relative_coords(target, get_img_shape(img)) # type: ignore[arg-type] + else: + target["boxes"] = convert_to_relative_coords(target["boxes"], get_img_shape(img)) # type: ignore[arg-type] + return img, target
+ + + +
+[docs] +def crop_bboxes_from_image(img_path: str | Path, geoms: np.ndarray) -> list[np.ndarray]: + """Crop a set of bounding boxes from an image + + Args: + img_path: path to the image + geoms: a array of polygons of shape (N, 4, 2) or of straight boxes of shape (N, 4) + + Returns: + a list of cropped images + """ + with Image.open(img_path) as pil_img: + img: np.ndarray = np.asarray(pil_img.convert("RGB")) + # Polygon + if geoms.ndim == 3 and geoms.shape[1:] == (4, 2): + return extract_rcrops(img, geoms.astype(dtype=int)) + if geoms.ndim == 2 and geoms.shape[1] == 4: + return extract_crops(img, geoms.astype(dtype=int)) + raise ValueError("Invalid geometry format")
+ + + +
+[docs] +def pre_transform_multiclass(img, target: tuple[np.ndarray, list]) -> tuple[np.ndarray, dict[str, list]]: + """Converts multiclass target to relative coordinates. + + Args: + img: Image + target: tuple of target polygons and their classes names + + Returns: + Image and dictionary of boxes, with class names as keys + """ + boxes = convert_to_relative_coords(target[0], get_img_shape(img)) + boxes_classes = target[1] + boxes_dict: dict = {k: [] for k in sorted(set(boxes_classes))} + for k, poly in zip(boxes_classes, boxes): + boxes_dict[k].append(poly) + boxes_dict = {k: np.stack(v, axis=0) for k, v in boxes_dict.items()} + return img, boxes_dict
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/datasets/wildreceipt.html b/_modules/doctr/datasets/wildreceipt.html new file mode 100644 index 0000000000..e012910eb0 --- /dev/null +++ b/_modules/doctr/datasets/wildreceipt.html @@ -0,0 +1,460 @@ + + + + + + + + + + + + + doctr.datasets.wildreceipt - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.datasets.wildreceipt

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import json
+import os
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+from tqdm import tqdm
+
+from .datasets import AbstractDataset
+from .utils import convert_target_to_relative, crop_bboxes_from_image
+
+__all__ = ["WILDRECEIPT"]
+
+
+
+[docs] +class WILDRECEIPT(AbstractDataset): + """ + WildReceipt dataset from `"Spatial Dual-Modality Graph Reasoning for Key Information Extraction" + <https://arxiv.org/abs/2103.14470v1>`_ | + `"repository" <https://download.openmmlab.com/mmocr/data/wildreceipt.tar>`_. + + .. image:: https://doctr-static.mindee.com/models?id=v0.7.0/wildreceipt-dataset.jpg&src=0 + :align: center + + >>> # NOTE: You need to download the dataset first. + >>> from doctr.datasets import WILDRECEIPT + >>> train_set = WILDRECEIPT(train=True, img_folder="/path/to/wildreceipt/", + >>> label_path="/path/to/wildreceipt/train.txt") + >>> img, target = train_set[0] + >>> test_set = WILDRECEIPT(train=False, img_folder="/path/to/wildreceipt/", + >>> label_path="/path/to/wildreceipt/test.txt") + >>> img, target = test_set[0] + + Args: + img_folder: folder with all the images of the dataset + label_path: path to the annotations file of the dataset + train: whether the subset should be the training one + use_polygons: whether polygons should be considered as rotated bounding box (instead of straight ones) + recognition_task: whether the dataset should be used for recognition task + detection_task: whether the dataset should be used for detection task + **kwargs: keyword arguments from `AbstractDataset`. + """ + + def __init__( + self, + img_folder: str, + label_path: str, + train: bool = True, + use_polygons: bool = False, + recognition_task: bool = False, + detection_task: bool = False, + **kwargs: Any, + ) -> None: + super().__init__( + img_folder, pre_transforms=convert_target_to_relative if not recognition_task else None, **kwargs + ) + # Task check + if recognition_task and detection_task: + raise ValueError( + "`recognition_task` and `detection_task` cannot be set to True simultaneously. " + + "To get the whole dataset with boxes and labels leave both parameters to False." + ) + + # File existence check + if not os.path.exists(label_path) or not os.path.exists(img_folder): + raise FileNotFoundError(f"unable to locate {label_path if not os.path.exists(label_path) else img_folder}") + + tmp_root = img_folder + self.train = train + np_dtype = np.float32 + self.data: list[tuple[str | Path | np.ndarray, str | dict[str, Any] | np.ndarray]] = [] + + with open(label_path, "r") as file: + data = file.read() + # Split the text file into separate JSON strings + json_strings = data.strip().split("\n") + box: list[float] | np.ndarray + + for json_string in tqdm( + iterable=json_strings, desc="Preparing and Loading WILDRECEIPT", total=len(json_strings) + ): + _targets = [] + json_data = json.loads(json_string) + img_path = json_data["file_name"] + annotations = json_data["annotations"] + for annotation in annotations: + coordinates = annotation["box"] + if use_polygons: + # (x, y) coordinates of top left, top right, bottom right, bottom left corners + box = np.array( + [ + [coordinates[0], coordinates[1]], + [coordinates[2], coordinates[3]], + [coordinates[4], coordinates[5]], + [coordinates[6], coordinates[7]], + ], + dtype=np_dtype, + ) + else: + x, y = coordinates[::2], coordinates[1::2] + box = [min(x), min(y), max(x), max(y)] + _targets.append((annotation["text"], box)) + text_targets, box_targets = zip(*_targets) + + if recognition_task: + crops = crop_bboxes_from_image( + img_path=os.path.join(tmp_root, img_path), geoms=np.asarray(box_targets, dtype=int).clip(min=0) + ) + for crop, label in zip(crops, list(text_targets)): + if label and " " not in label: + self.data.append((crop, label)) + elif detection_task: + self.data.append((img_path, np.asarray(box_targets, dtype=int).clip(min=0))) + else: + self.data.append(( + img_path, + dict(boxes=np.asarray(box_targets, dtype=int).clip(min=0), labels=list(text_targets)), + )) + self.root = tmp_root + + def extra_repr(self) -> str: + return f"train={self.train}"
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/io/elements.html b/_modules/doctr/io/elements.html new file mode 100644 index 0000000000..479802cb83 --- /dev/null +++ b/_modules/doctr/io/elements.html @@ -0,0 +1,1024 @@ + + + + + + + + + + + + + doctr.io.elements - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.io.elements

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from typing import Any
+
+from defusedxml import defuse_stdlib
+
+defuse_stdlib()
+from xml.etree import ElementTree as ET
+from xml.etree.ElementTree import Element as ETElement
+from xml.etree.ElementTree import SubElement
+
+import numpy as np
+
+import doctr
+from doctr.file_utils import requires_package
+from doctr.utils.common_types import BoundingBox
+from doctr.utils.geometry import resolve_enclosing_bbox, resolve_enclosing_rbbox
+from doctr.utils.reconstitution import synthesize_kie_page, synthesize_page
+from doctr.utils.repr import NestedObject
+
+try:  # optional dependency for visualization
+    from doctr.utils.visualization import visualize_kie_page, visualize_page
+except ModuleNotFoundError:
+    pass
+
+__all__ = ["Element", "Word", "Artefact", "Line", "Prediction", "Block", "Page", "KIEPage", "Document"]
+
+
+class Element(NestedObject):
+    """Implements an abstract document element with exporting and text rendering capabilities"""
+
+    _children_names: list[str] = []
+    _exported_keys: list[str] = []
+
+    def __init__(self, **kwargs: Any) -> None:
+        for k, v in kwargs.items():
+            if k in self._children_names:
+                setattr(self, k, v)
+            else:
+                raise KeyError(f"{self.__class__.__name__} object does not have any attribute named '{k}'")
+
+    def export(self) -> dict[str, Any]:
+        """Exports the object into a nested dict format"""
+        export_dict = {k: getattr(self, k) for k in self._exported_keys}
+        for children_name in self._children_names:
+            if children_name in ["predictions"]:
+                export_dict[children_name] = {
+                    k: [item.export() for item in c] for k, c in getattr(self, children_name).items()
+                }
+            else:
+                export_dict[children_name] = [c.export() for c in getattr(self, children_name)]
+
+        return export_dict
+
+    @classmethod
+    def from_dict(cls, save_dict: dict[str, Any], **kwargs):
+        raise NotImplementedError
+
+    def render(self) -> str:
+        raise NotImplementedError
+
+
+
+[docs] +class Word(Element): + """Implements a word element + + Args: + value: the text string of the word + confidence: the confidence associated with the text prediction + geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to + the page's size + objectness_score: the objectness score of the detection + crop_orientation: the general orientation of the crop in degrees and its confidence + """ + + _exported_keys: list[str] = ["value", "confidence", "geometry", "objectness_score", "crop_orientation"] + _children_names: list[str] = [] + + def __init__( + self, + value: str, + confidence: float, + geometry: BoundingBox | np.ndarray, + objectness_score: float, + crop_orientation: dict[str, Any], + ) -> None: + super().__init__() + self.value = value + self.confidence = confidence + self.geometry = geometry + self.objectness_score = objectness_score + self.crop_orientation = crop_orientation + + def render(self) -> str: + """Renders the full text of the element""" + return self.value + + def extra_repr(self) -> str: + return f"value='{self.value}', confidence={self.confidence:.2}" + + @classmethod + def from_dict(cls, save_dict: dict[str, Any], **kwargs): + kwargs = {k: save_dict[k] for k in cls._exported_keys} + return cls(**kwargs)
+ + + +
+[docs] +class Artefact(Element): + """Implements a non-textual element + + Args: + artefact_type: the type of artefact + confidence: the confidence of the type prediction + geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to + the page's size. + """ + + _exported_keys: list[str] = ["geometry", "type", "confidence"] + _children_names: list[str] = [] + + def __init__(self, artefact_type: str, confidence: float, geometry: BoundingBox) -> None: + super().__init__() + self.geometry = geometry + self.type = artefact_type + self.confidence = confidence + + def render(self) -> str: + """Renders the full text of the element""" + return f"[{self.type.upper()}]" + + def extra_repr(self) -> str: + return f"type='{self.type}', confidence={self.confidence:.2}" + + @classmethod + def from_dict(cls, save_dict: dict[str, Any], **kwargs): + kwargs = {k: save_dict[k] for k in cls._exported_keys} + return cls(**kwargs)
+ + + +
+[docs] +class Line(Element): + """Implements a line element as a collection of words + + Args: + words: list of word elements + geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to + the page's size. If not specified, it will be resolved by default to the smallest bounding box enclosing + all words in it. + """ + + _exported_keys: list[str] = ["geometry", "objectness_score"] + _children_names: list[str] = ["words"] + words: list[Word] = [] + + def __init__( + self, + words: list[Word], + geometry: BoundingBox | np.ndarray | None = None, + objectness_score: float | None = None, + ) -> None: + # Compute the objectness score of the line + if objectness_score is None: + objectness_score = float(np.mean([w.objectness_score for w in words])) + # Resolve the geometry using the smallest enclosing bounding box + if geometry is None: + # Check whether this is a rotated or straight box + box_resolution_fn = resolve_enclosing_rbbox if len(words[0].geometry) == 4 else resolve_enclosing_bbox + geometry = box_resolution_fn([w.geometry for w in words]) # type: ignore[misc] + + super().__init__(words=words) + self.geometry = geometry + self.objectness_score = objectness_score + + def render(self) -> str: + """Renders the full text of the element""" + return " ".join(w.render() for w in self.words) + + @classmethod + def from_dict(cls, save_dict: dict[str, Any], **kwargs): + kwargs = {k: save_dict[k] for k in cls._exported_keys} + kwargs.update({ + "words": [Word.from_dict(_dict) for _dict in save_dict["words"]], + }) + return cls(**kwargs)
+ + + +class Prediction(Word): + """Implements a prediction element""" + + def render(self) -> str: + """Renders the full text of the element""" + return self.value + + def extra_repr(self) -> str: + return f"value='{self.value}', confidence={self.confidence:.2}, bounding_box={self.geometry}" + + +
+[docs] +class Block(Element): + """Implements a block element as a collection of lines and artefacts + + Args: + lines: list of line elements + artefacts: list of artefacts + geometry: bounding box of the word in format ((xmin, ymin), (xmax, ymax)) where coordinates are relative to + the page's size. If not specified, it will be resolved by default to the smallest bounding box enclosing + all lines and artefacts in it. + """ + + _exported_keys: list[str] = ["geometry", "objectness_score"] + _children_names: list[str] = ["lines", "artefacts"] + lines: list[Line] = [] + artefacts: list[Artefact] = [] + + def __init__( + self, + lines: list[Line] = [], + artefacts: list[Artefact] = [], + geometry: BoundingBox | np.ndarray | None = None, + objectness_score: float | None = None, + ) -> None: + # Compute the objectness score of the line + if objectness_score is None: + objectness_score = float(np.mean([w.objectness_score for line in lines for w in line.words])) + # Resolve the geometry using the smallest enclosing bounding box + if geometry is None: + line_boxes = [word.geometry for line in lines for word in line.words] + artefact_boxes = [artefact.geometry for artefact in artefacts] + box_resolution_fn = ( + resolve_enclosing_rbbox if isinstance(lines[0].geometry, np.ndarray) else resolve_enclosing_bbox + ) + geometry = box_resolution_fn(line_boxes + artefact_boxes) # type: ignore + + super().__init__(lines=lines, artefacts=artefacts) + self.geometry = geometry + self.objectness_score = objectness_score + + def render(self, line_break: str = "\n") -> str: + """Renders the full text of the element""" + return line_break.join(line.render() for line in self.lines) + + @classmethod + def from_dict(cls, save_dict: dict[str, Any], **kwargs): + kwargs = {k: save_dict[k] for k in cls._exported_keys} + kwargs.update({ + "lines": [Line.from_dict(_dict) for _dict in save_dict["lines"]], + "artefacts": [Artefact.from_dict(_dict) for _dict in save_dict["artefacts"]], + }) + return cls(**kwargs)
+ + + +
+[docs] +class Page(Element): + """Implements a page element as a collection of blocks + + Args: + page: image encoded as a numpy array in uint8 + blocks: list of block elements + page_idx: the index of the page in the input raw document + dimensions: the page size in pixels in format (height, width) + orientation: a dictionary with the value of the rotation angle in degress and confidence of the prediction + language: a dictionary with the language value and confidence of the prediction + """ + + _exported_keys: list[str] = ["page_idx", "dimensions", "orientation", "language"] + _children_names: list[str] = ["blocks"] + blocks: list[Block] = [] + + def __init__( + self, + page: np.ndarray, + blocks: list[Block], + page_idx: int, + dimensions: tuple[int, int], + orientation: dict[str, Any] | None = None, + language: dict[str, Any] | None = None, + ) -> None: + super().__init__(blocks=blocks) + self.page = page + self.page_idx = page_idx + self.dimensions = dimensions + self.orientation = orientation if isinstance(orientation, dict) else dict(value=None, confidence=None) + self.language = language if isinstance(language, dict) else dict(value=None, confidence=None) + + def render(self, block_break: str = "\n\n") -> str: + """Renders the full text of the element""" + return block_break.join(b.render() for b in self.blocks) + + def extra_repr(self) -> str: + return f"dimensions={self.dimensions}" + +
+[docs] + def show(self, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None: + """Overlay the result on a given image + + Args: + interactive: whether the display should be interactive + preserve_aspect_ratio: pass True if you passed True to the predictor + **kwargs: additional keyword arguments passed to the matplotlib.pyplot.show method + """ + requires_package("matplotlib", "`.show()` requires matplotlib & mplcursors installed") + requires_package("mplcursors", "`.show()` requires matplotlib & mplcursors installed") + import matplotlib.pyplot as plt + + visualize_page(self.export(), self.page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio) + plt.show(**kwargs)
+ + + def synthesize(self, **kwargs) -> np.ndarray: + """Synthesize the page from the predictions + + Args: + **kwargs: keyword arguments passed to the `synthesize_page` method + + Returns: + synthesized page + """ + return synthesize_page(self.export(), **kwargs) + + def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> tuple[bytes, ET.ElementTree]: + """Export the page as XML (hOCR-format) + convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md + + Args: + file_title: the title of the XML file + + Returns: + a tuple of the XML byte string, and its ElementTree + """ + p_idx = self.page_idx + block_count: int = 1 + line_count: int = 1 + word_count: int = 1 + height, width = self.dimensions + language = self.language if "language" in self.language.keys() else "en" + # Create the XML root element + page_hocr = ETElement("html", attrib={"xmlns": "http://www.w3.org/1999/xhtml", "xml:lang": str(language)}) + # Create the header / SubElements of the root element + head = SubElement(page_hocr, "head") + SubElement(head, "title").text = file_title + SubElement(head, "meta", attrib={"http-equiv": "Content-Type", "content": "text/html; charset=utf-8"}) + SubElement( + head, + "meta", + attrib={"name": "ocr-system", "content": f"python-doctr {doctr.__version__}"}, # type: ignore[attr-defined] + ) + SubElement( + head, + "meta", + attrib={"name": "ocr-capabilities", "content": "ocr_page ocr_carea ocr_par ocr_line ocrx_word"}, + ) + # Create the body + body = SubElement(page_hocr, "body") + page_div = SubElement( + body, + "div", + attrib={ + "class": "ocr_page", + "id": f"page_{p_idx + 1}", + "title": f"image; bbox 0 0 {width} {height}; ppageno 0", + }, + ) + # iterate over the blocks / lines / words and create the XML elements in body line by line with the attributes + for block in self.blocks: + if len(block.geometry) != 2: + raise TypeError("XML export is only available for straight bounding boxes for now.") + (xmin, ymin), (xmax, ymax) = block.geometry + block_div = SubElement( + page_div, + "div", + attrib={ + "class": "ocr_carea", + "id": f"block_{block_count}", + "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \ + {int(round(xmax * width))} {int(round(ymax * height))}", + }, + ) + paragraph = SubElement( + block_div, + "p", + attrib={ + "class": "ocr_par", + "id": f"par_{block_count}", + "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \ + {int(round(xmax * width))} {int(round(ymax * height))}", + }, + ) + block_count += 1 + for line in block.lines: + (xmin, ymin), (xmax, ymax) = line.geometry + # NOTE: baseline, x_size, x_descenders, x_ascenders is currently initalized to 0 + line_span = SubElement( + paragraph, + "span", + attrib={ + "class": "ocr_line", + "id": f"line_{line_count}", + "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \ + {int(round(xmax * width))} {int(round(ymax * height))}; \ + baseline 0 0; x_size 0; x_descenders 0; x_ascenders 0", + }, + ) + line_count += 1 + for word in line.words: + (xmin, ymin), (xmax, ymax) = word.geometry + conf = word.confidence + word_div = SubElement( + line_span, + "span", + attrib={ + "class": "ocrx_word", + "id": f"word_{word_count}", + "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \ + {int(round(xmax * width))} {int(round(ymax * height))}; \ + x_wconf {int(round(conf * 100))}", + }, + ) + # set the text + word_div.text = word.value + word_count += 1 + + return (ET.tostring(page_hocr, encoding="utf-8", method="xml"), ET.ElementTree(page_hocr)) + + @classmethod + def from_dict(cls, save_dict: dict[str, Any], **kwargs): + kwargs = {k: save_dict[k] for k in cls._exported_keys} + kwargs.update({"blocks": [Block.from_dict(block_dict) for block_dict in save_dict["blocks"]]}) + return cls(**kwargs)
+ + + +class KIEPage(Element): + """Implements a KIE page element as a collection of predictions + + Args: + predictions: Dictionary with list of block elements for each detection class + page: image encoded as a numpy array in uint8 + page_idx: the index of the page in the input raw document + dimensions: the page size in pixels in format (height, width) + orientation: a dictionary with the value of the rotation angle in degress and confidence of the prediction + language: a dictionary with the language value and confidence of the prediction + """ + + _exported_keys: list[str] = ["page_idx", "dimensions", "orientation", "language"] + _children_names: list[str] = ["predictions"] + predictions: dict[str, list[Prediction]] = {} + + def __init__( + self, + page: np.ndarray, + predictions: dict[str, list[Prediction]], + page_idx: int, + dimensions: tuple[int, int], + orientation: dict[str, Any] | None = None, + language: dict[str, Any] | None = None, + ) -> None: + super().__init__(predictions=predictions) + self.page = page + self.page_idx = page_idx + self.dimensions = dimensions + self.orientation = orientation if isinstance(orientation, dict) else dict(value=None, confidence=None) + self.language = language if isinstance(language, dict) else dict(value=None, confidence=None) + + def render(self, prediction_break: str = "\n\n") -> str: + """Renders the full text of the element""" + return prediction_break.join( + f"{class_name}: {p.render()}" for class_name, predictions in self.predictions.items() for p in predictions + ) + + def extra_repr(self) -> str: + return f"dimensions={self.dimensions}" + + def show(self, interactive: bool = True, preserve_aspect_ratio: bool = False, **kwargs) -> None: + """Overlay the result on a given image + + Args: + interactive: whether the display should be interactive + preserve_aspect_ratio: pass True if you passed True to the predictor + **kwargs: keyword arguments passed to the matplotlib.pyplot.show method + """ + requires_package("matplotlib", "`.show()` requires matplotlib & mplcursors installed") + requires_package("mplcursors", "`.show()` requires matplotlib & mplcursors installed") + import matplotlib.pyplot as plt + + visualize_kie_page( + self.export(), self.page, interactive=interactive, preserve_aspect_ratio=preserve_aspect_ratio + ) + plt.show(**kwargs) + + def synthesize(self, **kwargs) -> np.ndarray: + """Synthesize the page from the predictions + + Args: + **kwargs: keyword arguments passed to the `synthesize_kie_page` method + + Returns: + synthesized page + """ + return synthesize_kie_page(self.export(), **kwargs) + + def export_as_xml(self, file_title: str = "docTR - XML export (hOCR)") -> tuple[bytes, ET.ElementTree]: + """Export the page as XML (hOCR-format) + convention: https://github.com/kba/hocr-spec/blob/master/1.2/spec.md + + Args: + file_title: the title of the XML file + + Returns: + a tuple of the XML byte string, and its ElementTree + """ + p_idx = self.page_idx + prediction_count: int = 1 + height, width = self.dimensions + language = self.language if "language" in self.language.keys() else "en" + # Create the XML root element + page_hocr = ETElement("html", attrib={"xmlns": "http://www.w3.org/1999/xhtml", "xml:lang": str(language)}) + # Create the header / SubElements of the root element + head = SubElement(page_hocr, "head") + SubElement(head, "title").text = file_title + SubElement(head, "meta", attrib={"http-equiv": "Content-Type", "content": "text/html; charset=utf-8"}) + SubElement( + head, + "meta", + attrib={"name": "ocr-system", "content": f"python-doctr {doctr.__version__}"}, # type: ignore[attr-defined] + ) + SubElement( + head, + "meta", + attrib={"name": "ocr-capabilities", "content": "ocr_page ocr_carea ocr_par ocr_line ocrx_word"}, + ) + # Create the body + body = SubElement(page_hocr, "body") + SubElement( + body, + "div", + attrib={ + "class": "ocr_page", + "id": f"page_{p_idx + 1}", + "title": f"image; bbox 0 0 {width} {height}; ppageno 0", + }, + ) + # iterate over the blocks / lines / words and create the XML elements in body line by line with the attributes + for class_name, predictions in self.predictions.items(): + for prediction in predictions: + if len(prediction.geometry) != 2: + raise TypeError("XML export is only available for straight bounding boxes for now.") + (xmin, ymin), (xmax, ymax) = prediction.geometry + prediction_div = SubElement( + body, + "div", + attrib={ + "class": "ocr_carea", + "id": f"{class_name}_prediction_{prediction_count}", + "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \ + {int(round(xmax * width))} {int(round(ymax * height))}", + }, + ) + # NOTE: ocr_par, ocr_line and ocrx_word are the same because the KIE predictions contain only words + # This is a workaround to make it PDF/A compatible + par_div = SubElement( + prediction_div, + "p", + attrib={ + "class": "ocr_par", + "id": f"{class_name}_par_{prediction_count}", + "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \ + {int(round(xmax * width))} {int(round(ymax * height))}", + }, + ) + line_span = SubElement( + par_div, + "span", + attrib={ + "class": "ocr_line", + "id": f"{class_name}_line_{prediction_count}", + "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \ + {int(round(xmax * width))} {int(round(ymax * height))}; \ + baseline 0 0; x_size 0; x_descenders 0; x_ascenders 0", + }, + ) + word_div = SubElement( + line_span, + "span", + attrib={ + "class": "ocrx_word", + "id": f"{class_name}_word_{prediction_count}", + "title": f"bbox {int(round(xmin * width))} {int(round(ymin * height))} \ + {int(round(xmax * width))} {int(round(ymax * height))}; \ + x_wconf {int(round(prediction.confidence * 100))}", + }, + ) + word_div.text = prediction.value + prediction_count += 1 + + return ET.tostring(page_hocr, encoding="utf-8", method="xml"), ET.ElementTree(page_hocr) + + @classmethod + def from_dict(cls, save_dict: dict[str, Any], **kwargs): + kwargs = {k: save_dict[k] for k in cls._exported_keys} + kwargs.update({ + "predictions": [Prediction.from_dict(predictions_dict) for predictions_dict in save_dict["predictions"]] + }) + return cls(**kwargs) + + +
+[docs] +class Document(Element): + """Implements a document element as a collection of pages + + Args: + pages: list of page elements + """ + + _children_names: list[str] = ["pages"] + pages: list[Page] = [] + + def __init__( + self, + pages: list[Page], + ) -> None: + super().__init__(pages=pages) + + def render(self, page_break: str = "\n\n\n\n") -> str: + """Renders the full text of the element""" + return page_break.join(p.render() for p in self.pages) + +
+[docs] + def show(self, **kwargs) -> None: + """Overlay the result on a given image""" + for result in self.pages: + result.show(**kwargs)
+ + + def synthesize(self, **kwargs) -> list[np.ndarray]: + """Synthesize all pages from their predictions + + Args: + **kwargs: keyword arguments passed to the `Page.synthesize` method + + Returns: + list of synthesized pages + """ + return [page.synthesize(**kwargs) for page in self.pages] + + def export_as_xml(self, **kwargs) -> list[tuple[bytes, ET.ElementTree]]: + """Export the document as XML (hOCR-format) + + Args: + **kwargs: additional keyword arguments passed to the Page.export_as_xml method + + Returns: + list of tuple of (bytes, ElementTree) + """ + return [page.export_as_xml(**kwargs) for page in self.pages] + + @classmethod + def from_dict(cls, save_dict: dict[str, Any], **kwargs): + kwargs = {k: save_dict[k] for k in cls._exported_keys} + kwargs.update({"pages": [Page.from_dict(page_dict) for page_dict in save_dict["pages"]]}) + return cls(**kwargs)
+ + + +class KIEDocument(Document): + """Implements a document element as a collection of pages + + Args: + pages: list of page elements + """ + + _children_names: list[str] = ["pages"] + pages: list[KIEPage] = [] # type: ignore[assignment] + + def __init__( + self, + pages: list[KIEPage], + ) -> None: + super().__init__(pages=pages) # type: ignore[arg-type] +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/io/html.html b/_modules/doctr/io/html.html new file mode 100644 index 0000000000..890419175e --- /dev/null +++ b/_modules/doctr/io/html.html @@ -0,0 +1,360 @@ + + + + + + + + + + + + + doctr.io.html - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.io.html

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from typing import Any
+
+__all__ = ["read_html"]
+
+
+
+[docs] +def read_html(url: str, **kwargs: Any) -> bytes: + """Read a PDF file and convert it into an image in numpy format + + >>> from doctr.io import read_html + >>> doc = read_html("https://www.yoursite.com") + + Args: + url: URL of the target web page + **kwargs: keyword arguments from `weasyprint.HTML` + + Returns: + decoded PDF file as a bytes stream + """ + from weasyprint import HTML + + return HTML(url, **kwargs).write_pdf()
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/io/image/base.html b/_modules/doctr/io/image/base.html new file mode 100644 index 0000000000..14f514cc6e --- /dev/null +++ b/_modules/doctr/io/image/base.html @@ -0,0 +1,387 @@ + + + + + + + + + + + + + doctr.io.image.base - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.io.image.base

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from pathlib import Path
+
+import cv2
+import numpy as np
+
+from doctr.utils.common_types import AbstractFile
+
+__all__ = ["read_img_as_numpy"]
+
+
+
+[docs] +def read_img_as_numpy( + file: AbstractFile, + output_size: tuple[int, int] | None = None, + rgb_output: bool = True, +) -> np.ndarray: + """Read an image file into numpy format + + >>> from doctr.io import read_img_as_numpy + >>> page = read_img_as_numpy("path/to/your/doc.jpg") + + Args: + file: the path to the image file + output_size: the expected output size of each page in format H x W + rgb_output: whether the output ndarray channel order should be RGB instead of BGR. + + Returns: + the page decoded as numpy ndarray of shape H x W x 3 + """ + if isinstance(file, (str, Path)): + if not Path(file).is_file(): + raise FileNotFoundError(f"unable to access {file}") + img = cv2.imread(str(file), cv2.IMREAD_COLOR) + elif isinstance(file, bytes): + _file: np.ndarray = np.frombuffer(file, np.uint8) + img = cv2.imdecode(_file, cv2.IMREAD_COLOR) + else: + raise TypeError("unsupported object type for argument 'file'") + + # Validity check + if img is None: + raise ValueError("unable to read file.") + # Resizing + if isinstance(output_size, tuple): + img = cv2.resize(img, output_size[::-1], interpolation=cv2.INTER_LINEAR) + # Switch the channel order + if rgb_output: + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) + return img
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/io/image/pytorch.html b/_modules/doctr/io/image/pytorch.html new file mode 100644 index 0000000000..07e1496598 --- /dev/null +++ b/_modules/doctr/io/image/pytorch.html @@ -0,0 +1,435 @@ + + + + + + + + + + + + + doctr.io.image.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.io.image.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from io import BytesIO
+
+import numpy as np
+import torch
+from PIL import Image
+from torchvision.transforms.functional import to_tensor
+
+from doctr.utils.common_types import AbstractPath
+
+__all__ = ["tensor_from_pil", "read_img_as_tensor", "decode_img_as_tensor", "tensor_from_numpy", "get_img_shape"]
+
+
+def tensor_from_pil(pil_img: Image.Image, dtype: torch.dtype = torch.float32) -> torch.Tensor:
+    """Convert a PIL Image to a PyTorch tensor
+
+    Args:
+        pil_img: a PIL image
+        dtype: the output tensor data type
+
+    Returns:
+        decoded image as tensor
+    """
+    if dtype == torch.float32:
+        img = to_tensor(pil_img)
+    else:
+        img = tensor_from_numpy(np.array(pil_img, np.uint8, copy=True), dtype)
+
+    return img
+
+
+
+[docs] +def read_img_as_tensor(img_path: AbstractPath, dtype: torch.dtype = torch.float32) -> torch.Tensor: + """Read an image file as a PyTorch tensor + + Args: + img_path: location of the image file + dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255. + + Returns: + decoded image as a tensor + """ + if dtype not in (torch.uint8, torch.float16, torch.float32): + raise ValueError("insupported value for dtype") + + with Image.open(img_path, mode="r") as pil_img: + return tensor_from_pil(pil_img.convert("RGB"), dtype)
+ + + +
+[docs] +def decode_img_as_tensor(img_content: bytes, dtype: torch.dtype = torch.float32) -> torch.Tensor: + """Read a byte stream as a PyTorch tensor + + Args: + img_content: bytes of a decoded image + dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255. + + Returns: + decoded image as a tensor + """ + if dtype not in (torch.uint8, torch.float16, torch.float32): + raise ValueError("insupported value for dtype") + + with Image.open(BytesIO(img_content), mode="r") as pil_img: + return tensor_from_pil(pil_img.convert("RGB"), dtype)
+ + + +def tensor_from_numpy(npy_img: np.ndarray, dtype: torch.dtype = torch.float32) -> torch.Tensor: + """Read an image file as a PyTorch tensor + + Args: + npy_img: image encoded as a numpy array of shape (H, W, C) in np.uint8 + dtype: the desired data type of the output tensor. If it is float-related, values will be divided by 255. + + Returns: + same image as a tensor of shape (C, H, W) + """ + if dtype not in (torch.uint8, torch.float16, torch.float32): + raise ValueError("insupported value for dtype") + + if dtype == torch.float32: + img = to_tensor(npy_img) + else: + img = torch.from_numpy(npy_img) + # put it from HWC to CHW format + img = img.permute((2, 0, 1)).contiguous() + if dtype == torch.float16: + # Switch to FP16 + img = img.to(dtype=torch.float16).div(255) + + return img + + +def get_img_shape(img: torch.Tensor) -> tuple[int, int]: + """Get the shape of an image""" + return img.shape[-2:] # type: ignore[return-value] +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/io/pdf.html b/_modules/doctr/io/pdf.html new file mode 100644 index 0000000000..f6fb02c880 --- /dev/null +++ b/_modules/doctr/io/pdf.html @@ -0,0 +1,377 @@ + + + + + + + + + + + + + doctr.io.pdf - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.io.pdf

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from typing import Any
+
+import numpy as np
+import pypdfium2 as pdfium
+
+from doctr.utils.common_types import AbstractFile
+
+__all__ = ["read_pdf"]
+
+
+
+[docs] +def read_pdf( + file: AbstractFile, + scale: int = 2, + rgb_mode: bool = True, + password: str | None = None, + **kwargs: Any, +) -> list[np.ndarray]: + """Read a PDF file and convert it into an image in numpy format + + >>> from doctr.io import read_pdf + >>> doc = read_pdf("path/to/your/doc.pdf") + + Args: + file: the path to the PDF file + scale: rendering scale (1 corresponds to 72dpi) + rgb_mode: if True, the output will be RGB, otherwise BGR + password: a password to unlock the document, if encrypted + **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render` + + Returns: + the list of pages decoded as numpy ndarray of shape H x W x C + """ + # Rasterise pages to numpy ndarrays with pypdfium2 + pdf = pdfium.PdfDocument(file, password=password) + try: + return [page.render(scale=scale, rev_byteorder=rgb_mode, **kwargs).to_numpy() for page in pdf] + finally: + pdf.close()
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/io/reader.html b/_modules/doctr/io/reader.html new file mode 100644 index 0000000000..30a511ab8b --- /dev/null +++ b/_modules/doctr/io/reader.html @@ -0,0 +1,422 @@ + + + + + + + + + + + + + doctr.io.reader - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.io.reader

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from collections.abc import Sequence
+from pathlib import Path
+
+import numpy as np
+
+from doctr.file_utils import requires_package
+from doctr.utils.common_types import AbstractFile
+
+from .html import read_html
+from .image import read_img_as_numpy
+from .pdf import read_pdf
+
+__all__ = ["DocumentFile"]
+
+
+
+[docs] +class DocumentFile: + """Read a document from multiple extensions""" + +
+[docs] + @classmethod + def from_pdf(cls, file: AbstractFile, **kwargs) -> list[np.ndarray]: + """Read a PDF file + + >>> from doctr.io import DocumentFile + >>> doc = DocumentFile.from_pdf("path/to/your/doc.pdf") + + Args: + file: the path to the PDF file or a binary stream + **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render` + + Returns: + the list of pages decoded as numpy ndarray of shape H x W x 3 + """ + return read_pdf(file, **kwargs)
+ + +
+[docs] + @classmethod + def from_url(cls, url: str, **kwargs) -> list[np.ndarray]: + """Interpret a web page as a PDF document + + >>> from doctr.io import DocumentFile + >>> doc = DocumentFile.from_url("https://www.yoursite.com") + + Args: + url: the URL of the target web page + **kwargs: additional parameters to :meth:`pypdfium2.PdfPage.render` + + Returns: + the list of pages decoded as numpy ndarray of shape H x W x 3 + """ + requires_package( + "weasyprint", + "`.from_url` requires weasyprint installed.\n" + + "Installation instructions: https://doc.courtbouillon.org/weasyprint/stable/first_steps.html#installation", + ) + pdf_stream = read_html(url) + return cls.from_pdf(pdf_stream, **kwargs)
+ + +
+[docs] + @classmethod + def from_images(cls, files: Sequence[AbstractFile] | AbstractFile, **kwargs) -> list[np.ndarray]: + """Read an image file (or a collection of image files) and convert it into an image in numpy format + + >>> from doctr.io import DocumentFile + >>> pages = DocumentFile.from_images(["path/to/your/page1.png", "path/to/your/page2.png"]) + + Args: + files: the path to the image file or a binary stream, or a collection of those + **kwargs: additional parameters to :meth:`doctr.io.image.read_img_as_numpy` + + Returns: + the list of pages decoded as numpy ndarray of shape H x W x 3 + """ + if isinstance(files, (str, Path, bytes)): + files = [files] + + return [read_img_as_numpy(file, **kwargs) for file in files]
+
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/classification/magc_resnet/pytorch.html b/_modules/doctr/models/classification/magc_resnet/pytorch.html new file mode 100644 index 0000000000..dd6475530b --- /dev/null +++ b/_modules/doctr/models/classification/magc_resnet/pytorch.html @@ -0,0 +1,507 @@ + + + + + + + + + + + + + doctr.models.classification.magc_resnet.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.classification.magc_resnet.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+
+import math
+from copy import deepcopy
+from functools import partial
+from typing import Any
+
+import torch
+from torch import nn
+
+from doctr.datasets import VOCABS
+
+from ..resnet import ResNet
+
+__all__ = ["magc_resnet31"]
+
+
+default_cfgs: dict[str, dict[str, Any]] = {
+    "magc_resnet31": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.4.1/magc_resnet31-857391d8.pt&src=0",
+    },
+}
+
+
+class MAGC(nn.Module):
+    """Implements the Multi-Aspect Global Context Attention, as described in
+    <https://arxiv.org/pdf/1910.02562.pdf>`_.
+
+    Args:
+        inplanes: input channels
+        headers: number of headers to split channels
+        attn_scale: if True, re-scale attention to counteract the variance distibutions
+        ratio: bottleneck ratio
+        **kwargs
+    """
+
+    def __init__(
+        self,
+        inplanes: int,
+        headers: int = 8,
+        attn_scale: bool = False,
+        ratio: float = 0.0625,  # bottleneck ratio of 1/16 as described in paper
+        cfg: dict[str, Any] | None = None,
+    ) -> None:
+        super().__init__()
+
+        self.headers = headers
+        self.inplanes = inplanes
+        self.attn_scale = attn_scale
+        self.planes = int(inplanes * ratio)
+
+        self.single_header_inplanes = int(inplanes / headers)
+
+        self.conv_mask = nn.Conv2d(self.single_header_inplanes, 1, kernel_size=1)
+        self.softmax = nn.Softmax(dim=1)
+
+        self.transform = nn.Sequential(
+            nn.Conv2d(self.inplanes, self.planes, kernel_size=1),
+            nn.LayerNorm([self.planes, 1, 1]),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(self.planes, self.inplanes, kernel_size=1),
+        )
+
+    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
+        batch, _, height, width = inputs.size()
+        # (N * headers, C / headers, H , W)
+        x = inputs.contiguous().view(batch * self.headers, self.single_header_inplanes, height, width)
+        shortcut = x
+        # (N * headers, C / headers, H * W)
+        shortcut = shortcut.view(batch * self.headers, self.single_header_inplanes, height * width)
+
+        # (N * headers, 1, H, W)
+        context_mask = self.conv_mask(x)
+        # (N * headers, H * W)
+        context_mask = context_mask.view(batch * self.headers, -1)
+
+        # scale variance
+        if self.attn_scale and self.headers > 1:
+            context_mask = context_mask / math.sqrt(self.single_header_inplanes)
+
+        # (N * headers, H * W)
+        context_mask = self.softmax(context_mask)
+
+        # (N * headers, C / headers)
+        context = (shortcut * context_mask.unsqueeze(1)).sum(-1)
+
+        # (N, C, 1, 1)
+        context = context.view(batch, self.headers * self.single_header_inplanes, 1, 1)
+
+        # Transform: B, C, 1, 1 ->  B, C, 1, 1
+        transformed = self.transform(context)
+        return inputs + transformed
+
+
+def _magc_resnet(
+    arch: str,
+    pretrained: bool,
+    num_blocks: list[int],
+    output_channels: list[int],
+    stage_stride: list[int],
+    stage_conv: list[bool],
+    stage_pooling: list[tuple[int, int] | None],
+    ignore_keys: list[str] | None = None,
+    **kwargs: Any,
+) -> ResNet:
+    kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
+    kwargs["classes"] = kwargs.get("classes", default_cfgs[arch]["classes"])
+
+    _cfg = deepcopy(default_cfgs[arch])
+    _cfg["num_classes"] = kwargs["num_classes"]
+    _cfg["classes"] = kwargs["classes"]
+    kwargs.pop("classes")
+
+    # Build the model
+    model = ResNet(
+        num_blocks,
+        output_channels,
+        stage_stride,
+        stage_conv,
+        stage_pooling,
+        attn_module=partial(MAGC, headers=8, attn_scale=True),
+        cfg=_cfg,
+        **kwargs,
+    )
+    # Load pretrained parameters
+    if pretrained:
+        # The number of classes is not the same as the number of classes in the pretrained model =>
+        # remove the last layer weights
+        _ignore_keys = ignore_keys if kwargs["num_classes"] != len(default_cfgs[arch]["classes"]) else None
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+
+    return model
+
+
+
+[docs] +def magc_resnet31(pretrained: bool = False, **kwargs: Any) -> ResNet: + """Resnet31 architecture with Multi-Aspect Global Context Attention as described in + `"MASTER: Multi-Aspect Non-local Network for Scene Text Recognition", + <https://arxiv.org/pdf/1910.02562.pdf>`_. + + >>> import torch + >>> from doctr.models import magc_resnet31 + >>> model = magc_resnet31(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 224, 224), dtype=tf.float32) + >>> out = model(input_tensor) + + Args: + pretrained: boolean, True if model is pretrained + **kwargs: keyword arguments of the ResNet architecture + + Returns: + A feature extractor model + """ + return _magc_resnet( + "magc_resnet31", + pretrained, + [1, 2, 5, 3], + [256, 256, 512, 512], + [1, 1, 1, 1], + [True] * 4, + [(2, 2), (2, 1), None, None], + origin_stem=False, + stem_channels=128, + ignore_keys=["13.weight", "13.bias"], + **kwargs, + )
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/classification/mobilenet/pytorch.html b/_modules/doctr/models/classification/mobilenet/pytorch.html new file mode 100644 index 0000000000..763d2a762c --- /dev/null +++ b/_modules/doctr/models/classification/mobilenet/pytorch.html @@ -0,0 +1,629 @@ + + + + + + + + + + + + + doctr.models.classification.mobilenet.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.classification.mobilenet.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+# Greatly inspired by https://github.com/pytorch/vision/blob/master/torchvision/models/mobilenetv3.py
+
+import types
+from copy import deepcopy
+from typing import Any
+
+from torchvision.models import mobilenetv3
+from torchvision.models.mobilenetv3 import MobileNetV3
+
+from doctr.datasets import VOCABS
+
+from ...utils import load_pretrained_params
+
+__all__ = [
+    "MobileNetV3",
+    "mobilenet_v3_small",
+    "mobilenet_v3_small_r",
+    "mobilenet_v3_large",
+    "mobilenet_v3_large_r",
+    "mobilenet_v3_small_crop_orientation",
+    "mobilenet_v3_small_page_orientation",
+]
+
+default_cfgs: dict[str, dict[str, Any]] = {
+    "mobilenet_v3_large": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.4.1/mobilenet_v3_large-11fc8cb9.pt&src=0",
+    },
+    "mobilenet_v3_large_r": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.4.1/mobilenet_v3_large_r-74a22066.pt&src=0",
+    },
+    "mobilenet_v3_small": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.4.1/mobilenet_v3_small-6a4bfa6b.pt&src=0",
+    },
+    "mobilenet_v3_small_r": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.4.1/mobilenet_v3_small_r-1a8a3530.pt&src=0",
+    },
+    "mobilenet_v3_small_crop_orientation": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 256, 256),
+        "classes": [0, -90, 180, 90],
+        "url": "https://doctr-static.mindee.com/models?id=v0.8.1/mobilenet_v3_small_crop_orientation-f0847a18.pt&src=0",
+    },
+    "mobilenet_v3_small_page_orientation": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 512, 512),
+        "classes": [0, -90, 180, 90],
+        "url": "https://doctr-static.mindee.com/models?id=v0.8.1/mobilenet_v3_small_page_orientation-8e60325c.pt&src=0",
+    },
+}
+
+
+def _mobilenet_v3(
+    arch: str,
+    pretrained: bool,
+    rect_strides: list[str] | None = None,
+    ignore_keys: list[str] | None = None,
+    **kwargs: Any,
+) -> mobilenetv3.MobileNetV3:
+    kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
+    kwargs["classes"] = kwargs.get("classes", default_cfgs[arch]["classes"])
+
+    _cfg = deepcopy(default_cfgs[arch])
+    _cfg["num_classes"] = kwargs["num_classes"]
+    _cfg["classes"] = kwargs["classes"]
+    kwargs.pop("classes")
+
+    if arch.startswith("mobilenet_v3_small"):
+        model = mobilenetv3.mobilenet_v3_small(**kwargs, weights=None)
+    else:
+        model = mobilenetv3.mobilenet_v3_large(**kwargs, weights=None)
+
+    # Rectangular strides
+    if isinstance(rect_strides, list):
+        for layer_name in rect_strides:
+            m = model
+            for child in layer_name.split("."):
+                m = getattr(m, child)
+            m.stride = (2, 1)
+
+    # monkeypatch the model to allow for loading pretrained parameters
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:  # noqa: D417
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+
+    # Bind method to the instance
+    model.from_pretrained = types.MethodType(from_pretrained, model)
+
+    # Load pretrained parameters
+    if pretrained:
+        # The number of classes is not the same as the number of classes in the pretrained model =>
+        # remove the last layer weights
+        _ignore_keys = ignore_keys if kwargs["num_classes"] != len(default_cfgs[arch]["classes"]) else None
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+
+    model.cfg = _cfg
+
+    return model
+
+
+
+[docs] +def mobilenet_v3_small(pretrained: bool = False, **kwargs: Any) -> mobilenetv3.MobileNetV3: + """MobileNetV3-Small architecture as described in + `"Searching for MobileNetV3", + <https://arxiv.org/pdf/1905.02244.pdf>`_. + + >>> import torch + >>> from doctr.models import mobilenet_v3_small + >>> model = mobilenetv3_small(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained: boolean, True if model is pretrained + **kwargs: keyword arguments of the MobileNetV3 architecture + + Returns: + a torch.nn.Module + """ + return _mobilenet_v3( + "mobilenet_v3_small", pretrained, ignore_keys=["classifier.3.weight", "classifier.3.bias"], **kwargs + )
+ + + +
+[docs] +def mobilenet_v3_small_r(pretrained: bool = False, **kwargs: Any) -> mobilenetv3.MobileNetV3: + """MobileNetV3-Small architecture as described in + `"Searching for MobileNetV3", + <https://arxiv.org/pdf/1905.02244.pdf>`_, with rectangular pooling. + + >>> import torch + >>> from doctr.models import mobilenet_v3_small_r + >>> model = mobilenet_v3_small_r(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained: boolean, True if model is pretrained + **kwargs: keyword arguments of the MobileNetV3 architecture + + Returns: + a torch.nn.Module + """ + return _mobilenet_v3( + "mobilenet_v3_small_r", + pretrained, + ["features.2.block.1.0", "features.4.block.1.0", "features.9.block.1.0"], + ignore_keys=["classifier.3.weight", "classifier.3.bias"], + **kwargs, + )
+ + + +
+[docs] +def mobilenet_v3_large(pretrained: bool = False, **kwargs: Any) -> mobilenetv3.MobileNetV3: + """MobileNetV3-Large architecture as described in + `"Searching for MobileNetV3", + <https://arxiv.org/pdf/1905.02244.pdf>`_. + + >>> import torch + >>> from doctr.models import mobilenet_v3_large + >>> model = mobilenet_v3_large(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained: boolean, True if model is pretrained + **kwargs: keyword arguments of the MobileNetV3 architecture + + Returns: + a torch.nn.Module + """ + return _mobilenet_v3( + "mobilenet_v3_large", + pretrained, + ignore_keys=["classifier.3.weight", "classifier.3.bias"], + **kwargs, + )
+ + + +
+[docs] +def mobilenet_v3_large_r(pretrained: bool = False, **kwargs: Any) -> mobilenetv3.MobileNetV3: + """MobileNetV3-Large architecture as described in + `"Searching for MobileNetV3", + <https://arxiv.org/pdf/1905.02244.pdf>`_, with rectangular pooling. + + >>> import torch + >>> from doctr.models import mobilenet_v3_large_r + >>> model = mobilenet_v3_large_r(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained: boolean, True if model is pretrained + **kwargs: keyword arguments of the MobileNetV3 architecture + + Returns: + a torch.nn.Module + """ + return _mobilenet_v3( + "mobilenet_v3_large_r", + pretrained, + ["features.4.block.1.0", "features.7.block.1.0", "features.13.block.1.0"], + ignore_keys=["classifier.3.weight", "classifier.3.bias"], + **kwargs, + )
+ + + +
+[docs] +def mobilenet_v3_small_crop_orientation(pretrained: bool = False, **kwargs: Any) -> mobilenetv3.MobileNetV3: + """MobileNetV3-Small architecture as described in + `"Searching for MobileNetV3", + <https://arxiv.org/pdf/1905.02244.pdf>`_. + + >>> import torch + >>> from doctr.models import mobilenet_v3_small_crop_orientation + >>> model = mobilenet_v3_small_crop_orientation(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained: boolean, True if model is pretrained + **kwargs: keyword arguments of the MobileNetV3 architecture + + Returns: + a torch.nn.Module + """ + return _mobilenet_v3( + "mobilenet_v3_small_crop_orientation", + pretrained, + ignore_keys=["classifier.3.weight", "classifier.3.bias"], + **kwargs, + )
+ + + +
+[docs] +def mobilenet_v3_small_page_orientation(pretrained: bool = False, **kwargs: Any) -> mobilenetv3.MobileNetV3: + """MobileNetV3-Small architecture as described in + `"Searching for MobileNetV3", + <https://arxiv.org/pdf/1905.02244.pdf>`_. + + >>> import torch + >>> from doctr.models import mobilenet_v3_small_page_orientation + >>> model = mobilenet_v3_small_page_orientation(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained: boolean, True if model is pretrained + **kwargs: keyword arguments of the MobileNetV3 architecture + + Returns: + a torch.nn.Module + """ + return _mobilenet_v3( + "mobilenet_v3_small_page_orientation", + pretrained, + ignore_keys=["classifier.3.weight", "classifier.3.bias"], + **kwargs, + )
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/classification/resnet/pytorch.html b/_modules/doctr/models/classification/resnet/pytorch.html new file mode 100644 index 0000000000..80b0608818 --- /dev/null +++ b/_modules/doctr/models/classification/resnet/pytorch.html @@ -0,0 +1,722 @@ + + + + + + + + + + + + + doctr.models.classification.resnet.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.classification.resnet.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+
+import types
+from collections.abc import Callable
+from copy import deepcopy
+from typing import Any
+
+from torch import nn
+from torchvision.models.resnet import BasicBlock
+from torchvision.models.resnet import ResNet as TVResNet
+from torchvision.models.resnet import resnet18 as tv_resnet18
+from torchvision.models.resnet import resnet34 as tv_resnet34
+from torchvision.models.resnet import resnet50 as tv_resnet50
+
+from doctr.datasets import VOCABS
+
+from ...utils import conv_sequence_pt, load_pretrained_params
+
+__all__ = ["ResNet", "resnet18", "resnet31", "resnet34", "resnet50", "resnet34_wide", "resnet_stage"]
+
+
+default_cfgs: dict[str, dict[str, Any]] = {
+    "resnet18": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.4.1/resnet18-244bf390.pt&src=0",
+    },
+    "resnet31": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.4.1/resnet31-1056cc5c.pt&src=0",
+    },
+    "resnet34": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.5.0/resnet34-bd8725db.pt&src=0",
+    },
+    "resnet50": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.5.0/resnet50-1a6c155e.pt&src=0",
+    },
+    "resnet34_wide": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.6.0/resnet34_wide-b4b3e39e.pt&src=0",
+    },
+}
+
+
+def resnet_stage(in_channels: int, out_channels: int, num_blocks: int, stride: int) -> list[nn.Module]:
+    """Build a ResNet stage"""
+    _layers: list[nn.Module] = []
+
+    in_chan = in_channels
+    s = stride
+    for _ in range(num_blocks):
+        downsample = None
+        if in_chan != out_channels:
+            downsample = nn.Sequential(*conv_sequence_pt(in_chan, out_channels, False, True, kernel_size=1, stride=s))
+
+        _layers.append(BasicBlock(in_chan, out_channels, stride=s, downsample=downsample))
+        in_chan = out_channels
+        # Only the first block can have stride != 1
+        s = 1
+
+    return _layers
+
+
+class ResNet(nn.Sequential):
+    """Implements a ResNet-31 architecture from `"Show, Attend and Read:A Simple and Strong Baseline for Irregular
+    Text Recognition" <https://arxiv.org/pdf/1811.00751.pdf>`_.
+
+    Args:
+        num_blocks: number of resnet block in each stage
+        output_channels: number of channels in each stage
+        stage_conv: whether to add a conv_sequence after each stage
+        stage_pooling: pooling to add after each stage (if None, no pooling)
+        origin_stem: whether to use the orginal ResNet stem or ResNet-31's
+        stem_channels: number of output channels of the stem convolutions
+        attn_module: attention module to use in each stage
+        include_top: whether the classifier head should be instantiated
+        num_classes: number of output classes
+    """
+
+    def __init__(
+        self,
+        num_blocks: list[int],
+        output_channels: list[int],
+        stage_stride: list[int],
+        stage_conv: list[bool],
+        stage_pooling: list[tuple[int, int] | None],
+        origin_stem: bool = True,
+        stem_channels: int = 64,
+        attn_module: Callable[[int], nn.Module] | None = None,
+        include_top: bool = True,
+        num_classes: int = 1000,
+        cfg: dict[str, Any] | None = None,
+    ) -> None:
+        _layers: list[nn.Module]
+        if origin_stem:
+            _layers = [
+                *conv_sequence_pt(3, stem_channels, True, True, kernel_size=7, padding=3, stride=2),
+                nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
+            ]
+        else:
+            _layers = [
+                *conv_sequence_pt(3, stem_channels // 2, True, True, kernel_size=3, padding=1),
+                *conv_sequence_pt(stem_channels // 2, stem_channels, True, True, kernel_size=3, padding=1),
+                nn.MaxPool2d(2),
+            ]
+        in_chans = [stem_channels] + output_channels[:-1]
+        for n_blocks, in_chan, out_chan, stride, conv, pool in zip(
+            num_blocks, in_chans, output_channels, stage_stride, stage_conv, stage_pooling
+        ):
+            _stage = resnet_stage(in_chan, out_chan, n_blocks, stride)
+            if attn_module is not None:
+                _stage.append(attn_module(out_chan))
+            if conv:
+                _stage.extend(conv_sequence_pt(out_chan, out_chan, True, True, kernel_size=3, padding=1))
+            if pool is not None:
+                _stage.append(nn.MaxPool2d(pool))
+            _layers.append(nn.Sequential(*_stage))
+
+        if include_top:
+            _layers.extend([
+                nn.AdaptiveAvgPool2d(1),
+                nn.Flatten(1),
+                nn.Linear(output_channels[-1], num_classes, bias=True),
+            ])
+
+        super().__init__(*_layers)
+        self.cfg = cfg
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+
+
+def _resnet(
+    arch: str,
+    pretrained: bool,
+    num_blocks: list[int],
+    output_channels: list[int],
+    stage_stride: list[int],
+    stage_conv: list[bool],
+    stage_pooling: list[tuple[int, int] | None],
+    ignore_keys: list[str] | None = None,
+    **kwargs: Any,
+) -> ResNet:
+    kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
+    kwargs["classes"] = kwargs.get("classes", default_cfgs[arch]["classes"])
+
+    _cfg = deepcopy(default_cfgs[arch])
+    _cfg["num_classes"] = kwargs["num_classes"]
+    _cfg["classes"] = kwargs["classes"]
+    kwargs.pop("classes")
+
+    # Build the model
+    model = ResNet(num_blocks, output_channels, stage_stride, stage_conv, stage_pooling, cfg=_cfg, **kwargs)
+    # Load pretrained parameters
+    if pretrained:
+        # The number of classes is not the same as the number of classes in the pretrained model =>
+        # remove the last layer weights
+        _ignore_keys = ignore_keys if kwargs["num_classes"] != len(default_cfgs[arch]["classes"]) else None
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+
+    return model
+
+
+def _tv_resnet(
+    arch: str,
+    pretrained: bool,
+    arch_fn,
+    ignore_keys: list[str] | None = None,
+    **kwargs: Any,
+) -> TVResNet:
+    kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
+    kwargs["classes"] = kwargs.get("classes", default_cfgs[arch]["classes"])
+
+    _cfg = deepcopy(default_cfgs[arch])
+    _cfg["num_classes"] = kwargs["num_classes"]
+    _cfg["classes"] = kwargs["classes"]
+    kwargs.pop("classes")
+
+    # Build the model
+    model = arch_fn(**kwargs, weights=None)
+
+    # monkeypatch the model to allow for loading pretrained parameters
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:  # noqa: D417
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+
+    # Bind method to the instance
+    model.from_pretrained = types.MethodType(from_pretrained, model)
+
+    if pretrained:
+        # The number of classes is not the same as the number of classes in the pretrained model =>
+        # remove the last layer weights
+        _ignore_keys = ignore_keys if kwargs["num_classes"] != len(default_cfgs[arch]["classes"]) else None
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+
+    model.cfg = _cfg
+
+    return model
+
+
+
+[docs] +def resnet18(pretrained: bool = False, **kwargs: Any) -> TVResNet: + """ResNet-18 architecture as described in `"Deep Residual Learning for Image Recognition", + <https://arxiv.org/pdf/1512.03385.pdf>`_. + + >>> import torch + >>> from doctr.models import resnet18 + >>> model = resnet18(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained: boolean, True if model is pretrained + **kwargs: keyword arguments of the ResNet architecture + + Returns: + A resnet18 model + """ + return _tv_resnet( + "resnet18", + pretrained, + tv_resnet18, + ignore_keys=["fc.weight", "fc.bias"], + **kwargs, + )
+ + + +
+[docs] +def resnet31(pretrained: bool = False, **kwargs: Any) -> ResNet: + """Resnet31 architecture with rectangular pooling windows as described in + `"Show, Attend and Read:A Simple and Strong Baseline for Irregular Text Recognition", + <https://arxiv.org/pdf/1811.00751.pdf>`_. Downsizing: (H, W) --> (H/8, W/4) + + >>> import torch + >>> from doctr.models import resnet31 + >>> model = resnet31(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained: boolean, True if model is pretrained + **kwargs: keyword arguments of the ResNet architecture + + Returns: + A resnet31 model + """ + return _resnet( + "resnet31", + pretrained, + [1, 2, 5, 3], + [256, 256, 512, 512], + [1, 1, 1, 1], + [True] * 4, + [(2, 2), (2, 1), None, None], + origin_stem=False, + stem_channels=128, + ignore_keys=["13.weight", "13.bias"], + **kwargs, + )
+ + + +
+[docs] +def resnet34(pretrained: bool = False, **kwargs: Any) -> TVResNet: + """ResNet-34 architecture as described in `"Deep Residual Learning for Image Recognition", + <https://arxiv.org/pdf/1512.03385.pdf>`_. + + >>> import torch + >>> from doctr.models import resnet34 + >>> model = resnet34(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained: boolean, True if model is pretrained + **kwargs: keyword arguments of the ResNet architecture + + Returns: + A resnet34 model + """ + return _tv_resnet( + "resnet34", + pretrained, + tv_resnet34, + ignore_keys=["fc.weight", "fc.bias"], + **kwargs, + )
+ + + +def resnet34_wide(pretrained: bool = False, **kwargs: Any) -> ResNet: + """ResNet-34 architecture as described in `"Deep Residual Learning for Image Recognition", + <https://arxiv.org/pdf/1512.03385.pdf>`_ with twice as many output channels. + + >>> import torch + >>> from doctr.models import resnet34_wide + >>> model = resnet34_wide(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained: boolean, True if model is pretrained + **kwargs: keyword arguments of the ResNet architecture + + Returns: + A resnet34_wide model + """ + return _resnet( + "resnet34_wide", + pretrained, + [3, 4, 6, 3], + [128, 256, 512, 1024], + [1, 2, 2, 2], + [False] * 4, + [None] * 4, + origin_stem=True, + stem_channels=128, + ignore_keys=["10.weight", "10.bias"], + **kwargs, + ) + + +
+[docs] +def resnet50(pretrained: bool = False, **kwargs: Any) -> TVResNet: + """ResNet-50 architecture as described in `"Deep Residual Learning for Image Recognition", + <https://arxiv.org/pdf/1512.03385.pdf>`_. + + >>> import torch + >>> from doctr.models import resnet50 + >>> model = resnet50(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained: boolean, True if model is pretrained + **kwargs: keyword arguments of the ResNet architecture + + Returns: + A resnet50 model + """ + return _tv_resnet( + "resnet50", + pretrained, + tv_resnet50, + ignore_keys=["fc.weight", "fc.bias"], + **kwargs, + )
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/classification/textnet/pytorch.html b/_modules/doctr/models/classification/textnet/pytorch.html new file mode 100644 index 0000000000..3787161f27 --- /dev/null +++ b/_modules/doctr/models/classification/textnet/pytorch.html @@ -0,0 +1,617 @@ + + + + + + + + + + + + + doctr.models.classification.textnet.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.classification.textnet.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+
+from copy import deepcopy
+from typing import Any
+
+from torch import nn
+
+from doctr.datasets import VOCABS
+
+from ...modules.layers import FASTConvLayer
+from ...utils import conv_sequence_pt, load_pretrained_params
+
+__all__ = ["textnet_tiny", "textnet_small", "textnet_base"]
+
+default_cfgs: dict[str, dict[str, Any]] = {
+    "textnet_tiny": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.8.1/textnet_tiny-27288d12.pt&src=0",
+    },
+    "textnet_small": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.8.1/textnet_small-43166ee6.pt&src=0",
+    },
+    "textnet_base": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.8.1/textnet_base-7f68d7e0.pt&src=0",
+    },
+}
+
+
+class TextNet(nn.Sequential):
+    """Implements TextNet architecture from `"FAST: Faster Arbitrarily-Shaped Text Detector with
+    Minimalist Kernel Representation" <https://arxiv.org/abs/2111.02394>`_.
+    Implementation based on the official Pytorch implementation: <https://github.com/czczup/FAST>`_.
+
+    Args:
+        stages (list[dict[str, list[int]]]): list of dictionaries containing the parameters of each stage.
+        include_top (bool, optional): Whether to include the classifier head. Defaults to True.
+        num_classes (int, optional): Number of output classes. Defaults to 1000.
+        cfg (dict[str, Any], optional): Additional configuration. Defaults to None.
+    """
+
+    def __init__(
+        self,
+        stages: list[dict[str, list[int]]],
+        input_shape: tuple[int, int, int] = (3, 32, 32),
+        num_classes: int = 1000,
+        include_top: bool = True,
+        cfg: dict[str, Any] | None = None,
+    ) -> None:
+        _layers: list[nn.Module] = [
+            *conv_sequence_pt(
+                in_channels=3, out_channels=64, relu=True, bn=True, kernel_size=3, stride=2, padding=(1, 1)
+            ),
+            *[
+                nn.Sequential(*[
+                    FASTConvLayer(**params)  # type: ignore[arg-type]
+                    for params in [{key: stage[key][i] for key in stage} for i in range(len(stage["in_channels"]))]
+                ])
+                for stage in stages
+            ],
+        ]
+
+        if include_top:
+            _layers.append(
+                nn.Sequential(
+                    nn.AdaptiveAvgPool2d(1),
+                    nn.Flatten(1),
+                    nn.Linear(stages[-1]["out_channels"][-1], num_classes),
+                )
+            )
+
+        super().__init__(*_layers)
+        self.cfg = cfg
+
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+
+
+def _textnet(
+    arch: str,
+    pretrained: bool,
+    ignore_keys: list[str] | None = None,
+    **kwargs: Any,
+) -> TextNet:
+    kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
+    kwargs["classes"] = kwargs.get("classes", default_cfgs[arch]["classes"])
+
+    _cfg = deepcopy(default_cfgs[arch])
+    _cfg["num_classes"] = kwargs["num_classes"]
+    _cfg["classes"] = kwargs["classes"]
+    kwargs.pop("classes")
+
+    # Build the model
+    model = TextNet(**kwargs)
+    # Load pretrained parameters
+    if pretrained:
+        # The number of classes is not the same as the number of classes in the pretrained model =>
+        # remove the last layer weights
+        _ignore_keys = ignore_keys if kwargs["num_classes"] != len(default_cfgs[arch]["classes"]) else None
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+
+    model.cfg = _cfg
+
+    return model
+
+
+
+[docs] +def textnet_tiny(pretrained: bool = False, **kwargs: Any) -> TextNet: + """Implements TextNet architecture from `"FAST: Faster Arbitrarily-Shaped Text Detector with + Minimalist Kernel Representation" <https://arxiv.org/abs/2111.02394>`_. + Implementation based on the official Pytorch implementation: <https://github.com/czczup/FAST>`_. + + >>> import torch + >>> from doctr.models import textnet_tiny + >>> model = textnet_tiny(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained: boolean, True if model is pretrained + **kwargs: keyword arguments of the TextNet architecture + + Returns: + A textnet tiny model + """ + return _textnet( + "textnet_tiny", + pretrained, + stages=[ + {"in_channels": [64] * 3, "out_channels": [64] * 3, "kernel_size": [(3, 3)] * 3, "stride": [1, 2, 1]}, + { + "in_channels": [64, 128, 128, 128], + "out_channels": [128] * 4, + "kernel_size": [(3, 3), (1, 3), (3, 3), (3, 1)], + "stride": [2, 1, 1, 1], + }, + { + "in_channels": [128, 256, 256, 256], + "out_channels": [256] * 4, + "kernel_size": [(3, 3), (3, 3), (3, 1), (1, 3)], + "stride": [2, 1, 1, 1], + }, + { + "in_channels": [256, 512, 512, 512], + "out_channels": [512] * 4, + "kernel_size": [(3, 3), (3, 1), (1, 3), (3, 3)], + "stride": [2, 1, 1, 1], + }, + ], + ignore_keys=["7.2.weight", "7.2.bias"], + **kwargs, + )
+ + + +
+[docs] +def textnet_small(pretrained: bool = False, **kwargs: Any) -> TextNet: + """Implements TextNet architecture from `"FAST: Faster Arbitrarily-Shaped Text Detector with + Minimalist Kernel Representation" <https://arxiv.org/abs/2111.02394>`_. + Implementation based on the official Pytorch implementation: <https://github.com/czczup/FAST>`_. + + >>> import torch + >>> from doctr.models import textnet_small + >>> model = textnet_small(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained: boolean, True if model is pretrained + **kwargs: keyword arguments of the TextNet architecture + + Returns: + A TextNet small model + """ + return _textnet( + "textnet_small", + pretrained, + stages=[ + {"in_channels": [64] * 2, "out_channels": [64] * 2, "kernel_size": [(3, 3)] * 2, "stride": [1, 2]}, + { + "in_channels": [64, 128, 128, 128, 128, 128, 128, 128], + "out_channels": [128] * 8, + "kernel_size": [(3, 3), (1, 3), (3, 3), (3, 1), (3, 3), (3, 1), (1, 3), (3, 3)], + "stride": [2, 1, 1, 1, 1, 1, 1, 1], + }, + { + "in_channels": [128, 256, 256, 256, 256, 256, 256, 256], + "out_channels": [256] * 8, + "kernel_size": [(3, 3), (3, 3), (1, 3), (3, 1), (3, 3), (1, 3), (3, 1), (3, 3)], + "stride": [2, 1, 1, 1, 1, 1, 1, 1], + }, + { + "in_channels": [256, 512, 512, 512, 512], + "out_channels": [512] * 5, + "kernel_size": [(3, 3), (3, 1), (1, 3), (1, 3), (3, 1)], + "stride": [2, 1, 1, 1, 1], + }, + ], + ignore_keys=["7.2.weight", "7.2.bias"], + **kwargs, + )
+ + + +
+[docs] +def textnet_base(pretrained: bool = False, **kwargs: Any) -> TextNet: + """Implements TextNet architecture from `"FAST: Faster Arbitrarily-Shaped Text Detector with + Minimalist Kernel Representation" <https://arxiv.org/abs/2111.02394>`_. + Implementation based on the official Pytorch implementation: <https://github.com/czczup/FAST>`_. + + >>> import torch + >>> from doctr.models import textnet_base + >>> model = textnet_base(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained: boolean, True if model is pretrained + **kwargs: keyword arguments of the TextNet architecture + + Returns: + A TextNet base model + """ + return _textnet( + "textnet_base", + pretrained, + stages=[ + { + "in_channels": [64] * 10, + "out_channels": [64] * 10, + "kernel_size": [(3, 3), (3, 3), (3, 1), (3, 3), (3, 1), (3, 3), (3, 3), (1, 3), (3, 3), (3, 3)], + "stride": [1, 2, 1, 1, 1, 1, 1, 1, 1, 1], + }, + { + "in_channels": [64, 128, 128, 128, 128, 128, 128, 128, 128, 128], + "out_channels": [128] * 10, + "kernel_size": [(3, 3), (1, 3), (3, 3), (3, 1), (3, 3), (3, 3), (3, 1), (3, 1), (3, 3), (3, 3)], + "stride": [2, 1, 1, 1, 1, 1, 1, 1, 1, 1], + }, + { + "in_channels": [128, 256, 256, 256, 256, 256, 256, 256], + "out_channels": [256] * 8, + "kernel_size": [(3, 3), (3, 3), (3, 3), (1, 3), (3, 3), (3, 1), (3, 3), (3, 1)], + "stride": [2, 1, 1, 1, 1, 1, 1, 1], + }, + { + "in_channels": [256, 512, 512, 512, 512], + "out_channels": [512] * 5, + "kernel_size": [(3, 3), (1, 3), (3, 1), (3, 1), (1, 3)], + "stride": [2, 1, 1, 1, 1], + }, + ], + ignore_keys=["7.2.weight", "7.2.bias"], + **kwargs, + )
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/classification/vgg/pytorch.html b/_modules/doctr/models/classification/vgg/pytorch.html new file mode 100644 index 0000000000..ba26b3411a --- /dev/null +++ b/_modules/doctr/models/classification/vgg/pytorch.html @@ -0,0 +1,442 @@ + + + + + + + + + + + + + doctr.models.classification.vgg.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.classification.vgg.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import types
+from copy import deepcopy
+from typing import Any
+
+from torch import nn
+from torchvision.models import vgg as tv_vgg
+
+from doctr.datasets import VOCABS
+
+from ...utils import load_pretrained_params
+
+__all__ = ["vgg16_bn_r"]
+
+
+default_cfgs: dict[str, dict[str, Any]] = {
+    "vgg16_bn_r": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.4.1/vgg16_bn_r-d108c19c.pt&src=0",
+    },
+}
+
+
+def _vgg(
+    arch: str,
+    pretrained: bool,
+    tv_arch: str,
+    num_rect_pools: int = 3,
+    ignore_keys: list[str] | None = None,
+    **kwargs: Any,
+) -> tv_vgg.VGG:
+    kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
+    kwargs["classes"] = kwargs.get("classes", default_cfgs[arch]["classes"])
+
+    _cfg = deepcopy(default_cfgs[arch])
+    _cfg["num_classes"] = kwargs["num_classes"]
+    _cfg["classes"] = kwargs["classes"]
+    kwargs.pop("classes")
+
+    # Build the model
+    model = tv_vgg.__dict__[tv_arch](**kwargs, weights=None)
+    # list the MaxPool2d
+    pool_idcs = [idx for idx, m in enumerate(model.features) if isinstance(m, nn.MaxPool2d)]
+    # Replace their kernel with rectangular ones
+    for idx in pool_idcs[-num_rect_pools:]:
+        model.features[idx] = nn.MaxPool2d((2, 1))
+    # Patch average pool & classification head
+    model.avgpool = nn.AdaptiveAvgPool2d((1, 1))
+    model.classifier = nn.Linear(512, kwargs["num_classes"])
+
+    # monkeypatch the model to allow for loading pretrained parameters
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:  # noqa: D417
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+
+    # Bind method to the instance
+    model.from_pretrained = types.MethodType(from_pretrained, model)
+
+    # Load pretrained parameters
+    if pretrained:
+        # The number of classes is not the same as the number of classes in the pretrained model =>
+        # remove the last layer weights
+        _ignore_keys = ignore_keys if kwargs["num_classes"] != len(default_cfgs[arch]["classes"]) else None
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+
+    model.cfg = _cfg
+
+    return model
+
+
+
+[docs] +def vgg16_bn_r(pretrained: bool = False, **kwargs: Any) -> tv_vgg.VGG: + """VGG-16 architecture as described in `"Very Deep Convolutional Networks for Large-Scale Image Recognition" + <https://arxiv.org/pdf/1409.1556.pdf>`_, modified by adding batch normalization, rectangular pooling and a simpler + classification head. + + >>> import torch + >>> from doctr.models import vgg16_bn_r + >>> model = vgg16_bn_r(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 512, 512), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + **kwargs: keyword arguments of the VGG architecture + + Returns: + VGG feature extractor + """ + return _vgg( + "vgg16_bn_r", + pretrained, + "vgg16_bn", + 3, + ignore_keys=["classifier.weight", "classifier.bias"], + **kwargs, + )
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/classification/vip/pytorch.html b/_modules/doctr/models/classification/vip/pytorch.html new file mode 100644 index 0000000000..7259797796 --- /dev/null +++ b/_modules/doctr/models/classification/vip/pytorch.html @@ -0,0 +1,842 @@ + + + + + + + + + + + + + doctr.models.classification.vip.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.classification.vip.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from copy import deepcopy
+from typing import Any
+
+import torch
+import torch.nn as nn
+
+from doctr.datasets import VOCABS
+from doctr.models.modules.layers import AdaptiveAvgPool2d
+
+from ...utils import load_pretrained_params
+from .layers import (
+    CrossShapedWindowAttention,
+    MultiHeadSelfAttention,
+    OSRABlock,
+    PatchEmbed,
+    PatchMerging,
+    PermuteLayer,
+    SqueezeLayer,
+)
+
+__all__ = ["vip_tiny", "vip_base"]
+
+default_cfgs: dict[str, dict[str, Any]] = {
+    "vip_tiny": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.11.0/vip_tiny-033ed51c.pt&src=0",
+    },
+    "vip_base": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.11.0/vip_base-f6ea2ff5.pt&src=0",
+    },
+}
+
+
+class ClassifierHead(nn.Module):
+    """Classification head which averages the features and applies a linear layer."""
+
+    def __init__(self, in_features: int, out_features: int):
+        super().__init__()
+        self.fc = nn.Linear(in_features, out_features)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return self.fc(x.mean(dim=1))
+
+
+class VIPBlock(nn.Module):
+    """Unified block for Local, Global, and Mixed feature mixing in VIP architecture."""
+
+    def __init__(
+        self,
+        embed_dim: int,
+        local_unit: nn.ModuleList,
+        global_unit: nn.ModuleList | None = None,
+        proj: nn.Module | None = None,
+        downsample: bool = False,
+        out_dim: int | None = None,
+    ):
+        """
+        Args:
+            embed_dim: dimension of embeddings
+            local_unit: local mixing block(s)
+            global_unit: global mixing block(s)
+            proj: projection layer used for mixed mixing
+            downsample: whether to downsample at the end
+            out_dim: out channels if downsampling
+        """
+        super().__init__()
+        if downsample and out_dim is None:  # pragma: no cover
+            raise ValueError("`out_dim` must be specified if `downsample=True`")
+
+        self.local_unit = local_unit
+        self.global_unit = global_unit
+        self.proj = proj
+        self.downsample = PatchMerging(dim=embed_dim, out_dim=out_dim) if downsample else None  # type: ignore[arg-type]
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """
+        Forward pass for VIPBlock.
+
+        Args:
+            x: input tensor (B, H, W, C)
+
+        Returns:
+            Transformed tensor
+        """
+        b, h, w, C = x.shape
+
+        # Local or Mixed
+        if self.global_unit is None:
+            # local or global only
+            for blk in self.local_unit:
+                # Flatten to (B, H*W, C)
+                x = x.reshape(b, -1, C)
+                x = blk(x, (h, w))
+                x = x.reshape(b, h, w, -1)
+        else:
+            # Mixed
+            for lblk, gblk in zip(self.local_unit, self.global_unit):
+                x = x.reshape(b, -1, C)
+                # chunk into two halves
+                x1, x2 = torch.chunk(x, chunks=2, dim=2)
+                x1 = lblk(x1, (h, w))
+                x2 = gblk(x2, (h, w))
+                x = torch.cat([x1, x2], dim=2)
+                x = x.transpose(1, 2).contiguous().reshape(b, -1, h, w)
+                x = self.proj(x) + x  # type: ignore[misc]
+                x = x.permute(0, 2, 3, 1).contiguous()
+
+        if isinstance(self.downsample, nn.Module):
+            x = self.downsample(x)
+
+        return x
+
+
+class VIPNet(nn.Sequential):
+    """
+    VIP (Vision Permutable) encoder architecture, adapted for text recognition.
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_dim: int,
+        embed_dims: list[int],
+        depths: list[int],
+        num_heads: list[int],
+        mlp_ratios: list[int],
+        split_sizes: list[int],
+        sr_ratios: list[int],
+        input_shape: tuple[int, int, int] = (3, 32, 32),
+        num_classes: int = 1000,
+        include_top: bool = True,
+        cfg: dict[str, Any] | None = None,
+    ) -> None:
+        """
+        Args:
+            in_channels: number of input channels
+            out_dim: final embedding dimension
+            embed_dims: list of embedding dims per stage
+            depths: number of blocks per stage
+            num_heads: number of heads for attention blocks
+            mlp_ratios: ratio for MLP expansion
+            split_sizes: local window split sizes
+            sr_ratios: used for some global block adjustments
+            input_shape: (C, H, W)
+            num_classes: number of output classes
+            include_top: if True, append a classification head
+            cfg: optional config dictionary
+        """
+        self.cfg = cfg
+
+        dpr = [x.item() for x in torch.linspace(0, 0.1, sum(depths))]
+        drop_paths = [dpr[sum(depths[:i]) : sum(depths[: i + 1])] for i in range(len(depths))]
+        layers: list[Any] = [PatchEmbed(in_channels=in_channels, embed_dim=embed_dims[0])]
+
+        # Construct mixers
+        # e.g. local, mixed, global
+        mixer_functions = [
+            _vip_local_mixer,
+            _vip_mixed_mixer,
+            _vip_global_mha_mixer,
+        ]
+
+        for i, mixer_fn in enumerate(mixer_functions):
+            embed_dim = embed_dims[i]
+            depth_i = depths[i]
+            num_head = num_heads[i]
+            mlp_ratio = mlp_ratios[i]
+            sp_size = split_sizes[i]
+            sr_ratio = sr_ratios[i]
+            drop_path = drop_paths[i]
+
+            next_dim = embed_dims[i + 1] if i < len(embed_dims) - 1 else None
+
+            block = mixer_fn(
+                embed_dim=embed_dim,
+                depth=depth_i,
+                num_heads=num_head,
+                mlp_ratio=mlp_ratio,
+                split_size=sp_size,
+                sr_ratio=sr_ratio,
+                drop_path=drop_path,
+                downsample=(next_dim is not None),
+                out_dim=next_dim,
+            )
+            layers.append(block)
+
+        # LN -> permute -> GAP -> squeeze -> MLP
+        layers.append(
+            nn.Sequential(
+                nn.LayerNorm(embed_dims[-1], eps=1e-6),
+                PermuteLayer((0, 2, 3, 1)),
+                AdaptiveAvgPool2d((embed_dims[-1], 1)),
+                SqueezeLayer(dim=3),
+            )
+        )
+
+        mlp_head = nn.Sequential(
+            nn.Linear(embed_dims[-1], out_dim, bias=False),
+            nn.Hardswish(),
+            nn.Dropout(p=0.1),
+        )
+        layers.append(mlp_head)
+        if include_top:
+            layers.append(ClassifierHead(out_dim, num_classes))
+
+        super().__init__(*layers)
+
+        self.apply(self._init_weights)
+
+    def _init_weights(self, m):
+        if isinstance(m, nn.Linear):
+            nn.init.trunc_normal_(m.weight, std=0.02)
+            if m.bias is not None:
+                nn.init.constant_(m.bias, 0)
+        elif isinstance(m, nn.Conv2d):
+            nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+        elif isinstance(m, (nn.LayerNorm, nn.BatchNorm2d)):
+            nn.init.constant_(m.bias, 0)
+            nn.init.constant_(m.weight, 1.0)
+
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+
+
+
+[docs] +def vip_tiny(pretrained: bool = False, **kwargs: Any) -> VIPNet: + """ + VIP-Tiny encoder architecture.Corresponds to SVIPTRv2-T variant in the paper (VIPTRv2 function + in the official implementation: + https://github.com/cxfyxl/VIPTR/blob/main/modules/VIPTRv2.py) + + Args: + pretrained: whether to load pretrained weights + **kwargs: optional arguments + + Returns: + VIPNet model + """ + return _vip( + "vip_tiny", + pretrained, + in_channels=3, + out_dim=192, + embed_dims=[64, 128, 256], + depths=[3, 3, 3], + num_heads=[2, 4, 8], + mlp_ratios=[3, 4, 4], + split_sizes=[1, 2, 4], + sr_ratios=[4, 2, 2], + ignore_keys=["6.fc.weight", "6.fc.bias"], + **kwargs, + )
+ + + +
+[docs] +def vip_base(pretrained: bool = False, **kwargs: Any) -> VIPNet: + """ + VIP-Base encoder architecture. Corresponds to SVIPTRv2-B variant in the paper (VIPTRv2B function + in the official implementation: + https://github.com/cxfyxl/VIPTR/blob/main/modules/VIPTRv2.py) + + Args: + pretrained: whether to load pretrained weights + **kwargs: optional arguments + + Returns: + VIPNet model + """ + return _vip( + "vip_base", + pretrained, + in_channels=3, + out_dim=256, + embed_dims=[128, 256, 384], + depths=[3, 6, 9], + num_heads=[4, 8, 12], + mlp_ratios=[4, 4, 4], + split_sizes=[1, 2, 4], + sr_ratios=[4, 2, 2], + ignore_keys=["6.fc.weight", "6.fc.bias"], + **kwargs, + )
+ + + +def _vip( + arch: str, + pretrained: bool, + ignore_keys: list[str], + **kwargs: Any, +) -> VIPNet: + """ + Internal constructor for the VIPNet models. + + Args: + arch: architecture key + pretrained: load pretrained weights? + ignore_keys: layer keys to ignore + **kwargs: arguments passed to VIPNet + + Returns: + VIPNet instance + """ + kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"])) + kwargs["input_shape"] = kwargs.get("input_shape", default_cfgs[arch]["input_shape"]) + kwargs["classes"] = kwargs.get("classes", default_cfgs[arch]["classes"]) + + _cfg = deepcopy(default_cfgs[arch]) + _cfg["num_classes"] = kwargs["num_classes"] + _cfg["input_shape"] = kwargs["input_shape"] + _cfg["classes"] = kwargs["classes"] + kwargs.pop("classes") + + model = VIPNet(cfg=_cfg, **kwargs) + if pretrained: + # The number of classes is not the same as the number of classes in the pretrained model => + # remove the last layer weights + _ignore_keys = ignore_keys if kwargs["num_classes"] != len(default_cfgs[arch]["classes"]) else None + model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys) + return model + + +############################################ +# _vip_local_mixer +############################################ +def _vip_local_mixer( + embed_dim: int, + depth: int, + num_heads: int, + mlp_ratio: float, + drop_path: list[float], + split_size: int = 1, + sr_ratio: int = 1, + downsample: bool = False, + out_dim: int | None = None, +) -> nn.Module: + """Builds a VIPBlock performing local (cross-shaped) window attention. + + Args: + embed_dim: embedding dimension. + depth: number of attention blocks in this stage. + num_heads: number of attention heads. + mlp_ratio: ratio used to expand the hidden dimension in MLP. + split_size: size of the local window splits. + sr_ratio: parameter needed for cross-compatibility between different mixers + drop_path: list of per-block drop path rates. + downsample: whether to apply PatchMerging at the end. + out_dim: output embedding dimension if downsampling. + + Returns: + A VIPBlock (local attention) for one stage of the VIP network. + """ + blocks = nn.ModuleList([ + CrossShapedWindowAttention( + dim=embed_dim, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=True, + split_size=split_size, + drop_path=drop_path[i], + ) + for i in range(depth) + ]) + return VIPBlock(embed_dim, local_unit=blocks, downsample=downsample, out_dim=out_dim) + + +############################################ +# _vip_global_mha_mixer +############################################ +def _vip_global_mha_mixer( + embed_dim: int, + depth: int, + num_heads: int, + mlp_ratio: float, + drop_path: list[float], + split_size: int = 1, + sr_ratio: int = 1, + downsample: bool = False, + out_dim: int | None = None, +) -> nn.Module: + """Builds a VIPBlock performing global multi-head self-attention. + + Args: + embed_dim: embedding dimension. + depth: number of attention blocks in this stage. + num_heads: number of attention heads. + mlp_ratio: ratio used to expand the hidden dimension in MLP. + drop_path: list of per-block drop path rates. + split_size: parameter needed for cross-compatibility between different mixers + sr_ratio: parameter needed for cross-compatibility between different mixers + downsample: whether to apply PatchMerging at the end. + out_dim: output embedding dimension if downsampling. + + Returns: + A VIPBlock (global MHA) for one stage of the VIP network. + """ + blocks = nn.ModuleList([ + MultiHeadSelfAttention( + dim=embed_dim, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=True, + drop_path_rate=drop_path[i], + ) + for i in range(depth) + ]) + return VIPBlock( + embed_dim, + local_unit=blocks, # In this context, they are "global" blocks but stored in local_unit + downsample=downsample, + out_dim=out_dim, + ) + + +############################################ +# _vip_mixed_mixer +############################################ +def _vip_mixed_mixer( + embed_dim: int, + depth: int, + num_heads: int, + mlp_ratio: float, + drop_path: list[float], + split_size: int = 1, + sr_ratio: int = 1, + downsample: bool = False, + out_dim: int | None = None, +) -> nn.Module: + """Builds a VIPBlock performing mixed local+global attention. + + Args: + embed_dim: embedding dimension. + depth: number of attention blocks in this stage. + num_heads: total number of attention heads. + mlp_ratio: ratio used to expand the hidden dimension in MLP. + drop_path: list of per-block drop path rates. + split_size: size of the local window splits (for the local half). + sr_ratio: reduce spatial resolution in the global half (OSRA). + downsample: whether to apply PatchMerging at the end. + out_dim: output embedding dimension if downsampling. + + Returns: + A VIPBlock (mixed local+global) for one stage of the VIP network. + """ + # an inner dimension for the conv-projection + inner_dim = max(16, embed_dim // 8) + proj = nn.Sequential( + nn.Conv2d(embed_dim, embed_dim, kernel_size=3, padding=1, groups=embed_dim), + nn.GELU(), + nn.BatchNorm2d(embed_dim), + nn.Conv2d(embed_dim, inner_dim, kernel_size=1), + nn.GELU(), + nn.BatchNorm2d(inner_dim), + nn.Conv2d(inner_dim, embed_dim, kernel_size=1), + nn.BatchNorm2d(embed_dim), + ) + + # local half blocks + local_unit = nn.ModuleList([ + CrossShapedWindowAttention( + dim=embed_dim // 2, + num_heads=num_heads, + mlp_ratio=mlp_ratio, + qkv_bias=True, + split_size=split_size, + drop_path=drop_path[i], + ) + for i in range(depth) + ]) + + # global half blocks + global_unit = nn.ModuleList([ + OSRABlock( + dim=embed_dim // 2, + sr_ratio=sr_ratio, + num_heads=num_heads // 2, + mlp_ratio=mlp_ratio, + drop_path=drop_path[i], + ) + for i in range(depth) + ]) + + return VIPBlock( + embed_dim, + local_unit=local_unit, + global_unit=global_unit, + proj=proj, + downsample=downsample, + out_dim=out_dim, + ) +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/classification/vit/pytorch.html b/_modules/doctr/models/classification/vit/pytorch.html new file mode 100644 index 0000000000..28430389e9 --- /dev/null +++ b/_modules/doctr/models/classification/vit/pytorch.html @@ -0,0 +1,535 @@ + + + + + + + + + + + + + doctr.models.classification.vit.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.classification.vit.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from copy import deepcopy
+from typing import Any
+
+import torch
+from torch import nn
+
+from doctr.datasets import VOCABS
+from doctr.models.modules.transformer import EncoderBlock
+from doctr.models.modules.vision_transformer import PatchEmbedding
+
+from ...utils import load_pretrained_params
+
+__all__ = ["vit_s", "vit_b"]
+
+
+default_cfgs: dict[str, dict[str, Any]] = {
+    "vit_s": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.6.0/vit_s-5d05442d.pt&src=0",
+    },
+    "vit_b": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 32),
+        "classes": list(VOCABS["french"]),
+        "url": "https://doctr-static.mindee.com/models?id=v0.6.0/vit_b-0fbef167.pt&src=0",
+    },
+}
+
+
+class ClassifierHead(nn.Module):
+    """Classifier head for Vision Transformer
+
+    Args:
+        in_channels: number of input channels
+        num_classes: number of output classes
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        num_classes: int,
+    ) -> None:
+        super().__init__()
+
+        self.head = nn.Linear(in_channels, num_classes)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # (batch_size, num_classes) cls token
+        return self.head(x[:, 0])
+
+
+class VisionTransformer(nn.Sequential):
+    """VisionTransformer architecture as described in
+    `"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale",
+    <https://arxiv.org/pdf/2010.11929.pdf>`_.
+
+    Args:
+        d_model: dimension of the transformer layers
+        num_layers: number of transformer layers
+        num_heads: number of attention heads
+        ffd_ratio: multiplier for the hidden dimension of the feedforward layer
+        patch_size: size of the patches
+        input_shape: size of the input image
+        dropout: dropout rate
+        num_classes: number of output classes
+        include_top: whether the classifier head should be instantiated
+    """
+
+    def __init__(
+        self,
+        d_model: int,
+        num_layers: int,
+        num_heads: int,
+        ffd_ratio: int,
+        patch_size: tuple[int, int] = (4, 4),
+        input_shape: tuple[int, int, int] = (3, 32, 32),
+        dropout: float = 0.0,
+        num_classes: int = 1000,
+        include_top: bool = True,
+        cfg: dict[str, Any] | None = None,
+    ) -> None:
+        _layers: list[nn.Module] = [
+            PatchEmbedding(input_shape, d_model, patch_size),
+            EncoderBlock(num_layers, num_heads, d_model, d_model * ffd_ratio, dropout, nn.GELU()),
+        ]
+        if include_top:
+            _layers.append(ClassifierHead(d_model, num_classes))
+
+        super().__init__(*_layers)
+        self.cfg = cfg
+
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+
+
+def _vit(
+    arch: str,
+    pretrained: bool,
+    ignore_keys: list[str] | None = None,
+    **kwargs: Any,
+) -> VisionTransformer:
+    kwargs["num_classes"] = kwargs.get("num_classes", len(default_cfgs[arch]["classes"]))
+    kwargs["input_shape"] = kwargs.get("input_shape", default_cfgs[arch]["input_shape"])
+    kwargs["classes"] = kwargs.get("classes", default_cfgs[arch]["classes"])
+
+    _cfg = deepcopy(default_cfgs[arch])
+    _cfg["num_classes"] = kwargs["num_classes"]
+    _cfg["input_shape"] = kwargs["input_shape"]
+    _cfg["classes"] = kwargs["classes"]
+    kwargs.pop("classes")
+
+    # Build the model
+    model = VisionTransformer(cfg=_cfg, **kwargs)
+    # Load pretrained parameters
+    if pretrained:
+        # The number of classes is not the same as the number of classes in the pretrained model =>
+        # remove the last layer weights
+        _ignore_keys = ignore_keys if kwargs["num_classes"] != len(default_cfgs[arch]["classes"]) else None
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+
+    return model
+
+
+
+[docs] +def vit_s(pretrained: bool = False, **kwargs: Any) -> VisionTransformer: + """VisionTransformer-S architecture + `"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale", + <https://arxiv.org/pdf/2010.11929.pdf>`_. Patches: (H, W) -> (H/8, W/8) + + NOTE: unofficial config used in ViTSTR and ParSeq + + >>> import torch + >>> from doctr.models import vit_s + >>> model = vit_s(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 32, 32), dtype=tf.float32) + >>> out = model(input_tensor) + + Args: + pretrained: boolean, True if model is pretrained + **kwargs: keyword arguments of the VisionTransformer architecture + + Returns: + A feature extractor model + """ + return _vit( + "vit_s", + pretrained, + d_model=384, + num_layers=12, + num_heads=6, + ffd_ratio=4, + ignore_keys=["2.head.weight", "2.head.bias"], + **kwargs, + )
+ + + +
+[docs] +def vit_b(pretrained: bool = False, **kwargs: Any) -> VisionTransformer: + """VisionTransformer-B architecture as described in + `"An Image is Worth 16x16 Words: Transformers for Image Recognition at Scale", + <https://arxiv.org/pdf/2010.11929.pdf>`_. Patches: (H, W) -> (H/8, W/8) + + >>> import torch + >>> from doctr.models import vit_b + >>> model = vit_b(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 32, 32), dtype=tf.float32) + >>> out = model(input_tensor) + + Args: + pretrained: boolean, True if model is pretrained + **kwargs: keyword arguments of the VisionTransformer architecture + + Returns: + A feature extractor model + """ + return _vit( + "vit_b", + pretrained, + d_model=768, + num_layers=12, + num_heads=12, + ffd_ratio=4, + ignore_keys=["2.head.weight", "2.head.bias"], + **kwargs, + )
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/classification/zoo.html b/_modules/doctr/models/classification/zoo.html new file mode 100644 index 0000000000..43b721c9d5 --- /dev/null +++ b/_modules/doctr/models/classification/zoo.html @@ -0,0 +1,451 @@ + + + + + + + + + + + + + doctr.models.classification.zoo - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.classification.zoo

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from typing import Any
+
+from doctr.models.utils import _CompiledModule
+
+from .. import classification
+from ..preprocessor import PreProcessor
+from .predictor import OrientationPredictor
+
+__all__ = ["crop_orientation_predictor", "page_orientation_predictor"]
+
+ARCHS: list[str] = [
+    "magc_resnet31",
+    "mobilenet_v3_small",
+    "mobilenet_v3_small_r",
+    "mobilenet_v3_large",
+    "mobilenet_v3_large_r",
+    "resnet18",
+    "resnet31",
+    "resnet34",
+    "resnet50",
+    "resnet34_wide",
+    "textnet_tiny",
+    "textnet_small",
+    "textnet_base",
+    "vgg16_bn_r",
+    "vit_s",
+    "vit_b",
+    "vip_tiny",
+    "vip_base",
+]
+
+ORIENTATION_ARCHS: list[str] = ["mobilenet_v3_small_crop_orientation", "mobilenet_v3_small_page_orientation"]
+
+
+def _orientation_predictor(
+    arch: Any, pretrained: bool, model_type: str, disabled: bool = False, **kwargs: Any
+) -> OrientationPredictor:
+    if disabled:
+        # Case where the orientation predictor is disabled
+        return OrientationPredictor(None, None)
+
+    if isinstance(arch, str):
+        if arch not in ORIENTATION_ARCHS:
+            raise ValueError(f"unknown architecture '{arch}'")
+
+        # Load directly classifier from backbone
+        _model = classification.__dict__[arch](pretrained=pretrained)
+    else:
+        # Adding the type for torch compiled models to the allowed architectures
+        allowed_archs = [classification.MobileNetV3, _CompiledModule]
+
+        if not isinstance(arch, tuple(allowed_archs)):
+            raise ValueError(f"unknown architecture: {type(arch)}")
+        _model = arch
+
+    kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
+    kwargs["std"] = kwargs.get("std", _model.cfg["std"])
+    kwargs["batch_size"] = kwargs.get("batch_size", 128 if model_type == "crop" else 4)
+    input_shape = _model.cfg["input_shape"][1:]
+    predictor = OrientationPredictor(
+        PreProcessor(input_shape, preserve_aspect_ratio=True, symmetric_pad=True, **kwargs), _model
+    )
+    return predictor
+
+
+
+[docs] +def crop_orientation_predictor( + arch: Any = "mobilenet_v3_small_crop_orientation", pretrained: bool = False, batch_size: int = 128, **kwargs: Any +) -> OrientationPredictor: + """Crop orientation classification architecture. + + >>> import numpy as np + >>> from doctr.models import crop_orientation_predictor + >>> model = crop_orientation_predictor(arch='mobilenet_v3_small_crop_orientation', pretrained=True) + >>> input_crop = (255 * np.random.rand(256, 256, 3)).astype(np.uint8) + >>> out = model([input_crop]) + + Args: + arch: name of the architecture to use (e.g. 'mobilenet_v3_small_crop_orientation') + pretrained: If True, returns a model pre-trained on our recognition crops dataset + batch_size: number of samples the model processes in parallel + **kwargs: keyword arguments to be passed to the OrientationPredictor + + Returns: + OrientationPredictor + """ + return _orientation_predictor(arch=arch, pretrained=pretrained, batch_size=batch_size, model_type="crop", **kwargs)
+ + + +
+[docs] +def page_orientation_predictor( + arch: Any = "mobilenet_v3_small_page_orientation", pretrained: bool = False, batch_size: int = 4, **kwargs: Any +) -> OrientationPredictor: + """Page orientation classification architecture. + + >>> import numpy as np + >>> from doctr.models import page_orientation_predictor + >>> model = page_orientation_predictor(arch='mobilenet_v3_small_page_orientation', pretrained=True) + >>> input_page = (255 * np.random.rand(512, 512, 3)).astype(np.uint8) + >>> out = model([input_page]) + + Args: + arch: name of the architecture to use (e.g. 'mobilenet_v3_small_page_orientation') + pretrained: If True, returns a model pre-trained on our recognition crops dataset + batch_size: number of samples the model processes in parallel + **kwargs: keyword arguments to be passed to the OrientationPredictor + + Returns: + OrientationPredictor + """ + return _orientation_predictor(arch=arch, pretrained=pretrained, batch_size=batch_size, model_type="page", **kwargs)
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/detection/differentiable_binarization/pytorch.html b/_modules/doctr/models/detection/differentiable_binarization/pytorch.html new file mode 100644 index 0000000000..5966839050 --- /dev/null +++ b/_modules/doctr/models/detection/differentiable_binarization/pytorch.html @@ -0,0 +1,778 @@ + + + + + + + + + + + + + doctr.models.detection.differentiable_binarization.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.detection.differentiable_binarization.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from collections.abc import Callable
+from typing import Any
+
+import numpy as np
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torchvision.models import resnet34, resnet50
+from torchvision.models._utils import IntermediateLayerGetter
+from torchvision.ops.deform_conv import DeformConv2d
+
+from doctr.file_utils import CLASS_NAME
+
+from ...classification import mobilenet_v3_large
+from ...utils import _bf16_to_float32, load_pretrained_params
+from .base import DBPostProcessor, _DBNet
+
+__all__ = ["DBNet", "db_resnet50", "db_resnet34", "db_mobilenet_v3_large"]
+
+
+default_cfgs: dict[str, dict[str, Any]] = {
+    "db_resnet50": {
+        "input_shape": (3, 1024, 1024),
+        "mean": (0.798, 0.785, 0.772),
+        "std": (0.264, 0.2749, 0.287),
+        "url": "https://doctr-static.mindee.com/models?id=v0.7.0/db_resnet50-79bd7d70.pt&src=0",
+    },
+    "db_resnet34": {
+        "input_shape": (3, 1024, 1024),
+        "mean": (0.798, 0.785, 0.772),
+        "std": (0.264, 0.2749, 0.287),
+        "url": "https://doctr-static.mindee.com/models?id=v0.7.0/db_resnet34-cb6aed9e.pt&src=0",
+    },
+    "db_mobilenet_v3_large": {
+        "input_shape": (3, 1024, 1024),
+        "mean": (0.798, 0.785, 0.772),
+        "std": (0.264, 0.2749, 0.287),
+        "url": "https://doctr-static.mindee.com/models?id=v0.8.1/db_mobilenet_v3_large-21748dd0.pt&src=0",
+    },
+}
+
+
+class FeaturePyramidNetwork(nn.Module):
+    def __init__(
+        self,
+        in_channels: list[int],
+        out_channels: int,
+        deform_conv: bool = False,
+    ) -> None:
+        super().__init__()
+
+        out_chans = out_channels // len(in_channels)
+
+        conv_layer = DeformConv2d if deform_conv else nn.Conv2d
+
+        self.in_branches = nn.ModuleList([
+            nn.Sequential(
+                conv_layer(chans, out_channels, 1, bias=False),
+                nn.BatchNorm2d(out_channels),
+                nn.ReLU(inplace=True),
+            )
+            for idx, chans in enumerate(in_channels)
+        ])
+        self.upsample = nn.Upsample(scale_factor=2, mode="bilinear", align_corners=True)
+        self.out_branches = nn.ModuleList([
+            nn.Sequential(
+                conv_layer(out_channels, out_chans, 3, padding=1, bias=False),
+                nn.BatchNorm2d(out_chans),
+                nn.ReLU(inplace=True),
+                nn.Upsample(scale_factor=2**idx, mode="bilinear", align_corners=True),
+            )
+            for idx, chans in enumerate(in_channels)
+        ])
+
+    def forward(self, x: list[torch.Tensor]) -> torch.Tensor:
+        if len(x) != len(self.out_branches):
+            raise AssertionError
+        # Conv1x1 to get the same number of channels
+        _x: list[torch.Tensor] = [branch(t) for branch, t in zip(self.in_branches, x)]
+        out: list[torch.Tensor] = [_x[-1]]
+        for t in _x[:-1][::-1]:
+            out.append(self.upsample(out[-1]) + t)
+
+        # Conv and final upsampling
+        out = [branch(t) for branch, t in zip(self.out_branches, out[::-1])]
+
+        return torch.cat(out, dim=1)
+
+
+class DBNet(_DBNet, nn.Module):
+    """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
+    <https://arxiv.org/pdf/1911.08947.pdf>`_.
+
+    Args:
+        feature extractor: the backbone serving as feature extractor
+        head_chans: the number of channels in the head
+        deform_conv: whether to use deformable convolution
+        bin_thresh: threshold for binarization
+        box_thresh: minimal objectness score to consider a box
+        assume_straight_pages: if True, fit straight bounding boxes only
+        exportable: onnx exportable returns only logits
+        cfg: the configuration dict of the model
+        class_names: list of class names
+    """
+
+    def __init__(
+        self,
+        feat_extractor: IntermediateLayerGetter,
+        head_chans: int = 256,
+        deform_conv: bool = False,
+        bin_thresh: float = 0.3,
+        box_thresh: float = 0.1,
+        assume_straight_pages: bool = True,
+        exportable: bool = False,
+        cfg: dict[str, Any] | None = None,
+        class_names: list[str] = [CLASS_NAME],
+    ) -> None:
+        super().__init__()
+        self.class_names = class_names
+        num_classes: int = len(self.class_names)
+        self.cfg = cfg
+
+        conv_layer = DeformConv2d if deform_conv else nn.Conv2d
+
+        self.exportable = exportable
+        self.assume_straight_pages = assume_straight_pages
+
+        self.feat_extractor = feat_extractor
+        # Identify the number of channels for the head initialization
+        _is_training = self.feat_extractor.training
+        self.feat_extractor = self.feat_extractor.eval()
+        with torch.no_grad():
+            out = self.feat_extractor(torch.zeros((1, 3, 224, 224)))
+            fpn_channels = [v.shape[1] for _, v in out.items()]
+
+        if _is_training:
+            self.feat_extractor = self.feat_extractor.train()
+
+        self.fpn = FeaturePyramidNetwork(fpn_channels, head_chans, deform_conv)
+        # Conv1 map to channels
+
+        self.prob_head = nn.Sequential(
+            conv_layer(head_chans, head_chans // 4, 3, padding=1, bias=False),
+            nn.BatchNorm2d(head_chans // 4),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(head_chans // 4, head_chans // 4, 2, stride=2, bias=False),
+            nn.BatchNorm2d(head_chans // 4),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(head_chans // 4, num_classes, 2, stride=2),
+        )
+        self.thresh_head = nn.Sequential(
+            conv_layer(head_chans, head_chans // 4, 3, padding=1, bias=False),
+            nn.BatchNorm2d(head_chans // 4),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(head_chans // 4, head_chans // 4, 2, stride=2, bias=False),
+            nn.BatchNorm2d(head_chans // 4),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(head_chans // 4, num_classes, 2, stride=2),
+        )
+
+        self.postprocessor = DBPostProcessor(
+            assume_straight_pages=assume_straight_pages, bin_thresh=bin_thresh, box_thresh=box_thresh
+        )
+
+        for n, m in self.named_modules():
+            # Don't override the initialization of the backbone
+            if n.startswith("feat_extractor."):
+                continue
+            if isinstance(m, (nn.Conv2d, DeformConv2d)):
+                nn.init.kaiming_normal_(m.weight.data, mode="fan_out", nonlinearity="relu")
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1.0)
+                m.bias.data.zero_()
+
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        target: list[np.ndarray] | None = None,
+        return_model_output: bool = False,
+        return_preds: bool = False,
+    ) -> dict[str, torch.Tensor]:
+        # Extract feature maps at different stages
+        feats = self.feat_extractor(x)
+        feats = [feats[str(idx)] for idx in range(len(feats))]
+        # Pass through the FPN
+        feat_concat = self.fpn(feats)
+        logits = self.prob_head(feat_concat)
+
+        out: dict[str, Any] = {}
+        if self.exportable:
+            out["logits"] = logits
+            return out
+
+        if return_model_output or target is None or return_preds:
+            prob_map = _bf16_to_float32(torch.sigmoid(logits))
+
+        if return_model_output:
+            out["out_map"] = prob_map
+
+        if target is None or return_preds:
+            # Disable for torch.compile compatibility
+            @torch.compiler.disable
+            def _postprocess(prob_map: torch.Tensor) -> list[dict[str, Any]]:
+                return [
+                    dict(zip(self.class_names, preds))
+                    for preds in self.postprocessor(prob_map.detach().cpu().permute((0, 2, 3, 1)).numpy())
+                ]
+
+            # Post-process boxes (keep only text predictions)
+            out["preds"] = _postprocess(prob_map)
+
+        if target is not None:
+            thresh_map = self.thresh_head(feat_concat)
+            loss = self.compute_loss(logits, thresh_map, target)
+            out["loss"] = loss
+
+        return out
+
+    def compute_loss(
+        self,
+        out_map: torch.Tensor,
+        thresh_map: torch.Tensor,
+        target: list[np.ndarray],
+        gamma: float = 2.0,
+        alpha: float = 0.5,
+        eps: float = 1e-8,
+    ) -> torch.Tensor:
+        """Compute a batch of gts, masks, thresh_gts, thresh_masks from a list of boxes
+        and a list of masks for each image. From there it computes the loss with the model output
+
+        Args:
+            out_map: output feature map of the model of shape (N, C, H, W)
+            thresh_map: threshold map of shape (N, C, H, W)
+            target: list of dictionary where each dict has a `boxes` and a `flags` entry
+            gamma: modulating factor in the focal loss formula
+            alpha: balancing factor in the focal loss formula
+            eps: epsilon factor in dice loss
+
+        Returns:
+            A loss tensor
+        """
+        if gamma < 0:
+            raise ValueError("Value of gamma should be greater than or equal to zero.")
+
+        prob_map = torch.sigmoid(out_map)
+        thresh_map = torch.sigmoid(thresh_map)
+
+        targets = self.build_target(target, out_map.shape[1:])  # type: ignore[arg-type]
+
+        seg_target, seg_mask = torch.from_numpy(targets[0]), torch.from_numpy(targets[1])
+        seg_target, seg_mask = seg_target.to(out_map.device), seg_mask.to(out_map.device)
+        thresh_target, thresh_mask = torch.from_numpy(targets[2]), torch.from_numpy(targets[3])
+        thresh_target, thresh_mask = thresh_target.to(out_map.device), thresh_mask.to(out_map.device)
+
+        if torch.any(seg_mask):
+            # Focal loss
+            focal_scale = 10.0
+            bce_loss = F.binary_cross_entropy_with_logits(out_map, seg_target, reduction="none")
+
+            p_t = prob_map * seg_target + (1 - prob_map) * (1 - seg_target)
+            alpha_t = alpha * seg_target + (1 - alpha) * (1 - seg_target)
+            # Unreduced version
+            focal_loss = alpha_t * (1 - p_t) ** gamma * bce_loss
+            # Class reduced
+            focal_loss = (seg_mask * focal_loss).sum((0, 1, 2, 3)) / seg_mask.sum((0, 1, 2, 3))
+
+            # Compute dice loss for each class or for approx binary_map
+            if len(self.class_names) > 1:
+                dice_map = torch.softmax(out_map, dim=1)
+            else:
+                # compute binary map instead
+                dice_map = 1 / (1 + torch.exp(-50.0 * (prob_map - thresh_map)))
+            # Class reduced
+            inter = (seg_mask * dice_map * seg_target).sum((0, 2, 3))
+            cardinality = (seg_mask * (dice_map + seg_target)).sum((0, 2, 3))
+            dice_loss = (1 - 2 * inter / (cardinality + eps)).mean()
+
+        # Compute l1 loss for thresh_map
+        if torch.any(thresh_mask):
+            l1_loss = (torch.abs(thresh_map - thresh_target) * thresh_mask).sum() / (thresh_mask.sum() + eps)
+
+        return l1_loss + focal_scale * focal_loss + dice_loss
+
+
+def _dbnet(
+    arch: str,
+    pretrained: bool,
+    backbone_fn: Callable[[bool], nn.Module],
+    fpn_layers: list[str],
+    backbone_submodule: str | None = None,
+    pretrained_backbone: bool = True,
+    ignore_keys: list[str] | None = None,
+    **kwargs: Any,
+) -> DBNet:
+    pretrained_backbone = pretrained_backbone and not pretrained
+
+    # Feature extractor
+    backbone = (
+        backbone_fn(pretrained_backbone)
+        if not arch.split("_")[1].startswith("resnet")
+        # Starting with Imagenet pretrained params introduces some NaNs in layer3 & layer4 of resnet50
+        else backbone_fn(weights=None)  # type: ignore[call-arg]
+    )
+    if isinstance(backbone_submodule, str):
+        backbone = getattr(backbone, backbone_submodule)
+    feat_extractor = IntermediateLayerGetter(
+        backbone,
+        {layer_name: str(idx) for idx, layer_name in enumerate(fpn_layers)},
+    )
+
+    if not kwargs.get("class_names", None):
+        kwargs["class_names"] = default_cfgs[arch].get("class_names", [CLASS_NAME])
+    else:
+        kwargs["class_names"] = sorted(kwargs["class_names"])
+    # Build the model
+    model = DBNet(feat_extractor, cfg=default_cfgs[arch], **kwargs)
+    # Load pretrained parameters
+    if pretrained:
+        # The number of class_names is not the same as the number of classes in the pretrained model =>
+        # remove the layer weights
+        _ignore_keys = (
+            ignore_keys if kwargs["class_names"] != default_cfgs[arch].get("class_names", [CLASS_NAME]) else None
+        )
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+
+    return model
+
+
+def db_resnet34(pretrained: bool = False, **kwargs: Any) -> DBNet:
+    """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization"
+    <https://arxiv.org/pdf/1911.08947.pdf>`_, using a ResNet-34 backbone.
+
+    >>> import torch
+    >>> from doctr.models import db_resnet34
+    >>> model = db_resnet34(pretrained=True)
+    >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32)
+    >>> out = model(input_tensor)
+
+    Args:
+        pretrained (bool): If True, returns a model pre-trained on our text detection dataset
+        **kwargs: keyword arguments of the DBNet architecture
+
+    Returns:
+        text detection architecture
+    """
+    return _dbnet(
+        "db_resnet34",
+        pretrained,
+        resnet34,
+        ["layer1", "layer2", "layer3", "layer4"],
+        None,
+        ignore_keys=[
+            "prob_head.6.weight",
+            "prob_head.6.bias",
+            "thresh_head.6.weight",
+            "thresh_head.6.bias",
+        ],
+        **kwargs,
+    )
+
+
+
+[docs] +def db_resnet50(pretrained: bool = False, **kwargs: Any) -> DBNet: + """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization" + <https://arxiv.org/pdf/1911.08947.pdf>`_, using a ResNet-50 backbone. + + >>> import torch + >>> from doctr.models import db_resnet50 + >>> model = db_resnet50(pretrained=True) + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on our text detection dataset + **kwargs: keyword arguments of the DBNet architecture + + Returns: + text detection architecture + """ + return _dbnet( + "db_resnet50", + pretrained, + resnet50, + ["layer1", "layer2", "layer3", "layer4"], + None, + ignore_keys=[ + "prob_head.6.weight", + "prob_head.6.bias", + "thresh_head.6.weight", + "thresh_head.6.bias", + ], + **kwargs, + )
+ + + +
+[docs] +def db_mobilenet_v3_large(pretrained: bool = False, **kwargs: Any) -> DBNet: + """DBNet as described in `"Real-time Scene Text Detection with Differentiable Binarization" + <https://arxiv.org/pdf/1911.08947.pdf>`_, using a MobileNet V3 Large backbone. + + >>> import torch + >>> from doctr.models import db_mobilenet_v3_large + >>> model = db_mobilenet_v3_large(pretrained=True) + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on our text detection dataset + **kwargs: keyword arguments of the DBNet architecture + + Returns: + text detection architecture + """ + return _dbnet( + "db_mobilenet_v3_large", + pretrained, + mobilenet_v3_large, + ["3", "6", "12", "16"], + "features", + ignore_keys=[ + "prob_head.6.weight", + "prob_head.6.bias", + "thresh_head.6.weight", + "thresh_head.6.bias", + ], + **kwargs, + )
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/detection/fast/pytorch.html b/_modules/doctr/models/detection/fast/pytorch.html new file mode 100644 index 0000000000..626e66b3b6 --- /dev/null +++ b/_modules/doctr/models/detection/fast/pytorch.html @@ -0,0 +1,784 @@ + + + + + + + + + + + + + doctr.models.detection.fast.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.detection.fast.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from collections.abc import Callable
+from typing import Any
+
+import numpy as np
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torchvision.models._utils import IntermediateLayerGetter
+
+from doctr.file_utils import CLASS_NAME
+
+from ...classification import textnet_base, textnet_small, textnet_tiny
+from ...modules.layers import FASTConvLayer
+from ...utils import _bf16_to_float32, load_pretrained_params
+from .base import _FAST, FASTPostProcessor
+
+__all__ = ["FAST", "fast_tiny", "fast_small", "fast_base", "reparameterize"]
+
+
+default_cfgs: dict[str, dict[str, Any]] = {
+    "fast_tiny": {
+        "input_shape": (3, 1024, 1024),
+        "mean": (0.798, 0.785, 0.772),
+        "std": (0.264, 0.2749, 0.287),
+        "url": "https://doctr-static.mindee.com/models?id=v0.8.1/fast_tiny-1acac421.pt&src=0",
+    },
+    "fast_small": {
+        "input_shape": (3, 1024, 1024),
+        "mean": (0.798, 0.785, 0.772),
+        "std": (0.264, 0.2749, 0.287),
+        "url": "https://doctr-static.mindee.com/models?id=v0.8.1/fast_small-10952cc1.pt&src=0",
+    },
+    "fast_base": {
+        "input_shape": (3, 1024, 1024),
+        "mean": (0.798, 0.785, 0.772),
+        "std": (0.264, 0.2749, 0.287),
+        "url": "https://doctr-static.mindee.com/models?id=v0.8.1/fast_base-688a8b34.pt&src=0",
+    },
+}
+
+
+class FastNeck(nn.Module):
+    """Neck of the FAST architecture, composed of a series of 3x3 convolutions and upsampling layers.
+
+    Args:
+        in_channels: number of input channels
+        out_channels: number of output channels
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int = 128,
+    ) -> None:
+        super().__init__()
+        self.reduction = nn.ModuleList([
+            FASTConvLayer(in_channels * scale, out_channels, kernel_size=3) for scale in [1, 2, 4, 8]
+        ])
+
+    def _upsample(self, x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
+        return F.interpolate(x, size=y.shape[-2:], mode="bilinear")
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        f1, f2, f3, f4 = x
+        f1, f2, f3, f4 = [reduction(f) for reduction, f in zip(self.reduction, (f1, f2, f3, f4))]
+        f2, f3, f4 = [self._upsample(f, f1) for f in (f2, f3, f4)]
+        f = torch.cat((f1, f2, f3, f4), 1)
+        return f
+
+
+class FastHead(nn.Sequential):
+    """Head of the FAST architecture
+
+    Args:
+        in_channels: number of input channels
+        num_classes: number of output classes
+        out_channels: number of output channels
+        dropout: dropout probability
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        num_classes: int,
+        out_channels: int = 128,
+        dropout: float = 0.1,
+    ) -> None:
+        _layers: list[nn.Module] = [
+            FASTConvLayer(in_channels, out_channels, kernel_size=3),
+            nn.Dropout(dropout),
+            nn.Conv2d(out_channels, num_classes, kernel_size=1, bias=False),
+        ]
+        super().__init__(*_layers)
+
+
+class FAST(_FAST, nn.Module):
+    """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation"
+    <https://arxiv.org/pdf/2111.02394.pdf>`_.
+
+    Args:
+        feat extractor: the backbone serving as feature extractor
+        bin_thresh: threshold for binarization
+        box_thresh: minimal objectness score to consider a box
+        dropout_prob: dropout probability
+        pooling_size: size of the pooling layer
+        assume_straight_pages: if True, fit straight bounding boxes only
+        exportable: onnx exportable returns only logits
+        cfg: the configuration dict of the model
+        class_names: list of class names
+    """
+
+    def __init__(
+        self,
+        feat_extractor: IntermediateLayerGetter,
+        bin_thresh: float = 0.1,
+        box_thresh: float = 0.1,
+        dropout_prob: float = 0.1,
+        pooling_size: int = 4,  # different from paper performs better on close text-rich images
+        assume_straight_pages: bool = True,
+        exportable: bool = False,
+        cfg: dict[str, Any] = {},
+        class_names: list[str] = [CLASS_NAME],
+    ) -> None:
+        super().__init__()
+        self.class_names = class_names
+        num_classes: int = len(self.class_names)
+        self.cfg = cfg
+
+        self.exportable = exportable
+        self.assume_straight_pages = assume_straight_pages
+
+        self.feat_extractor = feat_extractor
+        # Identify the number of channels for the neck & head initialization
+        _is_training = self.feat_extractor.training
+        self.feat_extractor = self.feat_extractor.eval()
+        with torch.no_grad():
+            out = self.feat_extractor(torch.zeros((1, 3, 32, 32)))
+            feat_out_channels = [v.shape[1] for _, v in out.items()]
+
+        if _is_training:
+            self.feat_extractor = self.feat_extractor.train()
+
+        # Initialize neck & head
+        self.neck = FastNeck(feat_out_channels[0], feat_out_channels[1])
+        self.prob_head = FastHead(feat_out_channels[-1], num_classes, feat_out_channels[1], dropout_prob)
+
+        # NOTE: The post processing from the paper works not well for text-rich images
+        # so we use a modified version from DBNet
+        self.postprocessor = FASTPostProcessor(
+            assume_straight_pages=assume_straight_pages, bin_thresh=bin_thresh, box_thresh=box_thresh
+        )
+
+        # Pooling layer as erosion reversal as described in the paper
+        self.pooling = nn.MaxPool2d(kernel_size=pooling_size // 2 + 1, stride=1, padding=(pooling_size // 2) // 2)
+
+        for n, m in self.named_modules():
+            # Don't override the initialization of the backbone
+            if n.startswith("feat_extractor."):
+                continue
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight.data, mode="fan_out", nonlinearity="relu")
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1.0)
+                m.bias.data.zero_()
+
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        target: list[np.ndarray] | None = None,
+        return_model_output: bool = False,
+        return_preds: bool = False,
+    ) -> dict[str, torch.Tensor]:
+        # Extract feature maps at different stages
+        feats = self.feat_extractor(x)
+        feats = [feats[str(idx)] for idx in range(len(feats))]
+        # Pass through the Neck & Head & Upsample
+        feat_concat = self.neck(feats)
+        logits = F.interpolate(self.prob_head(feat_concat), size=x.shape[-2:], mode="bilinear")
+
+        out: dict[str, Any] = {}
+        if self.exportable:
+            out["logits"] = logits
+            return out
+
+        if return_model_output or target is None or return_preds:
+            prob_map = _bf16_to_float32(torch.sigmoid(self.pooling(logits)))
+
+        if return_model_output:
+            out["out_map"] = prob_map
+
+        if target is None or return_preds:
+            # Disable for torch.compile compatibility
+            @torch.compiler.disable
+            def _postprocess(prob_map: torch.Tensor) -> list[dict[str, Any]]:
+                return [
+                    dict(zip(self.class_names, preds))
+                    for preds in self.postprocessor(prob_map.detach().cpu().permute((0, 2, 3, 1)).numpy())
+                ]
+
+            # Post-process boxes (keep only text predictions)
+            out["preds"] = _postprocess(prob_map)
+
+        if target is not None:
+            loss = self.compute_loss(logits, target)
+            out["loss"] = loss
+
+        return out
+
+    def compute_loss(
+        self,
+        out_map: torch.Tensor,
+        target: list[np.ndarray],
+        eps: float = 1e-6,
+    ) -> torch.Tensor:
+        """Compute fast loss, 2 x Dice loss where the text kernel loss is scaled by 0.5.
+
+        Args:
+            out_map: output feature map of the model of shape (N, num_classes, H, W)
+            target: list of dictionary where each dict has a `boxes` and a `flags` entry
+            eps: epsilon factor in dice loss
+
+        Returns:
+            A loss tensor
+        """
+        targets = self.build_target(target, out_map.shape[1:])  # type: ignore[arg-type]
+
+        seg_target, seg_mask = torch.from_numpy(targets[0]), torch.from_numpy(targets[1])
+        shrunken_kernel = torch.from_numpy(targets[2]).to(out_map.device)
+        seg_target, seg_mask = seg_target.to(out_map.device), seg_mask.to(out_map.device)
+
+        def ohem_sample(score: torch.Tensor, gt: torch.Tensor, mask: torch.Tensor) -> torch.Tensor:
+            masks = []
+            for class_idx in range(gt.shape[0]):
+                pos_num = int(torch.sum(gt[class_idx] > 0.5)) - int(
+                    torch.sum((gt[class_idx] > 0.5) & (mask[class_idx] <= 0.5))
+                )
+                neg_num = int(torch.sum(gt[class_idx] <= 0.5))
+                neg_num = int(min(pos_num * 3, neg_num))
+
+                if neg_num == 0 or pos_num == 0:
+                    masks.append(mask[class_idx])
+                    continue
+
+                neg_score_sorted, _ = torch.sort(-score[class_idx][gt[class_idx] <= 0.5])
+                threshold = -neg_score_sorted[neg_num - 1]
+
+                selected_mask = ((score[class_idx] >= threshold) | (gt[class_idx] > 0.5)) & (mask[class_idx] > 0.5)
+                masks.append(selected_mask)
+            # combine all masks to shape (len(masks), H, W)
+            return torch.stack(masks).unsqueeze(0).float()
+
+        if len(self.class_names) > 1:
+            kernels = torch.softmax(out_map, dim=1)
+            prob_map = torch.softmax(self.pooling(out_map), dim=1)
+        else:
+            kernels = torch.sigmoid(out_map)
+            prob_map = torch.sigmoid(self.pooling(out_map))
+
+        # As described in the paper, we use the Dice loss for the text segmentation map and the Dice loss scaled by 0.5.
+        selected_masks = torch.cat(
+            [ohem_sample(score, gt, mask) for score, gt, mask in zip(prob_map, seg_target, seg_mask)], 0
+        ).float()
+        inter = (selected_masks * prob_map * seg_target).sum((0, 2, 3))
+        cardinality = (selected_masks * (prob_map + seg_target)).sum((0, 2, 3))
+        text_loss = (1 - 2 * inter / (cardinality + eps)).mean() * 0.5
+
+        # As described in the paper, we use the Dice loss for the text kernel map.
+        selected_masks = seg_target * seg_mask
+        inter = (selected_masks * kernels * shrunken_kernel).sum((0, 2, 3))  # noqa
+        cardinality = (selected_masks * (kernels + shrunken_kernel)).sum((0, 2, 3))  # noqa
+        kernel_loss = (1 - 2 * inter / (cardinality + eps)).mean()
+
+        return text_loss + kernel_loss
+
+
+def reparameterize(model: FAST | nn.Module) -> FAST:
+    """Fuse batchnorm and conv layers and reparameterize the model
+
+    Args:
+        model: the FAST model to reparameterize
+
+    Returns:
+        the reparameterized model
+    """
+    last_conv = None
+    last_conv_name = None
+
+    for module in model.modules():
+        if hasattr(module, "reparameterize_layer"):
+            module.reparameterize_layer()  # type: ignore[operator]
+
+    for name, child in model.named_children():
+        if isinstance(child, nn.BatchNorm2d):
+            # fuse batchnorm only if it is followed by a conv layer
+            if last_conv is None:
+                continue
+            conv_w = last_conv.weight
+            conv_b = last_conv.bias if last_conv.bias is not None else torch.zeros_like(child.running_mean)  # type: ignore[arg-type]
+
+            factor = child.weight / torch.sqrt(child.running_var + child.eps)  # type: ignore
+            last_conv.weight = nn.Parameter(conv_w * factor.reshape([last_conv.out_channels, 1, 1, 1]))
+            last_conv.bias = nn.Parameter((conv_b - child.running_mean) * factor + child.bias)  # type: ignore[operator]
+            model._modules[last_conv_name] = last_conv  # type: ignore[index]
+            model._modules[name] = nn.Identity()
+            last_conv = None
+        elif isinstance(child, nn.Conv2d):
+            last_conv = child
+            last_conv_name = name
+        else:
+            reparameterize(child)
+
+    return model  # type: ignore[return-value]
+
+
+def _fast(
+    arch: str,
+    pretrained: bool,
+    backbone_fn: Callable[[bool], nn.Module],
+    feat_layers: list[str],
+    pretrained_backbone: bool = True,
+    ignore_keys: list[str] | None = None,
+    **kwargs: Any,
+) -> FAST:
+    pretrained_backbone = pretrained_backbone and not pretrained
+
+    # Build the feature extractor
+    feat_extractor = IntermediateLayerGetter(
+        backbone_fn(pretrained_backbone),
+        {layer_name: str(idx) for idx, layer_name in enumerate(feat_layers)},
+    )
+
+    if not kwargs.get("class_names", None):
+        kwargs["class_names"] = default_cfgs[arch].get("class_names", [CLASS_NAME])
+    else:
+        kwargs["class_names"] = sorted(kwargs["class_names"])
+    # Build the model
+    model = FAST(feat_extractor, cfg=default_cfgs[arch], **kwargs)
+    # Load pretrained parameters
+    if pretrained:
+        # The number of class_names is not the same as the number of classes in the pretrained model =>
+        # remove the layer weights
+        _ignore_keys = (
+            ignore_keys if kwargs["class_names"] != default_cfgs[arch].get("class_names", [CLASS_NAME]) else None
+        )
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+
+    return model
+
+
+
+[docs] +def fast_tiny(pretrained: bool = False, **kwargs: Any) -> FAST: + """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation" + <https://arxiv.org/pdf/2111.02394.pdf>`_, using a tiny TextNet backbone. + + >>> import torch + >>> from doctr.models import fast_tiny + >>> model = fast_tiny(pretrained=True) + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on our text detection dataset + **kwargs: keyword arguments of the DBNet architecture + + Returns: + text detection architecture + """ + return _fast( + "fast_tiny", + pretrained, + textnet_tiny, + ["3", "4", "5", "6"], + ignore_keys=["prob_head.2.weight"], + **kwargs, + )
+ + + +
+[docs] +def fast_small(pretrained: bool = False, **kwargs: Any) -> FAST: + """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation" + <https://arxiv.org/pdf/2111.02394.pdf>`_, using a small TextNet backbone. + + >>> import torch + >>> from doctr.models import fast_small + >>> model = fast_small(pretrained=True) + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on our text detection dataset + **kwargs: keyword arguments of the DBNet architecture + + Returns: + text detection architecture + """ + return _fast( + "fast_small", + pretrained, + textnet_small, + ["3", "4", "5", "6"], + ignore_keys=["prob_head.2.weight"], + **kwargs, + )
+ + + +
+[docs] +def fast_base(pretrained: bool = False, **kwargs: Any) -> FAST: + """FAST as described in `"FAST: Faster Arbitrarily-Shaped Text Detector with Minimalist Kernel Representation" + <https://arxiv.org/pdf/2111.02394.pdf>`_, using a base TextNet backbone. + + >>> import torch + >>> from doctr.models import fast_base + >>> model = fast_base(pretrained=True) + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on our text detection dataset + **kwargs: keyword arguments of the DBNet architecture + + Returns: + text detection architecture + """ + return _fast( + "fast_base", + pretrained, + textnet_base, + ["3", "4", "5", "6"], + ignore_keys=["prob_head.2.weight"], + **kwargs, + )
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/detection/linknet/pytorch.html b/_modules/doctr/models/detection/linknet/pytorch.html new file mode 100644 index 0000000000..7fbef6d7e7 --- /dev/null +++ b/_modules/doctr/models/detection/linknet/pytorch.html @@ -0,0 +1,726 @@ + + + + + + + + + + + + + doctr.models.detection.linknet.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.detection.linknet.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from collections.abc import Callable
+from typing import Any
+
+import numpy as np
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torchvision.models._utils import IntermediateLayerGetter
+
+from doctr.file_utils import CLASS_NAME
+from doctr.models.classification import resnet18, resnet34, resnet50
+
+from ...utils import _bf16_to_float32, load_pretrained_params
+from .base import LinkNetPostProcessor, _LinkNet
+
+__all__ = ["LinkNet", "linknet_resnet18", "linknet_resnet34", "linknet_resnet50"]
+
+
+default_cfgs: dict[str, dict[str, Any]] = {
+    "linknet_resnet18": {
+        "input_shape": (3, 1024, 1024),
+        "mean": (0.798, 0.785, 0.772),
+        "std": (0.264, 0.2749, 0.287),
+        "url": "https://doctr-static.mindee.com/models?id=v0.7.0/linknet_resnet18-e47a14dc.pt&src=0",
+    },
+    "linknet_resnet34": {
+        "input_shape": (3, 1024, 1024),
+        "mean": (0.798, 0.785, 0.772),
+        "std": (0.264, 0.2749, 0.287),
+        "url": "https://doctr-static.mindee.com/models?id=v0.7.0/linknet_resnet34-9ca2df3e.pt&src=0",
+    },
+    "linknet_resnet50": {
+        "input_shape": (3, 1024, 1024),
+        "mean": (0.798, 0.785, 0.772),
+        "std": (0.264, 0.2749, 0.287),
+        "url": "https://doctr-static.mindee.com/models?id=v0.7.0/linknet_resnet50-6cf565c1.pt&src=0",
+    },
+}
+
+
+class LinkNetFPN(nn.Module):
+    def __init__(self, layer_shapes: list[tuple[int, int, int]]) -> None:
+        super().__init__()
+        strides = [
+            1 if (in_shape[-1] == out_shape[-1]) else 2
+            for in_shape, out_shape in zip(layer_shapes[:-1], layer_shapes[1:])
+        ]
+
+        chans = [shape[0] for shape in layer_shapes]
+
+        _decoder_layers = [
+            self.decoder_block(ochan, ichan, stride) for ichan, ochan, stride in zip(chans[:-1], chans[1:], strides)
+        ]
+
+        self.decoders = nn.ModuleList(_decoder_layers)
+
+    @staticmethod
+    def decoder_block(in_chan: int, out_chan: int, stride: int) -> nn.Sequential:
+        """Creates a LinkNet decoder block"""
+        mid_chan = in_chan // 4
+        return nn.Sequential(
+            nn.Conv2d(in_chan, mid_chan, kernel_size=1, bias=False),
+            nn.BatchNorm2d(mid_chan),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(mid_chan, mid_chan, 3, padding=1, output_padding=stride - 1, stride=stride, bias=False),
+            nn.BatchNorm2d(mid_chan),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(mid_chan, out_chan, kernel_size=1, bias=False),
+            nn.BatchNorm2d(out_chan),
+            nn.ReLU(inplace=True),
+        )
+
+    def forward(self, feats: list[torch.Tensor]) -> torch.Tensor:
+        out = feats[-1]
+        for decoder, fmap in zip(self.decoders[::-1], feats[:-1][::-1]):
+            out = decoder(out) + fmap
+
+        out = self.decoders[0](out)
+
+        return out
+
+
+class LinkNet(nn.Module, _LinkNet):
+    """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation"
+    <https://arxiv.org/pdf/1707.03718.pdf>`_.
+
+    Args:
+        feature extractor: the backbone serving as feature extractor
+        bin_thresh: threshold for binarization of the output feature map
+        box_thresh: minimal objectness score to consider a box
+        head_chans: number of channels in the head layers
+        assume_straight_pages: if True, fit straight bounding boxes only
+        exportable: onnx exportable returns only logits
+        cfg: the configuration dict of the model
+        class_names: list of class names
+    """
+
+    def __init__(
+        self,
+        feat_extractor: IntermediateLayerGetter,
+        bin_thresh: float = 0.1,
+        box_thresh: float = 0.1,
+        head_chans: int = 32,
+        assume_straight_pages: bool = True,
+        exportable: bool = False,
+        cfg: dict[str, Any] | None = None,
+        class_names: list[str] = [CLASS_NAME],
+    ) -> None:
+        super().__init__()
+        self.class_names = class_names
+        num_classes: int = len(self.class_names)
+        self.cfg = cfg
+        self.exportable = exportable
+        self.assume_straight_pages = assume_straight_pages
+
+        self.feat_extractor = feat_extractor
+        # Identify the number of channels for the FPN initialization
+        self.feat_extractor.eval()
+        with torch.no_grad():
+            in_shape = (3, 512, 512)
+            out = self.feat_extractor(torch.zeros((1, *in_shape)))
+            # Get the shapes of the extracted feature maps
+            _shapes = [v.shape[1:] for _, v in out.items()]
+            # Prepend the expected shapes of the first encoder
+            _shapes = [(_shapes[0][0], in_shape[1] // 4, in_shape[2] // 4)] + _shapes
+        self.feat_extractor.train()
+
+        self.fpn = LinkNetFPN(_shapes)
+
+        self.classifier = nn.Sequential(
+            nn.ConvTranspose2d(
+                _shapes[0][0], head_chans, kernel_size=3, padding=1, output_padding=1, stride=2, bias=False
+            ),
+            nn.BatchNorm2d(head_chans),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(head_chans, head_chans, kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(head_chans),
+            nn.ReLU(inplace=True),
+            nn.ConvTranspose2d(head_chans, num_classes, kernel_size=2, stride=2),
+        )
+
+        self.postprocessor = LinkNetPostProcessor(
+            assume_straight_pages=self.assume_straight_pages, bin_thresh=bin_thresh, box_thresh=box_thresh
+        )
+
+        for n, m in self.named_modules():
+            # Don't override the initialization of the backbone
+            if n.startswith("feat_extractor."):
+                continue
+            if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
+                nn.init.kaiming_normal_(m.weight.data, mode="fan_out", nonlinearity="relu")
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1.0)
+                m.bias.data.zero_()
+
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        target: list[np.ndarray] | None = None,
+        return_model_output: bool = False,
+        return_preds: bool = False,
+        **kwargs: Any,
+    ) -> dict[str, Any]:
+        feats = self.feat_extractor(x)
+        logits = self.fpn([feats[str(idx)] for idx in range(len(feats))])
+        logits = self.classifier(logits)
+
+        out: dict[str, Any] = {}
+        if self.exportable:
+            out["logits"] = logits
+            return out
+
+        if return_model_output or target is None or return_preds:
+            prob_map = _bf16_to_float32(torch.sigmoid(logits))
+        if return_model_output:
+            out["out_map"] = prob_map
+
+        if target is None or return_preds:
+            # Disable for torch.compile compatibility
+            @torch.compiler.disable
+            def _postprocess(prob_map: torch.Tensor) -> list[dict[str, Any]]:
+                return [
+                    dict(zip(self.class_names, preds))
+                    for preds in self.postprocessor(prob_map.detach().cpu().permute((0, 2, 3, 1)).numpy())
+                ]
+
+            # Post-process boxes (keep only text predictions)
+            out["preds"] = _postprocess(prob_map)
+
+        if target is not None:
+            loss = self.compute_loss(logits, target)
+            out["loss"] = loss
+
+        return out
+
+    def compute_loss(
+        self,
+        out_map: torch.Tensor,
+        target: list[np.ndarray],
+        gamma: float = 2.0,
+        alpha: float = 0.5,
+        eps: float = 1e-8,
+    ) -> torch.Tensor:
+        """Compute linknet loss, BCE with boosted box edges or focal loss. Focal loss implementation based on
+        <https://github.com/tensorflow/addons/>`_.
+
+        Args:
+            out_map: output feature map of the model of shape (N, num_classes, H, W)
+            target: list of dictionary where each dict has a `boxes` and a `flags` entry
+            gamma: modulating factor in the focal loss formula
+            alpha: balancing factor in the focal loss formula
+            eps: epsilon factor in dice loss
+
+        Returns:
+            A loss tensor
+        """
+        _target, _mask = self.build_target(target, out_map.shape[1:])  # type: ignore[arg-type]
+
+        seg_target, seg_mask = torch.from_numpy(_target).to(dtype=out_map.dtype), torch.from_numpy(_mask)
+        seg_target, seg_mask = seg_target.to(out_map.device), seg_mask.to(out_map.device)
+        seg_mask = seg_mask.to(dtype=torch.float32)
+
+        bce_loss = F.binary_cross_entropy_with_logits(out_map, seg_target, reduction="none")
+        proba_map = torch.sigmoid(out_map)
+
+        # Focal loss
+        if gamma < 0:
+            raise ValueError("Value of gamma should be greater than or equal to zero.")
+        p_t = proba_map * seg_target + (1 - proba_map) * (1 - seg_target)
+        alpha_t = alpha * seg_target + (1 - alpha) * (1 - seg_target)
+        # Unreduced version
+        focal_loss = alpha_t * (1 - p_t) ** gamma * bce_loss
+        # Class reduced
+        focal_loss = (seg_mask * focal_loss).sum((0, 1, 2, 3)) / seg_mask.sum((0, 1, 2, 3))
+
+        # Compute dice loss for each class
+        dice_map = torch.softmax(out_map, dim=1) if len(self.class_names) > 1 else proba_map
+        # Class reduced
+        inter = (seg_mask * dice_map * seg_target).sum((0, 2, 3))
+        cardinality = (seg_mask * (dice_map + seg_target)).sum((0, 2, 3))
+        dice_loss = (1 - 2 * inter / (cardinality + eps)).mean()
+
+        # Return the full loss (equal sum of focal loss and dice loss)
+        return focal_loss + dice_loss
+
+
+def _linknet(
+    arch: str,
+    pretrained: bool,
+    backbone_fn: Callable[[bool], nn.Module],
+    fpn_layers: list[str],
+    pretrained_backbone: bool = True,
+    ignore_keys: list[str] | None = None,
+    **kwargs: Any,
+) -> LinkNet:
+    pretrained_backbone = pretrained_backbone and not pretrained
+
+    # Build the feature extractor
+    backbone = backbone_fn(pretrained_backbone)
+    feat_extractor = IntermediateLayerGetter(
+        backbone,
+        {layer_name: str(idx) for idx, layer_name in enumerate(fpn_layers)},
+    )
+    if not kwargs.get("class_names", None):
+        kwargs["class_names"] = default_cfgs[arch].get("class_names", [CLASS_NAME])
+    else:
+        kwargs["class_names"] = sorted(kwargs["class_names"])
+
+    # Build the model
+    model = LinkNet(feat_extractor, cfg=default_cfgs[arch], **kwargs)
+    # Load pretrained parameters
+    if pretrained:
+        # The number of class_names is not the same as the number of classes in the pretrained model =>
+        # remove the layer weights
+        _ignore_keys = (
+            ignore_keys if kwargs["class_names"] != default_cfgs[arch].get("class_names", [CLASS_NAME]) else None
+        )
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+
+    return model
+
+
+
+[docs] +def linknet_resnet18(pretrained: bool = False, **kwargs: Any) -> LinkNet: + """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation" + <https://arxiv.org/pdf/1707.03718.pdf>`_. + + >>> import torch + >>> from doctr.models import linknet_resnet18 + >>> model = linknet_resnet18(pretrained=True).eval() + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on our text detection dataset + **kwargs: keyword arguments of the LinkNet architecture + + Returns: + text detection architecture + """ + return _linknet( + "linknet_resnet18", + pretrained, + resnet18, + ["layer1", "layer2", "layer3", "layer4"], + ignore_keys=[ + "classifier.6.weight", + "classifier.6.bias", + ], + **kwargs, + )
+ + + +
+[docs] +def linknet_resnet34(pretrained: bool = False, **kwargs: Any) -> LinkNet: + """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation" + <https://arxiv.org/pdf/1707.03718.pdf>`_. + + >>> import torch + >>> from doctr.models import linknet_resnet34 + >>> model = linknet_resnet34(pretrained=True).eval() + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on our text detection dataset + **kwargs: keyword arguments of the LinkNet architecture + + Returns: + text detection architecture + """ + return _linknet( + "linknet_resnet34", + pretrained, + resnet34, + ["layer1", "layer2", "layer3", "layer4"], + ignore_keys=[ + "classifier.6.weight", + "classifier.6.bias", + ], + **kwargs, + )
+ + + +
+[docs] +def linknet_resnet50(pretrained: bool = False, **kwargs: Any) -> LinkNet: + """LinkNet as described in `"LinkNet: Exploiting Encoder Representations for Efficient Semantic Segmentation" + <https://arxiv.org/pdf/1707.03718.pdf>`_. + + >>> import torch + >>> from doctr.models import linknet_resnet50 + >>> model = linknet_resnet50(pretrained=True).eval() + >>> input_tensor = torch.rand((1, 3, 1024, 1024), dtype=torch.float32) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on our text detection dataset + **kwargs: keyword arguments of the LinkNet architecture + + Returns: + text detection architecture + """ + return _linknet( + "linknet_resnet50", + pretrained, + resnet50, + ["layer1", "layer2", "layer3", "layer4"], + ignore_keys=[ + "classifier.6.weight", + "classifier.6.bias", + ], + **kwargs, + )
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/detection/zoo.html b/_modules/doctr/models/detection/zoo.html new file mode 100644 index 0000000000..8afe6b66ee --- /dev/null +++ b/_modules/doctr/models/detection/zoo.html @@ -0,0 +1,440 @@ + + + + + + + + + + + + + doctr.models.detection.zoo - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.detection.zoo

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from typing import Any
+
+from doctr.models.utils import _CompiledModule
+
+from .. import detection
+from ..detection.fast import reparameterize
+from ..preprocessor import PreProcessor
+from .predictor import DetectionPredictor
+
+__all__ = ["detection_predictor"]
+
+ARCHS: list[str]
+
+ARCHS = [
+    "db_resnet34",
+    "db_resnet50",
+    "db_mobilenet_v3_large",
+    "linknet_resnet18",
+    "linknet_resnet34",
+    "linknet_resnet50",
+    "fast_tiny",
+    "fast_small",
+    "fast_base",
+]
+
+
+def _predictor(arch: Any, pretrained: bool, assume_straight_pages: bool = True, **kwargs: Any) -> DetectionPredictor:
+    if isinstance(arch, str):
+        if arch not in ARCHS:
+            raise ValueError(f"unknown architecture '{arch}'")
+
+        _model = detection.__dict__[arch](
+            pretrained=pretrained,
+            pretrained_backbone=kwargs.get("pretrained_backbone", True),
+            assume_straight_pages=assume_straight_pages,
+        )
+        # Reparameterize FAST models by default to lower inference latency and memory usage
+        if isinstance(_model, detection.FAST):
+            _model = reparameterize(_model)
+    else:
+        # Adding the type for torch compiled models to the allowed architectures
+        allowed_archs = [detection.DBNet, detection.LinkNet, detection.FAST, _CompiledModule]
+
+        if not isinstance(arch, tuple(allowed_archs)):
+            raise ValueError(f"unknown architecture: {type(arch)}")
+
+        _model = arch
+        _model.assume_straight_pages = assume_straight_pages
+        _model.postprocessor.assume_straight_pages = assume_straight_pages
+
+    kwargs.pop("pretrained_backbone", None)
+
+    kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
+    kwargs["std"] = kwargs.get("std", _model.cfg["std"])
+    kwargs["batch_size"] = kwargs.get("batch_size", 2)
+    predictor = DetectionPredictor(
+        PreProcessor(_model.cfg["input_shape"][1:], **kwargs),
+        _model,
+    )
+    return predictor
+
+
+
+[docs] +def detection_predictor( + arch: Any = "fast_base", + pretrained: bool = False, + assume_straight_pages: bool = True, + preserve_aspect_ratio: bool = True, + symmetric_pad: bool = True, + batch_size: int = 2, + **kwargs: Any, +) -> DetectionPredictor: + """Text detection architecture. + + >>> import numpy as np + >>> from doctr.models import detection_predictor + >>> model = detection_predictor(arch='db_resnet50', pretrained=True) + >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8) + >>> out = model([input_page]) + + Args: + arch: name of the architecture or model itself to use (e.g. 'db_resnet50') + pretrained: If True, returns a model pre-trained on our text detection dataset + assume_straight_pages: If True, fit straight boxes to the page + preserve_aspect_ratio: If True, pad the input document image to preserve the aspect ratio before + running the detection model on it + symmetric_pad: if True, pad the image symmetrically instead of padding at the bottom-right + batch_size: number of samples the model processes in parallel + **kwargs: optional keyword arguments passed to the architecture + + Returns: + Detection predictor + """ + return _predictor( + arch=arch, + pretrained=pretrained, + assume_straight_pages=assume_straight_pages, + preserve_aspect_ratio=preserve_aspect_ratio, + symmetric_pad=symmetric_pad, + batch_size=batch_size, + **kwargs, + )
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/factory/hub.html b/_modules/doctr/models/factory/hub.html new file mode 100644 index 0000000000..97021a05d6 --- /dev/null +++ b/_modules/doctr/models/factory/hub.html @@ -0,0 +1,558 @@ + + + + + + + + + + + + + doctr.models.factory.hub - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.factory.hub

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+# Inspired by: https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/hub.py
+
+import json
+import logging
+import subprocess
+import tempfile
+import textwrap
+from pathlib import Path
+from typing import Any
+
+import torch
+from huggingface_hub import (
+    HfApi,
+    get_token,
+    hf_hub_download,
+    login,
+)
+
+from doctr import models
+
+__all__ = ["login_to_hub", "push_to_hf_hub", "from_hub", "_save_model_and_config_for_hf_hub"]
+
+
+AVAILABLE_ARCHS = {
+    "classification": models.classification.zoo.ARCHS + models.classification.zoo.ORIENTATION_ARCHS,
+    "detection": models.detection.zoo.ARCHS,
+    "recognition": models.recognition.zoo.ARCHS,
+}
+
+
+
+[docs] +def login_to_hub() -> None: # pragma: no cover + """Login to huggingface hub""" + access_token = get_token() + if access_token is not None: + logging.info("Huggingface Hub token found and valid") + login(token=access_token) + else: + login() + # check if git lfs is installed + try: + subprocess.call(["git", "lfs", "version"]) + except FileNotFoundError: + raise OSError( + "Looks like you do not have git-lfs installed, please install. \ + You can install from https://git-lfs.github.com/. \ + Then run `git lfs install` (you only have to do this once)." + )
+ + + +def _save_model_and_config_for_hf_hub(model: Any, save_dir: str, arch: str, task: str) -> None: + """Save model and config to disk for pushing to huggingface hub + + Args: + model: PyTorch model to be saved + save_dir: directory to save model and config + arch: architecture name + task: task name + """ + save_directory = Path(save_dir) + weights_path = save_directory / "pytorch_model.bin" + torch.save(model.state_dict(), weights_path) + + config_path = save_directory / "config.json" + + # add model configuration + model_config = model.cfg + model_config["arch"] = arch + model_config["task"] = task + + with config_path.open("w") as f: + json.dump(model_config, f, indent=2, ensure_ascii=False) + + +
+[docs] +def push_to_hf_hub(model: Any, model_name: str, task: str, **kwargs) -> None: # pragma: no cover + """Save model and its configuration on HF hub + + >>> from doctr.models import login_to_hub, push_to_hf_hub + >>> from doctr.models.recognition import crnn_mobilenet_v3_small + >>> login_to_hub() + >>> model = crnn_mobilenet_v3_small(pretrained=True) + >>> push_to_hf_hub(model, 'my-model', 'recognition', arch='crnn_mobilenet_v3_small') + + Args: + model: PyTorch model to be saved + model_name: name of the model which is also the repository name + task: task name + **kwargs: keyword arguments for push_to_hf_hub + """ + run_config = kwargs.get("run_config", None) + arch = kwargs.get("arch", None) + + if run_config is None and arch is None: + raise ValueError("run_config or arch must be specified") + if task not in ["classification", "detection", "recognition"]: + raise ValueError("task must be one of classification, detection, recognition") + + # default readme + readme = textwrap.dedent( + f""" + + language: en + + + <p align="center"> + <img src="https://doctr-static.mindee.com/models?id=v0.3.1/Logo_doctr.gif&src=0" width="60%"> + </p> + + **Optical Character Recognition made seamless & accessible to anyone, powered by PyTorch** + + ## Task: {task} + + https://github.com/mindee/doctr + + ### Example usage: + + ```python + >>> from doctr.io import DocumentFile + >>> from doctr.models import ocr_predictor, from_hub + + >>> img = DocumentFile.from_images(['<image_path>']) + >>> # Load your model from the hub + >>> model = from_hub('mindee/my-model') + + >>> # Pass it to the predictor + >>> # If your model is a recognition model: + >>> predictor = ocr_predictor(det_arch='db_mobilenet_v3_large', + >>> reco_arch=model, + >>> pretrained=True) + + >>> # If your model is a detection model: + >>> predictor = ocr_predictor(det_arch=model, + >>> reco_arch='crnn_mobilenet_v3_small', + >>> pretrained=True) + + >>> # Get your predictions + >>> res = predictor(img) + ``` + """ + ) + + # add run configuration to readme if available + if run_config is not None: + arch = run_config.arch + readme += textwrap.dedent( + f"""### Run Configuration + \n{json.dumps(vars(run_config), indent=2, ensure_ascii=False)}""" + ) + + if arch not in AVAILABLE_ARCHS[task]: + raise ValueError( + f"Architecture: {arch} for task: {task} not found.\ + \nAvailable architectures: {AVAILABLE_ARCHS}" + ) + + commit_message = f"Add {model_name} model" + + # Create repository + api = HfApi() + api.create_repo(model_name, token=get_token(), exist_ok=False) + + # Save model files to a temporary directory + with tempfile.TemporaryDirectory() as tmp_dir: + _save_model_and_config_for_hf_hub(model, tmp_dir, arch=arch, task=task) + readme_path = Path(tmp_dir) / "README.md" + readme_path.write_text(readme) + + # Upload all files to the hub + api.upload_folder( + folder_path=tmp_dir, + repo_id=model_name, + commit_message=commit_message, + token=get_token(), + )
+ + + +
+[docs] +def from_hub(repo_id: str, **kwargs: Any): + """Instantiate & load a pretrained model from HF hub. + + >>> from doctr.models import from_hub + >>> model = from_hub("mindee/fasterrcnn_mobilenet_v3_large_fpn") + + Args: + repo_id: HuggingFace model hub repo + kwargs: kwargs of `hf_hub_download` or `snapshot_download` + + Returns: + Model loaded with the checkpoint + """ + # Get the config + with open(hf_hub_download(repo_id, filename="config.json", **kwargs), "rb") as f: + cfg = json.load(f) + + arch = cfg["arch"] + task = cfg["task"] + cfg.pop("arch") + cfg.pop("task") + + if task == "classification": + model = models.classification.__dict__[arch]( + pretrained=False, classes=cfg["classes"], num_classes=cfg["num_classes"] + ) + elif task == "detection": + model = models.detection.__dict__[arch](pretrained=False) + elif task == "recognition": + model = models.recognition.__dict__[arch](pretrained=False, input_shape=cfg["input_shape"], vocab=cfg["vocab"]) + + # update model cfg + model.cfg = cfg + # load the weights + weights = hf_hub_download(repo_id, filename="pytorch_model.bin", **kwargs) + model.from_pretrained(weights) + + return model
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/recognition/crnn/pytorch.html b/_modules/doctr/models/recognition/crnn/pytorch.html new file mode 100644 index 0000000000..1115905ff5 --- /dev/null +++ b/_modules/doctr/models/recognition/crnn/pytorch.html @@ -0,0 +1,680 @@ + + + + + + + + + + + + + doctr.models.recognition.crnn.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.recognition.crnn.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from collections.abc import Callable
+from copy import deepcopy
+from itertools import groupby
+from typing import Any
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+from doctr.datasets import VOCABS, decode_sequence
+
+from ...classification import mobilenet_v3_large_r, mobilenet_v3_small_r, vgg16_bn_r
+from ...utils import load_pretrained_params
+from ..core import RecognitionModel, RecognitionPostProcessor
+
+__all__ = ["CRNN", "crnn_vgg16_bn", "crnn_mobilenet_v3_small", "crnn_mobilenet_v3_large"]
+
+default_cfgs: dict[str, dict[str, Any]] = {
+    "crnn_vgg16_bn": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 128),
+        "vocab": VOCABS["french"],
+        "url": "https://doctr-static.mindee.com/models?id=v0.12.0/crnn_vgg16_bn-0417f351.pt&src=0",
+    },
+    "crnn_mobilenet_v3_small": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 128),
+        "vocab": VOCABS["french"],
+        "url": "https://doctr-static.mindee.com/models?id=v0.3.1/crnn_mobilenet_v3_small_pt-3b919a02.pt&src=0",
+    },
+    "crnn_mobilenet_v3_large": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 128),
+        "vocab": VOCABS["french"],
+        "url": "https://doctr-static.mindee.com/models?id=v0.3.1/crnn_mobilenet_v3_large_pt-f5259ec2.pt&src=0",
+    },
+}
+
+
+class CTCPostProcessor(RecognitionPostProcessor):
+    """Postprocess raw prediction of the model (logits) to a list of words using CTC decoding
+
+    Args:
+        vocab: string containing the ordered sequence of supported characters
+    """
+
+    @staticmethod
+    def ctc_best_path(
+        logits: torch.Tensor,
+        vocab: str = VOCABS["french"],
+        blank: int = 0,
+    ) -> list[tuple[str, float]]:
+        """Implements best path decoding as shown by Graves (Dissertation, p63), highly inspired from
+        <https://github.com/githubharald/CTCDecoder>`_.
+
+        Args:
+            logits: model output, shape: N x T x C
+            vocab: vocabulary to use
+            blank: index of blank label
+
+        Returns:
+            A list of tuples: (word, confidence)
+        """
+        # Gather the most confident characters, and assign the smallest conf among those to the sequence prob
+        probs = F.softmax(logits, dim=-1).max(dim=-1).values.min(dim=1).values
+
+        # collapse best path (using itertools.groupby), map to chars, join char list to string
+        words = [
+            decode_sequence([k for k, _ in groupby(seq.tolist()) if k != blank], vocab)
+            for seq in torch.argmax(logits, dim=-1)
+        ]
+
+        return list(zip(words, probs.tolist()))
+
+    def __call__(self, logits: torch.Tensor) -> list[tuple[str, float]]:
+        """Performs decoding of raw output with CTC and decoding of CTC predictions
+        with label_to_idx mapping dictionary
+
+        Args:
+            logits: raw output of the model, shape (N, C + 1, seq_len)
+
+        Returns:
+            A tuple of 2 lists: a list of str (words) and a list of float (probs)
+
+        """
+        # Decode CTC
+        return self.ctc_best_path(logits=logits, vocab=self.vocab, blank=len(self.vocab))
+
+
+class CRNN(RecognitionModel, nn.Module):
+    """Implements a CRNN architecture as described in `"An End-to-End Trainable Neural Network for Image-based
+    Sequence Recognition and Its Application to Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_.
+
+    Args:
+        feature_extractor: the backbone serving as feature extractor
+        vocab: vocabulary used for encoding
+        rnn_units: number of units in the LSTM layers
+        exportable: onnx exportable returns only logits
+        cfg: configuration dictionary
+    """
+
+    _children_names: list[str] = ["feat_extractor", "decoder", "linear", "postprocessor"]
+
+    def __init__(
+        self,
+        feature_extractor: nn.Module,
+        vocab: str,
+        rnn_units: int = 128,
+        input_shape: tuple[int, int, int] = (3, 32, 128),
+        exportable: bool = False,
+        cfg: dict[str, Any] | None = None,
+    ) -> None:
+        super().__init__()
+        self.vocab = vocab
+        self.cfg = cfg
+        self.max_length = 32
+        self.exportable = exportable
+        self.feat_extractor = feature_extractor
+
+        # Resolve the input_size of the LSTM
+        with torch.inference_mode():
+            out_shape = self.feat_extractor(torch.zeros((1, *input_shape))).shape
+        lstm_in = out_shape[1] * out_shape[2]
+
+        self.decoder = nn.LSTM(
+            input_size=lstm_in,
+            hidden_size=rnn_units,
+            batch_first=True,
+            num_layers=2,
+            bidirectional=True,
+        )
+
+        # features units = 2 * rnn_units because bidirectional layers
+        self.linear = nn.Linear(in_features=2 * rnn_units, out_features=len(vocab) + 1)
+
+        self.postprocessor = CTCPostProcessor(vocab=vocab)
+
+        for n, m in self.named_modules():
+            # Don't override the initialization of the backbone
+            if n.startswith("feat_extractor."):
+                continue
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight.data, mode="fan_out", nonlinearity="relu")
+                if m.bias is not None:
+                    m.bias.data.zero_()
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1.0)
+                m.bias.data.zero_()
+
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+
+    def compute_loss(
+        self,
+        model_output: torch.Tensor,
+        target: list[str],
+    ) -> torch.Tensor:
+        """Compute CTC loss for the model.
+
+        Args:
+            model_output: predicted logits of the model
+            target: list of target strings
+
+        Returns:
+            The loss of the model on the batch
+        """
+        gt, seq_len = self.build_target(target)
+        batch_len = model_output.shape[0]
+        input_length = model_output.shape[1] * torch.ones(size=(batch_len,), dtype=torch.int32)
+        # N x T x C -> T x N x C
+        logits = model_output.permute(1, 0, 2)
+        probs = F.log_softmax(logits, dim=-1)
+        ctc_loss = F.ctc_loss(
+            probs,
+            torch.from_numpy(gt),
+            input_length,
+            torch.tensor(seq_len, dtype=torch.int),
+            len(self.vocab),
+            zero_infinity=True,
+        )
+
+        return ctc_loss
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        target: list[str] | None = None,
+        return_model_output: bool = False,
+        return_preds: bool = False,
+    ) -> dict[str, Any]:
+        if self.training and target is None:
+            raise ValueError("Need to provide labels during training")
+
+        features = self.feat_extractor(x)
+        # B x C x H x W --> B x C*H x W --> B x W x C*H
+        c, h, w = features.shape[1], features.shape[2], features.shape[3]
+        features_seq = torch.reshape(features, shape=(-1, h * c, w))
+        features_seq = torch.transpose(features_seq, 1, 2)
+        logits, _ = self.decoder(features_seq)
+        logits = self.linear(logits)
+
+        out: dict[str, Any] = {}
+        if self.exportable:
+            out["logits"] = logits
+            return out
+
+        if return_model_output:
+            out["out_map"] = logits
+
+        if target is None or return_preds:
+            # Disable for torch.compile compatibility
+            @torch.compiler.disable
+            def _postprocess(logits: torch.Tensor) -> list[tuple[str, float]]:
+                return self.postprocessor(logits)
+
+            # Post-process boxes
+            out["preds"] = _postprocess(logits)
+
+        if target is not None:
+            out["loss"] = self.compute_loss(logits, target)
+
+        return out
+
+
+def _crnn(
+    arch: str,
+    pretrained: bool,
+    backbone_fn: Callable[[Any], nn.Module],
+    pretrained_backbone: bool = True,
+    ignore_keys: list[str] | None = None,
+    **kwargs: Any,
+) -> CRNN:
+    pretrained_backbone = pretrained_backbone and not pretrained
+
+    # Feature extractor
+    feat_extractor = backbone_fn(pretrained=pretrained_backbone).features  # type: ignore[call-arg]
+
+    kwargs["vocab"] = kwargs.get("vocab", default_cfgs[arch]["vocab"])
+    kwargs["input_shape"] = kwargs.get("input_shape", default_cfgs[arch]["input_shape"])
+
+    _cfg = deepcopy(default_cfgs[arch])
+    _cfg["vocab"] = kwargs["vocab"]
+    _cfg["input_shape"] = kwargs["input_shape"]
+
+    # Build the model
+    model = CRNN(feat_extractor, cfg=_cfg, **kwargs)  # type: ignore[arg-type]
+    # Load pretrained parameters
+    if pretrained:
+        # The number of classes is not the same as the number of classes in the pretrained model =>
+        # remove the last layer weights
+        _ignore_keys = ignore_keys if _cfg["vocab"] != default_cfgs[arch]["vocab"] else None
+        model.from_pretrained(_cfg["url"], ignore_keys=_ignore_keys)
+
+    return model
+
+
+
+[docs] +def crnn_vgg16_bn(pretrained: bool = False, **kwargs: Any) -> CRNN: + """CRNN with a VGG-16 backbone as described in `"An End-to-End Trainable Neural Network for Image-based + Sequence Recognition and Its Application to Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_. + + >>> import torch + >>> from doctr.models import crnn_vgg16_bn + >>> model = crnn_vgg16_bn(pretrained=True) + >>> input_tensor = torch.rand(1, 3, 32, 128) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on our text recognition dataset + **kwargs: keyword arguments of the CRNN architecture + + Returns: + text recognition architecture + """ + return _crnn("crnn_vgg16_bn", pretrained, vgg16_bn_r, ignore_keys=["linear.weight", "linear.bias"], **kwargs)
+ + + +
+[docs] +def crnn_mobilenet_v3_small(pretrained: bool = False, **kwargs: Any) -> CRNN: + """CRNN with a MobileNet V3 Small backbone as described in `"An End-to-End Trainable Neural Network for Image-based + Sequence Recognition and Its Application to Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_. + + >>> import torch + >>> from doctr.models import crnn_mobilenet_v3_small + >>> model = crnn_mobilenet_v3_small(pretrained=True) + >>> input_tensor = torch.rand(1, 3, 32, 128) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on our text recognition dataset + **kwargs: keyword arguments of the CRNN architecture + + Returns: + text recognition architecture + """ + return _crnn( + "crnn_mobilenet_v3_small", + pretrained, + mobilenet_v3_small_r, + ignore_keys=["linear.weight", "linear.bias"], + **kwargs, + )
+ + + +
+[docs] +def crnn_mobilenet_v3_large(pretrained: bool = False, **kwargs: Any) -> CRNN: + """CRNN with a MobileNet V3 Large backbone as described in `"An End-to-End Trainable Neural Network for Image-based + Sequence Recognition and Its Application to Scene Text Recognition" <https://arxiv.org/pdf/1507.05717.pdf>`_. + + >>> import torch + >>> from doctr.models import crnn_mobilenet_v3_large + >>> model = crnn_mobilenet_v3_large(pretrained=True) + >>> input_tensor = torch.rand(1, 3, 32, 128) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on our text recognition dataset + **kwargs: keyword arguments of the CRNN architecture + + Returns: + text recognition architecture + """ + return _crnn( + "crnn_mobilenet_v3_large", + pretrained, + mobilenet_v3_large_r, + ignore_keys=["linear.weight", "linear.bias"], + **kwargs, + )
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/recognition/master/pytorch.html b/_modules/doctr/models/recognition/master/pytorch.html new file mode 100644 index 0000000000..cecb6ca759 --- /dev/null +++ b/_modules/doctr/models/recognition/master/pytorch.html @@ -0,0 +1,679 @@ + + + + + + + + + + + + + doctr.models.recognition.master.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.recognition.master.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from collections.abc import Callable
+from copy import deepcopy
+from typing import Any
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torchvision.models._utils import IntermediateLayerGetter
+
+from doctr.datasets import VOCABS
+from doctr.models.classification import magc_resnet31
+from doctr.models.modules.transformer import Decoder, PositionalEncoding
+
+from ...utils import _bf16_to_float32, load_pretrained_params
+from .base import _MASTER, _MASTERPostProcessor
+
+__all__ = ["MASTER", "master"]
+
+
+default_cfgs: dict[str, dict[str, Any]] = {
+    "master": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 128),
+        "vocab": VOCABS["french"],
+        "url": "https://doctr-static.mindee.com/models?id=v0.7.0/master-fde31e4a.pt&src=0",
+    },
+}
+
+
+class MASTER(_MASTER, nn.Module):
+    """Implements MASTER as described in paper: <https://arxiv.org/pdf/1910.02562.pdf>`_.
+    Implementation based on the official Pytorch implementation: <https://github.com/wenwenyu/MASTER-pytorch>`_.
+
+    Args:
+        feature_extractor: the backbone serving as feature extractor
+        vocab: vocabulary, (without EOS, SOS, PAD)
+        d_model: d parameter for the transformer decoder
+        dff: depth of the pointwise feed-forward layer
+        num_heads: number of heads for the mutli-head attention module
+        num_layers: number of decoder layers to stack
+        max_length: maximum length of character sequence handled by the model
+        dropout: dropout probability of the decoder
+        input_shape: size of the image inputs
+        exportable: onnx exportable returns only logits
+        cfg: dictionary containing information about the model
+    """
+
+    def __init__(
+        self,
+        feature_extractor: nn.Module,
+        vocab: str,
+        d_model: int = 512,
+        dff: int = 2048,
+        num_heads: int = 8,  # number of heads in the transformer decoder
+        num_layers: int = 3,
+        max_length: int = 50,
+        dropout: float = 0.2,
+        input_shape: tuple[int, int, int] = (3, 32, 128),  # different from the paper
+        exportable: bool = False,
+        cfg: dict[str, Any] | None = None,
+    ) -> None:
+        super().__init__()
+
+        self.exportable = exportable
+        self.max_length = max_length
+        self.d_model = d_model
+        self.vocab = vocab
+        self.cfg = cfg
+        self.vocab_size = len(vocab)
+
+        self.feat_extractor = feature_extractor
+        self.positional_encoding = PositionalEncoding(self.d_model, dropout, max_len=input_shape[1] * input_shape[2])
+
+        self.decoder = Decoder(
+            num_layers=num_layers,
+            d_model=self.d_model,
+            num_heads=num_heads,
+            vocab_size=self.vocab_size + 3,  # EOS, SOS, PAD
+            dff=dff,
+            dropout=dropout,
+            maximum_position_encoding=self.max_length,
+        )
+
+        self.linear = nn.Linear(self.d_model, self.vocab_size + 3)
+        self.postprocessor = MASTERPostProcessor(vocab=self.vocab)
+
+        for n, m in self.named_modules():
+            # Don't override the initialization of the backbone
+            if n.startswith("feat_extractor."):
+                continue
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+    def make_source_and_target_mask(
+        self, source: torch.Tensor, target: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        # borrowed and slightly modified from  https://github.com/wenwenyu/MASTER-pytorch
+        # NOTE: nn.TransformerDecoder takes the inverse from this implementation
+        # [True, True, True, ..., False, False, False] -> False is masked
+        # (N, 1, 1, max_length)
+        target_pad_mask = (target != self.vocab_size + 2).unsqueeze(1).unsqueeze(1)
+        target_length = target.size(1)
+        # sub mask filled diagonal with True = see and False = masked (max_length, max_length)
+        # NOTE: onnxruntime tril/triu works only with float currently (onnxruntime 1.11.1 - opset 14)
+        target_sub_mask = torch.tril(torch.ones((target_length, target_length), device=source.device), diagonal=0).to(
+            dtype=torch.bool
+        )
+        # source mask filled with ones (max_length, positional_encoded_seq_len)
+        source_mask = torch.ones((target_length, source.size(1)), dtype=torch.uint8, device=source.device)
+        # combine the two masks into one (N, 1, max_length, max_length)
+        target_mask = target_pad_mask & target_sub_mask
+        return source_mask, target_mask.int()
+
+    @staticmethod
+    def compute_loss(
+        model_output: torch.Tensor,
+        gt: torch.Tensor,
+        seq_len: torch.Tensor,
+    ) -> torch.Tensor:
+        """Compute categorical cross-entropy loss for the model.
+        Sequences are masked after the EOS character.
+
+        Args:
+            gt: the encoded tensor with gt labels
+            model_output: predicted logits of the model
+            seq_len: lengths of each gt word inside the batch
+
+        Returns:
+            The loss of the model on the batch
+        """
+        # Input length : number of timesteps
+        input_len = model_output.shape[1]
+        # Add one for additional <eos> token (sos disappear in shift!)
+        seq_len = seq_len + 1
+        # Compute loss: don't forget to shift gt! Otherwise the model learns to output the gt[t-1]!
+        # The "masked" first gt char is <sos>. Delete last logit of the model output.
+        cce = F.cross_entropy(model_output[:, :-1, :].permute(0, 2, 1), gt[:, 1:], reduction="none")
+        # Compute mask, remove 1 timestep here as well
+        mask_2d = torch.arange(input_len - 1, device=model_output.device)[None, :] >= seq_len[:, None]
+        cce[mask_2d] = 0
+
+        ce_loss = cce.sum(1) / seq_len.to(dtype=model_output.dtype)
+        return ce_loss.mean()
+
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        target: list[str] | None = None,
+        return_model_output: bool = False,
+        return_preds: bool = False,
+    ) -> dict[str, Any]:
+        """Call function for training
+
+        Args:
+            x: images
+            target: list of str labels
+            return_model_output: if True, return logits
+            return_preds: if True, decode logits
+
+        Returns:
+            A dictionary containing eventually loss, logits and predictions.
+        """
+        # Encode
+        features = self.feat_extractor(x)["features"]
+        b, c, h, w = features.shape
+        # (N, C, H, W) --> (N, H * W, C)
+        features = features.view(b, c, h * w).permute((0, 2, 1))
+        # add positional encoding to features
+        encoded = self.positional_encoding(features)
+
+        out: dict[str, Any] = {}
+
+        if self.training and target is None:
+            raise ValueError("Need to provide labels during training")
+
+        if target is not None:
+            # Compute target: tensor of gts and sequence lengths
+            _gt, _seq_len = self.build_target(target)
+            gt, seq_len = torch.from_numpy(_gt).to(dtype=torch.long), torch.tensor(_seq_len)
+            gt, seq_len = gt.to(x.device), seq_len.to(x.device)
+
+            # Compute source mask and target mask
+            source_mask, target_mask = self.make_source_and_target_mask(encoded, gt)
+            output = self.decoder(gt, encoded, source_mask, target_mask)
+            # Compute logits
+            logits = self.linear(output)
+        else:
+            logits = self.decode(encoded)
+
+        logits = _bf16_to_float32(logits)
+
+        if self.exportable:
+            out["logits"] = logits
+            return out
+
+        if target is not None:
+            out["loss"] = self.compute_loss(logits, gt, seq_len)
+
+        if return_model_output:
+            out["out_map"] = logits
+
+        if return_preds:
+            # Disable for torch.compile compatibility
+            @torch.compiler.disable
+            def _postprocess(logits: torch.Tensor) -> list[tuple[str, float]]:
+                return self.postprocessor(logits)
+
+            # Post-process boxes
+            out["preds"] = _postprocess(logits)
+
+        return out
+
+    def decode(self, encoded: torch.Tensor) -> torch.Tensor:
+        """Decode function for prediction
+
+        Args:
+            encoded: input tensor
+
+        Returns:
+            A tuple of torch.Tensor: predictions, logits
+        """
+        b = encoded.size(0)
+
+        # Padding symbol + SOS at the beginning
+        ys = torch.full((b, self.max_length), self.vocab_size + 2, dtype=torch.long, device=encoded.device)  # pad
+        ys[:, 0] = self.vocab_size + 1  # sos
+
+        # Final dimension include EOS/SOS/PAD
+        for i in range(self.max_length - 1):
+            source_mask, target_mask = self.make_source_and_target_mask(encoded, ys)
+            output = self.decoder(ys, encoded, source_mask, target_mask)
+            logits = self.linear(output)
+            prob = torch.softmax(logits, dim=-1)
+            next_token = torch.max(prob, dim=-1).indices
+            # update ys with the next token and ignore the first token (SOS)
+            ys[:, i + 1] = next_token[:, i]
+
+        # Shape (N, max_length, vocab_size + 1)
+        return logits
+
+
+class MASTERPostProcessor(_MASTERPostProcessor):
+    """Post processor for MASTER architectures"""
+
+    def __call__(
+        self,
+        logits: torch.Tensor,
+    ) -> list[tuple[str, float]]:
+        # compute pred with argmax for attention models
+        out_idxs = logits.argmax(-1)
+        # N x L
+        probs = torch.gather(torch.softmax(logits, -1), -1, out_idxs.unsqueeze(-1)).squeeze(-1)
+        # Take the minimum confidence of the sequence
+        probs = probs.min(dim=1).values.detach().cpu()
+
+        # Manual decoding
+        word_values = [
+            "".join(self._embedding[idx] for idx in encoded_seq).split("<eos>")[0]
+            for encoded_seq in out_idxs.cpu().numpy()
+        ]
+
+        return list(zip(word_values, probs.numpy().clip(0, 1).tolist()))
+
+
+def _master(
+    arch: str,
+    pretrained: bool,
+    backbone_fn: Callable[[bool], nn.Module],
+    layer: str,
+    pretrained_backbone: bool = True,
+    ignore_keys: list[str] | None = None,
+    **kwargs: Any,
+) -> MASTER:
+    pretrained_backbone = pretrained_backbone and not pretrained
+
+    # Patch the config
+    _cfg = deepcopy(default_cfgs[arch])
+    _cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])
+    _cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"])
+
+    kwargs["vocab"] = _cfg["vocab"]
+    kwargs["input_shape"] = _cfg["input_shape"]
+
+    # Build the model
+    feat_extractor = IntermediateLayerGetter(
+        backbone_fn(pretrained_backbone),
+        {layer: "features"},
+    )
+    model = MASTER(feat_extractor, cfg=_cfg, **kwargs)
+    # Load pretrained parameters
+    if pretrained:
+        # The number of classes is not the same as the number of classes in the pretrained model =>
+        # remove the last layer weights
+        _ignore_keys = ignore_keys if _cfg["vocab"] != default_cfgs[arch]["vocab"] else None
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+
+    return model
+
+
+
+[docs] +def master(pretrained: bool = False, **kwargs: Any) -> MASTER: + """MASTER as described in paper: <https://arxiv.org/pdf/1910.02562.pdf>`_. + + >>> import torch + >>> from doctr.models import master + >>> model = master(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 32, 128)) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on our text recognition dataset + **kwargs: keywoard arguments passed to the MASTER architecture + + Returns: + text recognition architecture + """ + return _master( + "master", + pretrained, + magc_resnet31, + "10", + ignore_keys=[ + "decoder.embed.weight", + "linear.weight", + "linear.bias", + ], + **kwargs, + )
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/recognition/parseq/pytorch.html b/_modules/doctr/models/recognition/parseq/pytorch.html new file mode 100644 index 0000000000..10e4188019 --- /dev/null +++ b/_modules/doctr/models/recognition/parseq/pytorch.html @@ -0,0 +1,834 @@ + + + + + + + + + + + + + doctr.models.recognition.parseq.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.recognition.parseq.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import math
+from collections.abc import Callable
+from copy import deepcopy
+from itertools import permutations
+from typing import Any
+
+import numpy as np
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torchvision.models._utils import IntermediateLayerGetter
+
+from doctr.datasets import VOCABS
+from doctr.models.modules.transformer import MultiHeadAttention, PositionwiseFeedForward
+
+from ...classification import vit_s
+from ...utils import _bf16_to_float32, load_pretrained_params
+from .base import _PARSeq, _PARSeqPostProcessor
+
+__all__ = ["PARSeq", "parseq"]
+
+default_cfgs: dict[str, dict[str, Any]] = {
+    "parseq": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 128),
+        "vocab": VOCABS["french"],
+        "url": "https://doctr-static.mindee.com/models?id=v0.7.0/parseq-56125471.pt&src=0",
+    },
+}
+
+
+class CharEmbedding(nn.Module):
+    """Implements the character embedding module
+
+    Args:
+        vocab_size: size of the vocabulary
+        d_model: dimension of the model
+    """
+
+    def __init__(self, vocab_size: int, d_model: int):
+        super().__init__()
+        self.embedding = nn.Embedding(vocab_size, d_model)
+        self.d_model = d_model
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        return math.sqrt(self.d_model) * self.embedding(x)
+
+
+class PARSeqDecoder(nn.Module):
+    """Implements decoder module of the PARSeq model
+
+    Args:
+        d_model: dimension of the model
+        num_heads: number of attention heads
+        ffd: dimension of the feed forward layer
+        ffd_ratio: depth multiplier for the feed forward layer
+        dropout: dropout rate
+    """
+
+    def __init__(
+        self,
+        d_model: int,
+        num_heads: int = 12,
+        ffd: int = 2048,
+        ffd_ratio: int = 4,
+        dropout: float = 0.1,
+    ):
+        super().__init__()
+        self.attention = MultiHeadAttention(num_heads, d_model, dropout=dropout)
+        self.cross_attention = MultiHeadAttention(num_heads, d_model, dropout=dropout)
+        self.position_feed_forward = PositionwiseFeedForward(d_model, ffd * ffd_ratio, dropout, nn.GELU())
+
+        self.query_norm = nn.LayerNorm(d_model, eps=1e-5)
+        self.content_norm = nn.LayerNorm(d_model, eps=1e-5)
+        self.feed_forward_norm = nn.LayerNorm(d_model, eps=1e-5)
+        self.output_norm = nn.LayerNorm(d_model, eps=1e-5)
+        self.attention_dropout = nn.Dropout(dropout)
+        self.cross_attention_dropout = nn.Dropout(dropout)
+        self.feed_forward_dropout = nn.Dropout(dropout)
+
+    def forward(
+        self,
+        target,
+        content,
+        memory,
+        target_mask: torch.Tensor | None = None,
+    ):
+        query_norm = self.query_norm(target)
+        content_norm = self.content_norm(content)
+        target = target.clone() + self.attention_dropout(
+            self.attention(query_norm, content_norm, content_norm, mask=target_mask)
+        )
+        target = target.clone() + self.cross_attention_dropout(
+            self.cross_attention(self.query_norm(target), memory, memory)
+        )
+        target = target.clone() + self.feed_forward_dropout(self.position_feed_forward(self.feed_forward_norm(target)))
+        return self.output_norm(target)
+
+
+class PARSeq(_PARSeq, nn.Module):
+    """Implements a PARSeq architecture as described in `"Scene Text Recognition
+    with Permuted Autoregressive Sequence Models" <https://arxiv.org/pdf/2207.06966>`_.
+    Slightly modified implementation based on the official Pytorch implementation: <https://github.com/baudm/parseq/tree/main`_.
+
+    Args:
+        feature_extractor: the backbone serving as feature extractor
+        vocab: vocabulary used for encoding
+        embedding_units: number of embedding units
+        max_length: maximum word length handled by the model
+        dropout_prob: dropout probability for the decoder
+        dec_num_heads: number of attention heads in the decoder
+        dec_ff_dim: dimension of the feed forward layer in the decoder
+        dec_ffd_ratio: depth multiplier for the feed forward layer in the decoder
+        input_shape: input shape of the image
+        exportable: onnx exportable returns only logits
+        cfg: dictionary containing information about the model
+    """
+
+    def __init__(
+        self,
+        feature_extractor,
+        vocab: str,
+        embedding_units: int,
+        max_length: int = 32,  # different from the paper
+        dropout_prob: float = 0.1,
+        dec_num_heads: int = 12,
+        dec_ff_dim: int = 384,  # we use it from the original implementation instead of 2048
+        dec_ffd_ratio: int = 4,
+        input_shape: tuple[int, int, int] = (3, 32, 128),
+        exportable: bool = False,
+        cfg: dict[str, Any] | None = None,
+    ) -> None:
+        super().__init__()
+        self.vocab = vocab
+        self.exportable = exportable
+        self.cfg = cfg
+        self.max_length = max_length
+        self.vocab_size = len(vocab)
+        self.rng = np.random.default_rng()
+
+        self.feat_extractor = feature_extractor
+        self.decoder = PARSeqDecoder(embedding_units, dec_num_heads, dec_ff_dim, dec_ffd_ratio, dropout_prob)
+        self.head = nn.Linear(embedding_units, self.vocab_size + 1)  # +1 for EOS
+        self.embed = CharEmbedding(self.vocab_size + 3, embedding_units)  # +3 for SOS, EOS, PAD
+
+        self.pos_queries = nn.Parameter(torch.Tensor(1, self.max_length + 1, embedding_units))  # +1 for EOS
+        self.dropout = nn.Dropout(p=dropout_prob)
+
+        self.postprocessor = PARSeqPostProcessor(vocab=self.vocab)
+
+        nn.init.trunc_normal_(self.pos_queries, std=0.02)
+        for n, m in self.named_modules():
+            # Don't override the initialization of the backbone
+            if n.startswith("feat_extractor."):
+                continue
+            if isinstance(m, nn.Linear):
+                nn.init.trunc_normal_(m.weight, std=0.02)
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+            elif isinstance(m, nn.Embedding):
+                nn.init.trunc_normal_(m.weight, std=0.02)
+                if m.padding_idx is not None:
+                    m.weight.data[m.padding_idx].zero_()
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        # NOTE: This is required to make the model backward compatible with already trained models docTR version <0.11.1
+        # ref.: https://github.com/mindee/doctr/issues/1911
+        if kwargs.get("ignore_keys") is None:
+            kwargs["ignore_keys"] = []
+
+        kwargs["ignore_keys"].extend([
+            "decoder.attention_norm.weight",
+            "decoder.attention_norm.bias",
+            "decoder.cross_attention_norm.weight",
+            "decoder.cross_attention_norm.bias",
+        ])
+        load_pretrained_params(self, path_or_url, **kwargs)
+
+    def generate_permutations(self, seqlen: torch.Tensor) -> torch.Tensor:
+        # Generates permutations of the target sequence.
+        # Borrowed from https://github.com/baudm/parseq/blob/main/strhub/models/parseq/system.py
+        # with small modifications
+
+        max_num_chars = int(seqlen.max().item())  # get longest sequence length in batch
+        perms = [torch.arange(max_num_chars, device=seqlen.device)]
+
+        max_perms = math.factorial(max_num_chars) // 2
+        num_gen_perms = min(3, max_perms)
+        if max_num_chars < 5:
+            # Pool of permutations to sample from. We only need the first half (if complementary option is selected)
+            # Special handling for max_num_chars == 4 which correctly divides the pool into the flipped halves
+            if max_num_chars == 4:
+                selector = [0, 3, 4, 6, 9, 10, 12, 16, 17, 18, 19, 21]
+            else:
+                selector = list(range(max_perms))
+            perm_pool = torch.as_tensor(list(permutations(range(max_num_chars), max_num_chars)), device=seqlen.device)[
+                selector
+            ]
+            # If the forward permutation is always selected, no need to add it to the pool for sampling
+            perm_pool = perm_pool[1:]
+            final_perms = torch.stack(perms)
+            if len(perm_pool):
+                i = self.rng.choice(len(perm_pool), size=num_gen_perms - len(final_perms), replace=False)
+                final_perms = torch.cat([final_perms, perm_pool[i]])
+        else:
+            perms.extend([
+                torch.randperm(max_num_chars, device=seqlen.device) for _ in range(num_gen_perms - len(perms))
+            ])
+            final_perms = torch.stack(perms)
+
+        comp = final_perms.flip(-1)
+        final_perms = torch.stack([final_perms, comp]).transpose(0, 1).reshape(-1, max_num_chars)
+
+        sos_idx = torch.zeros(len(final_perms), 1, device=seqlen.device)
+        eos_idx = torch.full((len(final_perms), 1), max_num_chars + 1, device=seqlen.device)
+        combined = torch.cat([sos_idx, final_perms + 1, eos_idx], dim=1).int()
+        if len(combined) > 1:
+            combined[1, 1:] = max_num_chars + 1 - torch.arange(max_num_chars + 1, device=seqlen.device)
+        return combined
+
+    def generate_permutations_attention_masks(self, permutation: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]:
+        # Generate source and target mask for the decoder attention.
+        sz = permutation.shape[0]
+        mask = torch.ones((sz, sz), device=permutation.device)
+
+        for i in range(sz):
+            query_idx = permutation[i]
+            masked_keys = permutation[i + 1 :]
+            mask[query_idx, masked_keys] = 0.0
+        source_mask = mask[:-1, :-1].clone()
+        mask[torch.eye(sz, dtype=torch.bool, device=permutation.device)] = 0.0
+        target_mask = mask[1:, :-1]
+
+        return source_mask.int(), target_mask.int()
+
+    def decode(
+        self,
+        target: torch.Tensor,
+        memory: torch.Tensor,
+        target_mask: torch.Tensor | None = None,
+        target_query: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        """Add positional information to the target sequence and pass it through the decoder."""
+        batch_size, sequence_length = target.shape
+        # apply positional information to the target sequence excluding the SOS token
+        null_ctx = self.embed(target[:, :1])
+        content = self.pos_queries[:, : sequence_length - 1] + self.embed(target[:, 1:])
+        content = self.dropout(torch.cat([null_ctx, content], dim=1))
+        if target_query is None:
+            target_query = self.pos_queries[:, :sequence_length].expand(batch_size, -1, -1)
+        target_query = self.dropout(target_query)
+        return self.decoder(target_query, content, memory, target_mask)
+
+    def decode_autoregressive(self, features: torch.Tensor, max_len: int | None = None) -> torch.Tensor:
+        """Generate predictions for the given features."""
+        max_length = max_len if max_len is not None else self.max_length
+        max_length = min(max_length, self.max_length) + 1
+        # Padding symbol + SOS at the beginning
+        ys = torch.full(
+            (features.size(0), max_length), self.vocab_size + 2, dtype=torch.long, device=features.device
+        )  # pad
+        ys[:, 0] = self.vocab_size + 1  # SOS token
+        pos_queries = self.pos_queries[:, :max_length].expand(features.size(0), -1, -1)
+        # Create query mask for the decoder attention
+        query_mask = (
+            torch.tril(torch.ones((max_length, max_length), device=features.device), diagonal=0).to(dtype=torch.bool)
+        ).int()
+
+        pos_logits = []
+        for i in range(max_length):
+            # Decode one token at a time without providing information about the future tokens
+            tgt_out = self.decode(
+                ys[:, : i + 1],
+                features,
+                query_mask[i : i + 1, : i + 1],
+                target_query=pos_queries[:, i : i + 1],
+            )
+            pos_prob = self.head(tgt_out)
+            pos_logits.append(pos_prob)
+
+            if i + 1 < max_length:
+                # Update with the next token
+                ys[:, i + 1] = pos_prob.squeeze().argmax(-1)
+
+                # Stop decoding if all sequences have reached the EOS token
+                # NOTE: `break` isn't correctly translated to Onnx so we don't break here if we want to export
+                if not self.exportable and max_len is None and (ys == self.vocab_size).any(dim=-1).all():
+                    break
+
+        logits = torch.cat(pos_logits, dim=1)  # (N, max_length, vocab_size + 1)
+
+        # One refine iteration
+        # Update query mask
+        query_mask[torch.triu(torch.ones(max_length, max_length, dtype=torch.bool, device=features.device), 2)] = 1
+
+        # Prepare target input for 1 refine iteration
+        sos = torch.full((features.size(0), 1), self.vocab_size + 1, dtype=torch.long, device=features.device)
+        ys = torch.cat([sos, logits[:, :-1].argmax(-1)], dim=1)
+
+        # Create padding mask for refined target input maskes all behind EOS token as False
+        # (N, 1, 1, max_length)
+        target_pad_mask = ~((ys == self.vocab_size).int().cumsum(-1) > 0).unsqueeze(1).unsqueeze(1)
+        mask = (target_pad_mask.bool() & query_mask[:, : ys.shape[1]].bool()).int()
+        logits = self.head(self.decode(ys, features, mask, target_query=pos_queries))
+
+        return logits  # (N, max_length, vocab_size + 1)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        target: list[str] | None = None,
+        return_model_output: bool = False,
+        return_preds: bool = False,
+    ) -> dict[str, Any]:
+        features = self.feat_extractor(x)["features"]  # (batch_size, patches_seqlen, d_model)
+        # remove cls token
+        features = features[:, 1:, :]
+
+        if self.training and target is None:
+            raise ValueError("Need to provide labels during training")
+
+        if target is not None:
+            # Build target tensor
+            _gt, _seq_len = self.build_target(target)
+            gt, seq_len = torch.from_numpy(_gt).to(dtype=torch.long).to(x.device), torch.tensor(_seq_len).to(x.device)
+            gt = gt[:, : int(seq_len.max().item()) + 2]  # slice up to the max length of the batch + 2 (SOS + EOS)
+
+            if self.training:
+                # Generate permutations for the target sequences
+                tgt_perms = self.generate_permutations(seq_len)
+
+                gt_in = gt[:, :-1]  # remove EOS token from longest target sequence
+                gt_out = gt[:, 1:]  # remove SOS token
+                # Create padding mask for target input
+                # [True, True, True, ..., False, False, False] -> False is masked
+                padding_mask = ~(
+                    ((gt_in == self.vocab_size + 2) | (gt_in == self.vocab_size)).int().cumsum(-1) > 0
+                ).unsqueeze(1).unsqueeze(1)  # (N, 1, 1, seq_len)
+
+                loss = torch.tensor(0.0, device=features.device)
+                loss_numel: int | float = 0
+                n = (gt_out != self.vocab_size + 2).sum().item()
+                for i, perm in enumerate(tgt_perms):
+                    _, target_mask = self.generate_permutations_attention_masks(perm)  # (seq_len, seq_len)
+                    # combine both masks
+                    mask = (target_mask.bool() & padding_mask.bool()).int()  # (N, 1, seq_len, seq_len)
+
+                    logits = self.head(self.decode(gt_in, features, mask)).flatten(end_dim=1)
+                    loss += n * F.cross_entropy(logits, gt_out.flatten(), ignore_index=self.vocab_size + 2)
+                    loss_numel += n
+                    # After the second iteration (i.e. done with canonical and reverse orderings),
+                    # remove the [EOS] tokens for the succeeding perms
+                    if i == 1:
+                        gt_out = torch.where(gt_out == self.vocab_size, self.vocab_size + 2, gt_out)
+                        n = (gt_out != self.vocab_size + 2).sum().item()
+
+                loss /= loss_numel
+
+            else:
+                gt = gt[:, 1:]  # remove SOS token
+                max_len = gt.shape[1] - 1  # exclude EOS token
+                logits = self.decode_autoregressive(features, max_len)
+                loss = F.cross_entropy(logits.flatten(end_dim=1), gt.flatten(), ignore_index=self.vocab_size + 2)
+        else:
+            logits = self.decode_autoregressive(features)
+
+        logits = _bf16_to_float32(logits)
+
+        out: dict[str, Any] = {}
+        if self.exportable:
+            out["logits"] = logits
+            return out
+
+        if return_model_output:
+            out["out_map"] = logits
+
+        if target is None or return_preds:
+            # Disable for torch.compile compatibility
+            @torch.compiler.disable
+            def _postprocess(logits: torch.Tensor) -> list[tuple[str, float]]:
+                return self.postprocessor(logits)
+
+            # Post-process boxes
+            out["preds"] = _postprocess(logits)
+
+        if target is not None:
+            out["loss"] = loss
+
+        return out
+
+
+class PARSeqPostProcessor(_PARSeqPostProcessor):
+    """Post processor for PARSeq architecture
+
+    Args:
+        vocab: string containing the ordered sequence of supported characters
+    """
+
+    def __call__(
+        self,
+        logits: torch.Tensor,
+    ) -> list[tuple[str, float]]:
+        # compute pred with argmax for attention models
+        out_idxs = logits.argmax(-1)
+        preds_prob = torch.softmax(logits, -1).max(dim=-1)[0]
+
+        # Manual decoding
+        word_values = [
+            "".join(self._embedding[idx] for idx in encoded_seq).split("<eos>")[0]
+            for encoded_seq in out_idxs.cpu().numpy()
+        ]
+        # compute probabilties for each word up to the EOS token
+        probs = [
+            preds_prob[i, : len(word)].clip(0, 1).mean().item() if word else 0.0 for i, word in enumerate(word_values)
+        ]
+
+        return list(zip(word_values, probs))
+
+
+def _parseq(
+    arch: str,
+    pretrained: bool,
+    backbone_fn: Callable[[bool], nn.Module],
+    layer: str,
+    ignore_keys: list[str] | None = None,
+    **kwargs: Any,
+) -> PARSeq:
+    # Patch the config
+    _cfg = deepcopy(default_cfgs[arch])
+    _cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"])
+    _cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])
+    patch_size = kwargs.get("patch_size", (4, 8))
+
+    kwargs["vocab"] = _cfg["vocab"]
+    kwargs["input_shape"] = _cfg["input_shape"]
+
+    # Feature extractor
+    feat_extractor = IntermediateLayerGetter(
+        # NOTE: we don't use a pretrained backbone for non-rectangular patches to avoid the pos embed mismatch
+        backbone_fn(False, input_shape=_cfg["input_shape"], patch_size=patch_size),  # type: ignore[call-arg]
+        {layer: "features"},
+    )
+
+    kwargs.pop("patch_size", None)
+    kwargs.pop("pretrained_backbone", None)
+
+    # Build the model
+    model = PARSeq(feat_extractor, cfg=_cfg, **kwargs)
+    # Load pretrained parameters
+    if pretrained:
+        # The number of classes is not the same as the number of classes in the pretrained model =>
+        # remove the last layer weights
+        _ignore_keys = ignore_keys if _cfg["vocab"] != default_cfgs[arch]["vocab"] else None
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+
+    return model
+
+
+
+[docs] +def parseq(pretrained: bool = False, **kwargs: Any) -> PARSeq: + """PARSeq architecture from + `"Scene Text Recognition with Permuted Autoregressive Sequence Models" <https://arxiv.org/pdf/2207.06966>`_. + + >>> import torch + >>> from doctr.models import parseq + >>> model = parseq(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 32, 128)) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on our text recognition dataset + **kwargs: keyword arguments of the PARSeq architecture + + Returns: + text recognition architecture + """ + return _parseq( + "parseq", + pretrained, + vit_s, + "1", + embedding_units=384, + patch_size=(4, 8), + ignore_keys=["embed.embedding.weight", "head.weight", "head.bias"], + **kwargs, + )
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/recognition/sar/pytorch.html b/_modules/doctr/models/recognition/sar/pytorch.html new file mode 100644 index 0000000000..255ff52aff --- /dev/null +++ b/_modules/doctr/models/recognition/sar/pytorch.html @@ -0,0 +1,744 @@ + + + + + + + + + + + + + doctr.models.recognition.sar.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.recognition.sar.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from collections.abc import Callable
+from copy import deepcopy
+from typing import Any
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torchvision.models._utils import IntermediateLayerGetter
+
+from doctr.datasets import VOCABS
+
+from ...classification import resnet31
+from ...utils import _bf16_to_float32, load_pretrained_params
+from ..core import RecognitionModel, RecognitionPostProcessor
+
+__all__ = ["SAR", "sar_resnet31"]
+
+default_cfgs: dict[str, dict[str, Any]] = {
+    "sar_resnet31": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 128),
+        "vocab": VOCABS["french"],
+        "url": "https://doctr-static.mindee.com/models?id=v0.7.0/sar_resnet31-9a1deedf.pt&src=0",
+    },
+}
+
+
+class SAREncoder(nn.Module):
+    def __init__(self, in_feats: int, rnn_units: int, dropout_prob: float = 0.0) -> None:
+        super().__init__()
+        self.rnn = nn.LSTM(in_feats, rnn_units, 2, batch_first=True, dropout=dropout_prob)
+        self.linear = nn.Linear(rnn_units, rnn_units)
+
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        # (N, L, C) --> (N, T, C)
+        encoded = self.rnn(x)[0]
+        # (N, C)
+        return self.linear(encoded[:, -1, :])
+
+
+class AttentionModule(nn.Module):
+    def __init__(self, feat_chans: int, state_chans: int, attention_units: int) -> None:
+        super().__init__()
+        self.feat_conv = nn.Conv2d(feat_chans, attention_units, kernel_size=3, padding=1)
+        # No need to add another bias since both tensors are summed together
+        self.state_conv = nn.Conv2d(state_chans, attention_units, kernel_size=1, bias=False)
+        self.attention_projector = nn.Conv2d(attention_units, 1, kernel_size=1, bias=False)
+
+    def forward(
+        self,
+        features: torch.Tensor,  # (N, C, H, W)
+        hidden_state: torch.Tensor,  # (N, C)
+    ) -> torch.Tensor:
+        H_f, W_f = features.shape[2:]
+
+        # (N, feat_chans, H, W) --> (N, attention_units, H, W)
+        feat_projection = self.feat_conv(features)
+        # (N, state_chans, 1, 1) --> (N, attention_units, 1, 1)
+        hidden_state = hidden_state.view(hidden_state.size(0), hidden_state.size(1), 1, 1)
+        state_projection = self.state_conv(hidden_state)
+        state_projection = state_projection.expand(-1, -1, H_f, W_f)
+        # (N, attention_units, 1, 1) --> (N, attention_units, H_f, W_f)
+        attention_weights = torch.tanh(feat_projection + state_projection)
+        # (N, attention_units, H_f, W_f) --> (N, 1, H_f, W_f)
+        attention_weights = self.attention_projector(attention_weights)
+        B, C, H, W = attention_weights.size()
+
+        # (N, H, W) --> (N, 1, H, W)
+        attention_weights = torch.softmax(attention_weights.view(B, -1), dim=-1).view(B, C, H, W)
+        # fuse features and attention weights (N, C)
+        return (features * attention_weights).sum(dim=(2, 3))
+
+
+class SARDecoder(nn.Module):
+    """Implements decoder module of the SAR model
+
+    Args:
+        rnn_units: number of hidden units in recurrent cells
+        max_length: maximum length of a sequence
+        vocab_size: number of classes in the model alphabet
+        embedding_units: number of hidden embedding units
+        attention_units: number of hidden attention units
+
+    """
+
+    def __init__(
+        self,
+        rnn_units: int,
+        max_length: int,
+        vocab_size: int,
+        embedding_units: int,
+        attention_units: int,
+        feat_chans: int = 512,
+        dropout_prob: float = 0.0,
+    ) -> None:
+        super().__init__()
+        self.vocab_size = vocab_size
+        self.max_length = max_length
+
+        self.embed = nn.Linear(self.vocab_size + 1, embedding_units)
+        self.embed_tgt = nn.Embedding(embedding_units, self.vocab_size + 1)
+        self.attention_module = AttentionModule(feat_chans, rnn_units, attention_units)
+        self.lstm_cell = nn.LSTMCell(rnn_units, rnn_units)
+        self.output_dense = nn.Linear(2 * rnn_units, self.vocab_size + 1)
+        self.dropout = nn.Dropout(dropout_prob)
+
+    def forward(
+        self,
+        features: torch.Tensor,  # (N, C, H, W)
+        holistic: torch.Tensor,  # (N, C)
+        gt: torch.Tensor | None = None,  # (N, L)
+    ) -> torch.Tensor:
+        if gt is not None:
+            gt_embedding = self.embed_tgt(gt)
+
+        logits_list: list[torch.Tensor] = []
+
+        for t in range(self.max_length + 1):  # 32
+            if t == 0:
+                # step to init the first states of the LSTMCell
+                hidden_state_init = cell_state_init = torch.zeros(
+                    features.size(0), features.size(1), device=features.device, dtype=features.dtype
+                )
+                hidden_state, cell_state = hidden_state_init, cell_state_init
+                prev_symbol = holistic
+            elif t == 1:
+                # step to init a 'blank' sequence of length vocab_size + 1 filled with zeros
+                # (N, vocab_size + 1) --> (N, embedding_units)
+                prev_symbol = torch.zeros(
+                    features.size(0), self.vocab_size + 1, device=features.device, dtype=features.dtype
+                )
+                prev_symbol = self.embed(prev_symbol)
+            else:
+                if gt is not None and self.training:
+                    # (N, embedding_units) -2 because of <bos> and <eos> (same)
+                    prev_symbol = self.embed(gt_embedding[:, t - 2])
+                else:
+                    # -1 to start at timestep where prev_symbol was initialized
+                    index = logits_list[t - 1].argmax(-1)
+                    # update prev_symbol with ones at the index of the previous logit vector
+                    prev_symbol = self.embed(self.embed_tgt(index))
+
+            # (N, C), (N, C)  take the last hidden state and cell state from current timestep
+            hidden_state_init, cell_state_init = self.lstm_cell(prev_symbol, (hidden_state_init, cell_state_init))
+            hidden_state, cell_state = self.lstm_cell(hidden_state_init, (hidden_state, cell_state))
+            # (N, C, H, W), (N, C) --> (N, C)
+            glimpse = self.attention_module(features, hidden_state)
+            # (N, C), (N, C) --> (N, 2 * C)
+            logits = torch.cat([hidden_state, glimpse], dim=1)
+            logits = self.dropout(logits)
+            # (N, vocab_size + 1)
+            logits_list.append(self.output_dense(logits))
+
+        # (max_length + 1, N, vocab_size + 1) --> (N, max_length + 1, vocab_size + 1)
+        return torch.stack(logits_list[1:]).permute(1, 0, 2)
+
+
+class SAR(nn.Module, RecognitionModel):
+    """Implements a SAR architecture as described in `"Show, Attend and Read:A Simple and Strong Baseline for
+    Irregular Text Recognition" <https://arxiv.org/pdf/1811.00751.pdf>`_.
+
+    Args:
+        feature_extractor: the backbone serving as feature extractor
+        vocab: vocabulary used for encoding
+        rnn_units: number of hidden units in both encoder and decoder LSTM
+        embedding_units: number of embedding units
+        attention_units: number of hidden units in attention module
+        max_length: maximum word length handled by the model
+        dropout_prob: dropout probability of the encoder LSTM
+        exportable: onnx exportable returns only logits
+        cfg: dictionary containing information about the model
+    """
+
+    def __init__(
+        self,
+        feature_extractor,
+        vocab: str,
+        rnn_units: int = 512,
+        embedding_units: int = 512,
+        attention_units: int = 512,
+        max_length: int = 30,
+        dropout_prob: float = 0.0,
+        input_shape: tuple[int, int, int] = (3, 32, 128),
+        exportable: bool = False,
+        cfg: dict[str, Any] | None = None,
+    ) -> None:
+        super().__init__()
+        self.vocab = vocab
+        self.exportable = exportable
+        self.cfg = cfg
+
+        self.max_length = max_length + 1  # Add 1 timestep for EOS after the longest word
+
+        self.feat_extractor = feature_extractor
+
+        # Size the LSTM
+        self.feat_extractor.eval()
+        with torch.no_grad():
+            out_shape = self.feat_extractor(torch.zeros((1, *input_shape)))["features"].shape
+        # Switch back to original mode
+        self.feat_extractor.train()
+
+        self.encoder = SAREncoder(out_shape[1], rnn_units, dropout_prob)
+        self.decoder = SARDecoder(
+            rnn_units,
+            self.max_length,
+            len(self.vocab),
+            embedding_units,
+            attention_units,
+            dropout_prob=dropout_prob,
+        )
+
+        self.postprocessor = SARPostProcessor(vocab=vocab)
+
+        for n, m in self.named_modules():
+            # Don't override the initialization of the backbone
+            if n.startswith("feat_extractor."):
+                continue
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        target: list[str] | None = None,
+        return_model_output: bool = False,
+        return_preds: bool = False,
+    ) -> dict[str, Any]:
+        features = self.feat_extractor(x)["features"]
+        # NOTE: use max instead of functional max_pool2d which leads to ONNX incompatibility (kernel_size)
+        # Vertical max pooling (N, C, H, W) --> (N, C, W)
+        pooled_features = features.max(dim=-2).values
+        # (N, W, C)
+        pooled_features = pooled_features.permute(0, 2, 1).contiguous()
+        # (N, C)
+        encoded = self.encoder(pooled_features)
+        if target is not None:
+            _gt, _seq_len = self.build_target(target)
+            gt, seq_len = torch.from_numpy(_gt).to(dtype=torch.long), torch.tensor(_seq_len)
+            gt, seq_len = gt.to(x.device), seq_len.to(x.device)
+
+        if self.training and target is None:
+            raise ValueError("Need to provide labels during training for teacher forcing")
+
+        decoded_features = _bf16_to_float32(self.decoder(features, encoded, gt=None if target is None else gt))
+
+        out: dict[str, Any] = {}
+        if self.exportable:
+            out["logits"] = decoded_features
+            return out
+
+        if return_model_output:
+            out["out_map"] = decoded_features
+
+        if target is None or return_preds:
+            # Disable for torch.compile compatibility
+            @torch.compiler.disable
+            def _postprocess(decoded_features: torch.Tensor) -> list[tuple[str, float]]:
+                return self.postprocessor(decoded_features)
+
+            # Post-process boxes
+            out["preds"] = _postprocess(decoded_features)
+
+        if target is not None:
+            out["loss"] = self.compute_loss(decoded_features, gt, seq_len)
+
+        return out
+
+    @staticmethod
+    def compute_loss(
+        model_output: torch.Tensor,
+        gt: torch.Tensor,
+        seq_len: torch.Tensor,
+    ) -> torch.Tensor:
+        """Compute categorical cross-entropy loss for the model.
+        Sequences are masked after the EOS character.
+
+        Args:
+            model_output: predicted logits of the model
+            gt: the encoded tensor with gt labels
+            seq_len: lengths of each gt word inside the batch
+
+        Returns:
+            The loss of the model on the batch
+        """
+        # Input length : number of timesteps
+        input_len = model_output.shape[1]
+        # Add one for additional <eos> token
+        seq_len = seq_len + 1
+        # Compute loss
+        # (N, L, vocab_size + 1)
+        cce = F.cross_entropy(model_output.permute(0, 2, 1), gt, reduction="none")
+        mask_2d = torch.arange(input_len, device=model_output.device)[None, :] >= seq_len[:, None]
+        cce[mask_2d] = 0
+
+        ce_loss = cce.sum(1) / seq_len.to(dtype=model_output.dtype)
+        return ce_loss.mean()
+
+
+class SARPostProcessor(RecognitionPostProcessor):
+    """Post processor for SAR architectures
+
+    Args:
+        vocab: string containing the ordered sequence of supported characters
+    """
+
+    def __call__(
+        self,
+        logits: torch.Tensor,
+    ) -> list[tuple[str, float]]:
+        # compute pred with argmax for attention models
+        out_idxs = logits.argmax(-1)
+        # N x L
+        probs = torch.gather(torch.softmax(logits, -1), -1, out_idxs.unsqueeze(-1)).squeeze(-1)
+        # Take the minimum confidence of the sequence
+        probs = probs.min(dim=1).values.detach().cpu()
+
+        # Manual decoding
+        word_values = [
+            "".join(self._embedding[idx] for idx in encoded_seq).split("<eos>")[0]
+            for encoded_seq in out_idxs.detach().cpu().numpy()
+        ]
+
+        return list(zip(word_values, probs.numpy().clip(0, 1).tolist()))
+
+
+def _sar(
+    arch: str,
+    pretrained: bool,
+    backbone_fn: Callable[[bool], nn.Module],
+    layer: str,
+    pretrained_backbone: bool = True,
+    ignore_keys: list[str] | None = None,
+    **kwargs: Any,
+) -> SAR:
+    pretrained_backbone = pretrained_backbone and not pretrained
+
+    # Patch the config
+    _cfg = deepcopy(default_cfgs[arch])
+    _cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"])
+    _cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])
+
+    # Feature extractor
+    feat_extractor = IntermediateLayerGetter(
+        backbone_fn(pretrained_backbone),
+        {layer: "features"},
+    )
+    kwargs["vocab"] = _cfg["vocab"]
+    kwargs["input_shape"] = _cfg["input_shape"]
+
+    # Build the model
+    model = SAR(feat_extractor, cfg=_cfg, **kwargs)
+    # Load pretrained parameters
+    if pretrained:
+        # The number of classes is not the same as the number of classes in the pretrained model =>
+        # remove the last layer weights
+        _ignore_keys = ignore_keys if _cfg["vocab"] != default_cfgs[arch]["vocab"] else None
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+
+    return model
+
+
+
+[docs] +def sar_resnet31(pretrained: bool = False, **kwargs: Any) -> SAR: + """SAR with a resnet-31 feature extractor as described in `"Show, Attend and Read:A Simple and Strong + Baseline for Irregular Text Recognition" <https://arxiv.org/pdf/1811.00751.pdf>`_. + + >>> import torch + >>> from doctr.models import sar_resnet31 + >>> model = sar_resnet31(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 32, 128)) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on our text recognition dataset + **kwargs: keyword arguments of the SAR architecture + + Returns: + text recognition architecture + """ + return _sar( + "sar_resnet31", + pretrained, + resnet31, + "10", + ignore_keys=[ + "decoder.embed.weight", + "decoder.embed_tgt.weight", + "decoder.output_dense.weight", + "decoder.output_dense.bias", + ], + **kwargs, + )
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/recognition/viptr/pytorch.html b/_modules/doctr/models/recognition/viptr/pytorch.html new file mode 100644 index 0000000000..e1a5f67dda --- /dev/null +++ b/_modules/doctr/models/recognition/viptr/pytorch.html @@ -0,0 +1,611 @@ + + + + + + + + + + + + + doctr.models.recognition.viptr.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.recognition.viptr.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from collections.abc import Callable
+from copy import deepcopy
+from itertools import groupby
+from typing import Any
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision.models._utils import IntermediateLayerGetter
+
+from doctr.datasets import VOCABS, decode_sequence
+
+from ...classification import vip_tiny
+from ...utils import _bf16_to_float32, load_pretrained_params
+from ..core import RecognitionModel, RecognitionPostProcessor
+
+__all__ = ["VIPTR", "viptr_tiny"]
+
+
+default_cfgs: dict[str, dict[str, Any]] = {
+    "viptr_tiny": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 128),
+        "vocab": VOCABS["french"],
+        "url": "https://doctr-static.mindee.com/models?id=v0.11.0/viptr_tiny-1cb2515e.pt&src=0",
+    },
+}
+
+
+class VIPTRPostProcessor(RecognitionPostProcessor):
+    """Postprocess raw prediction of the model (logits) to a list of words using CTC decoding
+
+    Args:
+        vocab: string containing the ordered sequence of supported characters
+    """
+
+    @staticmethod
+    def ctc_best_path(
+        logits: torch.Tensor,
+        vocab: str = VOCABS["french"],
+        blank: int = 0,
+    ) -> list[tuple[str, float]]:
+        """Implements best path decoding as shown by Graves (Dissertation, p63), highly inspired from
+        <https://github.com/githubharald/CTCDecoder>`_.
+
+        Args:
+            logits: model output, shape: N x T x C
+            vocab: vocabulary to use
+            blank: index of blank label
+
+        Returns:
+            A list of tuples: (word, confidence)
+        """
+        # Gather the most confident characters, and assign the smallest conf among those to the sequence prob
+        probs = F.softmax(logits, dim=-1).max(dim=-1).values.min(dim=1).values
+
+        # collapse best path (using itertools.groupby), map to chars, join char list to string
+        words = [
+            decode_sequence([k for k, _ in groupby(seq.tolist()) if k != blank], vocab)
+            for seq in torch.argmax(logits, dim=-1)
+        ]
+
+        return list(zip(words, probs.tolist()))
+
+    def __call__(self, logits: torch.Tensor) -> list[tuple[str, float]]:
+        """Performs decoding of raw output with CTC and decoding of CTC predictions
+        with label_to_idx mapping dictionary
+
+        Args:
+            logits: raw output of the model, shape (N, C + 1, seq_len)
+
+        Returns:
+            A tuple of 2 lists: a list of str (words) and a list of float (probs)
+
+        """
+        # Decode CTC
+        return self.ctc_best_path(logits=logits, vocab=self.vocab, blank=len(self.vocab))
+
+
+class VIPTR(RecognitionModel, nn.Module):
+    """Implements a VIPTR architecture as described in `"A Vision Permutable Extractor for Fast and Efficient
+    Scene Text Recognition" <https://arxiv.org/abs/2401.10110>`_.
+
+    Args:
+        feature_extractor: the backbone serving as feature extractor
+        vocab: vocabulary used for encoding
+        input_shape: input shape of the image
+        exportable: onnx exportable returns only logits
+        cfg: configuration dictionary
+    """
+
+    def __init__(
+        self,
+        feature_extractor: nn.Module,
+        vocab: str,
+        input_shape: tuple[int, int, int] = (3, 32, 128),
+        exportable: bool = False,
+        cfg: dict[str, Any] | None = None,
+    ):
+        super().__init__()
+        self.vocab = vocab
+        self.exportable = exportable
+        self.cfg = cfg
+        self.max_length = 32
+        self.vocab_size = len(vocab)
+
+        self.feat_extractor = feature_extractor
+        with torch.inference_mode():
+            embedding_units = self.feat_extractor(torch.zeros((1, *input_shape)))["features"].shape[-1]
+
+        self.postprocessor = VIPTRPostProcessor(vocab=self.vocab)
+        self.head = nn.Linear(embedding_units, len(self.vocab) + 1)  # +1 for PAD
+
+        for n, m in self.named_modules():
+            # Don't override the initialization of the backbone
+            if n.startswith("feat_extractor."):
+                continue
+            if isinstance(m, nn.Linear):
+                nn.init.trunc_normal_(m.weight, std=0.02)
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        target: list[str] | None = None,
+        return_model_output: bool = False,
+        return_preds: bool = False,
+    ) -> dict[str, Any]:
+        if target is not None:
+            _gt, _seq_len = self.build_target(target)
+            gt, seq_len = torch.from_numpy(_gt).to(dtype=torch.long), torch.tensor(_seq_len)
+            gt, seq_len = gt.to(x.device), seq_len.to(x.device)
+
+        if self.training and target is None:
+            raise ValueError("Need to provide labels during training")
+
+        features = self.feat_extractor(x)["features"]  # (B, max_len, embed_dim)
+        B, N, E = features.size()
+        logits = self.head(features).view(B, N, len(self.vocab) + 1)
+
+        decoded_features = _bf16_to_float32(logits)
+
+        out: dict[str, Any] = {}
+        if self.exportable:
+            out["logits"] = decoded_features
+            return out
+
+        if return_model_output:
+            out["out_map"] = decoded_features
+
+        if target is None or return_preds:
+            # Disable for torch.compile compatibility
+            @torch.compiler.disable
+            def _postprocess(decoded_features: torch.Tensor) -> list[tuple[str, float]]:
+                return self.postprocessor(decoded_features)
+
+            # Post-process boxes
+            out["preds"] = _postprocess(decoded_features)
+
+        if target is not None:
+            out["loss"] = self.compute_loss(decoded_features, gt, seq_len, len(self.vocab))
+
+        return out
+
+    @staticmethod
+    def compute_loss(
+        model_output: torch.Tensor,
+        gt: torch.Tensor,
+        seq_len: torch.Tensor,
+        blank_idx: int = 0,
+    ) -> torch.Tensor:
+        """Compute CTC loss for the model.
+
+        Args:
+            model_output: predicted logits of the model
+            gt: ground truth tensor
+            seq_len: sequence lengths of the ground truth
+            blank_idx: index of the blank label
+
+        Returns:
+            The loss of the model on the batch
+        """
+        batch_len = model_output.shape[0]
+        input_length = model_output.shape[1] * torch.ones(size=(batch_len,), dtype=torch.int32)
+        # N x T x C -> T x N x C
+        logits = model_output.permute(1, 0, 2)
+        probs = F.log_softmax(logits, dim=-1)
+        ctc_loss = F.ctc_loss(
+            probs,
+            gt,
+            input_length,
+            seq_len,
+            blank_idx,
+            zero_infinity=True,
+        )
+
+        return ctc_loss
+
+
+def _viptr(
+    arch: str,
+    pretrained: bool,
+    backbone_fn: Callable[[bool], nn.Module],
+    layer: str,
+    pretrained_backbone: bool = True,
+    ignore_keys: list[str] | None = None,
+    **kwargs: Any,
+) -> VIPTR:
+    pretrained_backbone = pretrained_backbone and not pretrained
+
+    # Patch the config
+    _cfg = deepcopy(default_cfgs[arch])
+    _cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"])
+    _cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])
+
+    # Feature extractor
+    feat_extractor = IntermediateLayerGetter(
+        backbone_fn(pretrained_backbone, input_shape=_cfg["input_shape"]),  # type: ignore[call-arg]
+        {layer: "features"},
+    )
+
+    kwargs["vocab"] = _cfg["vocab"]
+    kwargs["input_shape"] = _cfg["input_shape"]
+
+    model = VIPTR(feat_extractor, cfg=_cfg, **kwargs)
+
+    # Load pretrained parameters
+    if pretrained:
+        # The number of classes is not the same as the number of classes in the pretrained model =>
+        # remove the last layer weights
+        _ignore_keys = ignore_keys if _cfg["vocab"] != default_cfgs[arch]["vocab"] else None
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+
+    return model
+
+
+
+[docs] +def viptr_tiny(pretrained: bool = False, **kwargs: Any) -> VIPTR: + """VIPTR-Tiny as described in `"A Vision Permutable Extractor for Fast and Efficient Scene Text Recognition" + <https://arxiv.org/abs/2401.10110>`_. + + >>> import torch + >>> from doctr.models import viptr_tiny + >>> model = viptr_tiny(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 32, 128)) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on our text recognition dataset + **kwargs: keyword arguments of the VIPTR architecture + + Returns: + VIPTR: a VIPTR model instance + """ + return _viptr( + "viptr_tiny", + pretrained, + vip_tiny, + "5", + ignore_keys=["head.weight", "head.bias"], + **kwargs, + )
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/recognition/vitstr/pytorch.html b/_modules/doctr/models/recognition/vitstr/pytorch.html new file mode 100644 index 0000000000..2ec28f4645 --- /dev/null +++ b/_modules/doctr/models/recognition/vitstr/pytorch.html @@ -0,0 +1,623 @@ + + + + + + + + + + + + + doctr.models.recognition.vitstr.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.recognition.vitstr.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from collections.abc import Callable
+from copy import deepcopy
+from typing import Any
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torchvision.models._utils import IntermediateLayerGetter
+
+from doctr.datasets import VOCABS
+
+from ...classification import vit_b, vit_s
+from ...utils import _bf16_to_float32, load_pretrained_params
+from .base import _ViTSTR, _ViTSTRPostProcessor
+
+__all__ = ["ViTSTR", "vitstr_small", "vitstr_base"]
+
+default_cfgs: dict[str, dict[str, Any]] = {
+    "vitstr_small": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 128),
+        "vocab": VOCABS["french"],
+        "url": "https://doctr-static.mindee.com/models?id=v0.7.0/vitstr_small-fcd12655.pt&src=0",
+    },
+    "vitstr_base": {
+        "mean": (0.694, 0.695, 0.693),
+        "std": (0.299, 0.296, 0.301),
+        "input_shape": (3, 32, 128),
+        "vocab": VOCABS["french"],
+        "url": "https://doctr-static.mindee.com/models?id=v0.7.0/vitstr_base-50b21df2.pt&src=0",
+    },
+}
+
+
+class ViTSTR(_ViTSTR, nn.Module):
+    """Implements a ViTSTR architecture as described in `"Vision Transformer for Fast and
+    Efficient Scene Text Recognition" <https://arxiv.org/pdf/2105.08582.pdf>`_.
+
+    Args:
+        feature_extractor: the backbone serving as feature extractor
+        vocab: vocabulary used for encoding
+        embedding_units: number of embedding units
+        max_length: maximum word length handled by the model
+        dropout_prob: dropout probability of the encoder LSTM
+        input_shape: input shape of the image
+        exportable: onnx exportable returns only logits
+        cfg: dictionary containing information about the model
+    """
+
+    def __init__(
+        self,
+        feature_extractor,
+        vocab: str,
+        embedding_units: int,
+        max_length: int = 32,  # different from paper
+        input_shape: tuple[int, int, int] = (3, 32, 128),  # different from paper
+        exportable: bool = False,
+        cfg: dict[str, Any] | None = None,
+    ) -> None:
+        super().__init__()
+        self.vocab = vocab
+        self.exportable = exportable
+        self.cfg = cfg
+        self.max_length = max_length + 2  # +2 for SOS and EOS
+
+        self.feat_extractor = feature_extractor
+        self.head = nn.Linear(embedding_units, len(self.vocab) + 1)  # +1 for EOS
+
+        self.postprocessor = ViTSTRPostProcessor(vocab=self.vocab)
+
+    def from_pretrained(self, path_or_url: str, **kwargs: Any) -> None:
+        """Load pretrained parameters onto the model
+
+        Args:
+            path_or_url: the path or URL to the model parameters (checkpoint)
+            **kwargs: additional arguments to be passed to `doctr.models.utils.load_pretrained_params`
+        """
+        load_pretrained_params(self, path_or_url, **kwargs)
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        target: list[str] | None = None,
+        return_model_output: bool = False,
+        return_preds: bool = False,
+    ) -> dict[str, Any]:
+        features = self.feat_extractor(x)["features"]  # (batch_size, patches_seqlen, d_model)
+
+        if target is not None:
+            _gt, _seq_len = self.build_target(target)
+            gt, seq_len = torch.from_numpy(_gt).to(dtype=torch.long), torch.tensor(_seq_len)
+            gt, seq_len = gt.to(x.device), seq_len.to(x.device)
+
+        if self.training and target is None:
+            raise ValueError("Need to provide labels during training")
+
+        # borrowed from : https://github.com/baudm/parseq/blob/main/strhub/models/vitstr/model.py
+        features = features[:, : self.max_length]  # (batch_size, max_length, d_model)
+        B, N, E = features.size()
+        features = features.reshape(B * N, E)
+        logits = self.head(features).view(B, N, len(self.vocab) + 1)  # (batch_size, max_length, vocab + 1)
+        decoded_features = _bf16_to_float32(logits[:, 1:])  # remove cls_token
+
+        out: dict[str, Any] = {}
+        if self.exportable:
+            out["logits"] = decoded_features
+            return out
+
+        if return_model_output:
+            out["out_map"] = decoded_features
+
+        if target is None or return_preds:
+            # Disable for torch.compile compatibility
+            @torch.compiler.disable
+            def _postprocess(decoded_features: torch.Tensor) -> list[tuple[str, float]]:
+                return self.postprocessor(decoded_features)
+
+            # Post-process boxes
+            out["preds"] = _postprocess(decoded_features)
+
+        if target is not None:
+            out["loss"] = self.compute_loss(decoded_features, gt, seq_len)
+
+        return out
+
+    @staticmethod
+    def compute_loss(
+        model_output: torch.Tensor,
+        gt: torch.Tensor,
+        seq_len: torch.Tensor,
+    ) -> torch.Tensor:
+        """Compute categorical cross-entropy loss for the model.
+        Sequences are masked after the EOS character.
+
+        Args:
+            model_output: predicted logits of the model
+            gt: the encoded tensor with gt labels
+            seq_len: lengths of each gt word inside the batch
+
+        Returns:
+            The loss of the model on the batch
+        """
+        # Input length : number of steps
+        input_len = model_output.shape[1]
+        # Add one for additional <eos> token (sos disappear in shift!)
+        seq_len = seq_len + 1
+        # Compute loss: don't forget to shift gt! Otherwise the model learns to output the gt[t-1]!
+        # The "masked" first gt char is <sos>.
+        cce = F.cross_entropy(model_output.permute(0, 2, 1), gt[:, 1:], reduction="none")
+        # Compute mask
+        mask_2d = torch.arange(input_len, device=model_output.device)[None, :] >= seq_len[:, None]
+        cce[mask_2d] = 0
+
+        ce_loss = cce.sum(1) / seq_len.to(dtype=model_output.dtype)
+        return ce_loss.mean()
+
+
+class ViTSTRPostProcessor(_ViTSTRPostProcessor):
+    """Post processor for ViTSTR architecture
+
+    Args:
+        vocab: string containing the ordered sequence of supported characters
+    """
+
+    def __call__(
+        self,
+        logits: torch.Tensor,
+    ) -> list[tuple[str, float]]:
+        # compute pred with argmax for attention models
+        out_idxs = logits.argmax(-1)
+        preds_prob = torch.softmax(logits, -1).max(dim=-1)[0]
+
+        # Manual decoding
+        word_values = [
+            "".join(self._embedding[idx] for idx in encoded_seq).split("<eos>")[0]
+            for encoded_seq in out_idxs.cpu().numpy()
+        ]
+        # compute probabilties for each word up to the EOS token
+        probs = [
+            preds_prob[i, : len(word)].clip(0, 1).mean().item() if word else 0.0 for i, word in enumerate(word_values)
+        ]
+
+        return list(zip(word_values, probs))
+
+
+def _vitstr(
+    arch: str,
+    pretrained: bool,
+    backbone_fn: Callable[[bool], nn.Module],
+    layer: str,
+    ignore_keys: list[str] | None = None,
+    **kwargs: Any,
+) -> ViTSTR:
+    # Patch the config
+    _cfg = deepcopy(default_cfgs[arch])
+    _cfg["vocab"] = kwargs.get("vocab", _cfg["vocab"])
+    _cfg["input_shape"] = kwargs.get("input_shape", _cfg["input_shape"])
+    patch_size = kwargs.get("patch_size", (4, 8))
+
+    kwargs["vocab"] = _cfg["vocab"]
+    kwargs["input_shape"] = _cfg["input_shape"]
+
+    # Feature extractor
+    feat_extractor = IntermediateLayerGetter(
+        # NOTE: we don't use a pretrained backbone for non-rectangular patches to avoid the pos embed mismatch
+        backbone_fn(False, input_shape=_cfg["input_shape"], patch_size=patch_size),  # type: ignore[call-arg]
+        {layer: "features"},
+    )
+
+    kwargs.pop("patch_size", None)
+    kwargs.pop("pretrained_backbone", None)
+
+    # Build the model
+    model = ViTSTR(feat_extractor, cfg=_cfg, **kwargs)
+    # Load pretrained parameters
+    if pretrained:
+        # The number of classes is not the same as the number of classes in the pretrained model =>
+        # remove the last layer weights
+        _ignore_keys = ignore_keys if _cfg["vocab"] != default_cfgs[arch]["vocab"] else None
+        model.from_pretrained(default_cfgs[arch]["url"], ignore_keys=_ignore_keys)
+
+    return model
+
+
+
+[docs] +def vitstr_small(pretrained: bool = False, **kwargs: Any) -> ViTSTR: + """ViTSTR-Small as described in `"Vision Transformer for Fast and Efficient Scene Text Recognition" + <https://arxiv.org/pdf/2105.08582.pdf>`_. + + >>> import torch + >>> from doctr.models import vitstr_small + >>> model = vitstr_small(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 32, 128)) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on our text recognition dataset + kwargs: keyword arguments of the ViTSTR architecture + + Returns: + text recognition architecture + """ + return _vitstr( + "vitstr_small", + pretrained, + vit_s, + "1", + embedding_units=384, + patch_size=(4, 8), + ignore_keys=["head.weight", "head.bias"], + **kwargs, + )
+ + + +
+[docs] +def vitstr_base(pretrained: bool = False, **kwargs: Any) -> ViTSTR: + """ViTSTR-Base as described in `"Vision Transformer for Fast and Efficient Scene Text Recognition" + <https://arxiv.org/pdf/2105.08582.pdf>`_. + + >>> import torch + >>> from doctr.models import vitstr_base + >>> model = vitstr_base(pretrained=False) + >>> input_tensor = torch.rand((1, 3, 32, 128)) + >>> out = model(input_tensor) + + Args: + pretrained (bool): If True, returns a model pre-trained on our text recognition dataset + kwargs: keyword arguments of the ViTSTR architecture + + Returns: + text recognition architecture + """ + return _vitstr( + "vitstr_base", + pretrained, + vit_b, + "1", + embedding_units=768, + patch_size=(4, 8), + ignore_keys=["head.weight", "head.bias"], + **kwargs, + )
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/recognition/zoo.html b/_modules/doctr/models/recognition/zoo.html new file mode 100644 index 0000000000..6106d6730d --- /dev/null +++ b/_modules/doctr/models/recognition/zoo.html @@ -0,0 +1,425 @@ + + + + + + + + + + + + + doctr.models.recognition.zoo - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.recognition.zoo

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from typing import Any
+
+from doctr.models.preprocessor import PreProcessor
+from doctr.models.utils import _CompiledModule
+
+from .. import recognition
+from .predictor import RecognitionPredictor
+
+__all__ = ["recognition_predictor"]
+
+
+ARCHS: list[str] = [
+    "crnn_vgg16_bn",
+    "crnn_mobilenet_v3_small",
+    "crnn_mobilenet_v3_large",
+    "sar_resnet31",
+    "master",
+    "vitstr_small",
+    "vitstr_base",
+    "parseq",
+    "viptr_tiny",
+]
+
+
+def _predictor(arch: Any, pretrained: bool, **kwargs: Any) -> RecognitionPredictor:
+    if isinstance(arch, str):
+        if arch not in ARCHS:
+            raise ValueError(f"unknown architecture '{arch}'")
+
+        _model = recognition.__dict__[arch](
+            pretrained=pretrained, pretrained_backbone=kwargs.get("pretrained_backbone", True)
+        )
+    else:
+        # Adding the type for torch compiled models to the allowed architectures
+        allowed_archs = [
+            recognition.CRNN,
+            recognition.SAR,
+            recognition.MASTER,
+            recognition.ViTSTR,
+            recognition.PARSeq,
+            recognition.VIPTR,
+            _CompiledModule,
+        ]
+
+        if not isinstance(arch, tuple(allowed_archs)):
+            raise ValueError(f"unknown architecture: {type(arch)}")
+        _model = arch
+
+    kwargs.pop("pretrained_backbone", None)
+
+    kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
+    kwargs["std"] = kwargs.get("std", _model.cfg["std"])
+    kwargs["batch_size"] = kwargs.get("batch_size", 128)
+    input_shape = _model.cfg["input_shape"][-2:]
+    predictor = RecognitionPredictor(PreProcessor(input_shape, preserve_aspect_ratio=True, **kwargs), _model)
+
+    return predictor
+
+
+
+[docs] +def recognition_predictor( + arch: Any = "crnn_vgg16_bn", + pretrained: bool = False, + symmetric_pad: bool = False, + batch_size: int = 128, + **kwargs: Any, +) -> RecognitionPredictor: + """Text recognition architecture. + + Example:: + >>> import numpy as np + >>> from doctr.models import recognition_predictor + >>> model = recognition_predictor(pretrained=True) + >>> input_page = (255 * np.random.rand(32, 128, 3)).astype(np.uint8) + >>> out = model([input_page]) + + Args: + arch: name of the architecture or model itself to use (e.g. 'crnn_vgg16_bn') + pretrained: If True, returns a model pre-trained on our text recognition dataset + symmetric_pad: if True, pad the image symmetrically instead of padding at the bottom-right + batch_size: number of samples the model processes in parallel + **kwargs: optional parameters to be passed to the architecture + + Returns: + Recognition predictor + """ + return _predictor(arch=arch, pretrained=pretrained, symmetric_pad=symmetric_pad, batch_size=batch_size, **kwargs)
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/models/zoo.html b/_modules/doctr/models/zoo.html new file mode 100644 index 0000000000..1ba5f640dc --- /dev/null +++ b/_modules/doctr/models/zoo.html @@ -0,0 +1,574 @@ + + + + + + + + + + + + + doctr.models.zoo - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.models.zoo

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+from typing import Any
+
+from .detection.zoo import detection_predictor
+from .kie_predictor import KIEPredictor
+from .predictor import OCRPredictor
+from .recognition.zoo import recognition_predictor
+
+__all__ = ["ocr_predictor", "kie_predictor"]
+
+
+def _predictor(
+    det_arch: Any,
+    reco_arch: Any,
+    pretrained: bool,
+    pretrained_backbone: bool = True,
+    assume_straight_pages: bool = True,
+    preserve_aspect_ratio: bool = True,
+    symmetric_pad: bool = True,
+    det_bs: int = 2,
+    reco_bs: int = 128,
+    detect_orientation: bool = False,
+    straighten_pages: bool = False,
+    detect_language: bool = False,
+    **kwargs,
+) -> OCRPredictor:
+    # Detection
+    det_predictor = detection_predictor(
+        det_arch,
+        pretrained=pretrained,
+        pretrained_backbone=pretrained_backbone,
+        batch_size=det_bs,
+        assume_straight_pages=assume_straight_pages,
+        preserve_aspect_ratio=preserve_aspect_ratio,
+        symmetric_pad=symmetric_pad,
+    )
+
+    # Recognition
+    reco_predictor = recognition_predictor(
+        reco_arch,
+        pretrained=pretrained,
+        pretrained_backbone=pretrained_backbone,
+        batch_size=reco_bs,
+    )
+
+    return OCRPredictor(
+        det_predictor,
+        reco_predictor,
+        assume_straight_pages=assume_straight_pages,
+        preserve_aspect_ratio=preserve_aspect_ratio,
+        symmetric_pad=symmetric_pad,
+        detect_orientation=detect_orientation,
+        straighten_pages=straighten_pages,
+        detect_language=detect_language,
+        **kwargs,
+    )
+
+
+
+[docs] +def ocr_predictor( + det_arch: Any = "fast_base", + reco_arch: Any = "crnn_vgg16_bn", + pretrained: bool = False, + pretrained_backbone: bool = True, + assume_straight_pages: bool = True, + preserve_aspect_ratio: bool = True, + symmetric_pad: bool = True, + export_as_straight_boxes: bool = False, + detect_orientation: bool = False, + straighten_pages: bool = False, + detect_language: bool = False, + **kwargs: Any, +) -> OCRPredictor: + """End-to-end OCR architecture using one model for localization, and another for text recognition. + + >>> import numpy as np + >>> from doctr.models import ocr_predictor + >>> model = ocr_predictor('db_resnet50', 'crnn_vgg16_bn', pretrained=True) + >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8) + >>> out = model([input_page]) + + Args: + det_arch: name of the detection architecture or the model itself to use + (e.g. 'db_resnet50', 'db_mobilenet_v3_large') + reco_arch: name of the recognition architecture or the model itself to use + (e.g. 'crnn_vgg16_bn', 'sar_resnet31') + pretrained: If True, returns a model pre-trained on our OCR dataset + pretrained_backbone: If True, returns a model with a pretrained backbone + assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages + without rotated textual elements. + preserve_aspect_ratio: If True, pad the input document image to preserve the aspect ratio before + running the detection model on it. + symmetric_pad: if True, pad the image symmetrically instead of padding at the bottom-right. + export_as_straight_boxes: when assume_straight_pages is set to False, export final predictions + (potentially rotated) as straight bounding boxes. + detect_orientation: if True, the estimated general page orientation will be added to the predictions for each + page. Doing so will slightly deteriorate the overall latency. + straighten_pages: if True, estimates the page general orientation + based on the segmentation map median line orientation. + Then, rotates page before passing it again to the deep learning detection module. + Doing so will improve performances for documents with page-uniform rotations. + detect_language: if True, the language prediction will be added to the predictions for each + page. Doing so will slightly deteriorate the overall latency. + kwargs: keyword args of `OCRPredictor` + + Returns: + OCR predictor + """ + return _predictor( + det_arch, + reco_arch, + pretrained, + pretrained_backbone=pretrained_backbone, + assume_straight_pages=assume_straight_pages, + preserve_aspect_ratio=preserve_aspect_ratio, + symmetric_pad=symmetric_pad, + export_as_straight_boxes=export_as_straight_boxes, + detect_orientation=detect_orientation, + straighten_pages=straighten_pages, + detect_language=detect_language, + **kwargs, + )
+ + + +def _kie_predictor( + det_arch: Any, + reco_arch: Any, + pretrained: bool, + pretrained_backbone: bool = True, + assume_straight_pages: bool = True, + preserve_aspect_ratio: bool = True, + symmetric_pad: bool = True, + det_bs: int = 2, + reco_bs: int = 128, + detect_orientation: bool = False, + straighten_pages: bool = False, + detect_language: bool = False, + **kwargs, +) -> KIEPredictor: + # Detection + det_predictor = detection_predictor( + det_arch, + pretrained=pretrained, + pretrained_backbone=pretrained_backbone, + batch_size=det_bs, + assume_straight_pages=assume_straight_pages, + preserve_aspect_ratio=preserve_aspect_ratio, + symmetric_pad=symmetric_pad, + ) + + # Recognition + reco_predictor = recognition_predictor( + reco_arch, + pretrained=pretrained, + pretrained_backbone=pretrained_backbone, + batch_size=reco_bs, + ) + + return KIEPredictor( + det_predictor, + reco_predictor, + assume_straight_pages=assume_straight_pages, + preserve_aspect_ratio=preserve_aspect_ratio, + symmetric_pad=symmetric_pad, + detect_orientation=detect_orientation, + straighten_pages=straighten_pages, + detect_language=detect_language, + **kwargs, + ) + + +
+[docs] +def kie_predictor( + det_arch: Any = "fast_base", + reco_arch: Any = "crnn_vgg16_bn", + pretrained: bool = False, + pretrained_backbone: bool = True, + assume_straight_pages: bool = True, + preserve_aspect_ratio: bool = True, + symmetric_pad: bool = True, + export_as_straight_boxes: bool = False, + detect_orientation: bool = False, + straighten_pages: bool = False, + detect_language: bool = False, + **kwargs: Any, +) -> KIEPredictor: + """End-to-end KIE architecture using one model for localization, and another for text recognition. + + >>> import numpy as np + >>> from doctr.models import ocr_predictor + >>> model = ocr_predictor('db_resnet50', 'crnn_vgg16_bn', pretrained=True) + >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8) + >>> out = model([input_page]) + + Args: + det_arch: name of the detection architecture or the model itself to use + (e.g. 'db_resnet50', 'db_mobilenet_v3_large') + reco_arch: name of the recognition architecture or the model itself to use + (e.g. 'crnn_vgg16_bn', 'sar_resnet31') + pretrained: If True, returns a model pre-trained on our OCR dataset + pretrained_backbone: If True, returns a model with a pretrained backbone + assume_straight_pages: if True, speeds up the inference by assuming you only pass straight pages + without rotated textual elements. + preserve_aspect_ratio: If True, pad the input document image to preserve the aspect ratio before + running the detection model on it. + symmetric_pad: if True, pad the image symmetrically instead of padding at the bottom-right. + export_as_straight_boxes: when assume_straight_pages is set to False, export final predictions + (potentially rotated) as straight bounding boxes. + detect_orientation: if True, the estimated general page orientation will be added to the predictions for each + page. Doing so will slightly deteriorate the overall latency. + straighten_pages: if True, estimates the page general orientation + based on the segmentation map median line orientation. + Then, rotates page before passing it again to the deep learning detection module. + Doing so will improve performances for documents with page-uniform rotations. + detect_language: if True, the language prediction will be added to the predictions for each + page. Doing so will slightly deteriorate the overall latency. + kwargs: keyword args of `OCRPredictor` + + Returns: + KIE predictor + """ + return _kie_predictor( + det_arch, + reco_arch, + pretrained, + pretrained_backbone=pretrained_backbone, + assume_straight_pages=assume_straight_pages, + preserve_aspect_ratio=preserve_aspect_ratio, + symmetric_pad=symmetric_pad, + export_as_straight_boxes=export_as_straight_boxes, + detect_orientation=detect_orientation, + straighten_pages=straighten_pages, + detect_language=detect_language, + **kwargs, + )
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/transforms/modules/base.html b/_modules/doctr/transforms/modules/base.html new file mode 100644 index 0000000000..cd6894b54a --- /dev/null +++ b/_modules/doctr/transforms/modules/base.html @@ -0,0 +1,578 @@ + + + + + + + + + + + + + doctr.transforms.modules.base - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.transforms.modules.base

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import math
+import random
+from collections.abc import Callable
+from typing import Any
+
+import numpy as np
+
+from doctr.utils.repr import NestedObject
+
+from .. import functional as F
+
+__all__ = ["SampleCompose", "ImageTransform", "ColorInversion", "OneOf", "RandomApply", "RandomRotate", "RandomCrop"]
+
+
+
+[docs] +class SampleCompose(NestedObject): + """Implements a wrapper that will apply transformations sequentially on both image and target + + .. code:: python + + >>> import numpy as np + >>> import torch + >>> from doctr.transforms import SampleCompose, ImageTransform, ColorInversion, RandomRotate + >>> transfos = SampleCompose([ImageTransform(ColorInversion((32, 32))), RandomRotate(30)]) + >>> out, out_boxes = transfos(torch.rand(8, 64, 64, 3), np.zeros((2, 4))) + + Args: + transforms: list of transformation modules + """ + + _children_names: list[str] = ["sample_transforms"] + + def __init__(self, transforms: list[Callable[[Any, Any], tuple[Any, Any]]]) -> None: + self.sample_transforms = transforms + + def __call__(self, x: Any, target: Any) -> tuple[Any, Any]: + for t in self.sample_transforms: + x, target = t(x, target) + + return x, target
+ + + +
+[docs] +class ImageTransform(NestedObject): + """Implements a transform wrapper to turn an image-only transformation into an image+target transform + + .. code:: python + + >>> import torch + >>> from doctr.transforms import ImageTransform, ColorInversion + >>> transfo = ImageTransform(ColorInversion((32, 32))) + >>> out, _ = transfo(torch.rand(8, 64, 64, 3), None) + + Args: + transform: the image transformation module to wrap + """ + + _children_names: list[str] = ["img_transform"] + + def __init__(self, transform: Callable[[Any], Any]) -> None: + self.img_transform = transform + + def __call__(self, img: Any, target: Any) -> tuple[Any, Any]: + img = self.img_transform(img) + return img, target
+ + + +
+[docs] +class ColorInversion(NestedObject): + """Applies the following tranformation to a tensor (image or batch of images): + convert to grayscale, colorize (shift 0-values randomly), and then invert colors + + .. code:: python + + >>> import torch + >>> from doctr.transforms import ColorInversion + >>> transfo = ColorInversion(min_val=0.6) + >>> out = transfo(torch.rand(8, 64, 64, 3)) + + Args: + min_val: range [min_val, 1] to colorize RGB pixels + """ + + def __init__(self, min_val: float = 0.5) -> None: + self.min_val = min_val + + def extra_repr(self) -> str: + return f"min_val={self.min_val}" + + def __call__(self, img: Any) -> Any: + return F.invert_colors(img, self.min_val)
+ + + +
+[docs] +class OneOf(NestedObject): + """Randomly apply one of the input transformations + + .. code:: python + + >>> import torch + >>> from doctr.transforms import OneOf + >>> transfo = OneOf([JpegQuality(), Gamma()]) + >>> out = transfo(torch.rand(1, 64, 64, 3)) + + Args: + transforms: list of transformations, one only will be picked + """ + + _children_names: list[str] = ["transforms"] + + def __init__(self, transforms: list[Callable[[Any], Any]]) -> None: + self.transforms = transforms + + def __call__(self, img: Any, target: np.ndarray | None = None) -> Any | tuple[Any, np.ndarray]: + # Pick transformation + transfo = self.transforms[int(random.random() * len(self.transforms))] + # Apply + return transfo(img) if target is None else transfo(img, target) # type: ignore[call-arg]
+ + + +
+[docs] +class RandomApply(NestedObject): + """Apply with a probability p the input transformation + + .. code:: python + + >>> import torch + >>> from doctr.transforms import RandomApply + >>> transfo = RandomApply(Gamma(), p=.5) + >>> out = transfo(torch.rand(1, 64, 64, 3)) + + Args: + transform: transformation to apply + p: probability to apply + """ + + def __init__(self, transform: Callable[[Any], Any], p: float = 0.5) -> None: + self.transform = transform + self.p = p + + def extra_repr(self) -> str: + return f"transform={self.transform}, p={self.p}" + + def __call__(self, img: Any, target: np.ndarray | None = None) -> Any | tuple[Any, np.ndarray]: + if random.random() < self.p: + return self.transform(img) if target is None else self.transform(img, target) # type: ignore[call-arg] + return img if target is None else (img, target)
+ + + +
+[docs] +class RandomRotate(NestedObject): + """Randomly rotate a tensor image and its boxes + + .. image:: https://doctr-static.mindee.com/models?id=v0.4.0/rotation_illustration.png&src=0 + :align: center + + Args: + max_angle: maximum angle for rotation, in degrees. Angles will be uniformly picked in [-max_angle, max_angle] + expand: whether the image should be padded before the rotation + """ + + def __init__(self, max_angle: float = 5.0, expand: bool = False) -> None: + self.max_angle = max_angle + self.expand = expand + + def extra_repr(self) -> str: + return f"max_angle={self.max_angle}, expand={self.expand}" + + def __call__(self, img: Any, target: np.ndarray) -> tuple[Any, np.ndarray]: + angle = random.uniform(-self.max_angle, self.max_angle) + r_img, r_polys = F.rotate_sample(img, target, angle, self.expand) + # Removes deleted boxes + is_kept = (r_polys.max(1) > r_polys.min(1)).sum(1) == 2 + return r_img, r_polys[is_kept]
+ + + +
+[docs] +class RandomCrop(NestedObject): + """Randomly crop a tensor image and its boxes + + Args: + scale: tuple of floats, relative (min_area, max_area) of the crop + ratio: tuple of float, relative (min_ratio, max_ratio) where ratio = h/w + """ + + def __init__(self, scale: tuple[float, float] = (0.08, 1.0), ratio: tuple[float, float] = (0.75, 1.33)) -> None: + self.scale = scale + self.ratio = ratio + + def extra_repr(self) -> str: + return f"scale={self.scale}, ratio={self.ratio}" + + def __call__(self, img: Any, target: np.ndarray) -> tuple[Any, np.ndarray]: + scale = random.uniform(self.scale[0], self.scale[1]) + ratio = random.uniform(self.ratio[0], self.ratio[1]) + + height, width = img.shape[-2:] + + # Calculate crop size + crop_area = scale * width * height + aspect_ratio = ratio * (width / height) + crop_width = int(round(math.sqrt(crop_area * aspect_ratio))) + crop_height = int(round(math.sqrt(crop_area / aspect_ratio))) + + # Ensure crop size does not exceed image dimensions + crop_width = min(crop_width, width) + crop_height = min(crop_height, height) + + # Randomly select crop position + x = random.randint(0, width - crop_width) + y = random.randint(0, height - crop_height) + + # relative crop box + crop_box = (x / width, y / height, (x + crop_width) / width, (y + crop_height) / height) + if target.shape[1:] == (4, 2): + min_xy = np.min(target, axis=1) + max_xy = np.max(target, axis=1) + _target = np.concatenate((min_xy, max_xy), axis=1) + else: + _target = target + + # Crop image and targets + croped_img, crop_boxes = F.crop_detection(img, _target, crop_box) + # hard fallback if no box is kept + if crop_boxes.shape[0] == 0: + return img, target + # clip boxes + return croped_img, np.clip(crop_boxes, 0, 1)
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/transforms/modules/pytorch.html b/_modules/doctr/transforms/modules/pytorch.html new file mode 100644 index 0000000000..df9ec8ae19 --- /dev/null +++ b/_modules/doctr/transforms/modules/pytorch.html @@ -0,0 +1,658 @@ + + + + + + + + + + + + + doctr.transforms.modules.pytorch - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.transforms.modules.pytorch

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+import math
+
+import numpy as np
+import torch
+from PIL.Image import Image
+from scipy.ndimage import gaussian_filter
+from torch.nn.functional import pad
+from torchvision.transforms import functional as F
+from torchvision.transforms import transforms as T
+
+from ..functional import random_shadow
+
+__all__ = [
+    "Resize",
+    "GaussianNoise",
+    "ChannelShuffle",
+    "RandomHorizontalFlip",
+    "RandomShadow",
+    "RandomResize",
+    "GaussianBlur",
+]
+
+
+
+[docs] +class Resize(T.Resize): + """Resize the input image to the given size + + >>> import torch + >>> from doctr.transforms import Resize + >>> transfo = Resize((64, 64), preserve_aspect_ratio=True, symmetric_pad=True) + >>> out = transfo(torch.rand((3, 64, 64))) + + Args: + size: output size in pixels, either a tuple (height, width) or a single integer for square images + interpolation: interpolation mode to use for resizing, default is bilinear + preserve_aspect_ratio: whether to preserve the aspect ratio of the image, + if True, the image will be resized to fit within the target size while maintaining its aspect ratio + symmetric_pad: whether to symmetrically pad the image to the target size, + if True, the image will be padded equally on both sides to fit the target size + """ + + def __init__( + self, + size: int | tuple[int, int], + interpolation=F.InterpolationMode.BILINEAR, + preserve_aspect_ratio: bool = False, + symmetric_pad: bool = False, + ) -> None: + super().__init__(size if isinstance(size, (list, tuple)) else (size, size), interpolation, antialias=True) + self.preserve_aspect_ratio = preserve_aspect_ratio + self.symmetric_pad = symmetric_pad + + def forward( + self, + img: torch.Tensor, + target: np.ndarray | None = None, + ) -> torch.Tensor | tuple[torch.Tensor, np.ndarray]: + target_ratio = self.size[0] / self.size[1] + actual_ratio = img.shape[-2] / img.shape[-1] + + if not self.preserve_aspect_ratio or (target_ratio == actual_ratio): + # If we don't preserve the aspect ratio or the wanted aspect ratio is the same than the original one + # We can use with the regular resize + if target is not None: + return super().forward(img), target + return super().forward(img) + else: + # Resize + if actual_ratio > target_ratio: + tmp_size = (self.size[0], max(int(self.size[0] / actual_ratio), 1)) + else: + tmp_size = (max(int(self.size[1] * actual_ratio), 1), self.size[1]) + + # Scale image + img = F.resize(img, tmp_size, self.interpolation, antialias=True) + raw_shape = img.shape[-2:] + if isinstance(self.size, (tuple, list)): + # Pad (inverted in pytorch) + _pad = (0, self.size[1] - img.shape[-1], 0, self.size[0] - img.shape[-2]) + if self.symmetric_pad: + half_pad = (math.ceil(_pad[1] / 2), math.ceil(_pad[3] / 2)) + _pad = (half_pad[0], _pad[1] - half_pad[0], half_pad[1], _pad[3] - half_pad[1]) + # Pad image + img = pad(img, _pad) + + # In case boxes are provided, resize boxes if needed (for detection task if preserve aspect ratio) + if target is not None: + if self.symmetric_pad: + offset = half_pad[0] / img.shape[-1], half_pad[1] / img.shape[-2] + + if self.preserve_aspect_ratio: + # Get absolute coords + if target.shape[1:] == (4,): + if self.symmetric_pad: + target[:, [0, 2]] = offset[0] + target[:, [0, 2]] * raw_shape[-1] / img.shape[-1] + target[:, [1, 3]] = offset[1] + target[:, [1, 3]] * raw_shape[-2] / img.shape[-2] + else: + target[:, [0, 2]] *= raw_shape[-1] / img.shape[-1] + target[:, [1, 3]] *= raw_shape[-2] / img.shape[-2] + elif target.shape[1:] == (4, 2): + if self.symmetric_pad: + target[..., 0] = offset[0] + target[..., 0] * raw_shape[-1] / img.shape[-1] + target[..., 1] = offset[1] + target[..., 1] * raw_shape[-2] / img.shape[-2] + else: + target[..., 0] *= raw_shape[-1] / img.shape[-1] + target[..., 1] *= raw_shape[-2] / img.shape[-2] + else: + raise AssertionError("Boxes should be in the format (n_boxes, 4, 2) or (n_boxes, 4)") + + return img, np.clip(target, 0, 1) + + return img + + def __repr__(self) -> str: + interpolate_str = self.interpolation.value + _repr = f"output_size={self.size}, interpolation='{interpolate_str}'" + if self.preserve_aspect_ratio: + _repr += f", preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}" + return f"{self.__class__.__name__}({_repr})"
+ + + +
+[docs] +class GaussianNoise(torch.nn.Module): + """Adds Gaussian Noise to the input tensor + + >>> import torch + >>> from doctr.transforms import GaussianNoise + >>> transfo = GaussianNoise(0., 1.) + >>> out = transfo(torch.rand((3, 224, 224))) + + Args: + mean : mean of the gaussian distribution + std : std of the gaussian distribution + """ + + def __init__(self, mean: float = 0.0, std: float = 1.0) -> None: + super().__init__() + self.std = std + self.mean = mean + + def forward(self, x: torch.Tensor) -> torch.Tensor: + # Reshape the distribution + noise = self.mean + 2 * self.std * torch.rand(x.shape, device=x.device) - self.std + if x.dtype == torch.uint8: + return (x + 255 * noise).round().clamp(0, 255).to(dtype=torch.uint8) + else: + return (x + noise.to(dtype=x.dtype)).clamp(0, 1) + + def extra_repr(self) -> str: + return f"mean={self.mean}, std={self.std}"
+ + + +class GaussianBlur(torch.nn.Module): + """Apply Gaussian Blur to the input tensor + + >>> import torch + >>> from doctr.transforms import GaussianBlur + >>> transfo = GaussianBlur(sigma=(0.0, 1.0)) + + Args: + sigma : standard deviation range for the gaussian kernel + """ + + def __init__(self, sigma: tuple[float, float]) -> None: + super().__init__() + self.sigma_range = sigma + + def forward(self, x: torch.Tensor) -> torch.Tensor: + # Sample a random sigma value within the specified range + sigma = torch.empty(1).uniform_(*self.sigma_range).item() + + # Apply Gaussian blur along spatial dimensions only + blurred = torch.tensor( + gaussian_filter( + x.numpy(), + sigma=sigma, + mode="reflect", + truncate=4.0, + ), + dtype=x.dtype, + device=x.device, + ) + return blurred + + +
+[docs] +class ChannelShuffle(torch.nn.Module): + """Randomly shuffle channel order of a given image""" + + def __init__(self): + super().__init__() + + def forward(self, img: torch.Tensor) -> torch.Tensor: + # Get a random order + chan_order = torch.rand(img.shape[0]).argsort() + return img[chan_order]
+ + + +
+[docs] +class RandomHorizontalFlip(T.RandomHorizontalFlip): + """Randomly flip the input image horizontally""" + + def forward(self, img: torch.Tensor | Image, target: np.ndarray) -> tuple[torch.Tensor | Image, np.ndarray]: + if torch.rand(1) < self.p: + _img = F.hflip(img) + _target = target.copy() + # Changing the relative bbox coordinates + if target.shape[1:] == (4,): + _target[:, ::2] = 1 - target[:, [2, 0]] + else: + _target[..., 0] = 1 - target[..., 0] + return _img, _target + return img, target
+ + + +
+[docs] +class RandomShadow(torch.nn.Module): + """Adds random shade to the input image + + >>> import torch + >>> from doctr.transforms import RandomShadow + >>> transfo = RandomShadow((0., 1.)) + >>> out = transfo(torch.rand((3, 64, 64))) + + Args: + opacity_range : minimum and maximum opacity of the shade + """ + + def __init__(self, opacity_range: tuple[float, float] | None = None) -> None: + super().__init__() + self.opacity_range = opacity_range if isinstance(opacity_range, tuple) else (0.2, 0.8) + + def __call__(self, x: torch.Tensor) -> torch.Tensor: + # Reshape the distribution + try: + if x.dtype == torch.uint8: + return ( + ( + 255 + * random_shadow( + x.to(dtype=torch.float32) / 255, + self.opacity_range, + ) + ) + .round() + .clip(0, 255) + .to(dtype=torch.uint8) + ) + else: + return random_shadow(x, self.opacity_range).clip(0, 1) + except ValueError: + return x + + def extra_repr(self) -> str: + return f"opacity_range={self.opacity_range}"
+ + + +
+[docs] +class RandomResize(torch.nn.Module): + """Randomly resize the input image and align corresponding targets + + >>> import torch + >>> from doctr.transforms import RandomResize + >>> transfo = RandomResize((0.3, 0.9), preserve_aspect_ratio=True, symmetric_pad=True, p=0.5) + >>> out = transfo(torch.rand((3, 64, 64))) + + Args: + scale_range: range of the resizing factor for width and height (independently) + preserve_aspect_ratio: whether to preserve the aspect ratio of the image, + given a float value, the aspect ratio will be preserved with this probability + symmetric_pad: whether to symmetrically pad the image, + given a float value, the symmetric padding will be applied with this probability + p: probability to apply the transformation + """ + + def __init__( + self, + scale_range: tuple[float, float] = (0.3, 0.9), + preserve_aspect_ratio: bool | float = False, + symmetric_pad: bool | float = False, + p: float = 0.5, + ) -> None: + super().__init__() + self.scale_range = scale_range + self.preserve_aspect_ratio = preserve_aspect_ratio + self.symmetric_pad = symmetric_pad + self.p = p + self._resize = Resize + + def forward(self, img: torch.Tensor, target: np.ndarray) -> tuple[torch.Tensor, np.ndarray]: + if torch.rand(1) < self.p: + scale_h = np.random.uniform(*self.scale_range) + scale_w = np.random.uniform(*self.scale_range) + new_size = (int(img.shape[-2] * scale_h), int(img.shape[-1] * scale_w)) + + _img, _target = self._resize( + new_size, + preserve_aspect_ratio=self.preserve_aspect_ratio + if isinstance(self.preserve_aspect_ratio, bool) + else bool(torch.rand(1) <= self.symmetric_pad), + symmetric_pad=self.symmetric_pad + if isinstance(self.symmetric_pad, bool) + else bool(torch.rand(1) <= self.symmetric_pad), + )(img, target) + + return _img, _target + return img, target + + def extra_repr(self) -> str: + return f"scale_range={self.scale_range}, preserve_aspect_ratio={self.preserve_aspect_ratio}, symmetric_pad={self.symmetric_pad}, p={self.p}" # noqa: E501
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/utils/metrics.html b/_modules/doctr/utils/metrics.html new file mode 100644 index 0000000000..8d9ec5a457 --- /dev/null +++ b/_modules/doctr/utils/metrics.html @@ -0,0 +1,918 @@ + + + + + + + + + + + + + doctr.utils.metrics - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.utils.metrics

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+
+
+import numpy as np
+from anyascii import anyascii
+from scipy.optimize import linear_sum_assignment
+from shapely.geometry import Polygon
+
+__all__ = [
+    "TextMatch",
+    "box_iou",
+    "polygon_iou",
+    "nms",
+    "LocalizationConfusion",
+    "OCRMetric",
+    "DetectionMetric",
+]
+
+
+def string_match(word1: str, word2: str) -> tuple[bool, bool, bool, bool]:
+    """Performs string comparison with multiple levels of tolerance
+
+    Args:
+        word1: a string
+        word2: another string
+
+    Returns:
+        a tuple with booleans specifying respectively whether the raw strings, their lower-case counterparts, their
+            anyascii counterparts and their lower-case anyascii counterparts match
+    """
+    raw_match = word1 == word2
+    caseless_match = word1.lower() == word2.lower()
+    anyascii_match = anyascii(word1) == anyascii(word2)
+
+    # Warning: the order is important here otherwise the pair ("EUR", "€") cannot be matched
+    unicase_match = anyascii(word1).lower() == anyascii(word2).lower()
+
+    return raw_match, caseless_match, anyascii_match, unicase_match
+
+
+
+[docs] +class TextMatch: + r"""Implements text match metric (word-level accuracy) for recognition task. + + The raw aggregated metric is computed as follows: + + .. math:: + \forall X, Y \in \mathcal{W}^N, + TextMatch(X, Y) = \frac{1}{N} \sum\limits_{i=1}^N f_{Y_i}(X_i) + + with the indicator function :math:`f_{a}` defined as: + + .. math:: + \forall a, x \in \mathcal{W}, + f_a(x) = \left\{ + \begin{array}{ll} + 1 & \mbox{if } x = a \\ + 0 & \mbox{otherwise.} + \end{array} + \right. + + where :math:`\mathcal{W}` is the set of all possible character sequences, + :math:`N` is a strictly positive integer. + + >>> from doctr.utils import TextMatch + >>> metric = TextMatch() + >>> metric.update(['Hello', 'world'], ['hello', 'world']) + >>> metric.summary() + """ + + def __init__(self) -> None: + self.reset() + +
+[docs] + def update( + self, + gt: list[str], + pred: list[str], + ) -> None: + """Update the state of the metric with new predictions + + Args: + gt: list of groung-truth character sequences + pred: list of predicted character sequences + """ + if len(gt) != len(pred): + raise AssertionError("prediction size does not match with ground-truth labels size") + + for gt_word, pred_word in zip(gt, pred): + _raw, _caseless, _anyascii, _unicase = string_match(gt_word, pred_word) + self.raw += int(_raw) + self.caseless += int(_caseless) + self.anyascii += int(_anyascii) + self.unicase += int(_unicase) + + self.total += len(gt)
+ + +
+[docs] + def summary(self) -> dict[str, float]: + """Computes the aggregated metrics + + Returns: + a dictionary with the exact match score for the raw data, its lower-case counterpart, its anyascii + counterpart and its lower-case anyascii counterpart + """ + if self.total == 0: + raise AssertionError("you need to update the metric before getting the summary") + + return dict( + raw=self.raw / self.total, + caseless=self.caseless / self.total, + anyascii=self.anyascii / self.total, + unicase=self.unicase / self.total, + )
+ + + def reset(self) -> None: + self.raw = 0 + self.caseless = 0 + self.anyascii = 0 + self.unicase = 0 + self.total = 0
+ + + +def box_iou(boxes_1: np.ndarray, boxes_2: np.ndarray) -> np.ndarray: + """Computes the IoU between two sets of bounding boxes + + Args: + boxes_1: bounding boxes of shape (N, 4) in format (xmin, ymin, xmax, ymax) + boxes_2: bounding boxes of shape (M, 4) in format (xmin, ymin, xmax, ymax) + + Returns: + the IoU matrix of shape (N, M) + """ + iou_mat: np.ndarray = np.zeros((boxes_1.shape[0], boxes_2.shape[0]), dtype=np.float32) + + if boxes_1.shape[0] > 0 and boxes_2.shape[0] > 0: + l1, t1, r1, b1 = np.split(boxes_1, 4, axis=1) + l2, t2, r2, b2 = np.split(boxes_2, 4, axis=1) + + left = np.maximum(l1, l2.T) + top = np.maximum(t1, t2.T) + right = np.minimum(r1, r2.T) + bot = np.minimum(b1, b2.T) + + intersection = np.clip(right - left, 0, np.inf) * np.clip(bot - top, 0, np.inf) + union = (r1 - l1) * (b1 - t1) + ((r2 - l2) * (b2 - t2)).T - intersection + iou_mat = intersection / union + + return iou_mat + + +def polygon_iou(polys_1: np.ndarray, polys_2: np.ndarray) -> np.ndarray: + """Computes the IoU between two sets of rotated bounding boxes + + Args: + polys_1: rotated bounding boxes of shape (N, 4, 2) + polys_2: rotated bounding boxes of shape (M, 4, 2) + mask_shape: spatial shape of the intermediate masks + use_broadcasting: if set to True, leverage broadcasting speedup by consuming more memory + + Returns: + the IoU matrix of shape (N, M) + """ + if polys_1.ndim != 3 or polys_2.ndim != 3: + raise AssertionError("expects boxes to be in format (N, 4, 2)") + + iou_mat = np.zeros((polys_1.shape[0], polys_2.shape[0]), dtype=np.float32) + + shapely_polys_1 = [Polygon(poly) for poly in polys_1] + shapely_polys_2 = [Polygon(poly) for poly in polys_2] + + for i, poly1 in enumerate(shapely_polys_1): + for j, poly2 in enumerate(shapely_polys_2): + intersection_area = poly1.intersection(poly2).area + union_area = poly1.area + poly2.area - intersection_area + iou_mat[i, j] = intersection_area / union_area + + return iou_mat + + +def nms(boxes: np.ndarray, thresh: float = 0.5) -> list[int]: + """Perform non-max suppression, borrowed from <https://github.com/rbgirshick/fast-rcnn>`_. + + Args: + boxes: np array of straight boxes: (*, 5), (xmin, ymin, xmax, ymax, score) + thresh: iou threshold to perform box suppression. + + Returns: + A list of box indexes to keep + """ + x1 = boxes[:, 0] + y1 = boxes[:, 1] + x2 = boxes[:, 2] + y2 = boxes[:, 3] + scores = boxes[:, 4] + + areas = (x2 - x1) * (y2 - y1) + order = scores.argsort()[::-1] + + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1) + h = np.maximum(0.0, yy2 - yy1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + + inds = np.where(ovr <= thresh)[0] + order = order[inds + 1] + return keep + + +
+[docs] +class LocalizationConfusion: + r"""Implements common confusion metrics and mean IoU for localization evaluation. + + The aggregated metrics are computed as follows: + + .. math:: + \forall Y \in \mathcal{B}^N, \forall X \in \mathcal{B}^M, \\ + Recall(X, Y) = \frac{1}{N} \sum\limits_{i=1}^N g_{X}(Y_i) \\ + Precision(X, Y) = \frac{1}{M} \sum\limits_{i=1}^M g_{X}(Y_i) \\ + meanIoU(X, Y) = \frac{1}{M} \sum\limits_{i=1}^M \max\limits_{j \in [1, N]} IoU(X_i, Y_j) + + with the function :math:`IoU(x, y)` being the Intersection over Union between bounding boxes :math:`x` and + :math:`y`, and the function :math:`g_{X}` defined as: + + .. math:: + \forall y \in \mathcal{B}, + g_X(y) = \left\{ + \begin{array}{ll} + 1 & \mbox{if } y\mbox{ has been assigned to any }(X_i)_i\mbox{ with an }IoU \geq 0.5 \\ + 0 & \mbox{otherwise.} + \end{array} + \right. + + where :math:`\mathcal{B}` is the set of possible bounding boxes, + :math:`N` (number of ground truths) and :math:`M` (number of predictions) are strictly positive integers. + + >>> import numpy as np + >>> from doctr.utils import LocalizationConfusion + >>> metric = LocalizationConfusion(iou_thresh=0.5) + >>> metric.update(np.asarray([[0, 0, 100, 100]]), np.asarray([[0, 0, 70, 70], [110, 95, 200, 150]])) + >>> metric.summary() + + Args: + iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match + use_polygons: if set to True, predictions and targets will be expected to have rotated format + """ + + def __init__( + self, + iou_thresh: float = 0.5, + use_polygons: bool = False, + ) -> None: + self.iou_thresh = iou_thresh + self.use_polygons = use_polygons + self.reset() + +
+[docs] + def update(self, gts: np.ndarray, preds: np.ndarray) -> None: + """Updates the metric + + Args: + gts: a set of relative bounding boxes either of shape (N, 4) or (N, 5) if they are rotated ones + preds: a set of relative bounding boxes either of shape (M, 4) or (M, 5) if they are rotated ones + """ + if preds.shape[0] > 0: + # Compute IoU + if self.use_polygons: + iou_mat = polygon_iou(gts, preds) + else: + iou_mat = box_iou(gts, preds) + self.tot_iou += float(iou_mat.max(axis=0).sum()) + + # Assign pairs + gt_indices, pred_indices = linear_sum_assignment(-iou_mat) + self.matches += int((iou_mat[gt_indices, pred_indices] >= self.iou_thresh).sum()) + + # Update counts + self.num_gts += gts.shape[0] + self.num_preds += preds.shape[0]
+ + +
+[docs] + def summary(self) -> tuple[float | None, float | None, float | None]: + """Computes the aggregated metrics + + Returns: + a tuple with the recall, precision and meanIoU scores + """ + # Recall + recall = self.matches / self.num_gts if self.num_gts > 0 else None + + # Precision + precision = self.matches / self.num_preds if self.num_preds > 0 else None + + # mean IoU + mean_iou = round(self.tot_iou / self.num_preds, 2) if self.num_preds > 0 else None + + return recall, precision, mean_iou
+ + + def reset(self) -> None: + self.num_gts = 0 + self.num_preds = 0 + self.matches = 0 + self.tot_iou = 0.0
+ + + +
+[docs] +class OCRMetric: + r"""Implements an end-to-end OCR metric. + + The aggregated metrics are computed as follows: + + .. math:: + \forall (B, L) \in \mathcal{B}^N \times \mathcal{L}^N, + \forall (\hat{B}, \hat{L}) \in \mathcal{B}^M \times \mathcal{L}^M, \\ + Recall(B, \hat{B}, L, \hat{L}) = \frac{1}{N} \sum\limits_{i=1}^N h_{B,L}(\hat{B}_i, \hat{L}_i) \\ + Precision(B, \hat{B}, L, \hat{L}) = \frac{1}{M} \sum\limits_{i=1}^M h_{B,L}(\hat{B}_i, \hat{L}_i) \\ + meanIoU(B, \hat{B}) = \frac{1}{M} \sum\limits_{i=1}^M \max\limits_{j \in [1, N]} IoU(\hat{B}_i, B_j) + + with the function :math:`IoU(x, y)` being the Intersection over Union between bounding boxes :math:`x` and + :math:`y`, and the function :math:`h_{B, L}` defined as: + + .. math:: + \forall (b, l) \in \mathcal{B} \times \mathcal{L}, + h_{B,L}(b, l) = \left\{ + \begin{array}{ll} + 1 & \mbox{if } b\mbox{ has been assigned to a given }B_j\mbox{ with an } \\ + & IoU \geq 0.5 \mbox{ and that for this assignment, } l = L_j\\ + 0 & \mbox{otherwise.} + \end{array} + \right. + + where :math:`\mathcal{B}` is the set of possible bounding boxes, + :math:`\mathcal{L}` is the set of possible character sequences, + :math:`N` (number of ground truths) and :math:`M` (number of predictions) are strictly positive integers. + + >>> import numpy as np + >>> from doctr.utils import OCRMetric + >>> metric = OCRMetric(iou_thresh=0.5) + >>> metric.update(np.asarray([[0, 0, 100, 100]]), np.asarray([[0, 0, 70, 70], [110, 95, 200, 150]]), + >>> ['hello'], ['hello', 'world']) + >>> metric.summary() + + Args: + iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match + use_polygons: if set to True, predictions and targets will be expected to have rotated format + """ + + def __init__( + self, + iou_thresh: float = 0.5, + use_polygons: bool = False, + ) -> None: + self.iou_thresh = iou_thresh + self.use_polygons = use_polygons + self.reset() + +
+[docs] + def update( + self, + gt_boxes: np.ndarray, + pred_boxes: np.ndarray, + gt_labels: list[str], + pred_labels: list[str], + ) -> None: + """Updates the metric + + Args: + gt_boxes: a set of relative bounding boxes either of shape (N, 4) or (N, 5) if they are rotated ones + pred_boxes: a set of relative bounding boxes either of shape (M, 4) or (M, 5) if they are rotated ones + gt_labels: a list of N string labels + pred_labels: a list of M string labels + """ + if gt_boxes.shape[0] != len(gt_labels) or pred_boxes.shape[0] != len(pred_labels): + raise AssertionError( + "there should be the same number of boxes and string both for the ground truth and the predictions" + ) + + # Compute IoU + if pred_boxes.shape[0] > 0: + if self.use_polygons: + iou_mat = polygon_iou(gt_boxes, pred_boxes) + else: + iou_mat = box_iou(gt_boxes, pred_boxes) + + self.tot_iou += float(iou_mat.max(axis=0).sum()) + + # Assign pairs + gt_indices, pred_indices = linear_sum_assignment(-iou_mat) + is_kept = iou_mat[gt_indices, pred_indices] >= self.iou_thresh + # String comparison + for gt_idx, pred_idx in zip(gt_indices[is_kept], pred_indices[is_kept]): + _raw, _caseless, _anyascii, _unicase = string_match(gt_labels[gt_idx], pred_labels[pred_idx]) + self.raw_matches += int(_raw) + self.caseless_matches += int(_caseless) + self.anyascii_matches += int(_anyascii) + self.unicase_matches += int(_unicase) + + self.num_gts += gt_boxes.shape[0] + self.num_preds += pred_boxes.shape[0]
+ + +
+[docs] + def summary(self) -> tuple[dict[str, float | None], dict[str, float | None], float | None]: + """Computes the aggregated metrics + + Returns: + a tuple with the recall & precision for each string comparison and the mean IoU + """ + # Recall + recall = dict( + raw=self.raw_matches / self.num_gts if self.num_gts > 0 else None, + caseless=self.caseless_matches / self.num_gts if self.num_gts > 0 else None, + anyascii=self.anyascii_matches / self.num_gts if self.num_gts > 0 else None, + unicase=self.unicase_matches / self.num_gts if self.num_gts > 0 else None, + ) + + # Precision + precision = dict( + raw=self.raw_matches / self.num_preds if self.num_preds > 0 else None, + caseless=self.caseless_matches / self.num_preds if self.num_preds > 0 else None, + anyascii=self.anyascii_matches / self.num_preds if self.num_preds > 0 else None, + unicase=self.unicase_matches / self.num_preds if self.num_preds > 0 else None, + ) + + # mean IoU (overall detected boxes) + mean_iou = round(self.tot_iou / self.num_preds, 2) if self.num_preds > 0 else None + + return recall, precision, mean_iou
+ + + def reset(self) -> None: + self.num_gts = 0 + self.num_preds = 0 + self.tot_iou = 0.0 + self.raw_matches = 0 + self.caseless_matches = 0 + self.anyascii_matches = 0 + self.unicase_matches = 0
+ + + +
+[docs] +class DetectionMetric: + r"""Implements an object detection metric. + + The aggregated metrics are computed as follows: + + .. math:: + \forall (B, C) \in \mathcal{B}^N \times \mathcal{C}^N, + \forall (\hat{B}, \hat{C}) \in \mathcal{B}^M \times \mathcal{C}^M, \\ + Recall(B, \hat{B}, C, \hat{C}) = \frac{1}{N} \sum\limits_{i=1}^N h_{B,C}(\hat{B}_i, \hat{C}_i) \\ + Precision(B, \hat{B}, C, \hat{C}) = \frac{1}{M} \sum\limits_{i=1}^M h_{B,C}(\hat{B}_i, \hat{C}_i) \\ + meanIoU(B, \hat{B}) = \frac{1}{M} \sum\limits_{i=1}^M \max\limits_{j \in [1, N]} IoU(\hat{B}_i, B_j) + + with the function :math:`IoU(x, y)` being the Intersection over Union between bounding boxes :math:`x` and + :math:`y`, and the function :math:`h_{B, C}` defined as: + + .. math:: + \forall (b, c) \in \mathcal{B} \times \mathcal{C}, + h_{B,C}(b, c) = \left\{ + \begin{array}{ll} + 1 & \mbox{if } b\mbox{ has been assigned to a given }B_j\mbox{ with an } \\ + & IoU \geq 0.5 \mbox{ and that for this assignment, } c = C_j\\ + 0 & \mbox{otherwise.} + \end{array} + \right. + + where :math:`\mathcal{B}` is the set of possible bounding boxes, + :math:`\mathcal{C}` is the set of possible class indices, + :math:`N` (number of ground truths) and :math:`M` (number of predictions) are strictly positive integers. + + >>> import numpy as np + >>> from doctr.utils import DetectionMetric + >>> metric = DetectionMetric(iou_thresh=0.5) + >>> metric.update(np.asarray([[0, 0, 100, 100]]), np.asarray([[0, 0, 70, 70], [110, 95, 200, 150]]), + >>> np.zeros(1, dtype=np.int64), np.array([0, 1], dtype=np.int64)) + >>> metric.summary() + + Args: + iou_thresh: minimum IoU to consider a pair of prediction and ground truth as a match + use_polygons: if set to True, predictions and targets will be expected to have rotated format + """ + + def __init__( + self, + iou_thresh: float = 0.5, + use_polygons: bool = False, + ) -> None: + self.iou_thresh = iou_thresh + self.use_polygons = use_polygons + self.reset() + +
+[docs] + def update( + self, + gt_boxes: np.ndarray, + pred_boxes: np.ndarray, + gt_labels: np.ndarray, + pred_labels: np.ndarray, + ) -> None: + """Updates the metric + + Args: + gt_boxes: a set of relative bounding boxes either of shape (N, 4) or (N, 5) if they are rotated ones + pred_boxes: a set of relative bounding boxes either of shape (M, 4) or (M, 5) if they are rotated ones + gt_labels: an array of class indices of shape (N,) + pred_labels: an array of class indices of shape (M,) + """ + if gt_boxes.shape[0] != gt_labels.shape[0] or pred_boxes.shape[0] != pred_labels.shape[0]: + raise AssertionError( + "there should be the same number of boxes and string both for the ground truth and the predictions" + ) + + # Compute IoU + if pred_boxes.shape[0] > 0: + if self.use_polygons: + iou_mat = polygon_iou(gt_boxes, pred_boxes) + else: + iou_mat = box_iou(gt_boxes, pred_boxes) + + self.tot_iou += float(iou_mat.max(axis=0).sum()) + + # Assign pairs + gt_indices, pred_indices = linear_sum_assignment(-iou_mat) + is_kept = iou_mat[gt_indices, pred_indices] >= self.iou_thresh + # Category comparison + self.num_matches += int((gt_labels[gt_indices[is_kept]] == pred_labels[pred_indices[is_kept]]).sum()) + + self.num_gts += gt_boxes.shape[0] + self.num_preds += pred_boxes.shape[0]
+ + +
+[docs] + def summary(self) -> tuple[float | None, float | None, float | None]: + """Computes the aggregated metrics + + Returns: + a tuple with the recall & precision for each class prediction and the mean IoU + """ + # Recall + recall = self.num_matches / self.num_gts if self.num_gts > 0 else None + + # Precision + precision = self.num_matches / self.num_preds if self.num_preds > 0 else None + + # mean IoU (overall detected boxes) + mean_iou = round(self.tot_iou / self.num_preds, 2) if self.num_preds > 0 else None + + return recall, precision, mean_iou
+ + + def reset(self) -> None: + self.num_gts = 0 + self.num_preds = 0 + self.tot_iou = 0.0 + self.num_matches = 0
+ +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/utils/reconstitution.html b/_modules/doctr/utils/reconstitution.html new file mode 100644 index 0000000000..5f2e88d34b --- /dev/null +++ b/_modules/doctr/utils/reconstitution.html @@ -0,0 +1,542 @@ + + + + + + + + + + + + + doctr.utils.reconstitution - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.utils.reconstitution

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+import logging
+from typing import Any
+
+import numpy as np
+from anyascii import anyascii
+from PIL import Image, ImageDraw
+
+from .fonts import get_font
+
+__all__ = ["synthesize_page", "synthesize_kie_page"]
+
+
+# Global variable to avoid multiple warnings
+ROTATION_WARNING = False
+
+
+def _warn_rotation(entry: dict[str, Any]) -> None:  # pragma: no cover
+    global ROTATION_WARNING
+    if not ROTATION_WARNING and len(entry["geometry"]) == 4:
+        logging.warning("Polygons with larger rotations will lead to inaccurate rendering")
+        ROTATION_WARNING = True
+
+
+def _synthesize(
+    response: Image.Image,
+    entry: dict[str, Any],
+    w: int,
+    h: int,
+    draw_proba: bool = False,
+    font_family: str | None = None,
+    smoothing_factor: float = 0.75,
+    min_font_size: int = 6,
+    max_font_size: int = 50,
+) -> Image.Image:
+    if len(entry["geometry"]) == 2:
+        (xmin, ymin), (xmax, ymax) = entry["geometry"]
+        polygon = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
+    else:
+        polygon = entry["geometry"]
+
+    # Calculate the bounding box of the word
+    x_coords, y_coords = zip(*polygon)
+    xmin, ymin, xmax, ymax = (
+        int(round(w * min(x_coords))),
+        int(round(h * min(y_coords))),
+        int(round(w * max(x_coords))),
+        int(round(h * max(y_coords))),
+    )
+    word_width = xmax - xmin
+    word_height = ymax - ymin
+
+    # If lines are provided instead of words, concatenate the word entries
+    if "words" in entry:
+        word_text = " ".join(word["value"] for word in entry["words"])
+    else:
+        word_text = entry["value"]
+    # Find the optimal font size
+    try:
+        font_size = min(word_height, max_font_size)
+        font = get_font(font_family, font_size)
+        text_width, text_height = font.getbbox(word_text)[2:4]
+
+        while (text_width > word_width or text_height > word_height) and font_size > min_font_size:
+            font_size = max(int(font_size * smoothing_factor), min_font_size)
+            font = get_font(font_family, font_size)
+            text_width, text_height = font.getbbox(word_text)[2:4]
+    except ValueError:
+        font = get_font(font_family, min_font_size)
+
+    # Create a mask for the word
+    mask = Image.new("L", (w, h), 0)
+    ImageDraw.Draw(mask).polygon([(int(round(w * x)), int(round(h * y))) for x, y in polygon], fill=255)
+
+    # Draw the word text
+    d = ImageDraw.Draw(response)
+    try:
+        try:
+            d.text((xmin, ymin), word_text, font=font, fill=(0, 0, 0), anchor="lt")
+        except UnicodeEncodeError:
+            d.text((xmin, ymin), anyascii(word_text), font=font, fill=(0, 0, 0), anchor="lt")
+    # Catch generic exceptions to avoid crashing the whole rendering
+    except Exception:  # pragma: no cover
+        logging.warning(f"Could not render word: {word_text}")
+
+    if draw_proba:
+        confidence = (
+            entry["confidence"]
+            if "confidence" in entry
+            else sum(w["confidence"] for w in entry["words"]) / len(entry["words"])
+        )
+        p = int(255 * confidence)
+        color = (255 - p, 0, p)  # Red to blue gradient based on probability
+        d.rectangle([(xmin, ymin), (xmax, ymax)], outline=color, width=2)
+
+        prob_font = get_font(font_family, 20)
+        prob_text = f"{confidence:.2f}"
+        prob_text_width, prob_text_height = prob_font.getbbox(prob_text)[2:4]
+
+        # Position the probability slightly above the bounding box
+        prob_x_offset = (word_width - prob_text_width) // 2
+        prob_y_offset = ymin - prob_text_height - 2
+        prob_y_offset = max(0, prob_y_offset)
+
+        d.text((xmin + prob_x_offset, prob_y_offset), prob_text, font=prob_font, fill=color, anchor="lt")
+
+    return response
+
+
+
+[docs] +def synthesize_page( + page: dict[str, Any], + draw_proba: bool = False, + font_family: str | None = None, + smoothing_factor: float = 0.95, + min_font_size: int = 8, + max_font_size: int = 50, +) -> np.ndarray: + """Draw a the content of the element page (OCR response) on a blank page. + + Args: + page: exported Page object to represent + draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0 + font_family: family of the font + smoothing_factor: factor to smooth the font size + min_font_size: minimum font size + max_font_size: maximum font size + + Returns: + the synthesized page + """ + # Draw template + h, w = page["dimensions"] + response = Image.new("RGB", (w, h), color=(255, 255, 255)) + + for block in page["blocks"]: + # If lines are provided use these to get better rendering results + if len(block["lines"]) > 1: + for line in block["lines"]: + _warn_rotation(block) # pragma: no cover + response = _synthesize( + response=response, + entry=line, + w=w, + h=h, + draw_proba=draw_proba, + font_family=font_family, + smoothing_factor=smoothing_factor, + min_font_size=min_font_size, + max_font_size=max_font_size, + ) + # Otherwise, draw each word + else: + for line in block["lines"]: + _warn_rotation(block) # pragma: no cover + for word in line["words"]: + response = _synthesize( + response=response, + entry=word, + w=w, + h=h, + draw_proba=draw_proba, + font_family=font_family, + smoothing_factor=smoothing_factor, + min_font_size=min_font_size, + max_font_size=max_font_size, + ) + + return np.array(response, dtype=np.uint8)
+ + + +def synthesize_kie_page( + page: dict[str, Any], + draw_proba: bool = False, + font_family: str | None = None, +) -> np.ndarray: + """Draw a the content of the element page (OCR response) on a blank page. + + Args: + page: exported Page object to represent + draw_proba: if True, draw words in colors to represent confidence. Blue: p=1, red: p=0 + font_family: family of the font + smoothing_factor: factor to smooth the font size + min_font_size: minimum font size + max_font_size: maximum font size + + Returns: + the synthesized page + """ + # Draw template + h, w = page["dimensions"] + response = Image.new("RGB", (w, h), color=(255, 255, 255)) + + # Draw each word + for predictions in page["predictions"].values(): + for prediction in predictions: + _warn_rotation(prediction) # pragma: no cover + response = _synthesize( + response=response, + entry=prediction, + w=w, + h=h, + draw_proba=draw_proba, + font_family=font_family, + ) + return np.array(response, dtype=np.uint8) +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/doctr/utils/visualization.html b/_modules/doctr/utils/visualization.html new file mode 100644 index 0000000000..f2c1fd37ad --- /dev/null +++ b/_modules/doctr/utils/visualization.html @@ -0,0 +1,709 @@ + + + + + + + + + + + + + doctr.utils.visualization - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

Source code for doctr.utils.visualization

+# Copyright (C) 2021-2026, Mindee.
+
+# This program is licensed under the Apache License 2.0.
+# See LICENSE or go to <https://opensource.org/licenses/Apache-2.0> for full license details.
+import colorsys
+from copy import deepcopy
+from typing import Any
+
+import cv2
+import matplotlib.patches as patches
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib.figure import Figure
+
+from .common_types import BoundingBox, Polygon4P
+
+__all__ = ["visualize_page", "visualize_kie_page", "draw_boxes"]
+
+
+def rect_patch(
+    geometry: BoundingBox,
+    page_dimensions: tuple[int, int],
+    label: str | None = None,
+    color: tuple[float, float, float] = (0, 0, 0),
+    alpha: float = 0.3,
+    linewidth: int = 2,
+    fill: bool = True,
+    preserve_aspect_ratio: bool = False,
+) -> patches.Rectangle:
+    """Create a matplotlib rectangular patch for the element
+
+    Args:
+        geometry: bounding box of the element
+        page_dimensions: dimensions of the Page in format (height, width)
+        label: label to display when hovered
+        color: color to draw box
+        alpha: opacity parameter to fill the boxes, 0 = transparent
+        linewidth: line width
+        fill: whether the patch should be filled
+        preserve_aspect_ratio: pass True if you passed True to the predictor
+
+    Returns:
+        a rectangular Patch
+    """
+    if len(geometry) != 2 or any(not isinstance(elt, tuple) or len(elt) != 2 for elt in geometry):
+        raise ValueError("invalid geometry format")
+
+    # Unpack
+    height, width = page_dimensions
+    (xmin, ymin), (xmax, ymax) = geometry
+    # Switch to absolute coords
+    if preserve_aspect_ratio:
+        width = height = max(height, width)
+    xmin, w = xmin * width, (xmax - xmin) * width
+    ymin, h = ymin * height, (ymax - ymin) * height
+
+    return patches.Rectangle(
+        (xmin, ymin),
+        w,
+        h,
+        fill=fill,
+        linewidth=linewidth,
+        edgecolor=(*color, alpha),
+        facecolor=(*color, alpha),
+        label=label,
+    )
+
+
+def polygon_patch(
+    geometry: np.ndarray,
+    page_dimensions: tuple[int, int],
+    label: str | None = None,
+    color: tuple[float, float, float] = (0, 0, 0),
+    alpha: float = 0.3,
+    linewidth: int = 2,
+    fill: bool = True,
+    preserve_aspect_ratio: bool = False,
+) -> patches.Polygon:
+    """Create a matplotlib polygon patch for the element
+
+    Args:
+        geometry: bounding box of the element
+        page_dimensions: dimensions of the Page in format (height, width)
+        label: label to display when hovered
+        color: color to draw box
+        alpha: opacity parameter to fill the boxes, 0 = transparent
+        linewidth: line width
+        fill: whether the patch should be filled
+        preserve_aspect_ratio: pass True if you passed True to the predictor
+
+    Returns:
+        a polygon Patch
+    """
+    if not geometry.shape == (4, 2):
+        raise ValueError("invalid geometry format")
+
+    # Unpack
+    height, width = page_dimensions
+    geometry[:, 0] = geometry[:, 0] * (max(width, height) if preserve_aspect_ratio else width)
+    geometry[:, 1] = geometry[:, 1] * (max(width, height) if preserve_aspect_ratio else height)
+
+    return patches.Polygon(
+        geometry,
+        fill=fill,
+        linewidth=linewidth,
+        edgecolor=(*color, alpha),
+        facecolor=(*color, alpha),
+        label=label,
+    )
+
+
+def create_obj_patch(
+    geometry: BoundingBox | Polygon4P | np.ndarray,
+    page_dimensions: tuple[int, int],
+    **kwargs: Any,
+) -> patches.Patch:
+    """Create a matplotlib patch for the element
+
+    Args:
+        geometry: bounding box (straight or rotated) of the element
+        page_dimensions: dimensions of the page in format (height, width)
+        **kwargs: keyword arguments for the patch
+
+    Returns:
+        a matplotlib Patch
+    """
+    if isinstance(geometry, tuple):
+        if len(geometry) == 2:  # straight word BB (2 pts)
+            return rect_patch(geometry, page_dimensions, **kwargs)
+        elif len(geometry) == 4:  # rotated word BB (4 pts)
+            return polygon_patch(np.asarray(geometry), page_dimensions, **kwargs)
+    elif isinstance(geometry, np.ndarray) and geometry.shape == (4, 2):  # rotated line
+        return polygon_patch(geometry, page_dimensions, **kwargs)
+    raise ValueError("invalid geometry format")
+
+
+def get_colors(num_colors: int) -> list[tuple[float, float, float]]:
+    """Generate num_colors color for matplotlib
+
+    Args:
+        num_colors: number of colors to generate
+
+    Returns:
+        colors: list of generated colors
+    """
+    colors = []
+    for i in np.arange(0.0, 360.0, 360.0 / num_colors):
+        hue = i / 360.0
+        lightness = (50 + np.random.rand() * 10) / 100.0
+        saturation = (90 + np.random.rand() * 10) / 100.0
+        colors.append(colorsys.hls_to_rgb(hue, lightness, saturation))  # type: ignore[arg-type]
+    return colors
+
+
+
+[docs] +def visualize_page( + page: dict[str, Any], + image: np.ndarray, + words_only: bool = True, + display_artefacts: bool = True, + scale: float = 10, + interactive: bool = True, + add_labels: bool = True, + **kwargs: Any, +) -> Figure: + """Visualize a full page with predicted blocks, lines and words + + >>> import numpy as np + >>> import matplotlib.pyplot as plt + >>> from doctr.utils.visualization import visualize_page + >>> from doctr.models import ocr_db_crnn + >>> model = ocr_db_crnn(pretrained=True) + >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8) + >>> out = model([[input_page]]) + >>> visualize_page(out[0].pages[0].export(), input_page) + >>> plt.show() + + Args: + page: the exported Page of a Document + image: np array of the page, needs to have the same shape than page['dimensions'] + words_only: whether only words should be displayed + display_artefacts: whether artefacts should be displayed + scale: figsize of the largest windows side + interactive: whether the plot should be interactive + add_labels: for static plot, adds text labels on top of bounding box + **kwargs: keyword arguments for the polygon patch + + Returns: + the matplotlib figure + """ + # Get proper scale and aspect ratio + h, w = image.shape[:2] + size = (scale * w / h, scale) if h > w else (scale, h / w * scale) + fig, ax = plt.subplots(figsize=size) + # Display the image + ax.imshow(image) + # hide both axis + ax.axis("off") + + if interactive: + artists: list[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page) + + for block in page["blocks"]: + if not words_only: + rect = create_obj_patch( + block["geometry"], page["dimensions"], label="block", color=(0, 1, 0), linewidth=1, **kwargs + ) + # add patch on figure + ax.add_patch(rect) + if interactive: + # add patch to cursor's artists + artists.append(rect) + + for line in block["lines"]: + if not words_only: + rect = create_obj_patch( + line["geometry"], page["dimensions"], label="line", color=(1, 0, 0), linewidth=1, **kwargs + ) + ax.add_patch(rect) + if interactive: + artists.append(rect) + + for word in line["words"]: + rect = create_obj_patch( + word["geometry"], + page["dimensions"], + label=f"{word['value']} (confidence: {word['confidence']:.2%})", + color=(0, 0, 1), + **kwargs, + ) + ax.add_patch(rect) + if interactive: + artists.append(rect) + elif add_labels: + if len(word["geometry"]) == 5: + text_loc = ( + int(page["dimensions"][1] * (word["geometry"][0] - word["geometry"][2] / 2)), + int(page["dimensions"][0] * (word["geometry"][1] - word["geometry"][3] / 2)), + ) + else: + text_loc = ( + int(page["dimensions"][1] * word["geometry"][0][0]), + int(page["dimensions"][0] * word["geometry"][0][1]), + ) + + if len(word["geometry"]) == 2: + # We draw only if boxes are in straight format + ax.text( + *text_loc, + word["value"], + size=10, + alpha=0.5, + color=(0, 0, 1), + ) + + if display_artefacts: + for artefact in block["artefacts"]: + rect = create_obj_patch( + artefact["geometry"], + page["dimensions"], + label="artefact", + color=(0.5, 0.5, 0.5), + linewidth=1, + **kwargs, + ) + ax.add_patch(rect) + if interactive: + artists.append(rect) + + if interactive: + import mplcursors + + # Create mlp Cursor to hover patches in artists + mplcursors.Cursor(artists, hover=2).connect("add", lambda sel: sel.annotation.set_text(sel.artist.get_label())) + fig.tight_layout(pad=0.0) + + return fig
+ + + +def visualize_kie_page( + page: dict[str, Any], + image: np.ndarray, + words_only: bool = False, + display_artefacts: bool = True, + scale: float = 10, + interactive: bool = True, + add_labels: bool = True, + **kwargs: Any, +) -> Figure: + """Visualize a full page with predicted blocks, lines and words + + >>> import numpy as np + >>> import matplotlib.pyplot as plt + >>> from doctr.utils.visualization import visualize_page + >>> from doctr.models import ocr_db_crnn + >>> model = ocr_db_crnn(pretrained=True) + >>> input_page = (255 * np.random.rand(600, 800, 3)).astype(np.uint8) + >>> out = model([[input_page]]) + >>> visualize_kie_page(out[0].pages[0].export(), input_page) + >>> plt.show() + + Args: + page: the exported Page of a Document + image: np array of the page, needs to have the same shape than page['dimensions'] + words_only: whether only words should be displayed + display_artefacts: whether artefacts should be displayed + scale: figsize of the largest windows side + interactive: whether the plot should be interactive + add_labels: for static plot, adds text labels on top of bounding box + **kwargs: keyword arguments for the polygon patch + + Returns: + the matplotlib figure + """ + # Get proper scale and aspect ratio + h, w = image.shape[:2] + size = (scale * w / h, scale) if h > w else (scale, h / w * scale) + fig, ax = plt.subplots(figsize=size) + # Display the image + ax.imshow(image) + # hide both axis + ax.axis("off") + + if interactive: + artists: list[patches.Patch] = [] # instantiate an empty list of patches (to be drawn on the page) + + colors = {k: color for color, k in zip(get_colors(len(page["predictions"])), page["predictions"])} + for key, value in page["predictions"].items(): + for prediction in value: + if not words_only: + rect = create_obj_patch( + prediction["geometry"], + page["dimensions"], + label=f"{key} \n {prediction['value']} (confidence: {prediction['confidence']:.2%}", + color=colors[key], + linewidth=1, + **kwargs, + ) + # add patch on figure + ax.add_patch(rect) + if interactive: + # add patch to cursor's artists + artists.append(rect) + + if interactive: + import mplcursors + + # Create mlp Cursor to hover patches in artists + mplcursors.Cursor(artists, hover=2).connect("add", lambda sel: sel.annotation.set_text(sel.artist.get_label())) + fig.tight_layout(pad=0.0) + + return fig + + +def draw_boxes(boxes: np.ndarray, image: np.ndarray, color: tuple[int, int, int] | None = None, **kwargs) -> None: + """Draw an array of relative straight boxes on an image + + Args: + boxes: array of relative boxes, of shape (*, 4) + image: np array, float32 or uint8 + color: color to use for bounding box edges + **kwargs: keyword arguments from `matplotlib.pyplot.plot` + """ + h, w = image.shape[:2] + # Convert boxes to absolute coords + _boxes = deepcopy(boxes) + _boxes[:, [0, 2]] *= w + _boxes[:, [1, 3]] *= h + _boxes = _boxes.astype(np.int32) + for box in _boxes.tolist(): + xmin, ymin, xmax, ymax = box + image = cv2.rectangle( + image, (xmin, ymin), (xmax, ymax), color=color if isinstance(color, tuple) else (0, 0, 255), thickness=2 + ) + plt.imshow(image) + plt.plot(**kwargs) +
+
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_modules/index.html b/_modules/index.html new file mode 100644 index 0000000000..b440c24212 --- /dev/null +++ b/_modules/index.html @@ -0,0 +1,384 @@ + + + + + + + + + + + + + Overview: module code - docTR documentation + + + + + + + + + + + + + + + + + + Contents + + + + + + Menu + + + + + + + + Expand + + + + + + Light mode + + + + + + + + + + + + + + Dark mode + + + + + + + Auto light/dark, in light mode + + + + + + + + + + + + + + + Auto light/dark, in dark mode + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +Skip to content + + + +
+
+
+ +
+ +
+
+ +
+ +
+
+ +
+
+
+ + + + + Back to top + +
+
+ +
+ +
+
+

All modules for which code is available

+ +
+
+
+ + +
+
+ + Made with Sphinx and @pradyunsg's + + Furo + +
+
+ +
+
+ +
+
+ +
+
+ + + + + + + + \ No newline at end of file diff --git a/_sources/changelog.rst.txt b/_sources/changelog.rst.txt new file mode 100644 index 0000000000..d2f66756be --- /dev/null +++ b/_sources/changelog.rst.txt @@ -0,0 +1,78 @@ +Changelog +========= + +v1.0.0 (2025-07-09) +------------------- +Release note: `v1.0.0 `_ + +v0.12.0 (2025-06-20) +-------------------- +Release note: `v0.12.0 `_ + +v0.11.0 (2025-01-30) +-------------------- +Release note: `v0.11.0 `_ + +v0.10.0 (2024-10-21) +-------------------- +Release note: `v0.10.0 `_ + +v0.9.0 (2024-08-08) +------------------- +Release note: `v0.9.0 `_ + +v0.8.1 (2024-03-04) +------------------- +Release note: `v0.8.1 `_ + +v0.8.0 (2024-02-28) +------------------- +Release note: `v0.8.0 `_ + +v0.7.0 (2023-09-09) +------------------- +Release note: `v0.7.0 `_ + +v0.6.0 (2022-09-29) +------------------- +Release note: `v0.6.0 `_ + +v0.5.1 (2022-03-22) +------------------- +Release note: `v0.5.1 `_ + +v0.5.0 (2021-12-31) +------------------- +Release note: `v0.5.0 `_ + +v0.4.1 (2021-11-22) +------------------- +Release note: `v0.4.1 `_ + +v0.4.0 (2021-10-01) +------------------- +Release note: `v0.4.0 `_ + +v0.3.1 (2021-08-27) +------------------- +Release note: `v0.3.1 `_ + +v0.3.0 (2021-07-02) +------------------- +Release note: `v0.3.0 `_ + +v0.2.1 (2021-05-28) +------------------- +Release note: `v0.2.1 `_ + +v0.2.0 (2021-05-11) +------------------- +Release note: `v0.2.0 `_ + +v0.1.1 (2021-03-18) +------------------- +Release note: `v0.1.1 `_ + +v0.1.0 (2021-03-05) +------------------- +Release note: `v0.1.0 `_ diff --git a/docs/source/community/resources.rst b/_sources/community/resources.rst.txt similarity index 100% rename from docs/source/community/resources.rst rename to _sources/community/resources.rst.txt diff --git a/docs/source/community/tools.rst b/_sources/community/tools.rst.txt similarity index 100% rename from docs/source/community/tools.rst rename to _sources/community/tools.rst.txt diff --git a/CODE_OF_CONDUCT.md b/_sources/contributing/code_of_conduct.md.txt similarity index 100% rename from CODE_OF_CONDUCT.md rename to _sources/contributing/code_of_conduct.md.txt diff --git a/CONTRIBUTING.md b/_sources/contributing/contributing.md.txt similarity index 100% rename from CONTRIBUTING.md rename to _sources/contributing/contributing.md.txt diff --git a/docs/source/getting_started/installing.rst b/_sources/getting_started/installing.rst.txt similarity index 100% rename from docs/source/getting_started/installing.rst rename to _sources/getting_started/installing.rst.txt diff --git a/docs/source/index.rst b/_sources/index.rst.txt similarity index 100% rename from docs/source/index.rst rename to _sources/index.rst.txt diff --git a/docs/source/modules/contrib.rst b/_sources/modules/contrib.rst.txt similarity index 100% rename from docs/source/modules/contrib.rst rename to _sources/modules/contrib.rst.txt diff --git a/docs/source/modules/datasets.rst b/_sources/modules/datasets.rst.txt similarity index 100% rename from docs/source/modules/datasets.rst rename to _sources/modules/datasets.rst.txt diff --git a/docs/source/modules/io.rst b/_sources/modules/io.rst.txt similarity index 100% rename from docs/source/modules/io.rst rename to _sources/modules/io.rst.txt diff --git a/docs/source/modules/models.rst b/_sources/modules/models.rst.txt similarity index 100% rename from docs/source/modules/models.rst rename to _sources/modules/models.rst.txt diff --git a/docs/source/modules/transforms.rst b/_sources/modules/transforms.rst.txt similarity index 100% rename from docs/source/modules/transforms.rst rename to _sources/modules/transforms.rst.txt diff --git a/docs/source/modules/utils.rst b/_sources/modules/utils.rst.txt similarity index 100% rename from docs/source/modules/utils.rst rename to _sources/modules/utils.rst.txt diff --git a/notebooks/README.rst b/_sources/notebooks.rst.txt similarity index 100% rename from notebooks/README.rst rename to _sources/notebooks.rst.txt diff --git a/docs/source/using_doctr/custom_models_training.rst b/_sources/using_doctr/custom_models_training.rst.txt similarity index 100% rename from docs/source/using_doctr/custom_models_training.rst rename to _sources/using_doctr/custom_models_training.rst.txt diff --git a/docs/source/using_doctr/running_on_aws.rst b/_sources/using_doctr/running_on_aws.rst.txt similarity index 100% rename from docs/source/using_doctr/running_on_aws.rst rename to _sources/using_doctr/running_on_aws.rst.txt diff --git a/docs/source/using_doctr/sharing_models.rst b/_sources/using_doctr/sharing_models.rst.txt similarity index 100% rename from docs/source/using_doctr/sharing_models.rst rename to _sources/using_doctr/sharing_models.rst.txt diff --git a/docs/source/using_doctr/using_contrib_modules.rst b/_sources/using_doctr/using_contrib_modules.rst.txt similarity index 100% rename from docs/source/using_doctr/using_contrib_modules.rst rename to _sources/using_doctr/using_contrib_modules.rst.txt diff --git a/docs/source/using_doctr/using_datasets.rst b/_sources/using_doctr/using_datasets.rst.txt similarity index 100% rename from docs/source/using_doctr/using_datasets.rst rename to _sources/using_doctr/using_datasets.rst.txt diff --git a/docs/source/using_doctr/using_model_export.rst b/_sources/using_doctr/using_model_export.rst.txt similarity index 100% rename from docs/source/using_doctr/using_model_export.rst rename to _sources/using_doctr/using_model_export.rst.txt diff --git a/docs/source/using_doctr/using_models.rst b/_sources/using_doctr/using_models.rst.txt similarity index 100% rename from docs/source/using_doctr/using_models.rst rename to _sources/using_doctr/using_models.rst.txt diff --git a/docs/source/_static/images/Logo-docTR-white.png b/_static/Logo-docTR-white.png similarity index 100% rename from docs/source/_static/images/Logo-docTR-white.png rename to _static/Logo-docTR-white.png diff --git a/_static/basic.css b/_static/basic.css new file mode 100644 index 0000000000..7ebbd6d07b --- /dev/null +++ b/_static/basic.css @@ -0,0 +1,914 @@ +/* + * Sphinx stylesheet -- basic theme. + */ + +/* -- main layout ----------------------------------------------------------- */ + +div.clearer { + clear: both; +} + +div.section::after { + display: block; + content: ''; + clear: left; +} + +/* -- relbar ---------------------------------------------------------------- */ + +div.related { + width: 100%; + font-size: 90%; +} + +div.related h3 { + display: none; +} + +div.related ul { + margin: 0; + padding: 0 0 0 10px; + list-style: none; +} + +div.related li { + display: inline; +} + +div.related li.right { + float: right; + margin-right: 5px; +} + +/* -- sidebar --------------------------------------------------------------- */ + +div.sphinxsidebarwrapper { + padding: 10px 5px 0 10px; +} + +div.sphinxsidebar { + float: left; + width: 230px; + margin-left: -100%; + font-size: 90%; + word-wrap: break-word; + overflow-wrap : break-word; +} + +div.sphinxsidebar ul { + list-style: none; +} + +div.sphinxsidebar ul ul, +div.sphinxsidebar ul.want-points { + margin-left: 20px; + list-style: square; +} + +div.sphinxsidebar ul ul { + margin-top: 0; + margin-bottom: 0; +} + +div.sphinxsidebar form { + margin-top: 10px; +} + +div.sphinxsidebar input { + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; +} + +div.sphinxsidebar #searchbox form.search { + overflow: hidden; +} + +div.sphinxsidebar #searchbox input[type="text"] { + float: left; + width: 80%; + padding: 0.25em; + box-sizing: border-box; +} + +div.sphinxsidebar #searchbox input[type="submit"] { + float: left; + width: 20%; + border-left: none; + padding: 0.25em; + box-sizing: border-box; +} + + +img { + border: 0; + max-width: 100%; +} + +/* -- search page ----------------------------------------------------------- */ + +ul.search { + margin-top: 10px; +} + +ul.search li { + padding: 5px 0; +} + +ul.search li a { + font-weight: bold; +} + +ul.search li p.context { + color: #888; + margin: 2px 0 0 30px; + text-align: left; +} + +ul.keywordmatches li.goodmatch a { + font-weight: bold; +} + +/* -- index page ------------------------------------------------------------ */ + +table.contentstable { + width: 90%; + margin-left: auto; + margin-right: auto; +} + +table.contentstable p.biglink { + line-height: 150%; +} + +a.biglink { + font-size: 1.3em; +} + +span.linkdescr { + font-style: italic; + padding-top: 5px; + font-size: 90%; +} + +/* -- general index --------------------------------------------------------- */ + +table.indextable { + width: 100%; +} + +table.indextable td { + text-align: left; + vertical-align: top; +} + +table.indextable ul { + margin-top: 0; + margin-bottom: 0; + list-style-type: none; +} + +table.indextable > tbody > tr > td > ul { + padding-left: 0em; +} + +table.indextable tr.pcap { + height: 10px; +} + +table.indextable tr.cap { + margin-top: 10px; + background-color: #f2f2f2; +} + +img.toggler { + margin-right: 3px; + margin-top: 3px; + cursor: pointer; +} + +div.modindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +div.genindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +/* -- domain module index --------------------------------------------------- */ + +table.modindextable td { + padding: 2px; + border-collapse: collapse; +} + +/* -- general body styles --------------------------------------------------- */ + +div.body { + min-width: 360px; + max-width: 800px; +} + +div.body p, div.body dd, div.body li, div.body blockquote { + -moz-hyphens: auto; + -ms-hyphens: auto; + -webkit-hyphens: auto; + hyphens: auto; +} + +a.headerlink { + visibility: hidden; +} + +a:visited { + color: #551A8B; +} + +h1:hover > a.headerlink, +h2:hover > a.headerlink, +h3:hover > a.headerlink, +h4:hover > a.headerlink, +h5:hover > a.headerlink, +h6:hover > a.headerlink, +dt:hover > a.headerlink, +caption:hover > a.headerlink, +p.caption:hover > a.headerlink, +div.code-block-caption:hover > a.headerlink { + visibility: visible; +} + +div.body p.caption { + text-align: inherit; +} + +div.body td { + text-align: left; +} + +.first { + margin-top: 0 !important; +} + +p.rubric { + margin-top: 30px; + font-weight: bold; +} + +img.align-left, figure.align-left, .figure.align-left, object.align-left { + clear: left; + float: left; + margin-right: 1em; +} + +img.align-right, figure.align-right, .figure.align-right, object.align-right { + clear: right; + float: right; + margin-left: 1em; +} + +img.align-center, figure.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +img.align-default, figure.align-default, .figure.align-default { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left; +} + +.align-center { + text-align: center; +} + +.align-default { + text-align: center; +} + +.align-right { + text-align: right; +} + +/* -- sidebars -------------------------------------------------------------- */ + +div.sidebar, +aside.sidebar { + margin: 0 0 0.5em 1em; + border: 1px solid #ddb; + padding: 7px; + background-color: #ffe; + width: 40%; + float: right; + clear: right; + overflow-x: auto; +} + +p.sidebar-title { + font-weight: bold; +} + +nav.contents, +aside.topic, +div.admonition, div.topic, blockquote { + clear: left; +} + +/* -- topics ---------------------------------------------------------------- */ + +nav.contents, +aside.topic, +div.topic { + border: 1px solid #ccc; + padding: 7px; + margin: 10px 0 10px 0; +} + +p.topic-title { + font-size: 1.1em; + font-weight: bold; + margin-top: 10px; +} + +/* -- admonitions ----------------------------------------------------------- */ + +div.admonition { + margin-top: 10px; + margin-bottom: 10px; + padding: 7px; +} + +div.admonition dt { + font-weight: bold; +} + +p.admonition-title { + margin: 0px 10px 5px 0px; + font-weight: bold; +} + +div.body p.centered { + text-align: center; + margin-top: 25px; +} + +/* -- content of sidebars/topics/admonitions -------------------------------- */ + +div.sidebar > :last-child, +aside.sidebar > :last-child, +nav.contents > :last-child, +aside.topic > :last-child, +div.topic > :last-child, +div.admonition > :last-child { + margin-bottom: 0; +} + +div.sidebar::after, +aside.sidebar::after, +nav.contents::after, +aside.topic::after, +div.topic::after, +div.admonition::after, +blockquote::after { + display: block; + content: ''; + clear: both; +} + +/* -- tables ---------------------------------------------------------------- */ + +table.docutils { + margin-top: 10px; + margin-bottom: 10px; + border: 0; + border-collapse: collapse; +} + +table.align-center { + margin-left: auto; + margin-right: auto; +} + +table.align-default { + margin-left: auto; + margin-right: auto; +} + +table caption span.caption-number { + font-style: italic; +} + +table caption span.caption-text { +} + +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 0; + border-bottom: 1px solid #aaa; +} + +th { + text-align: left; + padding-right: 5px; +} + +table.citation { + border-left: solid 1px gray; + margin-left: 1px; +} + +table.citation td { + border-bottom: none; +} + +th > :first-child, +td > :first-child { + margin-top: 0px; +} + +th > :last-child, +td > :last-child { + margin-bottom: 0px; +} + +/* -- figures --------------------------------------------------------------- */ + +div.figure, figure { + margin: 0.5em; + padding: 0.5em; +} + +div.figure p.caption, figcaption { + padding: 0.3em; +} + +div.figure p.caption span.caption-number, +figcaption span.caption-number { + font-style: italic; +} + +div.figure p.caption span.caption-text, +figcaption span.caption-text { +} + +/* -- field list styles ----------------------------------------------------- */ + +table.field-list td, table.field-list th { + border: 0 !important; +} + +.field-list ul { + margin: 0; + padding-left: 1em; +} + +.field-list p { + margin: 0; +} + +.field-name { + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; +} + +/* -- hlist styles ---------------------------------------------------------- */ + +table.hlist { + margin: 1em 0; +} + +table.hlist td { + vertical-align: top; +} + +/* -- object description styles --------------------------------------------- */ + +.sig { + font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; +} + +.sig-name, code.descname { + background-color: transparent; + font-weight: bold; +} + +.sig-name { + font-size: 1.1em; +} + +code.descname { + font-size: 1.2em; +} + +.sig-prename, code.descclassname { + background-color: transparent; +} + +.optional { + font-size: 1.3em; +} + +.sig-paren { + font-size: larger; +} + +.sig-param.n { + font-style: italic; +} + +/* C++ specific styling */ + +.sig-inline.c-texpr, +.sig-inline.cpp-texpr { + font-family: unset; +} + +.sig.c .k, .sig.c .kt, +.sig.cpp .k, .sig.cpp .kt { + color: #0033B3; +} + +.sig.c .m, +.sig.cpp .m { + color: #1750EB; +} + +.sig.c .s, .sig.c .sc, +.sig.cpp .s, .sig.cpp .sc { + color: #067D17; +} + + +/* -- other body styles ----------------------------------------------------- */ + +ol.arabic { + list-style: decimal; +} + +ol.loweralpha { + list-style: lower-alpha; +} + +ol.upperalpha { + list-style: upper-alpha; +} + +ol.lowerroman { + list-style: lower-roman; +} + +ol.upperroman { + list-style: upper-roman; +} + +:not(li) > ol > li:first-child > :first-child, +:not(li) > ul > li:first-child > :first-child { + margin-top: 0px; +} + +:not(li) > ol > li:last-child > :last-child, +:not(li) > ul > li:last-child > :last-child { + margin-bottom: 0px; +} + +ol.simple ol p, +ol.simple ul p, +ul.simple ol p, +ul.simple ul p { + margin-top: 0; +} + +ol.simple > li:not(:first-child) > p, +ul.simple > li:not(:first-child) > p { + margin-top: 0; +} + +ol.simple p, +ul.simple p { + margin-bottom: 0; +} + +aside.footnote > span, +div.citation > span { + float: left; +} +aside.footnote > span:last-of-type, +div.citation > span:last-of-type { + padding-right: 0.5em; +} +aside.footnote > p { + margin-left: 2em; +} +div.citation > p { + margin-left: 4em; +} +aside.footnote > p:last-of-type, +div.citation > p:last-of-type { + margin-bottom: 0em; +} +aside.footnote > p:last-of-type:after, +div.citation > p:last-of-type:after { + content: ""; + clear: both; +} + +dl.field-list { + display: grid; + grid-template-columns: fit-content(30%) auto; +} + +dl.field-list > dt { + font-weight: bold; + word-break: break-word; + padding-left: 0.5em; + padding-right: 5px; +} + +dl.field-list > dd { + padding-left: 0.5em; + margin-top: 0em; + margin-left: 0em; + margin-bottom: 0em; +} + +dl { + margin-bottom: 15px; +} + +dd > :first-child { + margin-top: 0px; +} + +dd ul, dd table { + margin-bottom: 10px; +} + +dd { + margin-top: 3px; + margin-bottom: 10px; + margin-left: 30px; +} + +.sig dd { + margin-top: 0px; + margin-bottom: 0px; +} + +.sig dl { + margin-top: 0px; + margin-bottom: 0px; +} + +dl > dd:last-child, +dl > dd:last-child > :last-child { + margin-bottom: 0; +} + +dt:target, span.highlighted { + background-color: #fbe54e; +} + +rect.highlighted { + fill: #fbe54e; +} + +dl.glossary dt { + font-weight: bold; + font-size: 1.1em; +} + +.versionmodified { + font-style: italic; +} + +.system-message { + background-color: #fda; + padding: 5px; + border: 3px solid red; +} + +.footnote:target { + background-color: #ffa; +} + +.line-block { + display: block; + margin-top: 1em; + margin-bottom: 1em; +} + +.line-block .line-block { + margin-top: 0; + margin-bottom: 0; + margin-left: 1.5em; +} + +.guilabel, .menuselection { + font-family: sans-serif; +} + +.accelerator { + text-decoration: underline; +} + +.classifier { + font-style: oblique; +} + +.classifier:before { + font-style: normal; + margin: 0 0.5em; + content: ":"; + display: inline-block; +} + +abbr, acronym { + border-bottom: dotted 1px; + cursor: help; +} + +.translated { + background-color: rgba(207, 255, 207, 0.2) +} + +.untranslated { + background-color: rgba(255, 207, 207, 0.2) +} + +/* -- code displays --------------------------------------------------------- */ + +pre { + overflow: auto; + overflow-y: hidden; /* fixes display issues on Chrome browsers */ +} + +pre, div[class*="highlight-"] { + clear: both; +} + +span.pre { + -moz-hyphens: none; + -ms-hyphens: none; + -webkit-hyphens: none; + hyphens: none; + white-space: nowrap; +} + +div[class*="highlight-"] { + margin: 1em 0; +} + +td.linenos pre { + border: 0; + background-color: transparent; + color: #aaa; +} + +table.highlighttable { + display: block; +} + +table.highlighttable tbody { + display: block; +} + +table.highlighttable tr { + display: flex; +} + +table.highlighttable td { + margin: 0; + padding: 0; +} + +table.highlighttable td.linenos { + padding-right: 0.5em; +} + +table.highlighttable td.code { + flex: 1; + overflow: hidden; +} + +.highlight .hll { + display: block; +} + +div.highlight pre, +table.highlighttable pre { + margin: 0; +} + +div.code-block-caption + div { + margin-top: 0; +} + +div.code-block-caption { + margin-top: 1em; + padding: 2px 5px; + font-size: small; +} + +div.code-block-caption code { + background-color: transparent; +} + +table.highlighttable td.linenos, +span.linenos, +div.highlight span.gp { /* gp: Generic.Prompt */ + user-select: none; + -webkit-user-select: text; /* Safari fallback only */ + -webkit-user-select: none; /* Chrome/Safari */ + -moz-user-select: none; /* Firefox */ + -ms-user-select: none; /* IE10+ */ +} + +div.code-block-caption span.caption-number { + padding: 0.1em 0.3em; + font-style: italic; +} + +div.code-block-caption span.caption-text { +} + +div.literal-block-wrapper { + margin: 1em 0; +} + +code.xref, a code { + background-color: transparent; + font-weight: bold; +} + +h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { + background-color: transparent; +} + +.viewcode-link { + float: right; +} + +.viewcode-back { + float: right; + font-family: sans-serif; +} + +div.viewcode-block:target { + margin: -1px -10px; + padding: 0 10px; +} + +/* -- math display ---------------------------------------------------------- */ + +img.math { + vertical-align: middle; +} + +div.body div.math p { + text-align: center; +} + +span.eqno { + float: right; +} + +span.eqno a.headerlink { + position: absolute; + z-index: 1; +} + +div.math:hover a.headerlink { + visibility: visible; +} + +/* -- printout stylesheet --------------------------------------------------- */ + +@media print { + div.document, + div.documentwrapper, + div.bodywrapper { + margin: 0 !important; + width: 100%; + } + + div.sphinxsidebar, + div.related, + div.footer, + #top-link { + display: none; + } +} \ No newline at end of file diff --git a/_static/check-solid.svg b/_static/check-solid.svg new file mode 100644 index 0000000000..92fad4b5c0 --- /dev/null +++ b/_static/check-solid.svg @@ -0,0 +1,4 @@ + + + + diff --git a/_static/clipboard.min.js b/_static/clipboard.min.js new file mode 100644 index 0000000000..54b3c46381 --- /dev/null +++ b/_static/clipboard.min.js @@ -0,0 +1,7 @@ +/*! + * clipboard.js v2.0.8 + * https://clipboardjs.com/ + * + * Licensed MIT © Zeno Rocha + */ +!function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.ClipboardJS=e():t.ClipboardJS=e()}(this,function(){return n={686:function(t,e,n){"use strict";n.d(e,{default:function(){return o}});var e=n(279),i=n.n(e),e=n(370),u=n.n(e),e=n(817),c=n.n(e);function a(t){try{return document.execCommand(t)}catch(t){return}}var f=function(t){t=c()(t);return a("cut"),t};var l=function(t){var e,n,o,r=1 + + + + diff --git a/_static/copybutton.css b/_static/copybutton.css new file mode 100644 index 0000000000..f1916ec7d1 --- /dev/null +++ b/_static/copybutton.css @@ -0,0 +1,94 @@ +/* Copy buttons */ +button.copybtn { + position: absolute; + display: flex; + top: .3em; + right: .3em; + width: 1.7em; + height: 1.7em; + opacity: 0; + transition: opacity 0.3s, border .3s, background-color .3s; + user-select: none; + padding: 0; + border: none; + outline: none; + border-radius: 0.4em; + /* The colors that GitHub uses */ + border: #1b1f2426 1px solid; + background-color: #f6f8fa; + color: #57606a; +} + +button.copybtn.success { + border-color: #22863a; + color: #22863a; +} + +button.copybtn svg { + stroke: currentColor; + width: 1.5em; + height: 1.5em; + padding: 0.1em; +} + +div.highlight { + position: relative; +} + +/* Show the copybutton */ +.highlight:hover button.copybtn, button.copybtn.success { + opacity: 1; +} + +.highlight button.copybtn:hover { + background-color: rgb(235, 235, 235); +} + +.highlight button.copybtn:active { + background-color: rgb(187, 187, 187); +} + +/** + * A minimal CSS-only tooltip copied from: + * https://codepen.io/mildrenben/pen/rVBrpK + * + * To use, write HTML like the following: + * + *

Short

+ */ + .o-tooltip--left { + position: relative; + } + + .o-tooltip--left:after { + opacity: 0; + visibility: hidden; + position: absolute; + content: attr(data-tooltip); + padding: .2em; + font-size: .8em; + left: -.2em; + background: grey; + color: white; + white-space: nowrap; + z-index: 2; + border-radius: 2px; + transform: translateX(-102%) translateY(0); + transition: opacity 0.2s cubic-bezier(0.64, 0.09, 0.08, 1), transform 0.2s cubic-bezier(0.64, 0.09, 0.08, 1); +} + +.o-tooltip--left:hover:after { + display: block; + opacity: 1; + visibility: visible; + transform: translateX(-100%) translateY(0); + transition: opacity 0.2s cubic-bezier(0.64, 0.09, 0.08, 1), transform 0.2s cubic-bezier(0.64, 0.09, 0.08, 1); + transition-delay: .5s; +} + +/* By default the copy button shouldn't show up when printing a page */ +@media print { + button.copybtn { + display: none; + } +} diff --git a/_static/copybutton.js b/_static/copybutton.js new file mode 100644 index 0000000000..2ea7ff3e21 --- /dev/null +++ b/_static/copybutton.js @@ -0,0 +1,248 @@ +// Localization support +const messages = { + 'en': { + 'copy': 'Copy', + 'copy_to_clipboard': 'Copy to clipboard', + 'copy_success': 'Copied!', + 'copy_failure': 'Failed to copy', + }, + 'es' : { + 'copy': 'Copiar', + 'copy_to_clipboard': 'Copiar al portapapeles', + 'copy_success': '¡Copiado!', + 'copy_failure': 'Error al copiar', + }, + 'de' : { + 'copy': 'Kopieren', + 'copy_to_clipboard': 'In die Zwischenablage kopieren', + 'copy_success': 'Kopiert!', + 'copy_failure': 'Fehler beim Kopieren', + }, + 'fr' : { + 'copy': 'Copier', + 'copy_to_clipboard': 'Copier dans le presse-papier', + 'copy_success': 'Copié !', + 'copy_failure': 'Échec de la copie', + }, + 'ru': { + 'copy': 'Скопировать', + 'copy_to_clipboard': 'Скопировать в буфер', + 'copy_success': 'Скопировано!', + 'copy_failure': 'Не удалось скопировать', + }, + 'zh-CN': { + 'copy': '复制', + 'copy_to_clipboard': '复制到剪贴板', + 'copy_success': '复制成功!', + 'copy_failure': '复制失败', + }, + 'it' : { + 'copy': 'Copiare', + 'copy_to_clipboard': 'Copiato negli appunti', + 'copy_success': 'Copiato!', + 'copy_failure': 'Errore durante la copia', + } +} + +let locale = 'en' +if( document.documentElement.lang !== undefined + && messages[document.documentElement.lang] !== undefined ) { + locale = document.documentElement.lang +} + +let doc_url_root = DOCUMENTATION_OPTIONS.URL_ROOT; +if (doc_url_root == '#') { + doc_url_root = ''; +} + +/** + * SVG files for our copy buttons + */ +let iconCheck = ` + ${messages[locale]['copy_success']} + + +` + +// If the user specified their own SVG use that, otherwise use the default +let iconCopy = ``; +if (!iconCopy) { + iconCopy = ` + ${messages[locale]['copy_to_clipboard']} + + + +` +} + +/** + * Set up copy/paste for code blocks + */ + +const runWhenDOMLoaded = cb => { + if (document.readyState != 'loading') { + cb() + } else if (document.addEventListener) { + document.addEventListener('DOMContentLoaded', cb) + } else { + document.attachEvent('onreadystatechange', function() { + if (document.readyState == 'complete') cb() + }) + } +} + +const codeCellId = index => `codecell${index}` + +// Clears selected text since ClipboardJS will select the text when copying +const clearSelection = () => { + if (window.getSelection) { + window.getSelection().removeAllRanges() + } else if (document.selection) { + document.selection.empty() + } +} + +// Changes tooltip text for a moment, then changes it back +// We want the timeout of our `success` class to be a bit shorter than the +// tooltip and icon change, so that we can hide the icon before changing back. +var timeoutIcon = 2000; +var timeoutSuccessClass = 1500; + +const temporarilyChangeTooltip = (el, oldText, newText) => { + el.setAttribute('data-tooltip', newText) + el.classList.add('success') + // Remove success a little bit sooner than we change the tooltip + // So that we can use CSS to hide the copybutton first + setTimeout(() => el.classList.remove('success'), timeoutSuccessClass) + setTimeout(() => el.setAttribute('data-tooltip', oldText), timeoutIcon) +} + +// Changes the copy button icon for two seconds, then changes it back +const temporarilyChangeIcon = (el) => { + el.innerHTML = iconCheck; + setTimeout(() => {el.innerHTML = iconCopy}, timeoutIcon) +} + +const addCopyButtonToCodeCells = () => { + // If ClipboardJS hasn't loaded, wait a bit and try again. This + // happens because we load ClipboardJS asynchronously. + if (window.ClipboardJS === undefined) { + setTimeout(addCopyButtonToCodeCells, 250) + return + } + + // Add copybuttons to all of our code cells + const COPYBUTTON_SELECTOR = 'div.highlight pre'; + const codeCells = document.querySelectorAll(COPYBUTTON_SELECTOR) + codeCells.forEach((codeCell, index) => { + const id = codeCellId(index) + codeCell.setAttribute('id', id) + + const clipboardButton = id => + `` + codeCell.insertAdjacentHTML('afterend', clipboardButton(id)) + }) + +function escapeRegExp(string) { + return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string +} + +/** + * Removes excluded text from a Node. + * + * @param {Node} target Node to filter. + * @param {string} exclude CSS selector of nodes to exclude. + * @returns {DOMString} Text from `target` with text removed. + */ +function filterText(target, exclude) { + const clone = target.cloneNode(true); // clone as to not modify the live DOM + if (exclude) { + // remove excluded nodes + clone.querySelectorAll(exclude).forEach(node => node.remove()); + } + return clone.innerText; +} + +// Callback when a copy button is clicked. Will be passed the node that was clicked +// should then grab the text and replace pieces of text that shouldn't be used in output +function formatCopyText(textContent, copybuttonPromptText, isRegexp = false, onlyCopyPromptLines = true, removePrompts = true, copyEmptyLines = true, lineContinuationChar = "", hereDocDelim = "") { + var regexp; + var match; + + // Do we check for line continuation characters and "HERE-documents"? + var useLineCont = !!lineContinuationChar + var useHereDoc = !!hereDocDelim + + // create regexp to capture prompt and remaining line + if (isRegexp) { + regexp = new RegExp('^(' + copybuttonPromptText + ')(.*)') + } else { + regexp = new RegExp('^(' + escapeRegExp(copybuttonPromptText) + ')(.*)') + } + + const outputLines = []; + var promptFound = false; + var gotLineCont = false; + var gotHereDoc = false; + const lineGotPrompt = []; + for (const line of textContent.split('\n')) { + match = line.match(regexp) + if (match || gotLineCont || gotHereDoc) { + promptFound = regexp.test(line) + lineGotPrompt.push(promptFound) + if (removePrompts && promptFound) { + outputLines.push(match[2]) + } else { + outputLines.push(line) + } + gotLineCont = line.endsWith(lineContinuationChar) & useLineCont + if (line.includes(hereDocDelim) & useHereDoc) + gotHereDoc = !gotHereDoc + } else if (!onlyCopyPromptLines) { + outputLines.push(line) + } else if (copyEmptyLines && line.trim() === '') { + outputLines.push(line) + } + } + + // If no lines with the prompt were found then just use original lines + if (lineGotPrompt.some(v => v === true)) { + textContent = outputLines.join('\n'); + } + + // Remove a trailing newline to avoid auto-running when pasting + if (textContent.endsWith("\n")) { + textContent = textContent.slice(0, -1) + } + return textContent +} + + +var copyTargetText = (trigger) => { + var target = document.querySelector(trigger.attributes['data-clipboard-target'].value); + + // get filtered text + let exclude = '.linenos'; + + let text = filterText(target, exclude); + return formatCopyText(text, '', false, true, true, true, '', '') +} + + // Initialize with a callback so we can modify the text before copy + const clipboard = new ClipboardJS('.copybtn', {text: copyTargetText}) + + // Update UI with error/success messages + clipboard.on('success', event => { + clearSelection() + temporarilyChangeTooltip(event.trigger, messages[locale]['copy'], messages[locale]['copy_success']) + temporarilyChangeIcon(event.trigger) + }) + + clipboard.on('error', event => { + temporarilyChangeTooltip(event.trigger, messages[locale]['copy'], messages[locale]['copy_failure']) + }) +} + +runWhenDOMLoaded(addCopyButtonToCodeCells) \ No newline at end of file diff --git a/_static/copybutton_funcs.js b/_static/copybutton_funcs.js new file mode 100644 index 0000000000..dbe1aaad79 --- /dev/null +++ b/_static/copybutton_funcs.js @@ -0,0 +1,73 @@ +function escapeRegExp(string) { + return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string +} + +/** + * Removes excluded text from a Node. + * + * @param {Node} target Node to filter. + * @param {string} exclude CSS selector of nodes to exclude. + * @returns {DOMString} Text from `target` with text removed. + */ +export function filterText(target, exclude) { + const clone = target.cloneNode(true); // clone as to not modify the live DOM + if (exclude) { + // remove excluded nodes + clone.querySelectorAll(exclude).forEach(node => node.remove()); + } + return clone.innerText; +} + +// Callback when a copy button is clicked. Will be passed the node that was clicked +// should then grab the text and replace pieces of text that shouldn't be used in output +export function formatCopyText(textContent, copybuttonPromptText, isRegexp = false, onlyCopyPromptLines = true, removePrompts = true, copyEmptyLines = true, lineContinuationChar = "", hereDocDelim = "") { + var regexp; + var match; + + // Do we check for line continuation characters and "HERE-documents"? + var useLineCont = !!lineContinuationChar + var useHereDoc = !!hereDocDelim + + // create regexp to capture prompt and remaining line + if (isRegexp) { + regexp = new RegExp('^(' + copybuttonPromptText + ')(.*)') + } else { + regexp = new RegExp('^(' + escapeRegExp(copybuttonPromptText) + ')(.*)') + } + + const outputLines = []; + var promptFound = false; + var gotLineCont = false; + var gotHereDoc = false; + const lineGotPrompt = []; + for (const line of textContent.split('\n')) { + match = line.match(regexp) + if (match || gotLineCont || gotHereDoc) { + promptFound = regexp.test(line) + lineGotPrompt.push(promptFound) + if (removePrompts && promptFound) { + outputLines.push(match[2]) + } else { + outputLines.push(line) + } + gotLineCont = line.endsWith(lineContinuationChar) & useLineCont + if (line.includes(hereDocDelim) & useHereDoc) + gotHereDoc = !gotHereDoc + } else if (!onlyCopyPromptLines) { + outputLines.push(line) + } else if (copyEmptyLines && line.trim() === '') { + outputLines.push(line) + } + } + + // If no lines with the prompt were found then just use original lines + if (lineGotPrompt.some(v => v === true)) { + textContent = outputLines.join('\n'); + } + + // Remove a trailing newline to avoid auto-running when pasting + if (textContent.endsWith("\n")) { + textContent = textContent.slice(0, -1) + } + return textContent +} diff --git a/docs/source/_static/css/mindee.css b/_static/css/mindee.css similarity index 100% rename from docs/source/_static/css/mindee.css rename to _static/css/mindee.css diff --git a/_static/debug.css b/_static/debug.css new file mode 100644 index 0000000000..74d4aec33e --- /dev/null +++ b/_static/debug.css @@ -0,0 +1,69 @@ +/* + This CSS file should be overridden by the theme authors. It's + meant for debugging and developing the skeleton that this theme provides. +*/ +body { + font-family: -apple-system, "Segoe UI", Roboto, Helvetica, Arial, sans-serif, + "Apple Color Emoji", "Segoe UI Emoji"; + background: lavender; +} +.sb-announcement { + background: rgb(131, 131, 131); +} +.sb-announcement__inner { + background: black; + color: white; +} +.sb-header { + background: lightskyblue; +} +.sb-header__inner { + background: royalblue; + color: white; +} +.sb-header-secondary { + background: lightcyan; +} +.sb-header-secondary__inner { + background: cornflowerblue; + color: white; +} +.sb-sidebar-primary { + background: lightgreen; +} +.sb-main { + background: blanchedalmond; +} +.sb-main__inner { + background: antiquewhite; +} +.sb-header-article { + background: lightsteelblue; +} +.sb-article-container { + background: snow; +} +.sb-article-main { + background: white; +} +.sb-footer-article { + background: lightpink; +} +.sb-sidebar-secondary { + background: lightgoldenrodyellow; +} +.sb-footer-content { + background: plum; +} +.sb-footer-content__inner { + background: palevioletred; +} +.sb-footer { + background: pink; +} +.sb-footer__inner { + background: salmon; +} +.sb-article { + background: white; +} diff --git a/_static/doctools.js b/_static/doctools.js new file mode 100644 index 0000000000..0398ebb9f0 --- /dev/null +++ b/_static/doctools.js @@ -0,0 +1,149 @@ +/* + * Base JavaScript utilities for all Sphinx HTML documentation. + */ +"use strict"; + +const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([ + "TEXTAREA", + "INPUT", + "SELECT", + "BUTTON", +]); + +const _ready = (callback) => { + if (document.readyState !== "loading") { + callback(); + } else { + document.addEventListener("DOMContentLoaded", callback); + } +}; + +/** + * Small JavaScript module for the documentation. + */ +const Documentation = { + init: () => { + Documentation.initDomainIndexTable(); + Documentation.initOnKeyListeners(); + }, + + /** + * i18n support + */ + TRANSLATIONS: {}, + PLURAL_EXPR: (n) => (n === 1 ? 0 : 1), + LOCALE: "unknown", + + // gettext and ngettext don't access this so that the functions + // can safely bound to a different name (_ = Documentation.gettext) + gettext: (string) => { + const translated = Documentation.TRANSLATIONS[string]; + switch (typeof translated) { + case "undefined": + return string; // no translation + case "string": + return translated; // translation exists + default: + return translated[0]; // (singular, plural) translation tuple exists + } + }, + + ngettext: (singular, plural, n) => { + const translated = Documentation.TRANSLATIONS[singular]; + if (typeof translated !== "undefined") + return translated[Documentation.PLURAL_EXPR(n)]; + return n === 1 ? singular : plural; + }, + + addTranslations: (catalog) => { + Object.assign(Documentation.TRANSLATIONS, catalog.messages); + Documentation.PLURAL_EXPR = new Function( + "n", + `return (${catalog.plural_expr})` + ); + Documentation.LOCALE = catalog.locale; + }, + + /** + * helper function to focus on search bar + */ + focusSearchBar: () => { + document.querySelectorAll("input[name=q]")[0]?.focus(); + }, + + /** + * Initialise the domain index toggle buttons + */ + initDomainIndexTable: () => { + const toggler = (el) => { + const idNumber = el.id.substr(7); + const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`); + if (el.src.substr(-9) === "minus.png") { + el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`; + toggledRows.forEach((el) => (el.style.display = "none")); + } else { + el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`; + toggledRows.forEach((el) => (el.style.display = "")); + } + }; + + const togglerElements = document.querySelectorAll("img.toggler"); + togglerElements.forEach((el) => + el.addEventListener("click", (event) => toggler(event.currentTarget)) + ); + togglerElements.forEach((el) => (el.style.display = "")); + if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler); + }, + + initOnKeyListeners: () => { + // only install a listener if it is really needed + if ( + !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS && + !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS + ) + return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.altKey || event.ctrlKey || event.metaKey) return; + + if (!event.shiftKey) { + switch (event.key) { + case "ArrowLeft": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const prevLink = document.querySelector('link[rel="prev"]'); + if (prevLink && prevLink.href) { + window.location.href = prevLink.href; + event.preventDefault(); + } + break; + case "ArrowRight": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const nextLink = document.querySelector('link[rel="next"]'); + if (nextLink && nextLink.href) { + window.location.href = nextLink.href; + event.preventDefault(); + } + break; + } + } + + // some keyboard layouts may need Shift to get / + switch (event.key) { + case "/": + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break; + Documentation.focusSearchBar(); + event.preventDefault(); + } + }); + }, +}; + +// quick alias for translations +const _ = Documentation.gettext; + +_ready(Documentation.init); diff --git a/_static/documentation_options.js b/_static/documentation_options.js new file mode 100644 index 0000000000..61f93b6596 --- /dev/null +++ b/_static/documentation_options.js @@ -0,0 +1,13 @@ +const DOCUMENTATION_OPTIONS = { + VERSION: '1.0.1a0-git', + LANGUAGE: 'en', + COLLAPSE_INDEX: false, + BUILDER: 'html', + FILE_SUFFIX: '.html', + LINK_SUFFIX: '.html', + HAS_SOURCE: true, + SOURCELINK_SUFFIX: '.txt', + NAVIGATION_WITH_KEYS: true, + SHOW_SEARCH_SUMMARY: true, + ENABLE_SEARCH_SHORTCUTS: true, +}; \ No newline at end of file diff --git a/docs/source/_static/images/favicon.ico b/_static/favicon.ico similarity index 100% rename from docs/source/_static/images/favicon.ico rename to _static/favicon.ico diff --git a/_static/file.png b/_static/file.png new file mode 100644 index 0000000000..a858a410e4 Binary files /dev/null and b/_static/file.png differ diff --git a/_static/images/Logo-docTR-white.png b/_static/images/Logo-docTR-white.png new file mode 100644 index 0000000000..988eb2cf7f Binary files /dev/null and b/_static/images/Logo-docTR-white.png differ diff --git a/_static/images/favicon.ico b/_static/images/favicon.ico new file mode 100644 index 0000000000..d9bf77d4a0 Binary files /dev/null and b/_static/images/favicon.ico differ diff --git a/docs/source/_static/js/custom.js b/_static/js/custom.js similarity index 100% rename from docs/source/_static/js/custom.js rename to _static/js/custom.js diff --git a/_static/language_data.js b/_static/language_data.js new file mode 100644 index 0000000000..c7fe6c6faf --- /dev/null +++ b/_static/language_data.js @@ -0,0 +1,192 @@ +/* + * This script contains the language-specific data used by searchtools.js, + * namely the list of stopwords, stemmer, scorer and splitter. + */ + +var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"]; + + +/* Non-minified version is copied as a separate JS file, if available */ + +/** + * Porter Stemmer + */ +var Stemmer = function() { + + var step2list = { + ational: 'ate', + tional: 'tion', + enci: 'ence', + anci: 'ance', + izer: 'ize', + bli: 'ble', + alli: 'al', + entli: 'ent', + eli: 'e', + ousli: 'ous', + ization: 'ize', + ation: 'ate', + ator: 'ate', + alism: 'al', + iveness: 'ive', + fulness: 'ful', + ousness: 'ous', + aliti: 'al', + iviti: 'ive', + biliti: 'ble', + logi: 'log' + }; + + var step3list = { + icate: 'ic', + ative: '', + alize: 'al', + iciti: 'ic', + ical: 'ic', + ful: '', + ness: '' + }; + + var c = "[^aeiou]"; // consonant + var v = "[aeiouy]"; // vowel + var C = c + "[^aeiouy]*"; // consonant sequence + var V = v + "[aeiou]*"; // vowel sequence + + var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0 + var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 + var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 + var s_v = "^(" + C + ")?" + v; // vowel in stem + + this.stemWord = function (w) { + var stem; + var suffix; + var firstch; + var origword = w; + + if (w.length < 3) + return w; + + var re; + var re2; + var re3; + var re4; + + firstch = w.substr(0,1); + if (firstch == "y") + w = firstch.toUpperCase() + w.substr(1); + + // Step 1a + re = /^(.+?)(ss|i)es$/; + re2 = /^(.+?)([^s])s$/; + + if (re.test(w)) + w = w.replace(re,"$1$2"); + else if (re2.test(w)) + w = w.replace(re2,"$1$2"); + + // Step 1b + re = /^(.+?)eed$/; + re2 = /^(.+?)(ed|ing)$/; + if (re.test(w)) { + var fp = re.exec(w); + re = new RegExp(mgr0); + if (re.test(fp[1])) { + re = /.$/; + w = w.replace(re,""); + } + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = new RegExp(s_v); + if (re2.test(stem)) { + w = stem; + re2 = /(at|bl|iz)$/; + re3 = new RegExp("([^aeiouylsz])\\1$"); + re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re2.test(w)) + w = w + "e"; + else if (re3.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + else if (re4.test(w)) + w = w + "e"; + } + } + + // Step 1c + re = /^(.+?)y$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(s_v); + if (re.test(stem)) + w = stem + "i"; + } + + // Step 2 + re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step2list[suffix]; + } + + // Step 3 + re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step3list[suffix]; + } + + // Step 4 + re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + re2 = /^(.+?)(s|t)(ion)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + if (re.test(stem)) + w = stem; + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = new RegExp(mgr1); + if (re2.test(stem)) + w = stem; + } + + // Step 5 + re = /^(.+?)e$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + re2 = new RegExp(meq1); + re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) + w = stem; + } + re = /ll$/; + re2 = new RegExp(mgr1); + if (re.test(w) && re2.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + + // and turn initial Y back to y + if (firstch == "y") + w = firstch.toLowerCase() + w.substr(1); + return w; + } +} + diff --git a/_static/minus.png b/_static/minus.png new file mode 100644 index 0000000000..d96755fdaf Binary files /dev/null and b/_static/minus.png differ diff --git a/_static/plus.png b/_static/plus.png new file mode 100644 index 0000000000..7107cec93a Binary files /dev/null and b/_static/plus.png differ diff --git a/_static/pygments.css b/_static/pygments.css new file mode 100644 index 0000000000..73f0553d82 --- /dev/null +++ b/_static/pygments.css @@ -0,0 +1,249 @@ +.highlight pre { line-height: 125%; } +.highlight td.linenos .normal { color: #666666; background-color: transparent; padding-left: 5px; padding-right: 5px; } +.highlight span.linenos { color: #666666; background-color: transparent; padding-left: 5px; padding-right: 5px; } +.highlight td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +.highlight span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +.highlight .hll { background-color: #ffffcc } +.highlight { background: #f0f0f0; } +.highlight .c { color: #60A0B0; font-style: italic } /* Comment */ +.highlight .err { border: 1px solid #F00 } /* Error */ +.highlight .k { color: #007020; font-weight: bold } /* Keyword */ +.highlight .o { color: #666 } /* Operator */ +.highlight .ch { color: #60A0B0; font-style: italic } /* Comment.Hashbang */ +.highlight .cm { color: #60A0B0; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #007020 } /* Comment.Preproc */ +.highlight .cpf { color: #60A0B0; font-style: italic } /* Comment.PreprocFile */ +.highlight .c1 { color: #60A0B0; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #60A0B0; background-color: #FFF0F0 } /* Comment.Special */ +.highlight .gd { color: #A00000 } /* Generic.Deleted */ +.highlight .ge { font-style: italic } /* Generic.Emph */ +.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +.highlight .gr { color: #F00 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #00A000 } /* Generic.Inserted */ +.highlight .go { color: #888 } /* Generic.Output */ +.highlight .gp { color: #C65D09; font-weight: bold } /* Generic.Prompt */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #04D } /* Generic.Traceback */ +.highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #007020 } /* Keyword.Pseudo */ +.highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #902000 } /* Keyword.Type */ +.highlight .m { color: #40A070 } /* Literal.Number */ +.highlight .s { color: #4070A0 } /* Literal.String */ +.highlight .na { color: #4070A0 } /* Name.Attribute */ +.highlight .nb { color: #007020 } /* Name.Builtin */ +.highlight .nc { color: #0E84B5; font-weight: bold } /* Name.Class */ +.highlight .no { color: #60ADD5 } /* Name.Constant */ +.highlight .nd { color: #555; font-weight: bold } /* Name.Decorator */ +.highlight .ni { color: #D55537; font-weight: bold } /* Name.Entity */ +.highlight .ne { color: #007020 } /* Name.Exception */ +.highlight .nf { color: #06287E } /* Name.Function */ +.highlight .nl { color: #002070; font-weight: bold } /* Name.Label */ +.highlight .nn { color: #0E84B5; font-weight: bold } /* Name.Namespace */ +.highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #BB60D5 } /* Name.Variable */ +.highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */ +.highlight .w { color: #BBB } /* Text.Whitespace */ +.highlight .mb { color: #40A070 } /* Literal.Number.Bin */ +.highlight .mf { color: #40A070 } /* Literal.Number.Float */ +.highlight .mh { color: #40A070 } /* Literal.Number.Hex */ +.highlight .mi { color: #40A070 } /* Literal.Number.Integer */ +.highlight .mo { color: #40A070 } /* Literal.Number.Oct */ +.highlight .sa { color: #4070A0 } /* Literal.String.Affix */ +.highlight .sb { color: #4070A0 } /* Literal.String.Backtick */ +.highlight .sc { color: #4070A0 } /* Literal.String.Char */ +.highlight .dl { color: #4070A0 } /* Literal.String.Delimiter */ +.highlight .sd { color: #4070A0; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #4070A0 } /* Literal.String.Double */ +.highlight .se { color: #4070A0; font-weight: bold } /* Literal.String.Escape */ +.highlight .sh { color: #4070A0 } /* Literal.String.Heredoc */ +.highlight .si { color: #70A0D0; font-style: italic } /* Literal.String.Interpol */ +.highlight .sx { color: #C65D09 } /* Literal.String.Other */ +.highlight .sr { color: #235388 } /* Literal.String.Regex */ +.highlight .s1 { color: #4070A0 } /* Literal.String.Single */ +.highlight .ss { color: #517918 } /* Literal.String.Symbol */ +.highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */ +.highlight .fm { color: #06287E } /* Name.Function.Magic */ +.highlight .vc { color: #BB60D5 } /* Name.Variable.Class */ +.highlight .vg { color: #BB60D5 } /* Name.Variable.Global */ +.highlight .vi { color: #BB60D5 } /* Name.Variable.Instance */ +.highlight .vm { color: #BB60D5 } /* Name.Variable.Magic */ +.highlight .il { color: #40A070 } /* Literal.Number.Integer.Long */ +@media not print { +body[data-theme="dark"] .highlight pre { line-height: 125%; } +body[data-theme="dark"] .highlight td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +body[data-theme="dark"] .highlight span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +body[data-theme="dark"] .highlight td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +body[data-theme="dark"] .highlight span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +body[data-theme="dark"] .highlight .hll { background-color: #49483e } +body[data-theme="dark"] .highlight { background: #272822; color: #F8F8F2 } +body[data-theme="dark"] .highlight .c { color: #959077 } /* Comment */ +body[data-theme="dark"] .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +body[data-theme="dark"] .highlight .esc { color: #F8F8F2 } /* Escape */ +body[data-theme="dark"] .highlight .g { color: #F8F8F2 } /* Generic */ +body[data-theme="dark"] .highlight .k { color: #66D9EF } /* Keyword */ +body[data-theme="dark"] .highlight .l { color: #AE81FF } /* Literal */ +body[data-theme="dark"] .highlight .n { color: #F8F8F2 } /* Name */ +body[data-theme="dark"] .highlight .o { color: #FF4689 } /* Operator */ +body[data-theme="dark"] .highlight .x { color: #F8F8F2 } /* Other */ +body[data-theme="dark"] .highlight .p { color: #F8F8F2 } /* Punctuation */ +body[data-theme="dark"] .highlight .ch { color: #959077 } /* Comment.Hashbang */ +body[data-theme="dark"] .highlight .cm { color: #959077 } /* Comment.Multiline */ +body[data-theme="dark"] .highlight .cp { color: #959077 } /* Comment.Preproc */ +body[data-theme="dark"] .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +body[data-theme="dark"] .highlight .c1 { color: #959077 } /* Comment.Single */ +body[data-theme="dark"] .highlight .cs { color: #959077 } /* Comment.Special */ +body[data-theme="dark"] .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +body[data-theme="dark"] .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +body[data-theme="dark"] .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +body[data-theme="dark"] .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +body[data-theme="dark"] .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +body[data-theme="dark"] .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +body[data-theme="dark"] .highlight .go { color: #66D9EF } /* Generic.Output */ +body[data-theme="dark"] .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +body[data-theme="dark"] .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +body[data-theme="dark"] .highlight .gu { color: #959077 } /* Generic.Subheading */ +body[data-theme="dark"] .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +body[data-theme="dark"] .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +body[data-theme="dark"] .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +body[data-theme="dark"] .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +body[data-theme="dark"] .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +body[data-theme="dark"] .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +body[data-theme="dark"] .highlight .kt { color: #66D9EF } /* Keyword.Type */ +body[data-theme="dark"] .highlight .ld { color: #E6DB74 } /* Literal.Date */ +body[data-theme="dark"] .highlight .m { color: #AE81FF } /* Literal.Number */ +body[data-theme="dark"] .highlight .s { color: #E6DB74 } /* Literal.String */ +body[data-theme="dark"] .highlight .na { color: #A6E22E } /* Name.Attribute */ +body[data-theme="dark"] .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +body[data-theme="dark"] .highlight .nc { color: #A6E22E } /* Name.Class */ +body[data-theme="dark"] .highlight .no { color: #66D9EF } /* Name.Constant */ +body[data-theme="dark"] .highlight .nd { color: #A6E22E } /* Name.Decorator */ +body[data-theme="dark"] .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +body[data-theme="dark"] .highlight .ne { color: #A6E22E } /* Name.Exception */ +body[data-theme="dark"] .highlight .nf { color: #A6E22E } /* Name.Function */ +body[data-theme="dark"] .highlight .nl { color: #F8F8F2 } /* Name.Label */ +body[data-theme="dark"] .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +body[data-theme="dark"] .highlight .nx { color: #A6E22E } /* Name.Other */ +body[data-theme="dark"] .highlight .py { color: #F8F8F2 } /* Name.Property */ +body[data-theme="dark"] .highlight .nt { color: #FF4689 } /* Name.Tag */ +body[data-theme="dark"] .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +body[data-theme="dark"] .highlight .ow { color: #FF4689 } /* Operator.Word */ +body[data-theme="dark"] .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +body[data-theme="dark"] .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +body[data-theme="dark"] .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +body[data-theme="dark"] .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +body[data-theme="dark"] .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +body[data-theme="dark"] .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +body[data-theme="dark"] .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +body[data-theme="dark"] .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +body[data-theme="dark"] .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +body[data-theme="dark"] .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +body[data-theme="dark"] .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +body[data-theme="dark"] .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +body[data-theme="dark"] .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +body[data-theme="dark"] .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +body[data-theme="dark"] .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +body[data-theme="dark"] .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +body[data-theme="dark"] .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +body[data-theme="dark"] .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +body[data-theme="dark"] .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +body[data-theme="dark"] .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +body[data-theme="dark"] .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +body[data-theme="dark"] .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +body[data-theme="dark"] .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +body[data-theme="dark"] .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +body[data-theme="dark"] .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +body[data-theme="dark"] .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +body[data-theme="dark"] .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ +@media (prefers-color-scheme: dark) { +body:not([data-theme="light"]) .highlight pre { line-height: 125%; } +body:not([data-theme="light"]) .highlight td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +body:not([data-theme="light"]) .highlight span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +body:not([data-theme="light"]) .highlight td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +body:not([data-theme="light"]) .highlight span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +body:not([data-theme="light"]) .highlight .hll { background-color: #49483e } +body:not([data-theme="light"]) .highlight { background: #272822; color: #F8F8F2 } +body:not([data-theme="light"]) .highlight .c { color: #959077 } /* Comment */ +body:not([data-theme="light"]) .highlight .err { color: #ED007E; background-color: #1E0010 } /* Error */ +body:not([data-theme="light"]) .highlight .esc { color: #F8F8F2 } /* Escape */ +body:not([data-theme="light"]) .highlight .g { color: #F8F8F2 } /* Generic */ +body:not([data-theme="light"]) .highlight .k { color: #66D9EF } /* Keyword */ +body:not([data-theme="light"]) .highlight .l { color: #AE81FF } /* Literal */ +body:not([data-theme="light"]) .highlight .n { color: #F8F8F2 } /* Name */ +body:not([data-theme="light"]) .highlight .o { color: #FF4689 } /* Operator */ +body:not([data-theme="light"]) .highlight .x { color: #F8F8F2 } /* Other */ +body:not([data-theme="light"]) .highlight .p { color: #F8F8F2 } /* Punctuation */ +body:not([data-theme="light"]) .highlight .ch { color: #959077 } /* Comment.Hashbang */ +body:not([data-theme="light"]) .highlight .cm { color: #959077 } /* Comment.Multiline */ +body:not([data-theme="light"]) .highlight .cp { color: #959077 } /* Comment.Preproc */ +body:not([data-theme="light"]) .highlight .cpf { color: #959077 } /* Comment.PreprocFile */ +body:not([data-theme="light"]) .highlight .c1 { color: #959077 } /* Comment.Single */ +body:not([data-theme="light"]) .highlight .cs { color: #959077 } /* Comment.Special */ +body:not([data-theme="light"]) .highlight .gd { color: #FF4689 } /* Generic.Deleted */ +body:not([data-theme="light"]) .highlight .ge { color: #F8F8F2; font-style: italic } /* Generic.Emph */ +body:not([data-theme="light"]) .highlight .ges { color: #F8F8F2; font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +body:not([data-theme="light"]) .highlight .gr { color: #F8F8F2 } /* Generic.Error */ +body:not([data-theme="light"]) .highlight .gh { color: #F8F8F2 } /* Generic.Heading */ +body:not([data-theme="light"]) .highlight .gi { color: #A6E22E } /* Generic.Inserted */ +body:not([data-theme="light"]) .highlight .go { color: #66D9EF } /* Generic.Output */ +body:not([data-theme="light"]) .highlight .gp { color: #FF4689; font-weight: bold } /* Generic.Prompt */ +body:not([data-theme="light"]) .highlight .gs { color: #F8F8F2; font-weight: bold } /* Generic.Strong */ +body:not([data-theme="light"]) .highlight .gu { color: #959077 } /* Generic.Subheading */ +body:not([data-theme="light"]) .highlight .gt { color: #F8F8F2 } /* Generic.Traceback */ +body:not([data-theme="light"]) .highlight .kc { color: #66D9EF } /* Keyword.Constant */ +body:not([data-theme="light"]) .highlight .kd { color: #66D9EF } /* Keyword.Declaration */ +body:not([data-theme="light"]) .highlight .kn { color: #FF4689 } /* Keyword.Namespace */ +body:not([data-theme="light"]) .highlight .kp { color: #66D9EF } /* Keyword.Pseudo */ +body:not([data-theme="light"]) .highlight .kr { color: #66D9EF } /* Keyword.Reserved */ +body:not([data-theme="light"]) .highlight .kt { color: #66D9EF } /* Keyword.Type */ +body:not([data-theme="light"]) .highlight .ld { color: #E6DB74 } /* Literal.Date */ +body:not([data-theme="light"]) .highlight .m { color: #AE81FF } /* Literal.Number */ +body:not([data-theme="light"]) .highlight .s { color: #E6DB74 } /* Literal.String */ +body:not([data-theme="light"]) .highlight .na { color: #A6E22E } /* Name.Attribute */ +body:not([data-theme="light"]) .highlight .nb { color: #F8F8F2 } /* Name.Builtin */ +body:not([data-theme="light"]) .highlight .nc { color: #A6E22E } /* Name.Class */ +body:not([data-theme="light"]) .highlight .no { color: #66D9EF } /* Name.Constant */ +body:not([data-theme="light"]) .highlight .nd { color: #A6E22E } /* Name.Decorator */ +body:not([data-theme="light"]) .highlight .ni { color: #F8F8F2 } /* Name.Entity */ +body:not([data-theme="light"]) .highlight .ne { color: #A6E22E } /* Name.Exception */ +body:not([data-theme="light"]) .highlight .nf { color: #A6E22E } /* Name.Function */ +body:not([data-theme="light"]) .highlight .nl { color: #F8F8F2 } /* Name.Label */ +body:not([data-theme="light"]) .highlight .nn { color: #F8F8F2 } /* Name.Namespace */ +body:not([data-theme="light"]) .highlight .nx { color: #A6E22E } /* Name.Other */ +body:not([data-theme="light"]) .highlight .py { color: #F8F8F2 } /* Name.Property */ +body:not([data-theme="light"]) .highlight .nt { color: #FF4689 } /* Name.Tag */ +body:not([data-theme="light"]) .highlight .nv { color: #F8F8F2 } /* Name.Variable */ +body:not([data-theme="light"]) .highlight .ow { color: #FF4689 } /* Operator.Word */ +body:not([data-theme="light"]) .highlight .pm { color: #F8F8F2 } /* Punctuation.Marker */ +body:not([data-theme="light"]) .highlight .w { color: #F8F8F2 } /* Text.Whitespace */ +body:not([data-theme="light"]) .highlight .mb { color: #AE81FF } /* Literal.Number.Bin */ +body:not([data-theme="light"]) .highlight .mf { color: #AE81FF } /* Literal.Number.Float */ +body:not([data-theme="light"]) .highlight .mh { color: #AE81FF } /* Literal.Number.Hex */ +body:not([data-theme="light"]) .highlight .mi { color: #AE81FF } /* Literal.Number.Integer */ +body:not([data-theme="light"]) .highlight .mo { color: #AE81FF } /* Literal.Number.Oct */ +body:not([data-theme="light"]) .highlight .sa { color: #E6DB74 } /* Literal.String.Affix */ +body:not([data-theme="light"]) .highlight .sb { color: #E6DB74 } /* Literal.String.Backtick */ +body:not([data-theme="light"]) .highlight .sc { color: #E6DB74 } /* Literal.String.Char */ +body:not([data-theme="light"]) .highlight .dl { color: #E6DB74 } /* Literal.String.Delimiter */ +body:not([data-theme="light"]) .highlight .sd { color: #E6DB74 } /* Literal.String.Doc */ +body:not([data-theme="light"]) .highlight .s2 { color: #E6DB74 } /* Literal.String.Double */ +body:not([data-theme="light"]) .highlight .se { color: #AE81FF } /* Literal.String.Escape */ +body:not([data-theme="light"]) .highlight .sh { color: #E6DB74 } /* Literal.String.Heredoc */ +body:not([data-theme="light"]) .highlight .si { color: #E6DB74 } /* Literal.String.Interpol */ +body:not([data-theme="light"]) .highlight .sx { color: #E6DB74 } /* Literal.String.Other */ +body:not([data-theme="light"]) .highlight .sr { color: #E6DB74 } /* Literal.String.Regex */ +body:not([data-theme="light"]) .highlight .s1 { color: #E6DB74 } /* Literal.String.Single */ +body:not([data-theme="light"]) .highlight .ss { color: #E6DB74 } /* Literal.String.Symbol */ +body:not([data-theme="light"]) .highlight .bp { color: #F8F8F2 } /* Name.Builtin.Pseudo */ +body:not([data-theme="light"]) .highlight .fm { color: #A6E22E } /* Name.Function.Magic */ +body:not([data-theme="light"]) .highlight .vc { color: #F8F8F2 } /* Name.Variable.Class */ +body:not([data-theme="light"]) .highlight .vg { color: #F8F8F2 } /* Name.Variable.Global */ +body:not([data-theme="light"]) .highlight .vi { color: #F8F8F2 } /* Name.Variable.Instance */ +body:not([data-theme="light"]) .highlight .vm { color: #F8F8F2 } /* Name.Variable.Magic */ +body:not([data-theme="light"]) .highlight .il { color: #AE81FF } /* Literal.Number.Integer.Long */ +} +} \ No newline at end of file diff --git a/doctr/py.typed b/_static/scripts/furo-extensions.js similarity index 100% rename from doctr/py.typed rename to _static/scripts/furo-extensions.js diff --git a/_static/scripts/furo.js b/_static/scripts/furo.js new file mode 100644 index 0000000000..87e1767fc6 --- /dev/null +++ b/_static/scripts/furo.js @@ -0,0 +1,3 @@ +/*! For license information please see furo.js.LICENSE.txt */ +(()=>{var t={856:function(t,e,n){var o,r;r=void 0!==n.g?n.g:"undefined"!=typeof window?window:this,o=function(){return function(t){"use strict";var e={navClass:"active",contentClass:"active",nested:!1,nestedClass:"active",offset:0,reflow:!1,events:!0},n=function(t,e,n){if(n.settings.events){var o=new CustomEvent(t,{bubbles:!0,cancelable:!0,detail:n});e.dispatchEvent(o)}},o=function(t){var e=0;if(t.offsetParent)for(;t;)e+=t.offsetTop,t=t.offsetParent;return e>=0?e:0},r=function(t){t&&t.sort(function(t,e){return o(t.content)=Math.max(document.body.scrollHeight,document.documentElement.scrollHeight,document.body.offsetHeight,document.documentElement.offsetHeight,document.body.clientHeight,document.documentElement.clientHeight)},l=function(t,e){var n=t[t.length-1];if(function(t,e){return!(!s()||!c(t.content,e,!0))}(n,e))return n;for(var o=t.length-1;o>=0;o--)if(c(t[o].content,e))return t[o]},a=function(t,e){if(e.nested&&t.parentNode){var n=t.parentNode.closest("li");n&&(n.classList.remove(e.nestedClass),a(n,e))}},i=function(t,e){if(t){var o=t.nav.closest("li");o&&(o.classList.remove(e.navClass),t.content.classList.remove(e.contentClass),a(o,e),n("gumshoeDeactivate",o,{link:t.nav,content:t.content,settings:e}))}},u=function(t,e){if(e.nested){var n=t.parentNode.closest("li");n&&(n.classList.add(e.nestedClass),u(n,e))}};return function(o,c){var s,a,d,f,m,v={setup:function(){s=document.querySelectorAll(o),a=[],Array.prototype.forEach.call(s,function(t){var e=document.getElementById(decodeURIComponent(t.hash.substr(1)));e&&a.push({nav:t,content:e})}),r(a)},detect:function(){var t=l(a,m);t?d&&t.content===d.content||(i(d,m),function(t,e){if(t){var o=t.nav.closest("li");o&&(o.classList.add(e.navClass),t.content.classList.add(e.contentClass),u(o,e),n("gumshoeActivate",o,{link:t.nav,content:t.content,settings:e}))}}(t,m),d=t):d&&(i(d,m),d=null)}},h=function(e){f&&t.cancelAnimationFrame(f),f=t.requestAnimationFrame(v.detect)},g=function(e){f&&t.cancelAnimationFrame(f),f=t.requestAnimationFrame(function(){r(a),v.detect()})};return v.destroy=function(){d&&i(d,m),t.removeEventListener("scroll",h,!1),m.reflow&&t.removeEventListener("resize",g,!1),a=null,s=null,d=null,f=null,m=null},m=function(){var t={};return Array.prototype.forEach.call(arguments,function(e){for(var n in e){if(!e.hasOwnProperty(n))return;t[n]=e[n]}}),t}(e,c||{}),v.setup(),v.detect(),t.addEventListener("scroll",h,!1),m.reflow&&t.addEventListener("resize",g,!1),v}}(r)}.apply(e,[]),void 0===o||(t.exports=o)}},e={};function n(o){var r=e[o];if(void 0!==r)return r.exports;var c=e[o]={exports:{}};return t[o].call(c.exports,c,c.exports,n),c.exports}n.n=t=>{var e=t&&t.__esModule?()=>t.default:()=>t;return n.d(e,{a:e}),e},n.d=(t,e)=>{for(var o in e)n.o(e,o)&&!n.o(t,o)&&Object.defineProperty(t,o,{enumerable:!0,get:e[o]})},n.g=function(){if("object"==typeof globalThis)return globalThis;try{return this||new Function("return this")()}catch(t){if("object"==typeof window)return window}}(),n.o=(t,e)=>Object.prototype.hasOwnProperty.call(t,e),(()=>{"use strict";var t=n(856),e=n.n(t),o=null,r=null,c=document.documentElement.scrollTop;function s(){const t=localStorage.getItem("theme")||"auto";var e;"light"!==(e=window.matchMedia("(prefers-color-scheme: dark)").matches?"auto"===t?"light":"light"==t?"dark":"auto":"auto"===t?"dark":"dark"==t?"light":"auto")&&"dark"!==e&&"auto"!==e&&(console.error(`Got invalid theme mode: ${e}. Resetting to auto.`),e="auto"),document.body.dataset.theme=e,localStorage.setItem("theme",e),console.log(`Changed to ${e} mode.`)}function l(){!function(){const t=document.getElementsByClassName("theme-toggle");Array.from(t).forEach(t=>{t.addEventListener("click",s)})}(),function(){let t=0,e=!1;window.addEventListener("scroll",function(n){t=window.scrollY,e||(window.requestAnimationFrame(function(){var n;(function(t){t>0?r.classList.add("scrolled"):r.classList.remove("scrolled")})(n=t),function(t){t<64?document.documentElement.classList.remove("show-back-to-top"):tc&&document.documentElement.classList.remove("show-back-to-top"),c=t}(n),function(t){null!==o&&(0==t?o.scrollTo(0,0):Math.ceil(t)>=Math.floor(document.documentElement.scrollHeight-window.innerHeight)?o.scrollTo(0,o.scrollHeight):document.querySelector(".scroll-current"))}(n),e=!1}),e=!0)}),window.scroll()}(),null!==o&&new(e())(".toc-tree a",{reflow:!0,recursive:!0,navClass:"scroll-current",offset:()=>{let t=parseFloat(getComputedStyle(document.documentElement).fontSize);const e=r.getBoundingClientRect();return e.top+e.height+2.5*t+1}})}document.addEventListener("DOMContentLoaded",function(){document.body.parentNode.classList.remove("no-js"),r=document.querySelector("header"),o=document.querySelector(".toc-scroll"),l()})})()})(); +//# sourceMappingURL=furo.js.map \ No newline at end of file diff --git a/_static/scripts/furo.js.LICENSE.txt b/_static/scripts/furo.js.LICENSE.txt new file mode 100644 index 0000000000..1632189c7e --- /dev/null +++ b/_static/scripts/furo.js.LICENSE.txt @@ -0,0 +1,7 @@ +/*! + * gumshoejs v5.1.2 (patched by @pradyunsg) + * A simple, framework-agnostic scrollspy script. + * (c) 2019 Chris Ferdinandi + * MIT License + * http://github.com/cferdinandi/gumshoe + */ diff --git a/_static/scripts/furo.js.map b/_static/scripts/furo.js.map new file mode 100644 index 0000000000..3b316f3a34 --- /dev/null +++ b/_static/scripts/furo.js.map @@ -0,0 +1 @@ +{"version":3,"file":"scripts/furo.js","mappings":";iCAAA,MAQWA,SAWS,IAAX,EAAAC,EACH,EAAAA,EACkB,oBAAXC,OACLA,OACAC,KAbO,EAAF,WACP,OAaJ,SAAUD,GACR,aAMA,IAAIE,EAAW,CAEbC,SAAU,SACVC,aAAc,SAGdC,QAAQ,EACRC,YAAa,SAGbC,OAAQ,EACRC,QAAQ,EAGRC,QAAQ,GA6BNC,EAAY,SAAUC,EAAMC,EAAMC,GAEpC,GAAKA,EAAOC,SAASL,OAArB,CAGA,IAAIM,EAAQ,IAAIC,YAAYL,EAAM,CAChCM,SAAS,EACTC,YAAY,EACZL,OAAQA,IAIVD,EAAKO,cAAcJ,EAVgB,CAWrC,EAOIK,EAAe,SAAUR,GAC3B,IAAIS,EAAW,EACf,GAAIT,EAAKU,aACP,KAAOV,GACLS,GAAYT,EAAKW,UACjBX,EAAOA,EAAKU,aAGhB,OAAOD,GAAY,EAAIA,EAAW,CACpC,EAMIG,EAAe,SAAUC,GACvBA,GACFA,EAASC,KAAK,SAAUC,EAAOC,GAG7B,OAFcR,EAAaO,EAAME,SACnBT,EAAaQ,EAAMC,UACF,EACxB,CACT,EAEJ,EAwCIC,EAAW,SAAUlB,EAAME,EAAUiB,GACvC,IAAIC,EAASpB,EAAKqB,wBACd1B,EAnCU,SAAUO,GAExB,MAA+B,mBAApBA,EAASP,OACX2B,WAAWpB,EAASP,UAItB2B,WAAWpB,EAASP,OAC7B,CA2Be4B,CAAUrB,GACvB,OAAIiB,EAEAK,SAASJ,EAAOD,OAAQ,KACvB/B,EAAOqC,aAAeC,SAASC,gBAAgBC,cAG7CJ,SAASJ,EAAOS,IAAK,KAAOlC,CACrC,EAMImC,EAAa,WACf,OACEC,KAAKC,KAAK5C,EAAOqC,YAAcrC,EAAO6C,cAnCjCF,KAAKG,IACVR,SAASS,KAAKC,aACdV,SAASC,gBAAgBS,aACzBV,SAASS,KAAKE,aACdX,SAASC,gBAAgBU,aACzBX,SAASS,KAAKP,aACdF,SAASC,gBAAgBC,aAkC7B,EAmBIU,EAAY,SAAUzB,EAAUX,GAClC,IAAIqC,EAAO1B,EAASA,EAAS2B,OAAS,GACtC,GAbgB,SAAUC,EAAMvC,GAChC,SAAI4B,MAAgBZ,EAASuB,EAAKxB,QAASf,GAAU,GAEvD,CAUMwC,CAAYH,EAAMrC,GAAW,OAAOqC,EACxC,IAAK,IAAII,EAAI9B,EAAS2B,OAAS,EAAGG,GAAK,EAAGA,IACxC,GAAIzB,EAASL,EAAS8B,GAAG1B,QAASf,GAAW,OAAOW,EAAS8B,EAEjE,EAOIC,EAAmB,SAAUC,EAAK3C,GAEpC,GAAKA,EAAST,QAAWoD,EAAIC,WAA7B,CAGA,IAAIC,EAAKF,EAAIC,WAAWE,QAAQ,MAC3BD,IAGLA,EAAGE,UAAUC,OAAOhD,EAASR,aAG7BkD,EAAiBG,EAAI7C,GAV0B,CAWjD,EAOIiD,EAAa,SAAUC,EAAOlD,GAEhC,GAAKkD,EAAL,CAGA,IAAIL,EAAKK,EAAMP,IAAIG,QAAQ,MACtBD,IAGLA,EAAGE,UAAUC,OAAOhD,EAASX,UAC7B6D,EAAMnC,QAAQgC,UAAUC,OAAOhD,EAASV,cAGxCoD,EAAiBG,EAAI7C,GAGrBJ,EAAU,oBAAqBiD,EAAI,CACjCM,KAAMD,EAAMP,IACZ5B,QAASmC,EAAMnC,QACff,SAAUA,IAjBM,CAmBpB,EAOIoD,EAAiB,SAAUT,EAAK3C,GAElC,GAAKA,EAAST,OAAd,CAGA,IAAIsD,EAAKF,EAAIC,WAAWE,QAAQ,MAC3BD,IAGLA,EAAGE,UAAUM,IAAIrD,EAASR,aAG1B4D,EAAeP,EAAI7C,GAVS,CAW9B,EA6LA,OA1JkB,SAAUsD,EAAUC,GAKpC,IACIC,EAAU7C,EAAU8C,EAASC,EAAS1D,EADtC2D,EAAa,CAUjBA,MAAmB,WAEjBH,EAAWhC,SAASoC,iBAAiBN,GAGrC3C,EAAW,GAGXkD,MAAMC,UAAUC,QAAQC,KAAKR,EAAU,SAAUjB,GAE/C,IAAIxB,EAAUS,SAASyC,eACrBC,mBAAmB3B,EAAK4B,KAAKC,OAAO,KAEjCrD,GAGLJ,EAAS0D,KAAK,CACZ1B,IAAKJ,EACLxB,QAASA,GAEb,GAGAL,EAAaC,EACf,EAKAgD,OAAoB,WAElB,IAAIW,EAASlC,EAAUzB,EAAUX,GAG5BsE,EASDb,GAAWa,EAAOvD,UAAY0C,EAAQ1C,UAG1CkC,EAAWQ,EAASzD,GAzFT,SAAUkD,EAAOlD,GAE9B,GAAKkD,EAAL,CAGA,IAAIL,EAAKK,EAAMP,IAAIG,QAAQ,MACtBD,IAGLA,EAAGE,UAAUM,IAAIrD,EAASX,UAC1B6D,EAAMnC,QAAQgC,UAAUM,IAAIrD,EAASV,cAGrC8D,EAAeP,EAAI7C,GAGnBJ,EAAU,kBAAmBiD,EAAI,CAC/BM,KAAMD,EAAMP,IACZ5B,QAASmC,EAAMnC,QACff,SAAUA,IAjBM,CAmBpB,CAqEIuE,CAASD,EAAQtE,GAGjByD,EAAUa,GAfJb,IACFR,EAAWQ,EAASzD,GACpByD,EAAU,KAchB,GAMIe,EAAgB,SAAUvE,GAExByD,GACFxE,EAAOuF,qBAAqBf,GAI9BA,EAAUxE,EAAOwF,sBAAsBf,EAAWgB,OACpD,EAMIC,EAAgB,SAAU3E,GAExByD,GACFxE,EAAOuF,qBAAqBf,GAI9BA,EAAUxE,EAAOwF,sBAAsB,WACrChE,EAAaC,GACbgD,EAAWgB,QACb,EACF,EAkDA,OA7CAhB,EAAWkB,QAAU,WAEfpB,GACFR,EAAWQ,EAASzD,GAItBd,EAAO4F,oBAAoB,SAAUN,GAAe,GAChDxE,EAASN,QACXR,EAAO4F,oBAAoB,SAAUF,GAAe,GAItDjE,EAAW,KACX6C,EAAW,KACXC,EAAU,KACVC,EAAU,KACV1D,EAAW,IACb,EAOEA,EA3XS,WACX,IAAI+E,EAAS,CAAC,EAOd,OANAlB,MAAMC,UAAUC,QAAQC,KAAKgB,UAAW,SAAUC,GAChD,IAAK,IAAIC,KAAOD,EAAK,CACnB,IAAKA,EAAIE,eAAeD,GAAM,OAC9BH,EAAOG,GAAOD,EAAIC,EACpB,CACF,GACOH,CACT,CAkXeK,CAAOhG,EAAUmE,GAAW,CAAC,GAGxCI,EAAW0B,QAGX1B,EAAWgB,SAGXzF,EAAOoG,iBAAiB,SAAUd,GAAe,GAC7CxE,EAASN,QACXR,EAAOoG,iBAAiB,SAAUV,GAAe,GAS9CjB,CACT,CAOF,CArcW4B,CAAQvG,EAChB,UAFM,SAEN,oB,GCXDwG,EAA2B,CAAC,EAGhC,SAASC,EAAoBC,GAE5B,IAAIC,EAAeH,EAAyBE,GAC5C,QAAqBE,IAAjBD,EACH,OAAOA,EAAaE,QAGrB,IAAIC,EAASN,EAAyBE,GAAY,CAGjDG,QAAS,CAAC,GAOX,OAHAE,EAAoBL,GAAU1B,KAAK8B,EAAOD,QAASC,EAAQA,EAAOD,QAASJ,GAGpEK,EAAOD,OACf,CCrBAJ,EAAoBO,EAAKF,IACxB,IAAIG,EAASH,GAAUA,EAAOI,WAC7B,IAAOJ,EAAiB,QACxB,IAAM,EAEP,OADAL,EAAoBU,EAAEF,EAAQ,CAAEG,EAAGH,IAC5BA,GCLRR,EAAoBU,EAAI,CAACN,EAASQ,KACjC,IAAI,IAAInB,KAAOmB,EACXZ,EAAoBa,EAAED,EAAYnB,KAASO,EAAoBa,EAAET,EAASX,IAC5EqB,OAAOC,eAAeX,EAASX,EAAK,CAAEuB,YAAY,EAAMC,IAAKL,EAAWnB,MCJ3EO,EAAoBxG,EAAI,WACvB,GAA0B,iBAAf0H,WAAyB,OAAOA,WAC3C,IACC,OAAOxH,MAAQ,IAAIyH,SAAS,cAAb,EAChB,CAAE,MAAOC,GACR,GAAsB,iBAAX3H,OAAqB,OAAOA,MACxC,CACA,CAPuB,GCAxBuG,EAAoBa,EAAI,CAACrB,EAAK6B,IAAUP,OAAOzC,UAAUqB,eAAenB,KAAKiB,EAAK6B,G,yCCK9EC,EAAY,KACZC,EAAS,KACTC,EAAgBzF,SAASC,gBAAgByF,UA4E7C,SAASC,IACP,MAAMC,EAAeC,aAAaC,QAAQ,UAAY,OAZxD,IAAkBC,EACH,WADGA,EAaIrI,OAAOsI,WAAW,gCAAgCC,QAI/C,SAAjBL,EACO,QACgB,SAAhBA,EACA,OAEA,OAIU,SAAjBA,EACO,OACgB,QAAhBA,EACA,QAEA,SA9BoB,SAATG,GAA4B,SAATA,IACzCG,QAAQC,MAAM,2BAA2BJ,yBACzCA,EAAO,QAGT/F,SAASS,KAAK2F,QAAQC,MAAQN,EAC9BF,aAAaS,QAAQ,QAASP,GAC9BG,QAAQK,IAAI,cAAcR,UA0B5B,CAmDA,SAASlC,KART,WAEE,MAAM2C,EAAUxG,SAASyG,uBAAuB,gBAChDpE,MAAMqE,KAAKF,GAASjE,QAASoE,IAC3BA,EAAI7C,iBAAiB,QAAS6B,IAElC,CAGEiB,GA/CF,WAEE,IAAIC,EAA6B,EAC7BC,GAAU,EAEdpJ,OAAOoG,iBAAiB,SAAU,SAAUuB,GAC1CwB,EAA6BnJ,OAAOqJ,QAE/BD,IACHpJ,OAAOwF,sBAAsB,WAzDnC,IAAuB8D,GArDvB,SAAgCA,GAC1BA,EAAY,EACdxB,EAAOjE,UAAUM,IAAI,YAErB2D,EAAOjE,UAAUC,OAAO,WAE5B,EAgDEyF,CADqBD,EA0DDH,GAvGtB,SAAmCG,GAC7BA,EAXmB,GAYrBhH,SAASC,gBAAgBsB,UAAUC,OAAO,oBAEtCwF,EAAYvB,EACdzF,SAASC,gBAAgBsB,UAAUM,IAAI,oBAC9BmF,EAAYvB,GACrBzF,SAASC,gBAAgBsB,UAAUC,OAAO,oBAG9CiE,EAAgBuB,CAClB,CAoCEE,CAA0BF,GAlC5B,SAA6BA,GACT,OAAdzB,IAKa,GAAbyB,EACFzB,EAAU4B,SAAS,EAAG,GAGtB9G,KAAKC,KAAK0G,IACV3G,KAAK+G,MAAMpH,SAASC,gBAAgBS,aAAehD,OAAOqC,aAE1DwF,EAAU4B,SAAS,EAAG5B,EAAU7E,cAGhBV,SAASqH,cAAc,mBAc3C,CAKEC,CAAoBN,GAwDdF,GAAU,CACZ,GAEAA,GAAU,EAEd,GACApJ,OAAO6J,QACT,CA8BEC,GA3BkB,OAAdjC,GAKJ,IAAI,IAAJ,CAAY,cAAe,CACzBrH,QAAQ,EACRuJ,WAAW,EACX5J,SAAU,iBACVI,OAAQ,KACN,IAAIyJ,EAAM9H,WAAW+H,iBAAiB3H,SAASC,iBAAiB2H,UAChE,MAAMC,EAAarC,EAAO7F,wBAC1B,OAAOkI,EAAW1H,IAAM0H,EAAWC,OAAS,IAAMJ,EAAM,IAiB9D,CAcA1H,SAAS8D,iBAAiB,mBAT1B,WACE9D,SAASS,KAAKW,WAAWG,UAAUC,OAAO,SAE1CgE,EAASxF,SAASqH,cAAc,UAChC9B,EAAYvF,SAASqH,cAAc,eAEnCxD,GACF,E","sources":["webpack:///./src/furo/assets/scripts/gumshoe-patched.js","webpack:///webpack/bootstrap","webpack:///webpack/runtime/compat get default export","webpack:///webpack/runtime/define property getters","webpack:///webpack/runtime/global","webpack:///webpack/runtime/hasOwnProperty shorthand","webpack:///./src/furo/assets/scripts/furo.js"],"sourcesContent":["/*!\n * gumshoejs v5.1.2 (patched by @pradyunsg)\n * A simple, framework-agnostic scrollspy script.\n * (c) 2019 Chris Ferdinandi\n * MIT License\n * http://github.com/cferdinandi/gumshoe\n */\n\n(function (root, factory) {\n if (typeof define === \"function\" && define.amd) {\n define([], function () {\n return factory(root);\n });\n } else if (typeof exports === \"object\") {\n module.exports = factory(root);\n } else {\n root.Gumshoe = factory(root);\n }\n})(\n typeof global !== \"undefined\"\n ? global\n : typeof window !== \"undefined\"\n ? window\n : this,\n function (window) {\n \"use strict\";\n\n //\n // Defaults\n //\n\n var defaults = {\n // Active classes\n navClass: \"active\",\n contentClass: \"active\",\n\n // Nested navigation\n nested: false,\n nestedClass: \"active\",\n\n // Offset & reflow\n offset: 0,\n reflow: false,\n\n // Event support\n events: true,\n };\n\n //\n // Methods\n //\n\n /**\n * Merge two or more objects together.\n * @param {Object} objects The objects to merge together\n * @returns {Object} Merged values of defaults and options\n */\n var extend = function () {\n var merged = {};\n Array.prototype.forEach.call(arguments, function (obj) {\n for (var key in obj) {\n if (!obj.hasOwnProperty(key)) return;\n merged[key] = obj[key];\n }\n });\n return merged;\n };\n\n /**\n * Emit a custom event\n * @param {String} type The event type\n * @param {Node} elem The element to attach the event to\n * @param {Object} detail Any details to pass along with the event\n */\n var emitEvent = function (type, elem, detail) {\n // Make sure events are enabled\n if (!detail.settings.events) return;\n\n // Create a new event\n var event = new CustomEvent(type, {\n bubbles: true,\n cancelable: true,\n detail: detail,\n });\n\n // Dispatch the event\n elem.dispatchEvent(event);\n };\n\n /**\n * Get an element's distance from the top of the Document.\n * @param {Node} elem The element\n * @return {Number} Distance from the top in pixels\n */\n var getOffsetTop = function (elem) {\n var location = 0;\n if (elem.offsetParent) {\n while (elem) {\n location += elem.offsetTop;\n elem = elem.offsetParent;\n }\n }\n return location >= 0 ? location : 0;\n };\n\n /**\n * Sort content from first to last in the DOM\n * @param {Array} contents The content areas\n */\n var sortContents = function (contents) {\n if (contents) {\n contents.sort(function (item1, item2) {\n var offset1 = getOffsetTop(item1.content);\n var offset2 = getOffsetTop(item2.content);\n if (offset1 < offset2) return -1;\n return 1;\n });\n }\n };\n\n /**\n * Get the offset to use for calculating position\n * @param {Object} settings The settings for this instantiation\n * @return {Float} The number of pixels to offset the calculations\n */\n var getOffset = function (settings) {\n // if the offset is a function run it\n if (typeof settings.offset === \"function\") {\n return parseFloat(settings.offset());\n }\n\n // Otherwise, return it as-is\n return parseFloat(settings.offset);\n };\n\n /**\n * Get the document element's height\n * @private\n * @returns {Number}\n */\n var getDocumentHeight = function () {\n return Math.max(\n document.body.scrollHeight,\n document.documentElement.scrollHeight,\n document.body.offsetHeight,\n document.documentElement.offsetHeight,\n document.body.clientHeight,\n document.documentElement.clientHeight,\n );\n };\n\n /**\n * Determine if an element is in view\n * @param {Node} elem The element\n * @param {Object} settings The settings for this instantiation\n * @param {Boolean} bottom If true, check if element is above bottom of viewport instead\n * @return {Boolean} Returns true if element is in the viewport\n */\n var isInView = function (elem, settings, bottom) {\n var bounds = elem.getBoundingClientRect();\n var offset = getOffset(settings);\n if (bottom) {\n return (\n parseInt(bounds.bottom, 10) <\n (window.innerHeight || document.documentElement.clientHeight)\n );\n }\n return parseInt(bounds.top, 10) <= offset;\n };\n\n /**\n * Check if at the bottom of the viewport\n * @return {Boolean} If true, page is at the bottom of the viewport\n */\n var isAtBottom = function () {\n if (\n Math.ceil(window.innerHeight + window.pageYOffset) >=\n getDocumentHeight()\n )\n return true;\n return false;\n };\n\n /**\n * Check if the last item should be used (even if not at the top of the page)\n * @param {Object} item The last item\n * @param {Object} settings The settings for this instantiation\n * @return {Boolean} If true, use the last item\n */\n var useLastItem = function (item, settings) {\n if (isAtBottom() && isInView(item.content, settings, true)) return true;\n return false;\n };\n\n /**\n * Get the active content\n * @param {Array} contents The content areas\n * @param {Object} settings The settings for this instantiation\n * @return {Object} The content area and matching navigation link\n */\n var getActive = function (contents, settings) {\n var last = contents[contents.length - 1];\n if (useLastItem(last, settings)) return last;\n for (var i = contents.length - 1; i >= 0; i--) {\n if (isInView(contents[i].content, settings)) return contents[i];\n }\n };\n\n /**\n * Deactivate parent navs in a nested navigation\n * @param {Node} nav The starting navigation element\n * @param {Object} settings The settings for this instantiation\n */\n var deactivateNested = function (nav, settings) {\n // If nesting isn't activated, bail\n if (!settings.nested || !nav.parentNode) return;\n\n // Get the parent navigation\n var li = nav.parentNode.closest(\"li\");\n if (!li) return;\n\n // Remove the active class\n li.classList.remove(settings.nestedClass);\n\n // Apply recursively to any parent navigation elements\n deactivateNested(li, settings);\n };\n\n /**\n * Deactivate a nav and content area\n * @param {Object} items The nav item and content to deactivate\n * @param {Object} settings The settings for this instantiation\n */\n var deactivate = function (items, settings) {\n // Make sure there are items to deactivate\n if (!items) return;\n\n // Get the parent list item\n var li = items.nav.closest(\"li\");\n if (!li) return;\n\n // Remove the active class from the nav and content\n li.classList.remove(settings.navClass);\n items.content.classList.remove(settings.contentClass);\n\n // Deactivate any parent navs in a nested navigation\n deactivateNested(li, settings);\n\n // Emit a custom event\n emitEvent(\"gumshoeDeactivate\", li, {\n link: items.nav,\n content: items.content,\n settings: settings,\n });\n };\n\n /**\n * Activate parent navs in a nested navigation\n * @param {Node} nav The starting navigation element\n * @param {Object} settings The settings for this instantiation\n */\n var activateNested = function (nav, settings) {\n // If nesting isn't activated, bail\n if (!settings.nested) return;\n\n // Get the parent navigation\n var li = nav.parentNode.closest(\"li\");\n if (!li) return;\n\n // Add the active class\n li.classList.add(settings.nestedClass);\n\n // Apply recursively to any parent navigation elements\n activateNested(li, settings);\n };\n\n /**\n * Activate a nav and content area\n * @param {Object} items The nav item and content to activate\n * @param {Object} settings The settings for this instantiation\n */\n var activate = function (items, settings) {\n // Make sure there are items to activate\n if (!items) return;\n\n // Get the parent list item\n var li = items.nav.closest(\"li\");\n if (!li) return;\n\n // Add the active class to the nav and content\n li.classList.add(settings.navClass);\n items.content.classList.add(settings.contentClass);\n\n // Activate any parent navs in a nested navigation\n activateNested(li, settings);\n\n // Emit a custom event\n emitEvent(\"gumshoeActivate\", li, {\n link: items.nav,\n content: items.content,\n settings: settings,\n });\n };\n\n /**\n * Create the Constructor object\n * @param {String} selector The selector to use for navigation items\n * @param {Object} options User options and settings\n */\n var Constructor = function (selector, options) {\n //\n // Variables\n //\n\n var publicAPIs = {};\n var navItems, contents, current, timeout, settings;\n\n //\n // Methods\n //\n\n /**\n * Set variables from DOM elements\n */\n publicAPIs.setup = function () {\n // Get all nav items\n navItems = document.querySelectorAll(selector);\n\n // Create contents array\n contents = [];\n\n // Loop through each item, get it's matching content, and push to the array\n Array.prototype.forEach.call(navItems, function (item) {\n // Get the content for the nav item\n var content = document.getElementById(\n decodeURIComponent(item.hash.substr(1)),\n );\n if (!content) return;\n\n // Push to the contents array\n contents.push({\n nav: item,\n content: content,\n });\n });\n\n // Sort contents by the order they appear in the DOM\n sortContents(contents);\n };\n\n /**\n * Detect which content is currently active\n */\n publicAPIs.detect = function () {\n // Get the active content\n var active = getActive(contents, settings);\n\n // if there's no active content, deactivate and bail\n if (!active) {\n if (current) {\n deactivate(current, settings);\n current = null;\n }\n return;\n }\n\n // If the active content is the one currently active, do nothing\n if (current && active.content === current.content) return;\n\n // Deactivate the current content and activate the new content\n deactivate(current, settings);\n activate(active, settings);\n\n // Update the currently active content\n current = active;\n };\n\n /**\n * Detect the active content on scroll\n * Debounced for performance\n */\n var scrollHandler = function (event) {\n // If there's a timer, cancel it\n if (timeout) {\n window.cancelAnimationFrame(timeout);\n }\n\n // Setup debounce callback\n timeout = window.requestAnimationFrame(publicAPIs.detect);\n };\n\n /**\n * Update content sorting on resize\n * Debounced for performance\n */\n var resizeHandler = function (event) {\n // If there's a timer, cancel it\n if (timeout) {\n window.cancelAnimationFrame(timeout);\n }\n\n // Setup debounce callback\n timeout = window.requestAnimationFrame(function () {\n sortContents(contents);\n publicAPIs.detect();\n });\n };\n\n /**\n * Destroy the current instantiation\n */\n publicAPIs.destroy = function () {\n // Undo DOM changes\n if (current) {\n deactivate(current, settings);\n }\n\n // Remove event listeners\n window.removeEventListener(\"scroll\", scrollHandler, false);\n if (settings.reflow) {\n window.removeEventListener(\"resize\", resizeHandler, false);\n }\n\n // Reset variables\n contents = null;\n navItems = null;\n current = null;\n timeout = null;\n settings = null;\n };\n\n /**\n * Initialize the current instantiation\n */\n var init = function () {\n // Merge user options into defaults\n settings = extend(defaults, options || {});\n\n // Setup variables based on the current DOM\n publicAPIs.setup();\n\n // Find the currently active content\n publicAPIs.detect();\n\n // Setup event listeners\n window.addEventListener(\"scroll\", scrollHandler, false);\n if (settings.reflow) {\n window.addEventListener(\"resize\", resizeHandler, false);\n }\n };\n\n //\n // Initialize and return the public APIs\n //\n\n init();\n return publicAPIs;\n };\n\n //\n // Return the Constructor\n //\n\n return Constructor;\n },\n);\n","// The module cache\nvar __webpack_module_cache__ = {};\n\n// The require function\nfunction __webpack_require__(moduleId) {\n\t// Check if module is in cache\n\tvar cachedModule = __webpack_module_cache__[moduleId];\n\tif (cachedModule !== undefined) {\n\t\treturn cachedModule.exports;\n\t}\n\t// Create a new module (and put it into the cache)\n\tvar module = __webpack_module_cache__[moduleId] = {\n\t\t// no module.id needed\n\t\t// no module.loaded needed\n\t\texports: {}\n\t};\n\n\t// Execute the module function\n\t__webpack_modules__[moduleId].call(module.exports, module, module.exports, __webpack_require__);\n\n\t// Return the exports of the module\n\treturn module.exports;\n}\n\n","// getDefaultExport function for compatibility with non-harmony modules\n__webpack_require__.n = (module) => {\n\tvar getter = module && module.__esModule ?\n\t\t() => (module['default']) :\n\t\t() => (module);\n\t__webpack_require__.d(getter, { a: getter });\n\treturn getter;\n};","// define getter functions for harmony exports\n__webpack_require__.d = (exports, definition) => {\n\tfor(var key in definition) {\n\t\tif(__webpack_require__.o(definition, key) && !__webpack_require__.o(exports, key)) {\n\t\t\tObject.defineProperty(exports, key, { enumerable: true, get: definition[key] });\n\t\t}\n\t}\n};","__webpack_require__.g = (function() {\n\tif (typeof globalThis === 'object') return globalThis;\n\ttry {\n\t\treturn this || new Function('return this')();\n\t} catch (e) {\n\t\tif (typeof window === 'object') return window;\n\t}\n})();","__webpack_require__.o = (obj, prop) => (Object.prototype.hasOwnProperty.call(obj, prop))","import Gumshoe from \"./gumshoe-patched.js\";\n\n////////////////////////////////////////////////////////////////////////////////\n// Scroll Handling\n////////////////////////////////////////////////////////////////////////////////\nvar tocScroll = null;\nvar header = null;\nvar lastScrollTop = document.documentElement.scrollTop;\nconst GO_TO_TOP_OFFSET = 64;\n\nfunction scrollHandlerForHeader(positionY) {\n if (positionY > 0) {\n header.classList.add(\"scrolled\");\n } else {\n header.classList.remove(\"scrolled\");\n }\n}\n\nfunction scrollHandlerForBackToTop(positionY) {\n if (positionY < GO_TO_TOP_OFFSET) {\n document.documentElement.classList.remove(\"show-back-to-top\");\n } else {\n if (positionY < lastScrollTop) {\n document.documentElement.classList.add(\"show-back-to-top\");\n } else if (positionY > lastScrollTop) {\n document.documentElement.classList.remove(\"show-back-to-top\");\n }\n }\n lastScrollTop = positionY;\n}\n\nfunction scrollHandlerForTOC(positionY) {\n if (tocScroll === null) {\n return;\n }\n\n // top of page.\n if (positionY == 0) {\n tocScroll.scrollTo(0, 0);\n } else if (\n // bottom of page.\n Math.ceil(positionY) >=\n Math.floor(document.documentElement.scrollHeight - window.innerHeight)\n ) {\n tocScroll.scrollTo(0, tocScroll.scrollHeight);\n } else {\n // somewhere in the middle.\n const current = document.querySelector(\".scroll-current\");\n if (current == null) {\n return;\n }\n\n // https://github.com/pypa/pip/issues/9159 This breaks scroll behaviours.\n // // scroll the currently \"active\" heading in toc, into view.\n // const rect = current.getBoundingClientRect();\n // if (0 > rect.top) {\n // current.scrollIntoView(true); // the argument is \"alignTop\"\n // } else if (rect.bottom > window.innerHeight) {\n // current.scrollIntoView(false);\n // }\n }\n}\n\nfunction scrollHandler(positionY) {\n scrollHandlerForHeader(positionY);\n scrollHandlerForBackToTop(positionY);\n scrollHandlerForTOC(positionY);\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// Theme Toggle\n////////////////////////////////////////////////////////////////////////////////\nfunction setTheme(mode) {\n if (mode !== \"light\" && mode !== \"dark\" && mode !== \"auto\") {\n console.error(`Got invalid theme mode: ${mode}. Resetting to auto.`);\n mode = \"auto\";\n }\n\n document.body.dataset.theme = mode;\n localStorage.setItem(\"theme\", mode);\n console.log(`Changed to ${mode} mode.`);\n}\n\nfunction cycleThemeOnce() {\n const currentTheme = localStorage.getItem(\"theme\") || \"auto\";\n const prefersDark = window.matchMedia(\"(prefers-color-scheme: dark)\").matches;\n\n if (prefersDark) {\n // Auto (dark) -> Light -> Dark\n if (currentTheme === \"auto\") {\n setTheme(\"light\");\n } else if (currentTheme == \"light\") {\n setTheme(\"dark\");\n } else {\n setTheme(\"auto\");\n }\n } else {\n // Auto (light) -> Dark -> Light\n if (currentTheme === \"auto\") {\n setTheme(\"dark\");\n } else if (currentTheme == \"dark\") {\n setTheme(\"light\");\n } else {\n setTheme(\"auto\");\n }\n }\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// Setup\n////////////////////////////////////////////////////////////////////////////////\nfunction setupScrollHandler() {\n // Taken from https://developer.mozilla.org/en-US/docs/Web/API/Document/scroll_event\n let last_known_scroll_position = 0;\n let ticking = false;\n\n window.addEventListener(\"scroll\", function (e) {\n last_known_scroll_position = window.scrollY;\n\n if (!ticking) {\n window.requestAnimationFrame(function () {\n scrollHandler(last_known_scroll_position);\n ticking = false;\n });\n\n ticking = true;\n }\n });\n window.scroll();\n}\n\nfunction setupScrollSpy() {\n if (tocScroll === null) {\n return;\n }\n\n // Scrollspy -- highlight table on contents, based on scroll\n new Gumshoe(\".toc-tree a\", {\n reflow: true,\n recursive: true,\n navClass: \"scroll-current\",\n offset: () => {\n let rem = parseFloat(getComputedStyle(document.documentElement).fontSize);\n const headerRect = header.getBoundingClientRect();\n return headerRect.top + headerRect.height + 2.5 * rem + 1;\n },\n });\n}\n\nfunction setupTheme() {\n // Attach event handlers for toggling themes\n const buttons = document.getElementsByClassName(\"theme-toggle\");\n Array.from(buttons).forEach((btn) => {\n btn.addEventListener(\"click\", cycleThemeOnce);\n });\n}\n\nfunction setup() {\n setupTheme();\n setupScrollHandler();\n setupScrollSpy();\n}\n\n////////////////////////////////////////////////////////////////////////////////\n// Main entrypoint\n////////////////////////////////////////////////////////////////////////////////\nfunction main() {\n document.body.parentNode.classList.remove(\"no-js\");\n\n header = document.querySelector(\"header\");\n tocScroll = document.querySelector(\".toc-scroll\");\n\n setup();\n}\n\ndocument.addEventListener(\"DOMContentLoaded\", main);\n"],"names":["root","g","window","this","defaults","navClass","contentClass","nested","nestedClass","offset","reflow","events","emitEvent","type","elem","detail","settings","event","CustomEvent","bubbles","cancelable","dispatchEvent","getOffsetTop","location","offsetParent","offsetTop","sortContents","contents","sort","item1","item2","content","isInView","bottom","bounds","getBoundingClientRect","parseFloat","getOffset","parseInt","innerHeight","document","documentElement","clientHeight","top","isAtBottom","Math","ceil","pageYOffset","max","body","scrollHeight","offsetHeight","getActive","last","length","item","useLastItem","i","deactivateNested","nav","parentNode","li","closest","classList","remove","deactivate","items","link","activateNested","add","selector","options","navItems","current","timeout","publicAPIs","querySelectorAll","Array","prototype","forEach","call","getElementById","decodeURIComponent","hash","substr","push","active","activate","scrollHandler","cancelAnimationFrame","requestAnimationFrame","detect","resizeHandler","destroy","removeEventListener","merged","arguments","obj","key","hasOwnProperty","extend","setup","addEventListener","factory","__webpack_module_cache__","__webpack_require__","moduleId","cachedModule","undefined","exports","module","__webpack_modules__","n","getter","__esModule","d","a","definition","o","Object","defineProperty","enumerable","get","globalThis","Function","e","prop","tocScroll","header","lastScrollTop","scrollTop","cycleThemeOnce","currentTheme","localStorage","getItem","mode","matchMedia","matches","console","error","dataset","theme","setItem","log","buttons","getElementsByClassName","from","btn","setupTheme","last_known_scroll_position","ticking","scrollY","positionY","scrollHandlerForHeader","scrollHandlerForBackToTop","scrollTo","floor","querySelector","scrollHandlerForTOC","scroll","setupScrollHandler","recursive","rem","getComputedStyle","fontSize","headerRect","height"],"sourceRoot":""} \ No newline at end of file diff --git a/_static/searchtools.js b/_static/searchtools.js new file mode 100644 index 0000000000..2c774d17af --- /dev/null +++ b/_static/searchtools.js @@ -0,0 +1,632 @@ +/* + * Sphinx JavaScript utilities for the full-text search. + */ +"use strict"; + +/** + * Simple result scoring code. + */ +if (typeof Scorer === "undefined") { + var Scorer = { + // Implement the following function to further tweak the score for each result + // The function takes a result array [docname, title, anchor, descr, score, filename] + // and returns the new score. + /* + score: result => { + const [docname, title, anchor, descr, score, filename, kind] = result + return score + }, + */ + + // query matches the full name of an object + objNameMatch: 11, + // or matches in the last dotted part of the object name + objPartialMatch: 6, + // Additive scores depending on the priority of the object + objPrio: { + 0: 15, // used to be importantResults + 1: 5, // used to be objectResults + 2: -5, // used to be unimportantResults + }, + // Used when the priority is not in the mapping. + objPrioDefault: 0, + + // query found in title + title: 15, + partialTitle: 7, + // query found in terms + term: 5, + partialTerm: 2, + }; +} + +// Global search result kind enum, used by themes to style search results. +class SearchResultKind { + static get index() { return "index"; } + static get object() { return "object"; } + static get text() { return "text"; } + static get title() { return "title"; } +} + +const _removeChildren = (element) => { + while (element && element.lastChild) element.removeChild(element.lastChild); +}; + +/** + * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping + */ +const _escapeRegExp = (string) => + string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string + +const _displayItem = (item, searchTerms, highlightTerms) => { + const docBuilder = DOCUMENTATION_OPTIONS.BUILDER; + const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX; + const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX; + const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY; + const contentRoot = document.documentElement.dataset.content_root; + + const [docName, title, anchor, descr, score, _filename, kind] = item; + + let listItem = document.createElement("li"); + // Add a class representing the item's type: + // can be used by a theme's CSS selector for styling + // See SearchResultKind for the class names. + listItem.classList.add(`kind-${kind}`); + let requestUrl; + let linkUrl; + if (docBuilder === "dirhtml") { + // dirhtml builder + let dirname = docName + "/"; + if (dirname.match(/\/index\/$/)) + dirname = dirname.substring(0, dirname.length - 6); + else if (dirname === "index/") dirname = ""; + requestUrl = contentRoot + dirname; + linkUrl = requestUrl; + } else { + // normal html builders + requestUrl = contentRoot + docName + docFileSuffix; + linkUrl = docName + docLinkSuffix; + } + let linkEl = listItem.appendChild(document.createElement("a")); + linkEl.href = linkUrl + anchor; + linkEl.dataset.score = score; + linkEl.innerHTML = title; + if (descr) { + listItem.appendChild(document.createElement("span")).innerHTML = + " (" + descr + ")"; + // highlight search terms in the description + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + } + else if (showSearchSummary) + fetch(requestUrl) + .then((responseData) => responseData.text()) + .then((data) => { + if (data) + listItem.appendChild( + Search.makeSearchSummary(data, searchTerms, anchor) + ); + // highlight search terms in the summary + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + }); + Search.output.appendChild(listItem); +}; +const _finishSearch = (resultCount) => { + Search.stopPulse(); + Search.title.innerText = _("Search Results"); + if (!resultCount) + Search.status.innerText = Documentation.gettext( + "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories." + ); + else + Search.status.innerText = Documentation.ngettext( + "Search finished, found one page matching the search query.", + "Search finished, found ${resultCount} pages matching the search query.", + resultCount, + ).replace('${resultCount}', resultCount); +}; +const _displayNextItem = ( + results, + resultCount, + searchTerms, + highlightTerms, +) => { + // results left, load the summary and display it + // this is intended to be dynamic (don't sub resultsCount) + if (results.length) { + _displayItem(results.pop(), searchTerms, highlightTerms); + setTimeout( + () => _displayNextItem(results, resultCount, searchTerms, highlightTerms), + 5 + ); + } + // search finished, update title and status message + else _finishSearch(resultCount); +}; +// Helper function used by query() to order search results. +// Each input is an array of [docname, title, anchor, descr, score, filename, kind]. +// Order the results by score (in opposite order of appearance, since the +// `_displayNextItem` function uses pop() to retrieve items) and then alphabetically. +const _orderResultsByScoreThenName = (a, b) => { + const leftScore = a[4]; + const rightScore = b[4]; + if (leftScore === rightScore) { + // same score: sort alphabetically + const leftTitle = a[1].toLowerCase(); + const rightTitle = b[1].toLowerCase(); + if (leftTitle === rightTitle) return 0; + return leftTitle > rightTitle ? -1 : 1; // inverted is intentional + } + return leftScore > rightScore ? 1 : -1; +}; + +/** + * Default splitQuery function. Can be overridden in ``sphinx.search`` with a + * custom function per language. + * + * The regular expression works by splitting the string on consecutive characters + * that are not Unicode letters, numbers, underscores, or emoji characters. + * This is the same as ``\W+`` in Python, preserving the surrogate pair area. + */ +if (typeof splitQuery === "undefined") { + var splitQuery = (query) => query + .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu) + .filter(term => term) // remove remaining empty strings +} + +/** + * Search Module + */ +const Search = { + _index: null, + _queued_query: null, + _pulse_status: -1, + + htmlToText: (htmlString, anchor) => { + const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html'); + for (const removalQuery of [".headerlink", "script", "style"]) { + htmlElement.querySelectorAll(removalQuery).forEach((el) => { el.remove() }); + } + if (anchor) { + const anchorContent = htmlElement.querySelector(`[role="main"] ${anchor}`); + if (anchorContent) return anchorContent.textContent; + + console.warn( + `Anchored content block not found. Sphinx search tries to obtain it via DOM query '[role=main] ${anchor}'. Check your theme or template.` + ); + } + + // if anchor not specified or not found, fall back to main content + const docContent = htmlElement.querySelector('[role="main"]'); + if (docContent) return docContent.textContent; + + console.warn( + "Content block not found. Sphinx search tries to obtain it via DOM query '[role=main]'. Check your theme or template." + ); + return ""; + }, + + init: () => { + const query = new URLSearchParams(window.location.search).get("q"); + document + .querySelectorAll('input[name="q"]') + .forEach((el) => (el.value = query)); + if (query) Search.performSearch(query); + }, + + loadIndex: (url) => + (document.body.appendChild(document.createElement("script")).src = url), + + setIndex: (index) => { + Search._index = index; + if (Search._queued_query !== null) { + const query = Search._queued_query; + Search._queued_query = null; + Search.query(query); + } + }, + + hasIndex: () => Search._index !== null, + + deferQuery: (query) => (Search._queued_query = query), + + stopPulse: () => (Search._pulse_status = -1), + + startPulse: () => { + if (Search._pulse_status >= 0) return; + + const pulse = () => { + Search._pulse_status = (Search._pulse_status + 1) % 4; + Search.dots.innerText = ".".repeat(Search._pulse_status); + if (Search._pulse_status >= 0) window.setTimeout(pulse, 500); + }; + pulse(); + }, + + /** + * perform a search for something (or wait until index is loaded) + */ + performSearch: (query) => { + // create the required interface elements + const searchText = document.createElement("h2"); + searchText.textContent = _("Searching"); + const searchSummary = document.createElement("p"); + searchSummary.classList.add("search-summary"); + searchSummary.innerText = ""; + const searchList = document.createElement("ul"); + searchList.setAttribute("role", "list"); + searchList.classList.add("search"); + + const out = document.getElementById("search-results"); + Search.title = out.appendChild(searchText); + Search.dots = Search.title.appendChild(document.createElement("span")); + Search.status = out.appendChild(searchSummary); + Search.output = out.appendChild(searchList); + + const searchProgress = document.getElementById("search-progress"); + // Some themes don't use the search progress node + if (searchProgress) { + searchProgress.innerText = _("Preparing search..."); + } + Search.startPulse(); + + // index already loaded, the browser was quick! + if (Search.hasIndex()) Search.query(query); + else Search.deferQuery(query); + }, + + _parseQuery: (query) => { + // stem the search terms and add them to the correct list + const stemmer = new Stemmer(); + const searchTerms = new Set(); + const excludedTerms = new Set(); + const highlightTerms = new Set(); + const objectTerms = new Set(splitQuery(query.toLowerCase().trim())); + splitQuery(query.trim()).forEach((queryTerm) => { + const queryTermLower = queryTerm.toLowerCase(); + + // maybe skip this "word" + // stopwords array is from language_data.js + if ( + stopwords.indexOf(queryTermLower) !== -1 || + queryTerm.match(/^\d+$/) + ) + return; + + // stem the word + let word = stemmer.stemWord(queryTermLower); + // select the correct list + if (word[0] === "-") excludedTerms.add(word.substr(1)); + else { + searchTerms.add(word); + highlightTerms.add(queryTermLower); + } + }); + + if (SPHINX_HIGHLIGHT_ENABLED) { // set in sphinx_highlight.js + localStorage.setItem("sphinx_highlight_terms", [...highlightTerms].join(" ")) + } + + // console.debug("SEARCH: searching for:"); + // console.info("required: ", [...searchTerms]); + // console.info("excluded: ", [...excludedTerms]); + + return [query, searchTerms, excludedTerms, highlightTerms, objectTerms]; + }, + + /** + * execute search (requires search index to be loaded) + */ + _performSearch: (query, searchTerms, excludedTerms, highlightTerms, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + const allTitles = Search._index.alltitles; + const indexEntries = Search._index.indexentries; + + // Collect multiple result groups to be sorted separately and then ordered. + // Each is an array of [docname, title, anchor, descr, score, filename, kind]. + const normalResults = []; + const nonMainIndexResults = []; + + _removeChildren(document.getElementById("search-progress")); + + const queryLower = query.toLowerCase().trim(); + for (const [title, foundTitles] of Object.entries(allTitles)) { + if (title.toLowerCase().trim().includes(queryLower) && (queryLower.length >= title.length/2)) { + for (const [file, id] of foundTitles) { + const score = Math.round(Scorer.title * queryLower.length / title.length); + const boost = titles[file] === title ? 1 : 0; // add a boost for document titles + normalResults.push([ + docNames[file], + titles[file] !== title ? `${titles[file]} > ${title}` : title, + id !== null ? "#" + id : "", + null, + score + boost, + filenames[file], + SearchResultKind.title, + ]); + } + } + } + + // search for explicit entries in index directives + for (const [entry, foundEntries] of Object.entries(indexEntries)) { + if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) { + for (const [file, id, isMain] of foundEntries) { + const score = Math.round(100 * queryLower.length / entry.length); + const result = [ + docNames[file], + titles[file], + id ? "#" + id : "", + null, + score, + filenames[file], + SearchResultKind.index, + ]; + if (isMain) { + normalResults.push(result); + } else { + nonMainIndexResults.push(result); + } + } + } + } + + // lookup as object + objectTerms.forEach((term) => + normalResults.push(...Search.performObjectSearch(term, objectTerms)) + ); + + // lookup as search terms in fulltext + normalResults.push(...Search.performTermsSearch(searchTerms, excludedTerms)); + + // let the scorer override scores with a custom scoring function + if (Scorer.score) { + normalResults.forEach((item) => (item[4] = Scorer.score(item))); + nonMainIndexResults.forEach((item) => (item[4] = Scorer.score(item))); + } + + // Sort each group of results by score and then alphabetically by name. + normalResults.sort(_orderResultsByScoreThenName); + nonMainIndexResults.sort(_orderResultsByScoreThenName); + + // Combine the result groups in (reverse) order. + // Non-main index entries are typically arbitrary cross-references, + // so display them after other results. + let results = [...nonMainIndexResults, ...normalResults]; + + // remove duplicate search results + // note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept + let seen = new Set(); + results = results.reverse().reduce((acc, result) => { + let resultStr = result.slice(0, 4).concat([result[5]]).map(v => String(v)).join(','); + if (!seen.has(resultStr)) { + acc.push(result); + seen.add(resultStr); + } + return acc; + }, []); + + return results.reverse(); + }, + + query: (query) => { + const [searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms] = Search._parseQuery(query); + const results = Search._performSearch(searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms); + + // for debugging + //Search.lastresults = results.slice(); // a copy + // console.info("search results:", Search.lastresults); + + // print the results + _displayNextItem(results, results.length, searchTerms, highlightTerms); + }, + + /** + * search for object names + */ + performObjectSearch: (object, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const objects = Search._index.objects; + const objNames = Search._index.objnames; + const titles = Search._index.titles; + + const results = []; + + const objectSearchCallback = (prefix, match) => { + const name = match[4] + const fullname = (prefix ? prefix + "." : "") + name; + const fullnameLower = fullname.toLowerCase(); + if (fullnameLower.indexOf(object) < 0) return; + + let score = 0; + const parts = fullnameLower.split("."); + + // check for different match types: exact matches of full name or + // "last name" (i.e. last dotted part) + if (fullnameLower === object || parts.slice(-1)[0] === object) + score += Scorer.objNameMatch; + else if (parts.slice(-1)[0].indexOf(object) > -1) + score += Scorer.objPartialMatch; // matches in last name + + const objName = objNames[match[1]][2]; + const title = titles[match[0]]; + + // If more than one term searched for, we require other words to be + // found in the name/title/description + const otherTerms = new Set(objectTerms); + otherTerms.delete(object); + if (otherTerms.size > 0) { + const haystack = `${prefix} ${name} ${objName} ${title}`.toLowerCase(); + if ( + [...otherTerms].some((otherTerm) => haystack.indexOf(otherTerm) < 0) + ) + return; + } + + let anchor = match[3]; + if (anchor === "") anchor = fullname; + else if (anchor === "-") anchor = objNames[match[1]][1] + "-" + fullname; + + const descr = objName + _(", in ") + title; + + // add custom score for some objects according to scorer + if (Scorer.objPrio.hasOwnProperty(match[2])) + score += Scorer.objPrio[match[2]]; + else score += Scorer.objPrioDefault; + + results.push([ + docNames[match[0]], + fullname, + "#" + anchor, + descr, + score, + filenames[match[0]], + SearchResultKind.object, + ]); + }; + Object.keys(objects).forEach((prefix) => + objects[prefix].forEach((array) => + objectSearchCallback(prefix, array) + ) + ); + return results; + }, + + /** + * search for full-text terms in the index + */ + performTermsSearch: (searchTerms, excludedTerms) => { + // prepare search + const terms = Search._index.terms; + const titleTerms = Search._index.titleterms; + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + + const scoreMap = new Map(); + const fileMap = new Map(); + + // perform the search on the required terms + searchTerms.forEach((word) => { + const files = []; + const arr = [ + { files: terms[word], score: Scorer.term }, + { files: titleTerms[word], score: Scorer.title }, + ]; + // add support for partial matches + if (word.length > 2) { + const escapedWord = _escapeRegExp(word); + if (!terms.hasOwnProperty(word)) { + Object.keys(terms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: terms[term], score: Scorer.partialTerm }); + }); + } + if (!titleTerms.hasOwnProperty(word)) { + Object.keys(titleTerms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: titleTerms[term], score: Scorer.partialTitle }); + }); + } + } + + // no match but word was a required one + if (arr.every((record) => record.files === undefined)) return; + + // found search word in contents + arr.forEach((record) => { + if (record.files === undefined) return; + + let recordFiles = record.files; + if (recordFiles.length === undefined) recordFiles = [recordFiles]; + files.push(...recordFiles); + + // set score for the word in each file + recordFiles.forEach((file) => { + if (!scoreMap.has(file)) scoreMap.set(file, {}); + scoreMap.get(file)[word] = record.score; + }); + }); + + // create the mapping + files.forEach((file) => { + if (!fileMap.has(file)) fileMap.set(file, [word]); + else if (fileMap.get(file).indexOf(word) === -1) fileMap.get(file).push(word); + }); + }); + + // now check if the files don't contain excluded terms + const results = []; + for (const [file, wordList] of fileMap) { + // check if all requirements are matched + + // as search terms with length < 3 are discarded + const filteredTermCount = [...searchTerms].filter( + (term) => term.length > 2 + ).length; + if ( + wordList.length !== searchTerms.size && + wordList.length !== filteredTermCount + ) + continue; + + // ensure that none of the excluded terms is in the search result + if ( + [...excludedTerms].some( + (term) => + terms[term] === file || + titleTerms[term] === file || + (terms[term] || []).includes(file) || + (titleTerms[term] || []).includes(file) + ) + ) + break; + + // select one (max) score for the file. + const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w])); + // add result to the result list + results.push([ + docNames[file], + titles[file], + "", + null, + score, + filenames[file], + SearchResultKind.text, + ]); + } + return results; + }, + + /** + * helper function to return a node containing the + * search summary for a given text. keywords is a list + * of stemmed words. + */ + makeSearchSummary: (htmlText, keywords, anchor) => { + const text = Search.htmlToText(htmlText, anchor); + if (text === "") return null; + + const textLower = text.toLowerCase(); + const actualStartPosition = [...keywords] + .map((k) => textLower.indexOf(k.toLowerCase())) + .filter((i) => i > -1) + .slice(-1)[0]; + const startWithContext = Math.max(actualStartPosition - 120, 0); + + const top = startWithContext === 0 ? "" : "..."; + const tail = startWithContext + 240 < text.length ? "..." : ""; + + let summary = document.createElement("p"); + summary.classList.add("context"); + summary.textContent = top + text.substr(startWithContext, 240).trim() + tail; + + return summary; + }, +}; + +_ready(Search.init); diff --git a/_static/skeleton.css b/_static/skeleton.css new file mode 100644 index 0000000000..467c878c62 --- /dev/null +++ b/_static/skeleton.css @@ -0,0 +1,296 @@ +/* Some sane resets. */ +html { + height: 100%; +} + +body { + margin: 0; + min-height: 100%; +} + +/* All the flexbox magic! */ +body, +.sb-announcement, +.sb-content, +.sb-main, +.sb-container, +.sb-container__inner, +.sb-article-container, +.sb-footer-content, +.sb-header, +.sb-header-secondary, +.sb-footer { + display: flex; +} + +/* These order things vertically */ +body, +.sb-main, +.sb-article-container { + flex-direction: column; +} + +/* Put elements in the center */ +.sb-header, +.sb-header-secondary, +.sb-container, +.sb-content, +.sb-footer, +.sb-footer-content { + justify-content: center; +} +/* Put elements at the ends */ +.sb-article-container { + justify-content: space-between; +} + +/* These elements grow. */ +.sb-main, +.sb-content, +.sb-container, +article { + flex-grow: 1; +} + +/* Because padding making this wider is not fun */ +article { + box-sizing: border-box; +} + +/* The announcements element should never be wider than the page. */ +.sb-announcement { + max-width: 100%; +} + +.sb-sidebar-primary, +.sb-sidebar-secondary { + flex-shrink: 0; + width: 17rem; +} + +.sb-announcement__inner { + justify-content: center; + + box-sizing: border-box; + height: 3rem; + + overflow-x: auto; + white-space: nowrap; +} + +/* Sidebars, with checkbox-based toggle */ +.sb-sidebar-primary, +.sb-sidebar-secondary { + position: fixed; + height: 100%; + top: 0; +} + +.sb-sidebar-primary { + left: -17rem; + transition: left 250ms ease-in-out; +} +.sb-sidebar-secondary { + right: -17rem; + transition: right 250ms ease-in-out; +} + +.sb-sidebar-toggle { + display: none; +} +.sb-sidebar-overlay { + position: fixed; + top: 0; + width: 0; + height: 0; + + transition: width 0ms ease 250ms, height 0ms ease 250ms, opacity 250ms ease; + + opacity: 0; + background-color: rgba(0, 0, 0, 0.54); +} + +#sb-sidebar-toggle--primary:checked + ~ .sb-sidebar-overlay[for="sb-sidebar-toggle--primary"], +#sb-sidebar-toggle--secondary:checked + ~ .sb-sidebar-overlay[for="sb-sidebar-toggle--secondary"] { + width: 100%; + height: 100%; + opacity: 1; + transition: width 0ms ease, height 0ms ease, opacity 250ms ease; +} + +#sb-sidebar-toggle--primary:checked ~ .sb-container .sb-sidebar-primary { + left: 0; +} +#sb-sidebar-toggle--secondary:checked ~ .sb-container .sb-sidebar-secondary { + right: 0; +} + +/* Full-width mode */ +.drop-secondary-sidebar-for-full-width-content + .hide-when-secondary-sidebar-shown { + display: none !important; +} +.drop-secondary-sidebar-for-full-width-content .sb-sidebar-secondary { + display: none !important; +} + +/* Mobile views */ +.sb-page-width { + width: 100%; +} + +.sb-article-container, +.sb-footer-content__inner, +.drop-secondary-sidebar-for-full-width-content .sb-article, +.drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 100vw; +} + +.sb-article, +.match-content-width { + padding: 0 1rem; + box-sizing: border-box; +} + +@media (min-width: 32rem) { + .sb-article, + .match-content-width { + padding: 0 2rem; + } +} + +/* Tablet views */ +@media (min-width: 42rem) { + .sb-article-container { + width: auto; + } + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 42rem; + } + .sb-article, + .match-content-width { + width: 42rem; + } +} +@media (min-width: 46rem) { + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 46rem; + } + .sb-article, + .match-content-width { + width: 46rem; + } +} +@media (min-width: 50rem) { + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 50rem; + } + .sb-article, + .match-content-width { + width: 50rem; + } +} + +/* Tablet views */ +@media (min-width: 59rem) { + .sb-sidebar-secondary { + position: static; + } + .hide-when-secondary-sidebar-shown { + display: none !important; + } + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 59rem; + } + .sb-article, + .match-content-width { + width: 42rem; + } +} +@media (min-width: 63rem) { + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 63rem; + } + .sb-article, + .match-content-width { + width: 46rem; + } +} +@media (min-width: 67rem) { + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 67rem; + } + .sb-article, + .match-content-width { + width: 50rem; + } +} + +/* Desktop views */ +@media (min-width: 76rem) { + .sb-sidebar-primary { + position: static; + } + .hide-when-primary-sidebar-shown { + display: none !important; + } + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 59rem; + } + .sb-article, + .match-content-width { + width: 42rem; + } +} + +/* Full desktop views */ +@media (min-width: 80rem) { + .sb-article, + .match-content-width { + width: 46rem; + } + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 63rem; + } +} + +@media (min-width: 84rem) { + .sb-article, + .match-content-width { + width: 50rem; + } + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 67rem; + } +} + +@media (min-width: 88rem) { + .sb-footer-content__inner, + .drop-secondary-sidebar-for-full-width-content .sb-article, + .drop-secondary-sidebar-for-full-width-content .match-content-width { + width: 67rem; + } + .sb-page-width { + width: 88rem; + } +} diff --git a/_static/sphinx_highlight.js b/_static/sphinx_highlight.js new file mode 100644 index 0000000000..8a96c69a19 --- /dev/null +++ b/_static/sphinx_highlight.js @@ -0,0 +1,154 @@ +/* Highlighting utilities for Sphinx HTML documentation. */ +"use strict"; + +const SPHINX_HIGHLIGHT_ENABLED = true + +/** + * highlight a given string on a node by wrapping it in + * span elements with the given class name. + */ +const _highlight = (node, addItems, text, className) => { + if (node.nodeType === Node.TEXT_NODE) { + const val = node.nodeValue; + const parent = node.parentNode; + const pos = val.toLowerCase().indexOf(text); + if ( + pos >= 0 && + !parent.classList.contains(className) && + !parent.classList.contains("nohighlight") + ) { + let span; + + const closestNode = parent.closest("body, svg, foreignObject"); + const isInSVG = closestNode && closestNode.matches("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.classList.add(className); + } + + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + const rest = document.createTextNode(val.substr(pos + text.length)); + parent.insertBefore( + span, + parent.insertBefore( + rest, + node.nextSibling + ) + ); + node.nodeValue = val.substr(0, pos); + /* There may be more occurrences of search term in this node. So call this + * function recursively on the remaining fragment. + */ + _highlight(rest, addItems, text, className); + + if (isInSVG) { + const rect = document.createElementNS( + "http://www.w3.org/2000/svg", + "rect" + ); + const bbox = parent.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute("class", className); + addItems.push({ parent: parent, target: rect }); + } + } + } else if (node.matches && !node.matches("button, select, textarea")) { + node.childNodes.forEach((el) => _highlight(el, addItems, text, className)); + } +}; +const _highlightText = (thisNode, text, className) => { + let addItems = []; + _highlight(thisNode, addItems, text, className); + addItems.forEach((obj) => + obj.parent.insertAdjacentElement("beforebegin", obj.target) + ); +}; + +/** + * Small JavaScript module for the documentation. + */ +const SphinxHighlight = { + + /** + * highlight the search words provided in localstorage in the text + */ + highlightSearchWords: () => { + if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight + + // get and clear terms from localstorage + const url = new URL(window.location); + const highlight = + localStorage.getItem("sphinx_highlight_terms") + || url.searchParams.get("highlight") + || ""; + localStorage.removeItem("sphinx_highlight_terms") + url.searchParams.delete("highlight"); + window.history.replaceState({}, "", url); + + // get individual terms from highlight string + const terms = highlight.toLowerCase().split(/\s+/).filter(x => x); + if (terms.length === 0) return; // nothing to do + + // There should never be more than one element matching "div.body" + const divBody = document.querySelectorAll("div.body"); + const body = divBody.length ? divBody[0] : document.querySelector("body"); + window.setTimeout(() => { + terms.forEach((term) => _highlightText(body, term, "highlighted")); + }, 10); + + const searchBox = document.getElementById("searchbox"); + if (searchBox === null) return; + searchBox.appendChild( + document + .createRange() + .createContextualFragment( + '" + ) + ); + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords: () => { + document + .querySelectorAll("#searchbox .highlight-link") + .forEach((el) => el.remove()); + document + .querySelectorAll("span.highlighted") + .forEach((el) => el.classList.remove("highlighted")); + localStorage.removeItem("sphinx_highlight_terms") + }, + + initEscapeListener: () => { + // only install a listener if it is really needed + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; + if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { + SphinxHighlight.hideSearchWords(); + event.preventDefault(); + } + }); + }, +}; + +_ready(() => { + /* Do not call highlightSearchWords() when we are on the search page. + * It will highlight words from the *previous* search query. + */ + if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords(); + SphinxHighlight.initEscapeListener(); +}); diff --git a/_static/styles/furo-extensions.css b/_static/styles/furo-extensions.css new file mode 100644 index 0000000000..2d74267fe1 --- /dev/null +++ b/_static/styles/furo-extensions.css @@ -0,0 +1,2 @@ +#furo-sidebar-ad-placement{padding:var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal)}#furo-sidebar-ad-placement .ethical-sidebar{background:var(--color-background-secondary);border:none;box-shadow:none}#furo-sidebar-ad-placement .ethical-sidebar:hover{background:var(--color-background-hover)}#furo-sidebar-ad-placement .ethical-sidebar a{color:var(--color-foreground-primary)}#furo-sidebar-ad-placement .ethical-callout a{color:var(--color-foreground-secondary)!important}#furo-readthedocs-versions{background:transparent;display:block;position:static;width:100%}#furo-readthedocs-versions .rst-versions{background:#1a1c1e}#furo-readthedocs-versions .rst-current-version{background:var(--color-sidebar-item-background);cursor:unset}#furo-readthedocs-versions .rst-current-version:hover{background:var(--color-sidebar-item-background)}#furo-readthedocs-versions .rst-current-version .fa-book{color:var(--color-foreground-primary)}#furo-readthedocs-versions>.rst-other-versions{padding:0}#furo-readthedocs-versions>.rst-other-versions small{opacity:1}#furo-readthedocs-versions .injected .rst-versions{position:unset}#furo-readthedocs-versions:focus-within,#furo-readthedocs-versions:hover{box-shadow:0 0 0 1px var(--color-sidebar-background-border)}#furo-readthedocs-versions:focus-within .rst-current-version,#furo-readthedocs-versions:hover .rst-current-version{background:#1a1c1e;font-size:inherit;height:auto;line-height:inherit;padding:12px;text-align:right}#furo-readthedocs-versions:focus-within .rst-current-version .fa-book,#furo-readthedocs-versions:hover .rst-current-version .fa-book{color:#fff;float:left}#furo-readthedocs-versions:focus-within .fa-caret-down,#furo-readthedocs-versions:hover .fa-caret-down{display:none}#furo-readthedocs-versions:focus-within .injected,#furo-readthedocs-versions:focus-within .rst-current-version,#furo-readthedocs-versions:focus-within .rst-other-versions,#furo-readthedocs-versions:hover .injected,#furo-readthedocs-versions:hover .rst-current-version,#furo-readthedocs-versions:hover .rst-other-versions{display:block}#furo-readthedocs-versions:focus-within>.rst-current-version,#furo-readthedocs-versions:hover>.rst-current-version{display:none}.highlight:hover button.copybtn{color:var(--color-code-foreground)}.highlight button.copybtn{align-items:center;background-color:var(--color-code-background);border:none;color:var(--color-background-item);cursor:pointer;height:1.25em;right:.5rem;top:.625rem;transition:color .3s,opacity .3s;width:1.25em}.highlight button.copybtn:hover{background-color:var(--color-code-background);color:var(--color-brand-content)}.highlight button.copybtn:after{background-color:transparent;color:var(--color-code-foreground);display:none}.highlight button.copybtn.success{color:#22863a;transition:color 0s}.highlight button.copybtn.success:after{display:block}.highlight button.copybtn svg{padding:0}body{--sd-color-primary:var(--color-brand-primary);--sd-color-primary-highlight:var(--color-brand-content);--sd-color-primary-text:var(--color-background-primary);--sd-color-shadow:rgba(0,0,0,.05);--sd-color-card-border:var(--color-card-border);--sd-color-card-border-hover:var(--color-brand-content);--sd-color-card-background:var(--color-card-background);--sd-color-card-text:var(--color-foreground-primary);--sd-color-card-header:var(--color-card-marginals-background);--sd-color-card-footer:var(--color-card-marginals-background);--sd-color-tabs-label-active:var(--color-brand-content);--sd-color-tabs-label-hover:var(--color-foreground-muted);--sd-color-tabs-label-inactive:var(--color-foreground-muted);--sd-color-tabs-underline-active:var(--color-brand-content);--sd-color-tabs-underline-hover:var(--color-foreground-border);--sd-color-tabs-underline-inactive:var(--color-background-border);--sd-color-tabs-overline:var(--color-background-border);--sd-color-tabs-underline:var(--color-background-border)}.sd-tab-content{box-shadow:0 -2px var(--sd-color-tabs-overline),0 1px var(--sd-color-tabs-underline)}.sd-card{box-shadow:0 .1rem .25rem var(--sd-color-shadow),0 0 .0625rem rgba(0,0,0,.1)}.sd-shadow-sm{box-shadow:0 .1rem .25rem var(--sd-color-shadow),0 0 .0625rem rgba(0,0,0,.1)!important}.sd-shadow-md{box-shadow:0 .3rem .75rem var(--sd-color-shadow),0 0 .0625rem rgba(0,0,0,.1)!important}.sd-shadow-lg{box-shadow:0 .6rem 1.5rem var(--sd-color-shadow),0 0 .0625rem rgba(0,0,0,.1)!important}.sd-card-hover:hover{transform:none}.sd-cards-carousel{gap:.25rem;padding:.25rem}body{--tabs--label-text:var(--color-foreground-muted);--tabs--label-text--hover:var(--color-foreground-muted);--tabs--label-text--active:var(--color-brand-content);--tabs--label-text--active--hover:var(--color-brand-content);--tabs--label-background:transparent;--tabs--label-background--hover:transparent;--tabs--label-background--active:transparent;--tabs--label-background--active--hover:transparent;--tabs--padding-x:0.25em;--tabs--margin-x:1em;--tabs--border:var(--color-background-border);--tabs--label-border:transparent;--tabs--label-border--hover:var(--color-foreground-muted);--tabs--label-border--active:var(--color-brand-content);--tabs--label-border--active--hover:var(--color-brand-content)}[role=main] .container{max-width:none;padding-left:0;padding-right:0}.shadow.docutils{border:none;box-shadow:0 .2rem .5rem rgba(0,0,0,.05),0 0 .0625rem rgba(0,0,0,.1)!important}.sphinx-bs .card{background-color:var(--color-background-secondary);color:var(--color-foreground)} +/*# sourceMappingURL=furo-extensions.css.map*/ \ No newline at end of file diff --git a/_static/styles/furo-extensions.css.map b/_static/styles/furo-extensions.css.map new file mode 100644 index 0000000000..68fb7fd0d7 --- /dev/null +++ b/_static/styles/furo-extensions.css.map @@ -0,0 +1 @@ +{"version":3,"file":"styles/furo-extensions.css","mappings":"AAGA,2BACE,oFACA,4CAKE,6CAHA,YACA,eAEA,CACA,kDACE,yCAEF,8CACE,sCAEJ,8CACE,kDAEJ,2BAGE,uBACA,cAHA,gBACA,UAEA,CAGA,yCACE,mBAEF,gDAEE,gDADA,YACA,CACA,sDACE,gDACF,yDACE,sCAEJ,+CACE,UACA,qDACE,UAGF,mDACE,eAEJ,yEAEE,4DAEA,mHASE,mBAPA,kBAEA,YADA,oBAGA,aADA,gBAIA,CAEA,qIAEE,WADA,UACA,CAEJ,uGACE,aAEF,iUAGE,cAEF,mHACE,aC1EJ,gCACE,mCAEF,0BAEE,mBAUA,8CACA,YAFA,mCAKA,eAZA,cAIA,YADA,YAYA,iCAdA,YAcA,CAEA,gCAEE,8CADA,gCACA,CAEF,gCAGE,6BADA,mCADA,YAEA,CAEF,kCAEE,cADA,mBACA,CACA,wCACE,cAEJ,8BACE,UCzCN,KAEE,6CAA8C,CAC9C,uDAAwD,CACxD,uDAAwD,CAGxD,iCAAsC,CAGtC,+CAAgD,CAChD,uDAAwD,CACxD,uDAAwD,CACxD,oDAAqD,CACrD,6DAA8D,CAC9D,6DAA8D,CAG9D,uDAAwD,CACxD,yDAA0D,CAC1D,4DAA6D,CAC7D,2DAA4D,CAC5D,8DAA+D,CAC/D,iEAAkE,CAClE,uDAAwD,CACxD,wDAAyD,CAG3D,gBACE,qFAGF,SACE,6EAEF,cACE,uFAEF,cACE,uFAEF,cACE,uFAGF,qBACE,eAEF,mBACE,WACA,eChDF,KACE,gDAAiD,CACjD,uDAAwD,CACxD,qDAAsD,CACtD,4DAA6D,CAC7D,oCAAqC,CACrC,2CAA4C,CAC5C,4CAA6C,CAC7C,mDAAoD,CACpD,wBAAyB,CACzB,oBAAqB,CACrB,6CAA8C,CAC9C,gCAAiC,CACjC,yDAA0D,CAC1D,uDAAwD,CACxD,8DAA+D,CCbjE,uBACE,eACA,eACA,gBAGF,iBACE,YACA,+EAGF,iBACE,mDACA","sources":["webpack:///./src/furo/assets/styles/extensions/_readthedocs.sass","webpack:///./src/furo/assets/styles/extensions/_copybutton.sass","webpack:///./src/furo/assets/styles/extensions/_sphinx-design.sass","webpack:///./src/furo/assets/styles/extensions/_sphinx-inline-tabs.sass","webpack:///./src/furo/assets/styles/extensions/_sphinx-panels.sass"],"sourcesContent":["// This file contains the styles used for tweaking how ReadTheDoc's embedded\n// contents would show up inside the theme.\n\n#furo-sidebar-ad-placement\n padding: var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal)\n .ethical-sidebar\n // Remove the border and box-shadow.\n border: none\n box-shadow: none\n // Manage the background colors.\n background: var(--color-background-secondary)\n &:hover\n background: var(--color-background-hover)\n // Ensure the text is legible.\n a\n color: var(--color-foreground-primary)\n\n .ethical-callout a\n color: var(--color-foreground-secondary) !important\n\n#furo-readthedocs-versions\n position: static\n width: 100%\n background: transparent\n display: block\n\n // Make the background color fit with the theme's aesthetic.\n .rst-versions\n background: rgb(26, 28, 30)\n\n .rst-current-version\n cursor: unset\n background: var(--color-sidebar-item-background)\n &:hover\n background: var(--color-sidebar-item-background)\n .fa-book\n color: var(--color-foreground-primary)\n\n > .rst-other-versions\n padding: 0\n small\n opacity: 1\n\n .injected\n .rst-versions\n position: unset\n\n &:hover,\n &:focus-within\n box-shadow: 0 0 0 1px var(--color-sidebar-background-border)\n\n .rst-current-version\n // Undo the tweaks done in RTD's CSS\n font-size: inherit\n line-height: inherit\n height: auto\n text-align: right\n padding: 12px\n\n // Match the rest of the body\n background: #1a1c1e\n\n .fa-book\n float: left\n color: white\n\n .fa-caret-down\n display: none\n\n .rst-current-version,\n .rst-other-versions,\n .injected\n display: block\n\n > .rst-current-version\n display: none\n",".highlight\n &:hover button.copybtn\n color: var(--color-code-foreground)\n\n button.copybtn\n // Align things correctly\n align-items: center\n\n height: 1.25em\n width: 1.25em\n\n top: 0.625rem // $code-spacing-vertical\n right: 0.5rem\n\n // Make it look better\n color: var(--color-background-item)\n background-color: var(--color-code-background)\n border: none\n\n // Change to cursor to make it obvious that you can click on it\n cursor: pointer\n\n // Transition smoothly, for aesthetics\n transition: color 300ms, opacity 300ms\n\n &:hover\n color: var(--color-brand-content)\n background-color: var(--color-code-background)\n\n &::after\n display: none\n color: var(--color-code-foreground)\n background-color: transparent\n\n &.success\n transition: color 0ms\n color: #22863a\n &::after\n display: block\n\n svg\n padding: 0\n","body\n // Colors\n --sd-color-primary: var(--color-brand-primary)\n --sd-color-primary-highlight: var(--color-brand-content)\n --sd-color-primary-text: var(--color-background-primary)\n\n // Shadows\n --sd-color-shadow: rgba(0, 0, 0, 0.05)\n\n // Cards\n --sd-color-card-border: var(--color-card-border)\n --sd-color-card-border-hover: var(--color-brand-content)\n --sd-color-card-background: var(--color-card-background)\n --sd-color-card-text: var(--color-foreground-primary)\n --sd-color-card-header: var(--color-card-marginals-background)\n --sd-color-card-footer: var(--color-card-marginals-background)\n\n // Tabs\n --sd-color-tabs-label-active: var(--color-brand-content)\n --sd-color-tabs-label-hover: var(--color-foreground-muted)\n --sd-color-tabs-label-inactive: var(--color-foreground-muted)\n --sd-color-tabs-underline-active: var(--color-brand-content)\n --sd-color-tabs-underline-hover: var(--color-foreground-border)\n --sd-color-tabs-underline-inactive: var(--color-background-border)\n --sd-color-tabs-overline: var(--color-background-border)\n --sd-color-tabs-underline: var(--color-background-border)\n\n// Tabs\n.sd-tab-content\n box-shadow: 0 -2px var(--sd-color-tabs-overline), 0 1px var(--sd-color-tabs-underline)\n\n// Shadows\n.sd-card // Have a shadow by default\n box-shadow: 0 0.1rem 0.25rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1)\n\n.sd-shadow-sm\n box-shadow: 0 0.1rem 0.25rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n.sd-shadow-md\n box-shadow: 0 0.3rem 0.75rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n.sd-shadow-lg\n box-shadow: 0 0.6rem 1.5rem var(--sd-color-shadow), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n// Cards\n.sd-card-hover:hover // Don't change scale on hover\n transform: none\n\n.sd-cards-carousel // Have a bit of gap in the carousel by default\n gap: 0.25rem\n padding: 0.25rem\n","// This file contains styles to tweak sphinx-inline-tabs to work well with Furo.\n\nbody\n --tabs--label-text: var(--color-foreground-muted)\n --tabs--label-text--hover: var(--color-foreground-muted)\n --tabs--label-text--active: var(--color-brand-content)\n --tabs--label-text--active--hover: var(--color-brand-content)\n --tabs--label-background: transparent\n --tabs--label-background--hover: transparent\n --tabs--label-background--active: transparent\n --tabs--label-background--active--hover: transparent\n --tabs--padding-x: 0.25em\n --tabs--margin-x: 1em\n --tabs--border: var(--color-background-border)\n --tabs--label-border: transparent\n --tabs--label-border--hover: var(--color-foreground-muted)\n --tabs--label-border--active: var(--color-brand-content)\n --tabs--label-border--active--hover: var(--color-brand-content)\n","// This file contains styles to tweak sphinx-panels to work well with Furo.\n\n// sphinx-panels includes Bootstrap 4, which uses .container which can conflict\n// with docutils' `.. container::` directive.\n[role=\"main\"] .container\n max-width: initial\n padding-left: initial\n padding-right: initial\n\n// Make the panels look nicer!\n.shadow.docutils\n border: none\n box-shadow: 0 0.2rem 0.5rem rgba(0, 0, 0, 0.05), 0 0 0.0625rem rgba(0, 0, 0, 0.1) !important\n\n// Make panel colors respond to dark mode\n.sphinx-bs .card\n background-color: var(--color-background-secondary)\n color: var(--color-foreground)\n"],"names":[],"sourceRoot":""} \ No newline at end of file diff --git a/_static/styles/furo.css b/_static/styles/furo.css new file mode 100644 index 0000000000..a5b614d2cd --- /dev/null +++ b/_static/styles/furo.css @@ -0,0 +1,2 @@ +/*! normalize.css v8.0.1 | MIT License | github.com/necolas/normalize.css */html{line-height:1.15;-webkit-text-size-adjust:100%}body{margin:0}main{display:block}h1{font-size:2em;margin:.67em 0}hr{box-sizing:content-box;height:0;overflow:visible}pre{font-family:monospace,monospace;font-size:1em}a{background-color:transparent}abbr[title]{border-bottom:none;text-decoration:underline;text-decoration:underline dotted}b,strong{font-weight:bolder}code,kbd,samp{font-family:monospace,monospace;font-size:1em}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sub{bottom:-.25em}sup{top:-.5em}img{border-style:none}button,input,optgroup,select,textarea{font-family:inherit;font-size:100%;line-height:1.15;margin:0}button,input{overflow:visible}button,select{text-transform:none}[type=button],[type=reset],[type=submit],button{-webkit-appearance:button}[type=button]::-moz-focus-inner,[type=reset]::-moz-focus-inner,[type=submit]::-moz-focus-inner,button::-moz-focus-inner{border-style:none;padding:0}[type=button]:-moz-focusring,[type=reset]:-moz-focusring,[type=submit]:-moz-focusring,button:-moz-focusring{outline:1px dotted ButtonText}fieldset{padding:.35em .75em .625em}legend{box-sizing:border-box;color:inherit;display:table;max-width:100%;padding:0;white-space:normal}progress{vertical-align:baseline}textarea{overflow:auto}[type=checkbox],[type=radio]{box-sizing:border-box;padding:0}[type=number]::-webkit-inner-spin-button,[type=number]::-webkit-outer-spin-button{height:auto}[type=search]{-webkit-appearance:textfield;outline-offset:-2px}[type=search]::-webkit-search-decoration{-webkit-appearance:none}::-webkit-file-upload-button{-webkit-appearance:button;font:inherit}details{display:block}summary{display:list-item}[hidden],template{display:none}@media print{.content-icon-container,.headerlink,.mobile-header,.related-pages{display:none!important}.highlight{border:.1pt solid var(--color-foreground-border)}a,blockquote,dl,ol,p,pre,table,ul{page-break-inside:avoid}caption,figure,h1,h2,h3,h4,h5,h6,img{page-break-after:avoid;page-break-inside:avoid}dl,ol,ul{page-break-before:avoid}}.visually-hidden{height:1px!important;margin:-1px!important;overflow:hidden!important;padding:0!important;position:absolute!important;width:1px!important;clip:rect(0,0,0,0)!important;background:var(--color-background-primary);border:0!important;color:var(--color-foreground-primary);white-space:nowrap!important}:-moz-focusring{outline:auto}body{--font-stack:-apple-system,BlinkMacSystemFont,Segoe UI,Helvetica,Arial,sans-serif,Apple Color Emoji,Segoe UI Emoji;--font-stack--monospace:"SFMono-Regular",Menlo,Consolas,Monaco,Liberation Mono,Lucida Console,monospace;--font-stack--headings:var(--font-stack);--font-size--normal:100%;--font-size--small:87.5%;--font-size--small--2:81.25%;--font-size--small--3:75%;--font-size--small--4:62.5%;--sidebar-caption-font-size:var(--font-size--small--2);--sidebar-item-font-size:var(--font-size--small);--sidebar-search-input-font-size:var(--font-size--small);--toc-font-size:var(--font-size--small--3);--toc-font-size--mobile:var(--font-size--normal);--toc-title-font-size:var(--font-size--small--4);--admonition-font-size:0.8125rem;--admonition-title-font-size:0.8125rem;--code-font-size:var(--font-size--small--2);--api-font-size:var(--font-size--small);--header-height:calc(var(--sidebar-item-line-height) + var(--sidebar-item-spacing-vertical)*4);--header-padding:0.5rem;--sidebar-tree-space-above:1.5rem;--sidebar-caption-space-above:1rem;--sidebar-item-line-height:1rem;--sidebar-item-spacing-vertical:0.5rem;--sidebar-item-spacing-horizontal:1rem;--sidebar-item-height:calc(var(--sidebar-item-line-height) + var(--sidebar-item-spacing-vertical)*2);--sidebar-expander-width:var(--sidebar-item-height);--sidebar-search-space-above:0.5rem;--sidebar-search-input-spacing-vertical:0.5rem;--sidebar-search-input-spacing-horizontal:0.5rem;--sidebar-search-input-height:1rem;--sidebar-search-icon-size:var(--sidebar-search-input-height);--toc-title-padding:0.25rem 0;--toc-spacing-vertical:1.5rem;--toc-spacing-horizontal:1.5rem;--toc-item-spacing-vertical:0.4rem;--toc-item-spacing-horizontal:1rem;--icon-search:url('data:image/svg+xml;charset=utf-8,');--icon-pencil:url('data:image/svg+xml;charset=utf-8,');--icon-abstract:url('data:image/svg+xml;charset=utf-8,');--icon-info:url('data:image/svg+xml;charset=utf-8,');--icon-flame:url('data:image/svg+xml;charset=utf-8,');--icon-question:url('data:image/svg+xml;charset=utf-8,');--icon-warning:url('data:image/svg+xml;charset=utf-8,');--icon-failure:url('data:image/svg+xml;charset=utf-8,');--icon-spark:url('data:image/svg+xml;charset=utf-8,');--color-admonition-title--caution:#ff9100;--color-admonition-title-background--caution:rgba(255,145,0,.2);--color-admonition-title--warning:#ff9100;--color-admonition-title-background--warning:rgba(255,145,0,.2);--color-admonition-title--danger:#ff5252;--color-admonition-title-background--danger:rgba(255,82,82,.2);--color-admonition-title--attention:#ff5252;--color-admonition-title-background--attention:rgba(255,82,82,.2);--color-admonition-title--error:#ff5252;--color-admonition-title-background--error:rgba(255,82,82,.2);--color-admonition-title--hint:#00c852;--color-admonition-title-background--hint:rgba(0,200,82,.2);--color-admonition-title--tip:#00c852;--color-admonition-title-background--tip:rgba(0,200,82,.2);--color-admonition-title--important:#00bfa5;--color-admonition-title-background--important:rgba(0,191,165,.2);--color-admonition-title--note:#00b0ff;--color-admonition-title-background--note:rgba(0,176,255,.2);--color-admonition-title--seealso:#448aff;--color-admonition-title-background--seealso:rgba(68,138,255,.2);--color-admonition-title--admonition-todo:grey;--color-admonition-title-background--admonition-todo:hsla(0,0%,50%,.2);--color-admonition-title:#651fff;--color-admonition-title-background:rgba(101,31,255,.2);--icon-admonition-default:var(--icon-abstract);--color-topic-title:#14b8a6;--color-topic-title-background:rgba(20,184,166,.2);--icon-topic-default:var(--icon-pencil);--color-problematic:#b30000;--color-foreground-primary:#000;--color-foreground-secondary:#5a5c63;--color-foreground-muted:#6b6f76;--color-foreground-border:#878787;--color-background-primary:#fff;--color-background-secondary:#f8f9fb;--color-background-hover:#efeff4;--color-background-hover--transparent:#efeff400;--color-background-border:#eeebee;--color-background-item:#ccc;--color-announcement-background:#000000dd;--color-announcement-text:#eeebee;--color-brand-primary:#0a4bff;--color-brand-content:#2757dd;--color-brand-visited:#872ee0;--color-api-background:var(--color-background-hover--transparent);--color-api-background-hover:var(--color-background-hover);--color-api-overall:var(--color-foreground-secondary);--color-api-name:var(--color-problematic);--color-api-pre-name:var(--color-problematic);--color-api-paren:var(--color-foreground-secondary);--color-api-keyword:var(--color-foreground-primary);--color-api-added:#21632c;--color-api-added-border:#38a84d;--color-api-changed:#046172;--color-api-changed-border:#06a1bc;--color-api-deprecated:#605706;--color-api-deprecated-border:#f0d90f;--color-api-removed:#b30000;--color-api-removed-border:#ff5c5c;--color-highlight-on-target:#ffc;--color-inline-code-background:var(--color-background-secondary);--color-highlighted-background:#def;--color-highlighted-text:var(--color-foreground-primary);--color-guilabel-background:#ddeeff80;--color-guilabel-border:#bedaf580;--color-guilabel-text:var(--color-foreground-primary);--color-admonition-background:transparent;--color-table-header-background:var(--color-background-secondary);--color-table-border:var(--color-background-border);--color-card-border:var(--color-background-secondary);--color-card-background:transparent;--color-card-marginals-background:var(--color-background-secondary);--color-header-background:var(--color-background-primary);--color-header-border:var(--color-background-border);--color-header-text:var(--color-foreground-primary);--color-sidebar-background:var(--color-background-secondary);--color-sidebar-background-border:var(--color-background-border);--color-sidebar-brand-text:var(--color-foreground-primary);--color-sidebar-caption-text:var(--color-foreground-muted);--color-sidebar-link-text:var(--color-foreground-secondary);--color-sidebar-link-text--top-level:var(--color-brand-primary);--color-sidebar-item-background:var(--color-sidebar-background);--color-sidebar-item-background--current:var( --color-sidebar-item-background );--color-sidebar-item-background--hover:linear-gradient(90deg,var(--color-background-hover--transparent) 0%,var(--color-background-hover) var(--sidebar-item-spacing-horizontal),var(--color-background-hover) 100%);--color-sidebar-item-expander-background:transparent;--color-sidebar-item-expander-background--hover:var( --color-background-hover );--color-sidebar-search-text:var(--color-foreground-primary);--color-sidebar-search-background:var(--color-background-secondary);--color-sidebar-search-background--focus:var(--color-background-primary);--color-sidebar-search-border:var(--color-background-border);--color-sidebar-search-icon:var(--color-foreground-muted);--color-toc-background:var(--color-background-primary);--color-toc-title-text:var(--color-foreground-muted);--color-toc-item-text:var(--color-foreground-secondary);--color-toc-item-text--hover:var(--color-foreground-primary);--color-toc-item-text--active:var(--color-brand-primary);--color-content-foreground:var(--color-foreground-primary);--color-content-background:transparent;--color-link:var(--color-brand-content);--color-link-underline:var(--color-background-border);--color-link--hover:var(--color-brand-content);--color-link-underline--hover:var(--color-foreground-border);--color-link--visited:var(--color-brand-visited);--color-link-underline--visited:var(--color-background-border);--color-link--visited--hover:var(--color-brand-visited);--color-link-underline--visited--hover:var(--color-foreground-border)}.only-light{display:block!important}html body .only-dark{display:none!important}@media not print{body[data-theme=dark]{--color-problematic:#ee5151;--color-foreground-primary:#cfd0d0;--color-foreground-secondary:#9ca0a5;--color-foreground-muted:#81868d;--color-foreground-border:#666;--color-background-primary:#131416;--color-background-secondary:#1a1c1e;--color-background-hover:#1e2124;--color-background-hover--transparent:#1e212400;--color-background-border:#303335;--color-background-item:#444;--color-announcement-background:#000000dd;--color-announcement-text:#eeebee;--color-brand-primary:#3d94ff;--color-brand-content:#5ca5ff;--color-brand-visited:#b27aeb;--color-highlighted-background:#083563;--color-guilabel-background:#08356380;--color-guilabel-border:#13395f80;--color-api-keyword:var(--color-foreground-secondary);--color-highlight-on-target:#330;--color-api-added:#3db854;--color-api-added-border:#267334;--color-api-changed:#09b0ce;--color-api-changed-border:#056d80;--color-api-deprecated:#b1a10b;--color-api-deprecated-border:#6e6407;--color-api-removed:#ff7575;--color-api-removed-border:#b03b3b;--color-admonition-background:#18181a;--color-card-border:var(--color-background-secondary);--color-card-background:#18181a;--color-card-marginals-background:var(--color-background-hover)}html body[data-theme=dark] .only-light{display:none!important}body[data-theme=dark] .only-dark{display:block!important}@media(prefers-color-scheme:dark){body:not([data-theme=light]){--color-problematic:#ee5151;--color-foreground-primary:#cfd0d0;--color-foreground-secondary:#9ca0a5;--color-foreground-muted:#81868d;--color-foreground-border:#666;--color-background-primary:#131416;--color-background-secondary:#1a1c1e;--color-background-hover:#1e2124;--color-background-hover--transparent:#1e212400;--color-background-border:#303335;--color-background-item:#444;--color-announcement-background:#000000dd;--color-announcement-text:#eeebee;--color-brand-primary:#3d94ff;--color-brand-content:#5ca5ff;--color-brand-visited:#b27aeb;--color-highlighted-background:#083563;--color-guilabel-background:#08356380;--color-guilabel-border:#13395f80;--color-api-keyword:var(--color-foreground-secondary);--color-highlight-on-target:#330;--color-api-added:#3db854;--color-api-added-border:#267334;--color-api-changed:#09b0ce;--color-api-changed-border:#056d80;--color-api-deprecated:#b1a10b;--color-api-deprecated-border:#6e6407;--color-api-removed:#ff7575;--color-api-removed-border:#b03b3b;--color-admonition-background:#18181a;--color-card-border:var(--color-background-secondary);--color-card-background:#18181a;--color-card-marginals-background:var(--color-background-hover)}html body:not([data-theme=light]) .only-light{display:none!important}body:not([data-theme=light]) .only-dark{display:block!important}}}body[data-theme=auto] .theme-toggle svg.theme-icon-when-auto-light{display:block}@media(prefers-color-scheme:dark){body[data-theme=auto] .theme-toggle svg.theme-icon-when-auto-dark{display:block}body[data-theme=auto] .theme-toggle svg.theme-icon-when-auto-light{display:none}}body[data-theme=dark] .theme-toggle svg.theme-icon-when-dark,body[data-theme=light] .theme-toggle svg.theme-icon-when-light{display:block}body{font-family:var(--font-stack)}code,kbd,pre,samp{font-family:var(--font-stack--monospace)}body{-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}article{line-height:1.5}h1,h2,h3,h4,h5,h6{border-radius:.5rem;font-family:var(--font-stack--headings);font-weight:700;line-height:1.25;margin:.5rem -.5rem;padding-left:.5rem;padding-right:.5rem}h1+p,h2+p,h3+p,h4+p,h5+p,h6+p{margin-top:0}h1{font-size:2.5em;margin-bottom:1rem}h1,h2{margin-top:1.75rem}h2{font-size:2em}h3{font-size:1.5em}h4{font-size:1.25em}h5{font-size:1.125em}h6{font-size:1em}small{font-size:80%;opacity:75%}p{margin-bottom:.75rem;margin-top:.5rem}hr.docutils{background-color:var(--color-background-border);border:0;height:1px;margin:2rem 0;padding:0}.centered{text-align:center}a{color:var(--color-link);text-decoration:underline;text-decoration-color:var(--color-link-underline)}a:visited{color:var(--color-link--visited);text-decoration-color:var(--color-link-underline--visited)}a:visited:hover{color:var(--color-link--visited--hover);text-decoration-color:var(--color-link-underline--visited--hover)}a:hover{color:var(--color-link--hover);text-decoration-color:var(--color-link-underline--hover)}a.muted-link{color:inherit}a.muted-link:hover{color:var(--color-link--hover);text-decoration-color:var(--color-link-underline--hover)}a.muted-link:hover:visited{color:var(--color-link--visited--hover);text-decoration-color:var(--color-link-underline--visited--hover)}html{overflow-x:hidden;overflow-y:scroll;scroll-behavior:smooth}.sidebar-scroll,.toc-scroll,article[role=main] *{scrollbar-color:var(--color-foreground-border) transparent;scrollbar-width:thin}body,html{height:100%}.skip-to-content,body,html{background:var(--color-background-primary);color:var(--color-foreground-primary)}.skip-to-content{border-radius:1rem;left:.25rem;padding:1rem;position:fixed;top:.25rem;transform:translateY(-200%);transition:transform .3s ease-in-out;z-index:40}.skip-to-content:focus-within{transform:translateY(0)}article{background:var(--color-content-background);color:var(--color-content-foreground);overflow-wrap:break-word}.page{display:flex;min-height:100%}.mobile-header{background-color:var(--color-header-background);border-bottom:1px solid var(--color-header-border);color:var(--color-header-text);display:none;height:var(--header-height);width:100%;z-index:10}.mobile-header.scrolled{border-bottom:none;box-shadow:0 0 .2rem rgba(0,0,0,.1),0 .2rem .4rem rgba(0,0,0,.2)}.mobile-header .header-center a{color:var(--color-header-text);text-decoration:none}.main{display:flex;flex:1}.sidebar-drawer{background:var(--color-sidebar-background);border-right:1px solid var(--color-sidebar-background-border);box-sizing:border-box;display:flex;justify-content:flex-end;min-width:15em;width:calc(50% - 26em)}.sidebar-container,.toc-drawer{box-sizing:border-box;width:15em}.toc-drawer{background:var(--color-toc-background);padding-right:1rem}.sidebar-sticky,.toc-sticky{display:flex;flex-direction:column;height:min(100%,100vh);height:100vh;position:sticky;top:0}.sidebar-scroll,.toc-scroll{flex-grow:1;flex-shrink:1;overflow:auto;scroll-behavior:smooth}.content{display:flex;flex-direction:column;justify-content:space-between;padding:0 3em;width:46em}.icon{display:inline-block;height:1rem;width:1rem}.icon svg{height:100%;width:100%}.announcement{align-items:center;background-color:var(--color-announcement-background);color:var(--color-announcement-text);display:flex;height:var(--header-height);overflow-x:auto}.announcement+.page{min-height:calc(100% - var(--header-height))}.announcement-content{box-sizing:border-box;min-width:100%;padding:.5rem;text-align:center;white-space:nowrap}.announcement-content a{color:var(--color-announcement-text);text-decoration-color:var(--color-announcement-text)}.announcement-content a:hover{color:var(--color-announcement-text);text-decoration-color:var(--color-link--hover)}.no-js .theme-toggle-container{display:none}.theme-toggle-container{display:flex}.theme-toggle{background:transparent;border:none;cursor:pointer;display:flex;padding:0}.theme-toggle svg{color:var(--color-foreground-primary);display:none;height:1.25rem;width:1.25rem}.theme-toggle-header{align-items:center;display:flex;justify-content:center}.nav-overlay-icon,.toc-overlay-icon{cursor:pointer;display:none}.nav-overlay-icon .icon,.toc-overlay-icon .icon{color:var(--color-foreground-secondary);height:1.5rem;width:1.5rem}.nav-overlay-icon,.toc-header-icon{align-items:center;justify-content:center}.toc-content-icon{height:1.5rem;width:1.5rem}.content-icon-container{display:flex;float:right;gap:.5rem;margin-bottom:1rem;margin-left:1rem;margin-top:1.5rem}.content-icon-container .edit-this-page svg,.content-icon-container .view-this-page svg{color:inherit;height:1.25rem;width:1.25rem}.sidebar-toggle{display:none;position:absolute}.sidebar-toggle[name=__toc]{left:20px}.sidebar-toggle:checked{left:40px}.overlay{background-color:rgba(0,0,0,.54);height:0;opacity:0;position:fixed;top:0;transition:width 0s,height 0s,opacity .25s ease-out;width:0}.sidebar-overlay{z-index:20}.toc-overlay{z-index:40}.sidebar-drawer{transition:left .25s ease-in-out;z-index:30}.toc-drawer{transition:right .25s ease-in-out;z-index:50}#__navigation:checked~.sidebar-overlay{height:100%;opacity:1;width:100%}#__navigation:checked~.page .sidebar-drawer{left:0;top:0}#__toc:checked~.toc-overlay{height:100%;opacity:1;width:100%}#__toc:checked~.page .toc-drawer{right:0;top:0}.back-to-top{background:var(--color-background-primary);border-radius:1rem;box-shadow:0 .2rem .5rem rgba(0,0,0,.05),0 0 1px 0 hsla(220,9%,46%,.502);display:none;font-size:.8125rem;left:0;margin-left:50%;padding:.5rem .75rem .5rem .5rem;position:fixed;text-decoration:none;top:1rem;transform:translateX(-50%);z-index:10}.back-to-top svg{height:1rem;width:1rem;fill:currentColor;display:inline-block}.back-to-top span{margin-left:.25rem}.show-back-to-top .back-to-top{align-items:center;display:flex}@media(min-width:97em){html{font-size:110%}}@media(max-width:82em){.toc-content-icon{display:flex}.toc-drawer{border-left:1px solid var(--color-background-muted);height:100vh;position:fixed;right:-15em;top:0}.toc-tree{border-left:none;font-size:var(--toc-font-size--mobile)}.sidebar-drawer{width:calc(50% - 18.5em)}}@media(max-width:67em){.content{margin-left:auto;margin-right:auto;padding:0 1em}}@media(max-width:63em){.nav-overlay-icon{display:flex}.sidebar-drawer{height:100vh;left:-15em;position:fixed;top:0;width:15em}.theme-toggle-header,.toc-header-icon{display:flex}.theme-toggle-content,.toc-content-icon{display:none}.mobile-header{align-items:center;display:flex;justify-content:space-between;position:sticky;top:0}.mobile-header .header-left,.mobile-header .header-right{display:flex;height:var(--header-height);padding:0 var(--header-padding)}.mobile-header .header-left label,.mobile-header .header-right label{height:100%;-webkit-user-select:none;-moz-user-select:none;user-select:none;width:100%}.nav-overlay-icon .icon,.theme-toggle svg{height:1.5rem;width:1.5rem}:target{scroll-margin-top:calc(var(--header-height) + 2.5rem)}.back-to-top{top:calc(var(--header-height) + .5rem)}.page{flex-direction:column;justify-content:center}}@media(max-width:48em){.content{overflow-x:auto;width:100%}}@media(max-width:46em){article[role=main] aside.sidebar{float:none;margin:1rem 0;width:100%}}.admonition,.topic{background:var(--color-admonition-background);border-radius:.2rem;box-shadow:0 .2rem .5rem rgba(0,0,0,.05),0 0 .0625rem rgba(0,0,0,.1);font-size:var(--admonition-font-size);margin:1rem auto;overflow:hidden;padding:0 .5rem .5rem;page-break-inside:avoid}.admonition>:nth-child(2),.topic>:nth-child(2){margin-top:0}.admonition>:last-child,.topic>:last-child{margin-bottom:0}.admonition p.admonition-title,p.topic-title{font-size:var(--admonition-title-font-size);font-weight:500;line-height:1.3;margin:0 -.5rem .5rem;padding:.4rem .5rem .4rem 2rem;position:relative}.admonition p.admonition-title:before,p.topic-title:before{content:"";height:1rem;left:.5rem;position:absolute;width:1rem}p.admonition-title{background-color:var(--color-admonition-title-background)}p.admonition-title:before{background-color:var(--color-admonition-title);-webkit-mask-image:var(--icon-admonition-default);mask-image:var(--icon-admonition-default);-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat}p.topic-title{background-color:var(--color-topic-title-background)}p.topic-title:before{background-color:var(--color-topic-title);-webkit-mask-image:var(--icon-topic-default);mask-image:var(--icon-topic-default);-webkit-mask-repeat:no-repeat;mask-repeat:no-repeat}.admonition{border-left:.2rem solid var(--color-admonition-title)}.admonition.caution{border-left-color:var(--color-admonition-title--caution)}.admonition.caution>.admonition-title{background-color:var(--color-admonition-title-background--caution)}.admonition.caution>.admonition-title:before{background-color:var(--color-admonition-title--caution);-webkit-mask-image:var(--icon-spark);mask-image:var(--icon-spark)}.admonition.warning{border-left-color:var(--color-admonition-title--warning)}.admonition.warning>.admonition-title{background-color:var(--color-admonition-title-background--warning)}.admonition.warning>.admonition-title:before{background-color:var(--color-admonition-title--warning);-webkit-mask-image:var(--icon-warning);mask-image:var(--icon-warning)}.admonition.danger{border-left-color:var(--color-admonition-title--danger)}.admonition.danger>.admonition-title{background-color:var(--color-admonition-title-background--danger)}.admonition.danger>.admonition-title:before{background-color:var(--color-admonition-title--danger);-webkit-mask-image:var(--icon-spark);mask-image:var(--icon-spark)}.admonition.attention{border-left-color:var(--color-admonition-title--attention)}.admonition.attention>.admonition-title{background-color:var(--color-admonition-title-background--attention)}.admonition.attention>.admonition-title:before{background-color:var(--color-admonition-title--attention);-webkit-mask-image:var(--icon-warning);mask-image:var(--icon-warning)}.admonition.error{border-left-color:var(--color-admonition-title--error)}.admonition.error>.admonition-title{background-color:var(--color-admonition-title-background--error)}.admonition.error>.admonition-title:before{background-color:var(--color-admonition-title--error);-webkit-mask-image:var(--icon-failure);mask-image:var(--icon-failure)}.admonition.hint{border-left-color:var(--color-admonition-title--hint)}.admonition.hint>.admonition-title{background-color:var(--color-admonition-title-background--hint)}.admonition.hint>.admonition-title:before{background-color:var(--color-admonition-title--hint);-webkit-mask-image:var(--icon-question);mask-image:var(--icon-question)}.admonition.tip{border-left-color:var(--color-admonition-title--tip)}.admonition.tip>.admonition-title{background-color:var(--color-admonition-title-background--tip)}.admonition.tip>.admonition-title:before{background-color:var(--color-admonition-title--tip);-webkit-mask-image:var(--icon-info);mask-image:var(--icon-info)}.admonition.important{border-left-color:var(--color-admonition-title--important)}.admonition.important>.admonition-title{background-color:var(--color-admonition-title-background--important)}.admonition.important>.admonition-title:before{background-color:var(--color-admonition-title--important);-webkit-mask-image:var(--icon-flame);mask-image:var(--icon-flame)}.admonition.note{border-left-color:var(--color-admonition-title--note)}.admonition.note>.admonition-title{background-color:var(--color-admonition-title-background--note)}.admonition.note>.admonition-title:before{background-color:var(--color-admonition-title--note);-webkit-mask-image:var(--icon-pencil);mask-image:var(--icon-pencil)}.admonition.seealso{border-left-color:var(--color-admonition-title--seealso)}.admonition.seealso>.admonition-title{background-color:var(--color-admonition-title-background--seealso)}.admonition.seealso>.admonition-title:before{background-color:var(--color-admonition-title--seealso);-webkit-mask-image:var(--icon-info);mask-image:var(--icon-info)}.admonition.admonition-todo{border-left-color:var(--color-admonition-title--admonition-todo)}.admonition.admonition-todo>.admonition-title{background-color:var(--color-admonition-title-background--admonition-todo)}.admonition.admonition-todo>.admonition-title:before{background-color:var(--color-admonition-title--admonition-todo);-webkit-mask-image:var(--icon-pencil);mask-image:var(--icon-pencil)}.admonition-todo>.admonition-title{text-transform:uppercase}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) dd{margin-left:2rem}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) dd>:first-child{margin-top:.125rem}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) .field-list,dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) dd>:last-child{margin-bottom:.75rem}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) .field-list>dt{font-size:var(--font-size--small);text-transform:uppercase}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) .field-list dd:empty{margin-bottom:.5rem}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) .field-list dd>ul{margin-left:-1.2rem}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) .field-list dd>ul>li>p:nth-child(2){margin-top:0}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple) .field-list dd>ul>li>p+p:last-child:empty{margin-bottom:0;margin-top:0}dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple)>dt{color:var(--color-api-overall)}.sig:not(.sig-inline){background:var(--color-api-background);border-radius:.25rem;font-family:var(--font-stack--monospace);font-size:var(--api-font-size);font-weight:700;margin-left:-.25rem;margin-right:-.25rem;padding:.25rem .5rem .25rem 3em;text-indent:-2.5em;transition:background .1s ease-out}.sig:not(.sig-inline):hover{background:var(--color-api-background-hover)}.sig:not(.sig-inline) a.reference .viewcode-link{font-weight:400;width:4.25rem}em.property,span.property{font-style:normal}em.property:first-child,span.property:first-child{color:var(--color-api-keyword)}.sig-name{color:var(--color-api-name)}.sig-prename{color:var(--color-api-pre-name);font-weight:400}.sig-paren{color:var(--color-api-paren)}.sig-param{font-style:normal}div.deprecated,div.versionadded,div.versionchanged,div.versionremoved{border-left:.1875rem solid;border-radius:.125rem;padding-left:.75rem}div.deprecated p,div.versionadded p,div.versionchanged p,div.versionremoved p{margin-bottom:.125rem;margin-top:.125rem}div.versionadded{border-color:var(--color-api-added-border)}div.versionadded .versionmodified{color:var(--color-api-added)}div.versionchanged{border-color:var(--color-api-changed-border)}div.versionchanged .versionmodified{color:var(--color-api-changed)}div.deprecated{border-color:var(--color-api-deprecated-border)}div.deprecated .versionmodified{color:var(--color-api-deprecated)}div.versionremoved{border-color:var(--color-api-removed-border)}div.versionremoved .versionmodified{color:var(--color-api-removed)}.viewcode-back,.viewcode-link{float:right;text-align:right}.line-block{margin-bottom:.75rem;margin-top:.5rem}.line-block .line-block{margin-bottom:0;margin-top:0;padding-left:1rem}.code-block-caption,article p.caption,table>caption{font-size:var(--font-size--small);text-align:center}.toctree-wrapper.compound .caption,.toctree-wrapper.compound :not(.caption)>.caption-text{font-size:var(--font-size--small);margin-bottom:0;text-align:initial;text-transform:uppercase}.toctree-wrapper.compound>ul{margin-bottom:0;margin-top:0}.sig-inline,code.literal{background:var(--color-inline-code-background);border-radius:.2em;font-size:var(--font-size--small--2);padding:.1em .2em}pre.literal-block .sig-inline,pre.literal-block code.literal{font-size:inherit;padding:0}p .sig-inline,p code.literal{border:1px solid var(--color-background-border)}.sig-inline{font-family:var(--font-stack--monospace)}div[class*=" highlight-"],div[class^=highlight-]{display:flex;margin:1em 0}div[class*=" highlight-"] .table-wrapper,div[class^=highlight-] .table-wrapper,pre{margin:0;padding:0}pre{overflow:auto}article[role=main] .highlight pre{line-height:1.5}.highlight pre,pre.literal-block{font-size:var(--code-font-size);padding:.625rem .875rem}pre.literal-block{background-color:var(--color-code-background);border-radius:.2rem;color:var(--color-code-foreground);margin-bottom:1rem;margin-top:1rem}.highlight{border-radius:.2rem;width:100%}.highlight .gp,.highlight span.linenos{pointer-events:none;-webkit-user-select:none;-moz-user-select:none;user-select:none}.highlight .hll{display:block;margin-left:-.875rem;margin-right:-.875rem;padding-left:.875rem;padding-right:.875rem}.code-block-caption{background-color:var(--color-code-background);border-bottom:1px solid;border-radius:.25rem;border-bottom-left-radius:0;border-bottom-right-radius:0;border-color:var(--color-background-border);color:var(--color-code-foreground);display:flex;font-weight:300;padding:.625rem .875rem}.code-block-caption+div[class]{margin-top:0}.code-block-caption+div[class]>.highlight{border-top-left-radius:0;border-top-right-radius:0}.highlighttable{display:block;width:100%}.highlighttable tbody{display:block}.highlighttable tr{display:flex}.highlighttable td.linenos{background-color:var(--color-code-background);border-bottom-left-radius:.2rem;border-top-left-radius:.2rem;color:var(--color-code-foreground);padding:.625rem 0 .625rem .875rem}.highlighttable .linenodiv{box-shadow:-.0625rem 0 var(--color-foreground-border) inset;font-size:var(--code-font-size);padding-right:.875rem}.highlighttable td.code{display:block;flex:1;overflow:hidden;padding:0}.highlighttable td.code .highlight{border-bottom-left-radius:0;border-top-left-radius:0}.highlight span.linenos{box-shadow:-.0625rem 0 var(--color-foreground-border) inset;display:inline-block;margin-right:.875rem;padding-left:0;padding-right:.875rem}.footnote-reference{font-size:var(--font-size--small--4);vertical-align:super}dl.footnote.brackets{color:var(--color-foreground-secondary);display:grid;font-size:var(--font-size--small);grid-template-columns:max-content auto}dl.footnote.brackets dt{margin:0}dl.footnote.brackets dt>.fn-backref{margin-left:.25rem}dl.footnote.brackets dt:after{content:":"}dl.footnote.brackets dt .brackets:before{content:"["}dl.footnote.brackets dt .brackets:after{content:"]"}dl.footnote.brackets dd{margin:0;padding:0 1rem}aside.footnote{color:var(--color-foreground-secondary);font-size:var(--font-size--small)}aside.footnote>span,div.citation>span{float:left;font-weight:500;padding-right:.25rem}aside.footnote>:not(span),div.citation>p{margin-left:2rem}img{box-sizing:border-box;height:auto;max-width:100%}article .figure,article figure{border-radius:.2rem;margin:0}article .figure :last-child,article figure :last-child{margin-bottom:0}article .align-left{clear:left;float:left;margin:0 1rem 1rem}article .align-right{clear:right;float:right;margin:0 1rem 1rem}article .align-center,article .align-default{display:block;margin-left:auto;margin-right:auto;text-align:center}article table.align-default{display:table;text-align:initial}.domainindex-jumpbox,.genindex-jumpbox{border-bottom:1px solid var(--color-background-border);border-top:1px solid var(--color-background-border);padding:.25rem}.domainindex-section h2,.genindex-section h2{margin-bottom:.5rem;margin-top:.75rem}.domainindex-section ul,.genindex-section ul{margin-bottom:0;margin-top:0}ol,ul{margin-bottom:1rem;margin-top:1rem;padding-left:1.2rem}ol li>p:first-child,ul li>p:first-child{margin-bottom:.25rem;margin-top:.25rem}ol li>p:last-child,ul li>p:last-child{margin-top:.25rem}ol li>ol,ol li>ul,ul li>ol,ul li>ul{margin-bottom:.5rem;margin-top:.5rem}ol.arabic{list-style:decimal}ol.loweralpha{list-style:lower-alpha}ol.upperalpha{list-style:upper-alpha}ol.lowerroman{list-style:lower-roman}ol.upperroman{list-style:upper-roman}.simple li>ol,.simple li>ul,.toctree-wrapper li>ol,.toctree-wrapper li>ul{margin-bottom:0;margin-top:0}.field-list dt,.option-list dt,dl.footnote dt,dl.glossary dt,dl.simple dt,dl:not([class]) dt{font-weight:500;margin-top:.25rem}.field-list dt+dt,.option-list dt+dt,dl.footnote dt+dt,dl.glossary dt+dt,dl.simple dt+dt,dl:not([class]) dt+dt{margin-top:0}.field-list dt .classifier:before,.option-list dt .classifier:before,dl.footnote dt .classifier:before,dl.glossary dt .classifier:before,dl.simple dt .classifier:before,dl:not([class]) dt .classifier:before{content:":";margin-left:.2rem;margin-right:.2rem}.field-list dd ul,.field-list dd>p:first-child,.option-list dd ul,.option-list dd>p:first-child,dl.footnote dd ul,dl.footnote dd>p:first-child,dl.glossary dd ul,dl.glossary dd>p:first-child,dl.simple dd ul,dl.simple dd>p:first-child,dl:not([class]) dd ul,dl:not([class]) dd>p:first-child{margin-top:.125rem}.field-list dd ul,.option-list dd ul,dl.footnote dd ul,dl.glossary dd ul,dl.simple dd ul,dl:not([class]) dd ul{margin-bottom:.125rem}.math-wrapper{overflow-x:auto;width:100%}div.math{position:relative;text-align:center}div.math .headerlink,div.math:focus .headerlink{display:none}div.math:hover .headerlink{display:inline-block}div.math span.eqno{position:absolute;right:.5rem;top:50%;transform:translateY(-50%);z-index:1}abbr[title]{cursor:help}.problematic{color:var(--color-problematic)}kbd:not(.compound){background-color:var(--color-background-secondary);border:1px solid var(--color-foreground-border);border-radius:.2rem;box-shadow:0 .0625rem 0 rgba(0,0,0,.2),inset 0 0 0 .125rem var(--color-background-primary);color:var(--color-foreground-primary);display:inline-block;font-size:var(--font-size--small--3);margin:0 .2rem;padding:0 .2rem;vertical-align:text-bottom}blockquote{background:var(--color-background-secondary);border-left:4px solid var(--color-background-border);margin-left:0;margin-right:0;padding:.5rem 1rem}blockquote .attribution{font-weight:600;text-align:right}blockquote.highlights,blockquote.pull-quote{font-size:1.25em}blockquote.epigraph,blockquote.pull-quote{border-left-width:0;border-radius:.5rem}blockquote.highlights{background:transparent;border-left-width:0}p .reference img{vertical-align:middle}p.rubric{font-size:1.125em;font-weight:700;line-height:1.25}dd p.rubric{font-size:var(--font-size--small);font-weight:inherit;line-height:inherit;text-transform:uppercase}article .sidebar{background-color:var(--color-background-secondary);border:1px solid var(--color-background-border);border-radius:.2rem;clear:right;float:right;margin-left:1rem;margin-right:0;width:30%}article .sidebar>*{padding-left:1rem;padding-right:1rem}article .sidebar>ol,article .sidebar>ul{padding-left:2.2rem}article .sidebar .sidebar-title{border-bottom:1px solid var(--color-background-border);font-weight:500;margin:0;padding:.5rem 1rem}[role=main] .table-wrapper.container{margin-bottom:.5rem;margin-top:1rem;overflow-x:auto;padding:.2rem .2rem .75rem;width:100%}table.docutils{border-collapse:collapse;border-radius:.2rem;border-spacing:0;box-shadow:0 .2rem .5rem rgba(0,0,0,.05),0 0 .0625rem rgba(0,0,0,.1)}table.docutils th{background:var(--color-table-header-background)}table.docutils td,table.docutils th{border-bottom:1px solid var(--color-table-border);border-left:1px solid var(--color-table-border);border-right:1px solid var(--color-table-border);padding:0 .25rem}table.docutils td p,table.docutils th p{margin:.25rem}table.docutils td:first-child,table.docutils th:first-child{border-left:none}table.docutils td:last-child,table.docutils th:last-child{border-right:none}table.docutils td.text-left,table.docutils th.text-left{text-align:left}table.docutils td.text-right,table.docutils th.text-right{text-align:right}table.docutils td.text-center,table.docutils th.text-center{text-align:center}:target{scroll-margin-top:2.5rem}@media(max-width:67em){:target{scroll-margin-top:calc(2.5rem + var(--header-height))}section>span:target{scroll-margin-top:calc(2.8rem + var(--header-height))}}.headerlink{font-weight:100;-webkit-user-select:none;-moz-user-select:none;user-select:none}.code-block-caption>.headerlink,dl dt>.headerlink,figcaption p>.headerlink,h1>.headerlink,h2>.headerlink,h3>.headerlink,h4>.headerlink,h5>.headerlink,h6>.headerlink,p.caption>.headerlink,table>caption>.headerlink{margin-left:.5rem;visibility:hidden}.code-block-caption:hover>.headerlink,dl dt:hover>.headerlink,figcaption p:hover>.headerlink,h1:hover>.headerlink,h2:hover>.headerlink,h3:hover>.headerlink,h4:hover>.headerlink,h5:hover>.headerlink,h6:hover>.headerlink,p.caption:hover>.headerlink,table>caption:hover>.headerlink{visibility:visible}.code-block-caption>.toc-backref,dl dt>.toc-backref,figcaption p>.toc-backref,h1>.toc-backref,h2>.toc-backref,h3>.toc-backref,h4>.toc-backref,h5>.toc-backref,h6>.toc-backref,p.caption>.toc-backref,table>caption>.toc-backref{color:inherit;text-decoration-line:none}figure:hover>figcaption>p>.headerlink,table:hover>caption>.headerlink{visibility:visible}:target>h1:first-of-type,:target>h2:first-of-type,:target>h3:first-of-type,:target>h4:first-of-type,:target>h5:first-of-type,:target>h6:first-of-type,span:target~h1:first-of-type,span:target~h2:first-of-type,span:target~h3:first-of-type,span:target~h4:first-of-type,span:target~h5:first-of-type,span:target~h6:first-of-type{background-color:var(--color-highlight-on-target)}:target>h1:first-of-type code.literal,:target>h2:first-of-type code.literal,:target>h3:first-of-type code.literal,:target>h4:first-of-type code.literal,:target>h5:first-of-type code.literal,:target>h6:first-of-type code.literal,span:target~h1:first-of-type code.literal,span:target~h2:first-of-type code.literal,span:target~h3:first-of-type code.literal,span:target~h4:first-of-type code.literal,span:target~h5:first-of-type code.literal,span:target~h6:first-of-type code.literal{background-color:transparent}.literal-block-wrapper:target .code-block-caption,.this-will-duplicate-information-and-it-is-still-useful-here li :target,figure:target,table:target>caption{background-color:var(--color-highlight-on-target)}dt:target{background-color:var(--color-highlight-on-target)!important}.footnote-reference:target,.footnote>dt:target+dd{background-color:var(--color-highlight-on-target)}.guilabel{background-color:var(--color-guilabel-background);border:1px solid var(--color-guilabel-border);border-radius:.5em;color:var(--color-guilabel-text);font-size:.9em;padding:0 .3em}footer{display:flex;flex-direction:column;font-size:var(--font-size--small);margin-top:2rem}.bottom-of-page{align-items:center;border-top:1px solid var(--color-background-border);color:var(--color-foreground-secondary);display:flex;justify-content:space-between;line-height:1.5;margin-top:1rem;padding-bottom:1rem;padding-top:1rem}@media(max-width:46em){.bottom-of-page{flex-direction:column-reverse;gap:.25rem;text-align:center}}.bottom-of-page .left-details{font-size:var(--font-size--small)}.bottom-of-page .right-details{display:flex;flex-direction:column;gap:.25rem;text-align:right}.bottom-of-page .icons{display:flex;font-size:1rem;gap:.25rem;justify-content:flex-end}.bottom-of-page .icons a{text-decoration:none}.bottom-of-page .icons img,.bottom-of-page .icons svg{font-size:1.125rem;height:1em;width:1em}.related-pages a{align-items:center;display:flex;text-decoration:none}.related-pages a:hover .page-info .title{color:var(--color-link);text-decoration:underline;text-decoration-color:var(--color-link-underline)}.related-pages a svg.furo-related-icon,.related-pages a svg.furo-related-icon>use{color:var(--color-foreground-border);flex-shrink:0;height:.75rem;margin:0 .5rem;width:.75rem}.related-pages a.next-page{clear:right;float:right;max-width:50%;text-align:right}.related-pages a.prev-page{clear:left;float:left;max-width:50%}.related-pages a.prev-page svg{transform:rotate(180deg)}.page-info{display:flex;flex-direction:column;overflow-wrap:anywhere}.next-page .page-info{align-items:flex-end}.page-info .context{align-items:center;color:var(--color-foreground-muted);display:flex;font-size:var(--font-size--small);padding-bottom:.1rem;text-decoration:none}ul.search{list-style:none;padding-left:0}ul.search li{border-bottom:1px solid var(--color-background-border);padding:1rem 0}[role=main] .highlighted{background-color:var(--color-highlighted-background);color:var(--color-highlighted-text)}.sidebar-brand{display:flex;flex-direction:column;flex-shrink:0;padding:var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal);text-decoration:none}.sidebar-brand-text{color:var(--color-sidebar-brand-text);font-size:1.5rem;overflow-wrap:break-word}.sidebar-brand-text,.sidebar-logo-container{margin:var(--sidebar-item-spacing-vertical) 0}.sidebar-logo{display:block;margin:0 auto;max-width:100%}.sidebar-search-container{align-items:center;background:var(--color-sidebar-search-background);display:flex;margin-top:var(--sidebar-search-space-above);position:relative}.sidebar-search-container:focus-within,.sidebar-search-container:hover{background:var(--color-sidebar-search-background--focus)}.sidebar-search-container:before{background-color:var(--color-sidebar-search-icon);content:"";height:var(--sidebar-search-icon-size);left:var(--sidebar-item-spacing-horizontal);-webkit-mask-image:var(--icon-search);mask-image:var(--icon-search);position:absolute;width:var(--sidebar-search-icon-size)}.sidebar-search{background:transparent;border:none;border-bottom:1px solid var(--color-sidebar-search-border);border-top:1px solid var(--color-sidebar-search-border);box-sizing:border-box;color:var(--color-sidebar-search-foreground);padding:var(--sidebar-search-input-spacing-vertical) var(--sidebar-search-input-spacing-horizontal) var(--sidebar-search-input-spacing-vertical) calc(var(--sidebar-item-spacing-horizontal) + var(--sidebar-search-input-spacing-horizontal) + var(--sidebar-search-icon-size));width:100%;z-index:10}.sidebar-search:focus{outline:none}.sidebar-search::-moz-placeholder{font-size:var(--sidebar-search-input-font-size)}.sidebar-search::placeholder{font-size:var(--sidebar-search-input-font-size)}#searchbox .highlight-link{margin:0;padding:var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal) 0;text-align:center}#searchbox .highlight-link a{color:var(--color-sidebar-search-icon);font-size:var(--font-size--small--2)}.sidebar-tree{font-size:var(--sidebar-item-font-size);margin-bottom:var(--sidebar-item-spacing-vertical);margin-top:var(--sidebar-tree-space-above)}.sidebar-tree ul{display:flex;flex-direction:column;list-style:none;margin-bottom:0;margin-top:0;padding:0}.sidebar-tree li{margin:0;position:relative}.sidebar-tree li>ul{margin-left:var(--sidebar-item-spacing-horizontal)}.sidebar-tree .icon,.sidebar-tree .reference{color:var(--color-sidebar-link-text)}.sidebar-tree .reference{box-sizing:border-box;display:inline-block;height:100%;line-height:var(--sidebar-item-line-height);overflow-wrap:anywhere;padding:var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal);text-decoration:none;width:100%}.sidebar-tree .reference:hover{background:var(--color-sidebar-item-background--hover);color:var(--color-sidebar-link-text)}.sidebar-tree .reference.external:after{color:var(--color-sidebar-link-text);content:url("data:image/svg+xml;charset=utf-8,%3Csvg xmlns='http://www.w3.org/2000/svg' width='12' height='12' fill='none' stroke='%23607d8b' stroke-linecap='round' stroke-linejoin='round' stroke-width='1.5' viewBox='0 0 24 24'%3E%3Cpath stroke='none' d='M0 0h24v24H0z'/%3E%3Cpath d='M11 7H6a2 2 0 0 0-2 2v9a2 2 0 0 0 2 2h9a2 2 0 0 0 2-2v-5M10 14 20 4M15 4h5v5'/%3E%3C/svg%3E");margin:0 .25rem;vertical-align:middle}.sidebar-tree .current-page>.reference{font-weight:700}.sidebar-tree label{align-items:center;cursor:pointer;display:flex;height:var(--sidebar-item-height);justify-content:center;position:absolute;right:0;top:0;-webkit-user-select:none;-moz-user-select:none;user-select:none;width:var(--sidebar-expander-width)}.sidebar-tree .caption,.sidebar-tree :not(.caption)>.caption-text{color:var(--color-sidebar-caption-text);font-size:var(--sidebar-caption-font-size);font-weight:700;margin:var(--sidebar-caption-space-above) 0 0 0;padding:var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal);text-transform:uppercase}.sidebar-tree li.has-children>.reference{padding-right:var(--sidebar-expander-width)}.sidebar-tree .toctree-l1>.reference,.sidebar-tree .toctree-l1>label .icon{color:var(--color-sidebar-link-text--top-level)}.sidebar-tree label{background:var(--color-sidebar-item-expander-background)}.sidebar-tree label:hover{background:var(--color-sidebar-item-expander-background--hover)}.sidebar-tree .current>.reference{background:var(--color-sidebar-item-background--current)}.sidebar-tree .current>.reference:hover{background:var(--color-sidebar-item-background--hover)}.toctree-checkbox{display:none;position:absolute}.toctree-checkbox~ul{display:none}.toctree-checkbox~label .icon svg{transform:rotate(90deg)}.toctree-checkbox:checked~ul{display:block}.toctree-checkbox:checked~label .icon svg{transform:rotate(-90deg)}.toc-title-container{padding:var(--toc-title-padding);padding-top:var(--toc-spacing-vertical)}.toc-title{color:var(--color-toc-title-text);font-size:var(--toc-title-font-size);padding-left:var(--toc-spacing-horizontal);text-transform:uppercase}.no-toc{display:none}.toc-tree-container{padding-bottom:var(--toc-spacing-vertical)}.toc-tree{border-left:1px solid var(--color-background-border);font-size:var(--toc-font-size);line-height:1.3;padding-left:calc(var(--toc-spacing-horizontal) - var(--toc-item-spacing-horizontal))}.toc-tree>ul>li:first-child{padding-top:0}.toc-tree>ul>li:first-child>ul{padding-left:0}.toc-tree>ul>li:first-child>a{display:none}.toc-tree ul{list-style-type:none;margin-bottom:0;margin-top:0;padding-left:var(--toc-item-spacing-horizontal)}.toc-tree li{padding-top:var(--toc-item-spacing-vertical)}.toc-tree li.scroll-current>.reference{color:var(--color-toc-item-text--active);font-weight:700}.toc-tree a.reference{color:var(--color-toc-item-text);overflow-wrap:anywhere;text-decoration:none}.toc-scroll{max-height:100vh;overflow-y:scroll}.contents:not(.this-will-duplicate-information-and-it-is-still-useful-here){background:rgba(255,0,0,.25);color:var(--color-problematic)}.contents:not(.this-will-duplicate-information-and-it-is-still-useful-here):before{content:"ERROR: Adding a table of contents in Furo-based documentation is unnecessary, and does not work well with existing styling. Add a 'this-will-duplicate-information-and-it-is-still-useful-here' class, if you want an escape hatch."}.text-align\:left>p{text-align:left}.text-align\:center>p{text-align:center}.text-align\:right>p{text-align:right} +/*# sourceMappingURL=furo.css.map*/ \ No newline at end of file diff --git a/_static/styles/furo.css.map b/_static/styles/furo.css.map new file mode 100644 index 0000000000..db1dec1655 --- /dev/null +++ b/_static/styles/furo.css.map @@ -0,0 +1 @@ +{"version":3,"file":"styles/furo.css","mappings":"AAAA,2EAA2E,CAU3E,KACE,gBAAiB,CACjB,6BACF,CASA,KACE,QACF,CAMA,KACE,aACF,CAOA,GACE,aAAc,CACd,cACF,CAUA,GACE,sBAAuB,CACvB,QAAS,CACT,gBACF,CAOA,IACE,+BAAiC,CACjC,aACF,CASA,EACE,4BACF,CAOA,YACE,kBAAmB,CACnB,yBAA0B,CAC1B,gCACF,CAMA,SAEE,kBACF,CAOA,cAGE,+BAAiC,CACjC,aACF,CAeA,QAEE,aAAc,CACd,aAAc,CACd,iBAAkB,CAClB,uBACF,CAEA,IACE,aACF,CAEA,IACE,SACF,CASA,IACE,iBACF,CAUA,sCAKE,mBAAoB,CACpB,cAAe,CACf,gBAAiB,CACjB,QACF,CAOA,aAEE,gBACF,CAOA,cAEE,mBACF,CAMA,gDAIE,yBACF,CAMA,wHAIE,iBAAkB,CAClB,SACF,CAMA,4GAIE,6BACF,CAMA,SACE,0BACF,CASA,OACE,qBAAsB,CACtB,aAAc,CACd,aAAc,CACd,cAAe,CACf,SAAU,CACV,kBACF,CAMA,SACE,uBACF,CAMA,SACE,aACF,CAOA,6BAEE,qBAAsB,CACtB,SACF,CAMA,kFAEE,WACF,CAOA,cACE,4BAA6B,CAC7B,mBACF,CAMA,yCACE,uBACF,CAOA,6BACE,yBAA0B,CAC1B,YACF,CASA,QACE,aACF,CAMA,QACE,iBACF,CAiBA,kBACE,YACF,CCvVA,aAcE,kEACE,uBAOF,WACE,iDAMF,kCACE,wBAEF,qCAEE,uBADA,uBACA,CAEF,SACE,wBAtBA,CCpBJ,iBAGE,qBAEA,sBACA,0BAFA,oBAHA,4BACA,oBAKA,6BAIA,2CAFA,mBACA,sCAFA,4BAGA,CAEF,gBACE,aCPF,KCCE,mHAGA,wGAGA,wCAAyC,CAEzC,wBAAyB,CACzB,wBAAyB,CACzB,4BAA6B,CAC7B,yBAA0B,CAC1B,2BAA4B,CAG5B,sDAAuD,CACvD,gDAAiD,CACjD,wDAAyD,CAGzD,0CAA2C,CAC3C,gDAAiD,CACjD,gDAAiD,CAKjD,gCAAiC,CACjC,sCAAuC,CAGvC,2CAA4C,CAG5C,uCAAwC,CCnCxC,+FAIA,uBAAwB,CAGxB,iCAAkC,CAClC,kCAAmC,CAEnC,+BAAgC,CAChC,sCAAuC,CACvC,sCAAuC,CACvC,qGAIA,mDAAoD,CAEpD,mCAAoC,CACpC,8CAA+C,CAC/C,gDAAiD,CACjD,kCAAmC,CACnC,6DAA8D,CAG9D,6BAA8B,CAC9B,6BAA8B,CAC9B,+BAAgC,CAChC,kCAAmC,CACnC,kCAAmC,CCRjC,+jBCaA,iqCAZF,iaCXA,8KAOA,4SAWA,4SAUA,0CACA,gEAGA,0CAGA,gEAGA,yCACA,+DAIA,4CACA,kEAGA,wCAUA,8DACA,uCAGA,4DACA,sCACA,2DAGA,4CACA,kEACA,uCAGA,6DACA,2GAGA,sHAEA,yFAEA,+CACA,+EAGA,4MAOA,gCACA,sHAIA,kCACA,uEACA,gEACA,4DACA,kEAGA,2DACA,sDACA,0CACA,8CACA,wGAGA,0BACA,iCAGA,+DACA,+BACA,sCACA,+DAEA,kGACA,oCACA,yDACA,sCL3HF,kCAEA,sDAIA,0CKyHE,kEAIA,oDACA,sDAGA,oCACA,oEAEA,0DACA,qDAIA,oDACA,6DAIA,iEAIA,2DAIA,2DAGA,4DACA,gEAIA,gEAEA,gFAEA,oNASA,qDLtKE,gFAGE,4DAIF,oEKgHF,yEAEA,6DAGA,0DAEA,uDACA,qDACA,wDAIA,6DAIA,yDACA,2DAIA,uCAGA,wCACA,sDAGA,+CAGA,6DAEA,iDACA,+DAEA,wDAEA,sEAMA,0DACA,sBACA,mEL5JI,wEAEA,iCACE,+BAMN,wEAGA,iCACE,kFAEA,uEAIF,gEACE,8BAGF,qEMzDA,sCAKA,wFAKA,iCAIA,0BAWA,iCACA,4BACA,mCAGA,+BAEA,sCACA,4BAEA,mCAEA,sCAKA,sDAIA,gCAEA,gEAQF,wCAME,sBACA,kCAKA,uBAEA,gEAIA,2BAIA,mCAEA,qCACA,iCAGE,+BACA,wEAEE,iCACA,kFAGF,6BACA,0CACF,kCAEE,8BACE,8BACA,qEAEE,sCACA,wFClFN,iCAGF,2DACE,4BACA,oCAKF,8BAGE,sCACA,+DAIA,sCAEA,sDAGA,gCACA,gEAGA,+CAEA,sBACE,yCAGF,uBACA,sEAIA,aAEA,mCAIA,kEACA,aACA,oEACA,YAIA,EAQE,4HAGA,gDACE,mBACA,wCAON,wCAGE,0DACA,mBAKA,mBACA,CANA,uCAKA,iBALA,iBAWA,mBAGF,mBACE,mDAIF,+BAEE,CAEA,yBAFA,kBAMA,CAJA,GACA,aAGA,mBAEF,wBAEE,iBACA,iBAEA,OACA,aAGF,CAHE,WAGF,GAEE,oBAEA,CAJF,gBAIE,aAEA,+CAKA,UANA,WACA,cADA,SAMA,WACA,iBAEE,GAMF,wBANE,yBAMF,kDACA,WAEA,gCACA,2DAGA,iBACE,uCAEJ,kEAIE,uCAGA,yDACE,cACA,+DAEA,yDAEE,mEAMJ,kEAMA,uBACA,kBAEA,uBACA,kDAKA,0DAIA,CALA,oBAKA,WACA,WAQA,4BAFF,0CAEE,CARA,qCAsBA,CAdA,iBAEA,kBACE,aADF,4BACE,WAMF,2BAGF,qCAEE,CAXE,UAWF,+BAGA,uBAEA,SAEA,0CAIE,CANF,qCAEA,CAIE,2DACE,gBAIN,+CAIA,CAEA,kDAKE,CAPF,8BAEA,CAOE,YACA,CAjBI,2BAGN,CAHM,WAcJ,UAGA,CAEA,2GAIF,iCAGE,8BAIA,qBACA,oBACF,uBAOI,0CAIA,CATF,6DAKE,CALF,sBASE,qCAKF,CACE,cACA,CAFF,sBAEE,CACA,+BAEA,qBAEE,WAKN,aACE,sCAGA,mBAEA,6BAMA,kCACA,CAJA,sBACA,aAEA,CAJA,eACA,MAIA,2FAEA,UAGA,YACA,sBACE,8BAEA,CALF,aACA,WAIE,OACA,oBAEF,uBACE,WAEF,YAFE,UAEF,eAgBA,kBACE,CAhBA,qDAQF,qCAGF,CAGI,YACF,CAJF,2BAGI,CAEA,eACA,qBAGA,mEAEA,qBACA,8BAIA,kBADF,kBACE,yBAEJ,oCAGI,qDAIJ,+BAGI,oCAEA,+CAQF,4CACE,yBACF,2BAOE,sBACA,CAHA,WACA,CAFF,cACE,CAJA,YAGF,CAEE,SAEA,mBAGA,kDAEE,CAJF,cAEA,cAEE,sBAEA,mBADA,YACA,uBACA,mDACE,CADF,YACE,iDAEA,uCAEN,+DAOE,mBADF,sBACE,mBAGF,aACE,sCAIA,aADF,WACE,CAKF,SACE,CAHJ,kBAEE,CAJE,gBAEJ,CAHI,iBAMA,yFAKA,aACA,eACA,cCxaJ,iBAEE,aADA,iBACA,6BAEA,kCAEA,SACA,UAIA,gCACA,CALA,SAEA,SAEA,CAJA,wEAEA,CAFA,OAKA,CAGA,mDACE,iBAGF,gCACE,CADF,UACE,aAEJ,iCAEE,CAFF,UAEE,wCAEA,WACA,WADA,UACA,CACA,4CAGA,MACA,CADA,KACA,wCACA,UAGA,CAJA,UAIA,6DAUA,0CACE,CAFF,mBAEE,wEACA,CAVA,YACA,CAMF,mBAJE,OAOA,gBAJJ,gCACE,CANE,cACA,CAHA,oBACA,CAGA,QAGJ,CAII,0BACA,CADA,UACA,wCAEJ,kBACE,0DACA,gCACE,kBACA,CADA,YACA,oEACA,2CAMF,mDAII,CALN,YACE,CANE,cAKJ,CACE,iBAII,kEACA,yCACE,kDACA,yDACE,+CACA,uBANN,CAMM,+BANN,uCACE,qDACA,4BAEE,mBADA,0CACA,CADA,qBACA,0DACE,wCACA,sGALJ,oCACA,sBACE,kBAFF,UAEE,2CACA,wFACE,cACA,kEANN,uBACE,iDACA,CADA,UACA,0DACE,wDAEE,iEACA,qEANN,sCACE,CAGE,iBAHF,gBAGE,qBACE,CAJJ,uBACA,gDACE,wDACA,6DAHF,2CACA,CADA,gBACA,eACE,CAGE,sBANN,8BACE,CAII,iBAFF,4DACA,WACE,YADF,uCACE,6EACA,2BANN,8CACE,kDACA,0CACE,8BACA,yFACE,sBACA,sFALJ,mEACA,sBACE,kEACA,6EACE,uCACA,kEALJ,qGAEE,kEACA,6EACE,uCACA,kEALJ,8CACA,uDACE,sEACA,2EACE,sCACA,iEALJ,mGACA,qCACE,oDACA,0DACE,6GACA,gDAGR,yDCvEA,sEACE,CACA,6GACE,gEACF,iGAIF,wFACE,qDAGA,mGAEE,2CAEF,4FACE,gCACF,wGACE,8DAEE,6FAIA,iJAKN,6GACE,gDAKF,yDACA,qCAGA,6BACA,kBACA,qDAKA,oCAEA,+DAGA,2CAGE,oDAIA,oEAEE,qBAEN,wDAEE,uCACE,kEAGJ,CACE,6CACA,uDAGF,CACE,mCAEF,yDAIE,gEAGA,CAEA,wHAIF,sDACE,+DAEE,sCAGF,8BACA,oCACE,oHAIF,gBACE,yGAIF,mBChHA,2MCDF,4HAQE,wKAOA,8HCbA,mBAEA,6HAIE,YACA,mIAaJ,gBAPE,YAOF,4FAKE,qDAuBE,sCACA,CAHA,oBAEA,CAbF,wCACE,CALF,8BAIA,CARE,eAIF,CAKE,mBAEF,qBAEE,CAIF,+BACE,mBACA,CAGA,kCACA,6BAIF,4CAIA,kDACE,6BACA,2BAGF,iBACE,mDAGA,8BACA,WAGJ,2BACE,cAGA,+BACA,CAHA,eAGA,wCACA,YACA,iBACA,uEAGA,0BACA,2CAEA,8EAGI,qBACA,CAFF,kBAEE,4DAMJ,mCACE,4BAGA,oBAGF,4CACE,qCACA,8BACA,gBACA,+CAEA,iCAEF,iCACE,oBACA,4CACA,qCAGF,8BAEE,+BAEA,WAEA,8BACE,oBACA,CADA,gBACA,yBAKF,gBADF,YACE,CACA,iBACA,qDAEA,mDCvIJ,2FAMA,iCACE,CACA,eAEA,CAFA,mBADA,wBAIA,8BACA,gBADA,YACA,0BAEE,8CAGA,wDAIE,gFAGE,iBAEN,wCAKF,+CACE,CACA,oDAEF,kDAIE,YAEF,CAHE,YAGF,CCpCE,mFAFA,QACA,UAIA,CAHA,IAGA,gDAGE,eACA,iEAGF,wBAEE,mBAMA,6CAEF,CAJE,mBACA,CAGF,kCAGE,CARF,kBACE,CAHA,eAUA,YACA,mBACA,CAFA,UAEA,wCC/BJ,mBACE,CDkCE,wBACA,sBCpCJ,iBACE,mDACA,2CACA,sBAGA,qBCDA,6CAIE,CATJ,uBAKE,CDGE,oBACF,yDAEE,CCDE,2CAGF,CAJA,kCACE,CDJJ,aAKE,eCXJ,CDME,uBCOE,gCACE,YAEF,2CAEE,wBACA,0BAIF,iBAEA,cADF,UACE,uBAEA,iCAEA,wCAEA,6CAMA,CAYF,gCATI,4BASJ,CAZE,mCAEE,iCAUJ,4BAGE,4DADA,+BACA,CAHF,qBAGE,sCACE,OAEF,iBAHA,SAGA,iHACE,2DAKF,CANA,8EAMA,uSAEE,kBAEF,+FACE,yCCjEJ,WACA,yBAGA,uBACA,gBAEA,uCAIA,CAJA,iCAIA,uCAGA,UACE,gBACA,qBAEA,0CClBJ,gBACE,KAGF,qBACE,YAGF,CAHE,cAGF,gCAEE,mBACA,iEAEA,oCACA,wCAEA,sBACA,WAEA,CAFA,YAEA,8EAEA,mCAFA,iBAEA,6BAIA,wEAKA,sDAIE,CARF,mDAIA,CAIE,cAEF,8CAIA,oBAFE,iBAEF,8CAGE,eAEF,CAFE,YAEF,OAEE,kBAGJ,CAJI,eACA,CAFF,mBAKF,yCCjDE,oBACA,CAFA,iBAEA,uCAKE,iBACA,qCAGA,mBCZJ,CDWI,gBCXJ,6BAEE,eACA,sBAGA,eAEA,sBACA,oDACA,iGAMA,gBAFE,YAEF,8FAME,iJCnBF,YACA,gNAWE,gDAEF,iSAaE,kBACE,gHAKF,oCACE,eACF,CADE,UACF,8CACE,gDACF,wCACE,oBCtCJ,oBAEF,6BACE,QACE,kDAGF,yBACE,kDAmBA,kDAEF,CAhBA,+CAaA,CAbA,oBAaA,0FACE,CADF,gGAfF,cACE,gBACA,CAaA,0BAGA,mQACE,gBAGF,oMACE,iBACA,CAFF,eACE,CADF,gBAEE,aAGJ,iCAEE,CAFF,wCAEE,wBAUE,+VAIE,uEAHA,2BAGA,wXAKJ,iDAGF,CARM,+CACE,iDAIN,CALI,gBAQN,mHACE,gBAGF,2DACE,0EAOA,0EAGF,gBAEE,6DCjFA,kDACA,gCACA,qDAGA,qBACA,qDCDA,cACA,eAEA,yBAGF,sBAEE,iBACA,sNAWA,iBACE,kBACA,wRAgBA,kBAEA,iOAgBA,uCACE,uEAEA,kBAEF,qUAuBE,iDAIJ,CACA,geCzFF,4BAEE,CAQA,6JACA,iDAIA,sEAGA,mDAOF,iDAGE,4DAIA,8CACA,qDAEE,eAFF,cAEE,oBAEF,uBAFE,kCAGA,eACA,iBACA,mBAIA,mDACA,CAHA,uCAEA,CAJA,0CACA,CAIA,gBAJA,gBACA,oBADA,gBAIA,wBAEJ,gBAGE,6BACA,YAHA,iBAGA,gCACA,iEAEA,6CACA,sDACA,0BADA,wBACA,0BACA,oIAIA,mBAFA,YAEA,qBACA,0CAIE,uBAEF,CAHA,yBACE,CAEF,iDACE,mFAKJ,oCACE,CANE,aAKJ,CACE,qEAIA,YAFA,WAEA,CAHA,aACA,CAEA,gBACE,4BACA,sBADA,aACA,gCAMF,oCACA,yDACA,2CAEA,qBAGE,kBAEA,CACA,mCAIF,CARE,YACA,CAOF,iCAEE,CAPA,oBACA,CAQA,oBACE,uDAEJ,sDAGA,CAHA,cAGA,0BACE,oDAIA,oCACA,4BACA,sBAGA,cAEA,oFAGA,sBAEA,yDACE,CAIF,iBAJE,wBAIF,6CAHE,6CAKA,eACA,aACA,CADA,cACA,yCAGJ,kBACE,CAKA,iDAEA,CARF,aACE,4CAGA,kBAIA,wEAGA,wDAGA,kCAOA,iDAGA,CAPF,WAEE,sCAEA,CAJF,2CACE,CAMA,qCACA,+BARF,kBACE,qCAOA,iBAsBA,sBACE,CAvBF,WAKA,CACE,0DAIF,CALA,uDACE,CANF,sBAqBA,4CACA,CALA,gRAIA,YAEE,6CAEN,mCAEE,+CASA,6EAIA,4BChNA,SDmNA,qFCnNA,gDACA,sCAGA,qCACA,sDACA,CAKA,kDAGA,CARA,0CAQA,kBAGA,YACA,sBACA,iBAFA,gBADF,YACE,CAHA,SAKA,kBAEA,SAFA,iBAEA,uEAGA,CAEE,6CAFF,oCAgBI,CAdF,yBACE,qBACF,CAGF,oBACE,CAIF,WACE,CALA,2CAGA,uBACF,CACE,mFAGE,CALF,qBAEA,UAGE,gCAIF,sDAEA,CALE,oCAKF,yCC7CJ,oCACE,CD+CA,yXAQE,sCCrDJ,wCAGA,oCACE","sources":["webpack:///./node_modules/normalize.css/normalize.css","webpack:///./src/furo/assets/styles/base/_print.sass","webpack:///./src/furo/assets/styles/base/_screen-readers.sass","webpack:///./src/furo/assets/styles/base/_theme.sass","webpack:///./src/furo/assets/styles/variables/_fonts.scss","webpack:///./src/furo/assets/styles/variables/_spacing.scss","webpack:///./src/furo/assets/styles/variables/_icons.scss","webpack:///./src/furo/assets/styles/variables/_admonitions.scss","webpack:///./src/furo/assets/styles/variables/_colors.scss","webpack:///./src/furo/assets/styles/base/_typography.sass","webpack:///./src/furo/assets/styles/_scaffold.sass","webpack:///./src/furo/assets/styles/content/_admonitions.sass","webpack:///./src/furo/assets/styles/content/_api.sass","webpack:///./src/furo/assets/styles/content/_blocks.sass","webpack:///./src/furo/assets/styles/content/_captions.sass","webpack:///./src/furo/assets/styles/content/_code.sass","webpack:///./src/furo/assets/styles/content/_footnotes.sass","webpack:///./src/furo/assets/styles/content/_images.sass","webpack:///./src/furo/assets/styles/content/_indexes.sass","webpack:///./src/furo/assets/styles/content/_lists.sass","webpack:///./src/furo/assets/styles/content/_math.sass","webpack:///./src/furo/assets/styles/content/_misc.sass","webpack:///./src/furo/assets/styles/content/_rubrics.sass","webpack:///./src/furo/assets/styles/content/_sidebar.sass","webpack:///./src/furo/assets/styles/content/_tables.sass","webpack:///./src/furo/assets/styles/content/_target.sass","webpack:///./src/furo/assets/styles/content/_gui-labels.sass","webpack:///./src/furo/assets/styles/components/_footer.sass","webpack:///./src/furo/assets/styles/components/_sidebar.sass","webpack:///./src/furo/assets/styles/components/_table_of_contents.sass","webpack:///./src/furo/assets/styles/_shame.sass"],"sourcesContent":["/*! normalize.css v8.0.1 | MIT License | github.com/necolas/normalize.css */\n\n/* Document\n ========================================================================== */\n\n/**\n * 1. Correct the line height in all browsers.\n * 2. Prevent adjustments of font size after orientation changes in iOS.\n */\n\nhtml {\n line-height: 1.15; /* 1 */\n -webkit-text-size-adjust: 100%; /* 2 */\n}\n\n/* Sections\n ========================================================================== */\n\n/**\n * Remove the margin in all browsers.\n */\n\nbody {\n margin: 0;\n}\n\n/**\n * Render the `main` element consistently in IE.\n */\n\nmain {\n display: block;\n}\n\n/**\n * Correct the font size and margin on `h1` elements within `section` and\n * `article` contexts in Chrome, Firefox, and Safari.\n */\n\nh1 {\n font-size: 2em;\n margin: 0.67em 0;\n}\n\n/* Grouping content\n ========================================================================== */\n\n/**\n * 1. Add the correct box sizing in Firefox.\n * 2. Show the overflow in Edge and IE.\n */\n\nhr {\n box-sizing: content-box; /* 1 */\n height: 0; /* 1 */\n overflow: visible; /* 2 */\n}\n\n/**\n * 1. Correct the inheritance and scaling of font size in all browsers.\n * 2. Correct the odd `em` font sizing in all browsers.\n */\n\npre {\n font-family: monospace, monospace; /* 1 */\n font-size: 1em; /* 2 */\n}\n\n/* Text-level semantics\n ========================================================================== */\n\n/**\n * Remove the gray background on active links in IE 10.\n */\n\na {\n background-color: transparent;\n}\n\n/**\n * 1. Remove the bottom border in Chrome 57-\n * 2. Add the correct text decoration in Chrome, Edge, IE, Opera, and Safari.\n */\n\nabbr[title] {\n border-bottom: none; /* 1 */\n text-decoration: underline; /* 2 */\n text-decoration: underline dotted; /* 2 */\n}\n\n/**\n * Add the correct font weight in Chrome, Edge, and Safari.\n */\n\nb,\nstrong {\n font-weight: bolder;\n}\n\n/**\n * 1. Correct the inheritance and scaling of font size in all browsers.\n * 2. Correct the odd `em` font sizing in all browsers.\n */\n\ncode,\nkbd,\nsamp {\n font-family: monospace, monospace; /* 1 */\n font-size: 1em; /* 2 */\n}\n\n/**\n * Add the correct font size in all browsers.\n */\n\nsmall {\n font-size: 80%;\n}\n\n/**\n * Prevent `sub` and `sup` elements from affecting the line height in\n * all browsers.\n */\n\nsub,\nsup {\n font-size: 75%;\n line-height: 0;\n position: relative;\n vertical-align: baseline;\n}\n\nsub {\n bottom: -0.25em;\n}\n\nsup {\n top: -0.5em;\n}\n\n/* Embedded content\n ========================================================================== */\n\n/**\n * Remove the border on images inside links in IE 10.\n */\n\nimg {\n border-style: none;\n}\n\n/* Forms\n ========================================================================== */\n\n/**\n * 1. Change the font styles in all browsers.\n * 2. Remove the margin in Firefox and Safari.\n */\n\nbutton,\ninput,\noptgroup,\nselect,\ntextarea {\n font-family: inherit; /* 1 */\n font-size: 100%; /* 1 */\n line-height: 1.15; /* 1 */\n margin: 0; /* 2 */\n}\n\n/**\n * Show the overflow in IE.\n * 1. Show the overflow in Edge.\n */\n\nbutton,\ninput { /* 1 */\n overflow: visible;\n}\n\n/**\n * Remove the inheritance of text transform in Edge, Firefox, and IE.\n * 1. Remove the inheritance of text transform in Firefox.\n */\n\nbutton,\nselect { /* 1 */\n text-transform: none;\n}\n\n/**\n * Correct the inability to style clickable types in iOS and Safari.\n */\n\nbutton,\n[type=\"button\"],\n[type=\"reset\"],\n[type=\"submit\"] {\n -webkit-appearance: button;\n}\n\n/**\n * Remove the inner border and padding in Firefox.\n */\n\nbutton::-moz-focus-inner,\n[type=\"button\"]::-moz-focus-inner,\n[type=\"reset\"]::-moz-focus-inner,\n[type=\"submit\"]::-moz-focus-inner {\n border-style: none;\n padding: 0;\n}\n\n/**\n * Restore the focus styles unset by the previous rule.\n */\n\nbutton:-moz-focusring,\n[type=\"button\"]:-moz-focusring,\n[type=\"reset\"]:-moz-focusring,\n[type=\"submit\"]:-moz-focusring {\n outline: 1px dotted ButtonText;\n}\n\n/**\n * Correct the padding in Firefox.\n */\n\nfieldset {\n padding: 0.35em 0.75em 0.625em;\n}\n\n/**\n * 1. Correct the text wrapping in Edge and IE.\n * 2. Correct the color inheritance from `fieldset` elements in IE.\n * 3. Remove the padding so developers are not caught out when they zero out\n * `fieldset` elements in all browsers.\n */\n\nlegend {\n box-sizing: border-box; /* 1 */\n color: inherit; /* 2 */\n display: table; /* 1 */\n max-width: 100%; /* 1 */\n padding: 0; /* 3 */\n white-space: normal; /* 1 */\n}\n\n/**\n * Add the correct vertical alignment in Chrome, Firefox, and Opera.\n */\n\nprogress {\n vertical-align: baseline;\n}\n\n/**\n * Remove the default vertical scrollbar in IE 10+.\n */\n\ntextarea {\n overflow: auto;\n}\n\n/**\n * 1. Add the correct box sizing in IE 10.\n * 2. Remove the padding in IE 10.\n */\n\n[type=\"checkbox\"],\n[type=\"radio\"] {\n box-sizing: border-box; /* 1 */\n padding: 0; /* 2 */\n}\n\n/**\n * Correct the cursor style of increment and decrement buttons in Chrome.\n */\n\n[type=\"number\"]::-webkit-inner-spin-button,\n[type=\"number\"]::-webkit-outer-spin-button {\n height: auto;\n}\n\n/**\n * 1. Correct the odd appearance in Chrome and Safari.\n * 2. Correct the outline style in Safari.\n */\n\n[type=\"search\"] {\n -webkit-appearance: textfield; /* 1 */\n outline-offset: -2px; /* 2 */\n}\n\n/**\n * Remove the inner padding in Chrome and Safari on macOS.\n */\n\n[type=\"search\"]::-webkit-search-decoration {\n -webkit-appearance: none;\n}\n\n/**\n * 1. Correct the inability to style clickable types in iOS and Safari.\n * 2. Change font properties to `inherit` in Safari.\n */\n\n::-webkit-file-upload-button {\n -webkit-appearance: button; /* 1 */\n font: inherit; /* 2 */\n}\n\n/* Interactive\n ========================================================================== */\n\n/*\n * Add the correct display in Edge, IE 10+, and Firefox.\n */\n\ndetails {\n display: block;\n}\n\n/*\n * Add the correct display in all browsers.\n */\n\nsummary {\n display: list-item;\n}\n\n/* Misc\n ========================================================================== */\n\n/**\n * Add the correct display in IE 10+.\n */\n\ntemplate {\n display: none;\n}\n\n/**\n * Add the correct display in IE 10.\n */\n\n[hidden] {\n display: none;\n}\n","// This file contains styles for managing print media.\n\n////////////////////////////////////////////////////////////////////////////////\n// Hide elements not relevant to print media.\n////////////////////////////////////////////////////////////////////////////////\n@media print\n // Hide icon container.\n .content-icon-container\n display: none !important\n\n // Hide showing header links if hovering over when printing.\n .headerlink\n display: none !important\n\n // Hide mobile header.\n .mobile-header\n display: none !important\n\n // Hide navigation links.\n .related-pages\n display: none !important\n\n////////////////////////////////////////////////////////////////////////////////\n// Tweaks related to decolorization.\n////////////////////////////////////////////////////////////////////////////////\n@media print\n // Apply a border around code which no longer have a color background.\n .highlight\n border: 0.1pt solid var(--color-foreground-border)\n\n////////////////////////////////////////////////////////////////////////////////\n// Avoid page break in some relevant cases.\n////////////////////////////////////////////////////////////////////////////////\n@media print\n ul, ol, dl, a, table, pre, blockquote, p\n page-break-inside: avoid\n\n h1, h2, h3, h4, h5, h6, img, figure, caption\n page-break-inside: avoid\n page-break-after: avoid\n\n ul, ol, dl\n page-break-before: avoid\n",".visually-hidden\n position: absolute !important\n width: 1px !important\n height: 1px !important\n padding: 0 !important\n margin: -1px !important\n overflow: hidden !important\n clip: rect(0,0,0,0) !important\n white-space: nowrap !important\n border: 0 !important\n color: var(--color-foreground-primary)\n background: var(--color-background-primary)\n\n:-moz-focusring\n outline: auto\n","// This file serves as the \"skeleton\" of the theming logic.\n//\n// This contains the bulk of the logic for handling dark mode, color scheme\n// toggling and the handling of color-scheme-specific hiding of elements.\n\n@use \"../variables\" as *\n\nbody\n @include fonts\n @include spacing\n @include icons\n @include admonitions\n @include default-admonition(#651fff, \"abstract\")\n @include default-topic(#14B8A6, \"pencil\")\n\n @include colors\n\n.only-light\n display: block !important\nhtml body .only-dark\n display: none !important\n\n// Ignore dark-mode hints if print media.\n@media not print\n // Enable dark-mode, if requested.\n body[data-theme=\"dark\"]\n @include colors-dark\n\n html & .only-light\n display: none !important\n .only-dark\n display: block !important\n\n // Enable dark mode, unless explicitly told to avoid.\n @media (prefers-color-scheme: dark)\n body:not([data-theme=\"light\"])\n @include colors-dark\n\n html & .only-light\n display: none !important\n .only-dark\n display: block !important\n\n//\n// Theme toggle presentation\n//\nbody[data-theme=\"auto\"]\n .theme-toggle svg.theme-icon-when-auto-light\n display: block\n\n @media (prefers-color-scheme: dark)\n .theme-toggle svg.theme-icon-when-auto-dark\n display: block\n .theme-toggle svg.theme-icon-when-auto-light\n display: none\n\nbody[data-theme=\"dark\"]\n .theme-toggle svg.theme-icon-when-dark\n display: block\n\nbody[data-theme=\"light\"]\n .theme-toggle svg.theme-icon-when-light\n display: block\n","// Fonts used by this theme.\n//\n// There are basically two things here -- using the system font stack and\n// defining sizes for various elements in %ages. We could have also used `em`\n// but %age is easier to reason about for me.\n\n@mixin fonts {\n // These are adapted from https://systemfontstack.com/\n --font-stack:\n -apple-system, BlinkMacSystemFont, Segoe UI, Helvetica, Arial, sans-serif,\n Apple Color Emoji, Segoe UI Emoji;\n --font-stack--monospace:\n \"SFMono-Regular\", Menlo, Consolas, Monaco, Liberation Mono, Lucida Console,\n monospace;\n --font-stack--headings: var(--font-stack);\n\n --font-size--normal: 100%;\n --font-size--small: 87.5%;\n --font-size--small--2: 81.25%;\n --font-size--small--3: 75%;\n --font-size--small--4: 62.5%;\n\n // Sidebar\n --sidebar-caption-font-size: var(--font-size--small--2);\n --sidebar-item-font-size: var(--font-size--small);\n --sidebar-search-input-font-size: var(--font-size--small);\n\n // Table of Contents\n --toc-font-size: var(--font-size--small--3);\n --toc-font-size--mobile: var(--font-size--normal);\n --toc-title-font-size: var(--font-size--small--4);\n\n // Admonitions\n //\n // These aren't defined in terms of %ages, since nesting these is permitted.\n --admonition-font-size: 0.8125rem;\n --admonition-title-font-size: 0.8125rem;\n\n // Code\n --code-font-size: var(--font-size--small--2);\n\n // API\n --api-font-size: var(--font-size--small);\n}\n","// Spacing for various elements on the page\n//\n// If the user wants to tweak things in a certain way, they are permitted to.\n// They also have to deal with the consequences though!\n\n@mixin spacing {\n // Header!\n --header-height: calc(\n var(--sidebar-item-line-height) + 4 *\n #{var(--sidebar-item-spacing-vertical)}\n );\n --header-padding: 0.5rem;\n\n // Sidebar\n --sidebar-tree-space-above: 1.5rem;\n --sidebar-caption-space-above: 1rem;\n\n --sidebar-item-line-height: 1rem;\n --sidebar-item-spacing-vertical: 0.5rem;\n --sidebar-item-spacing-horizontal: 1rem;\n --sidebar-item-height: calc(\n var(--sidebar-item-line-height) + 2 *#{var(--sidebar-item-spacing-vertical)}\n );\n\n --sidebar-expander-width: var(--sidebar-item-height); // be square\n\n --sidebar-search-space-above: 0.5rem;\n --sidebar-search-input-spacing-vertical: 0.5rem;\n --sidebar-search-input-spacing-horizontal: 0.5rem;\n --sidebar-search-input-height: 1rem;\n --sidebar-search-icon-size: var(--sidebar-search-input-height);\n\n // Table of Contents\n --toc-title-padding: 0.25rem 0;\n --toc-spacing-vertical: 1.5rem;\n --toc-spacing-horizontal: 1.5rem;\n --toc-item-spacing-vertical: 0.4rem;\n --toc-item-spacing-horizontal: 1rem;\n}\n","// Expose theme icons as CSS variables.\n\n$icons: (\n // Adapted from tabler-icons\n // url: https://tablericons.com/\n \"search\":\n url('data:image/svg+xml;charset=utf-8,'),\n // Factored out from mkdocs-material on 24-Aug-2020.\n // url: https://squidfunk.github.io/mkdocs-material/reference/admonitions/\n \"pencil\":\n url('data:image/svg+xml;charset=utf-8,'),\n \"abstract\":\n url('data:image/svg+xml;charset=utf-8,'),\n \"info\":\n url('data:image/svg+xml;charset=utf-8,'),\n \"flame\":\n url('data:image/svg+xml;charset=utf-8,'),\n \"question\":\n url('data:image/svg+xml;charset=utf-8,'),\n \"warning\":\n url('data:image/svg+xml;charset=utf-8,'),\n \"failure\":\n url('data:image/svg+xml;charset=utf-8,'),\n \"spark\":\n url('data:image/svg+xml;charset=utf-8,')\n);\n\n@mixin icons {\n @each $name, $glyph in $icons {\n --icon-#{$name}: #{$glyph};\n }\n}\n","@use \"sass:list\";\n// Admonitions\n\n// Structure of these is:\n// admonition-class: color \"icon-name\";\n//\n// The colors are translated into CSS variables below. The icons are\n// used directly in the main declarations to set the `mask-image` in\n// the title.\n\n// prettier-ignore\n$admonitions: (\n // Each of these has an reST directives for it.\n \"caution\": #ff9100 \"spark\",\n \"warning\": #ff9100 \"warning\",\n \"danger\": #ff5252 \"spark\",\n \"attention\": #ff5252 \"warning\",\n \"error\": #ff5252 \"failure\",\n \"hint\": #00c852 \"question\",\n \"tip\": #00c852 \"info\",\n \"important\": #00bfa5 \"flame\",\n \"note\": #00b0ff \"pencil\",\n \"seealso\": #448aff \"info\",\n \"admonition-todo\": #808080 \"pencil\"\n);\n\n@mixin default-admonition($color, $icon-name) {\n --color-admonition-title: #{$color};\n --color-admonition-title-background: #{rgba($color, 0.2)};\n\n --icon-admonition-default: var(--icon-#{$icon-name});\n}\n\n@mixin default-topic($color, $icon-name) {\n --color-topic-title: #{$color};\n --color-topic-title-background: #{rgba($color, 0.2)};\n\n --icon-topic-default: var(--icon-#{$icon-name});\n}\n\n@mixin admonitions {\n @each $name, $values in $admonitions {\n --color-admonition-title--#{$name}: #{list.nth($values, 1)};\n --color-admonition-title-background--#{$name}: #{rgba(\n list.nth($values, 1),\n 0.2\n )};\n }\n}\n","// Colors used throughout this theme.\n//\n// The aim is to give the user more control. Thus, instead of hard-coding colors\n// in various parts of the stylesheet, the approach taken is to define all\n// colors as CSS variables and reusing them in all the places.\n//\n// `colors-dark` depends on `colors` being included at a lower specificity.\n\n@mixin colors {\n --color-problematic: #b30000;\n\n // Base Colors\n --color-foreground-primary: black; // for main text and headings\n --color-foreground-secondary: #5a5c63; // for secondary text\n --color-foreground-muted: #6b6f76; // for muted text\n --color-foreground-border: #878787; // for content borders\n\n --color-background-primary: white; // for content\n --color-background-secondary: #f8f9fb; // for navigation + ToC\n --color-background-hover: #efeff4ff; // for navigation-item hover\n --color-background-hover--transparent: #efeff400;\n --color-background-border: #eeebee; // for UI borders\n --color-background-item: #ccc; // for \"background\" items (eg: copybutton)\n\n // Announcements\n --color-announcement-background: #000000dd;\n --color-announcement-text: #eeebee;\n\n // Brand colors\n --color-brand-primary: #0a4bff;\n --color-brand-content: #2757dd;\n --color-brand-visited: #872ee0;\n\n // API documentation\n --color-api-background: var(--color-background-hover--transparent);\n --color-api-background-hover: var(--color-background-hover);\n --color-api-overall: var(--color-foreground-secondary);\n --color-api-name: var(--color-problematic);\n --color-api-pre-name: var(--color-problematic);\n --color-api-paren: var(--color-foreground-secondary);\n --color-api-keyword: var(--color-foreground-primary);\n\n --color-api-added: #21632c;\n --color-api-added-border: #38a84d;\n --color-api-changed: #046172;\n --color-api-changed-border: #06a1bc;\n --color-api-deprecated: #605706;\n --color-api-deprecated-border: #f0d90f;\n --color-api-removed: #b30000;\n --color-api-removed-border: #ff5c5c;\n\n --color-highlight-on-target: #ffffcc;\n\n // Inline code background\n --color-inline-code-background: var(--color-background-secondary);\n\n // Highlighted text (search)\n --color-highlighted-background: #ddeeff;\n --color-highlighted-text: var(--color-foreground-primary);\n\n // GUI Labels\n --color-guilabel-background: #ddeeff80;\n --color-guilabel-border: #bedaf580;\n --color-guilabel-text: var(--color-foreground-primary);\n\n // Admonitions!\n --color-admonition-background: transparent;\n\n //////////////////////////////////////////////////////////////////////////////\n // Everything below this should be one of:\n // - var(...)\n // - *-gradient(...)\n // - special literal values (eg: transparent, none)\n //////////////////////////////////////////////////////////////////////////////\n\n // Tables\n --color-table-header-background: var(--color-background-secondary);\n --color-table-border: var(--color-background-border);\n\n // Cards\n --color-card-border: var(--color-background-secondary);\n --color-card-background: transparent;\n --color-card-marginals-background: var(--color-background-secondary);\n\n // Header\n --color-header-background: var(--color-background-primary);\n --color-header-border: var(--color-background-border);\n --color-header-text: var(--color-foreground-primary);\n\n // Sidebar (left)\n --color-sidebar-background: var(--color-background-secondary);\n --color-sidebar-background-border: var(--color-background-border);\n\n --color-sidebar-brand-text: var(--color-foreground-primary);\n --color-sidebar-caption-text: var(--color-foreground-muted);\n --color-sidebar-link-text: var(--color-foreground-secondary);\n --color-sidebar-link-text--top-level: var(--color-brand-primary);\n\n --color-sidebar-item-background: var(--color-sidebar-background);\n --color-sidebar-item-background--current: var(\n --color-sidebar-item-background\n );\n --color-sidebar-item-background--hover: linear-gradient(\n 90deg,\n var(--color-background-hover--transparent) 0%,\n var(--color-background-hover) var(--sidebar-item-spacing-horizontal),\n var(--color-background-hover) 100%\n );\n\n --color-sidebar-item-expander-background: transparent;\n --color-sidebar-item-expander-background--hover: var(\n --color-background-hover\n );\n\n --color-sidebar-search-text: var(--color-foreground-primary);\n --color-sidebar-search-background: var(--color-background-secondary);\n --color-sidebar-search-background--focus: var(--color-background-primary);\n --color-sidebar-search-border: var(--color-background-border);\n --color-sidebar-search-icon: var(--color-foreground-muted);\n\n // Table of Contents (right)\n --color-toc-background: var(--color-background-primary);\n --color-toc-title-text: var(--color-foreground-muted);\n --color-toc-item-text: var(--color-foreground-secondary);\n --color-toc-item-text--hover: var(--color-foreground-primary);\n --color-toc-item-text--active: var(--color-brand-primary);\n\n // Actual page contents\n --color-content-foreground: var(--color-foreground-primary);\n --color-content-background: transparent;\n\n // Links\n --color-link: var(--color-brand-content);\n --color-link-underline: var(--color-background-border);\n --color-link--hover: var(--color-brand-content);\n --color-link-underline--hover: var(--color-foreground-border);\n\n --color-link--visited: var(--color-brand-visited);\n --color-link-underline--visited: var(--color-background-border);\n --color-link--visited--hover: var(--color-brand-visited);\n --color-link-underline--visited--hover: var(--color-foreground-border);\n}\n\n@mixin colors-dark {\n --color-problematic: #ee5151;\n\n // Base Colors\n --color-foreground-primary: #cfd0d0; // for main text and headings\n --color-foreground-secondary: #9ca0a5; // for secondary text\n --color-foreground-muted: #81868d; // for muted text\n --color-foreground-border: #666666; // for content borders\n\n --color-background-primary: #131416; // for content\n --color-background-secondary: #1a1c1e; // for navigation + ToC\n --color-background-hover: #1e2124ff; // for navigation-item hover\n --color-background-hover--transparent: #1e212400;\n --color-background-border: #303335; // for UI borders\n --color-background-item: #444; // for \"background\" items (eg: copybutton)\n\n // Announcements\n --color-announcement-background: #000000dd;\n --color-announcement-text: #eeebee;\n\n // Brand colors\n --color-brand-primary: #3d94ff;\n --color-brand-content: #5ca5ff;\n --color-brand-visited: #b27aeb;\n\n // Highlighted text (search)\n --color-highlighted-background: #083563;\n\n // GUI Labels\n --color-guilabel-background: #08356380;\n --color-guilabel-border: #13395f80;\n\n // API documentation\n --color-api-keyword: var(--color-foreground-secondary);\n --color-highlight-on-target: #333300;\n\n --color-api-added: #3db854;\n --color-api-added-border: #267334;\n --color-api-changed: #09b0ce;\n --color-api-changed-border: #056d80;\n --color-api-deprecated: #b1a10b;\n --color-api-deprecated-border: #6e6407;\n --color-api-removed: #ff7575;\n --color-api-removed-border: #b03b3b;\n\n // Admonitions\n --color-admonition-background: #18181a;\n\n // Cards\n --color-card-border: var(--color-background-secondary);\n --color-card-background: #18181a;\n --color-card-marginals-background: var(--color-background-hover);\n}\n","// This file contains the styling for making the content throughout the page,\n// including fonts, paragraphs, headings and spacing among these elements.\n\nbody\n font-family: var(--font-stack)\npre,\ncode,\nkbd,\nsamp\n font-family: var(--font-stack--monospace)\n\n// Make fonts look slightly nicer.\nbody\n -webkit-font-smoothing: antialiased\n -moz-osx-font-smoothing: grayscale\n\n// Line height from Bootstrap 4.1\narticle\n line-height: 1.5\n\n//\n// Headings\n//\nh1,\nh2,\nh3,\nh4,\nh5,\nh6\n line-height: 1.25\n font-family: var(--font-stack--headings)\n font-weight: bold\n\n border-radius: 0.5rem\n margin-top: 0.5rem\n margin-bottom: 0.5rem\n margin-left: -0.5rem\n margin-right: -0.5rem\n padding-left: 0.5rem\n padding-right: 0.5rem\n\n + p\n margin-top: 0\n\nh1\n font-size: 2.5em\n margin-top: 1.75rem\n margin-bottom: 1rem\nh2\n font-size: 2em\n margin-top: 1.75rem\nh3\n font-size: 1.5em\nh4\n font-size: 1.25em\nh5\n font-size: 1.125em\nh6\n font-size: 1em\n\nsmall\n opacity: 75%\n font-size: 80%\n\n// Paragraph\np\n margin-top: 0.5rem\n margin-bottom: 0.75rem\n\n// Horizontal rules\nhr.docutils\n height: 1px\n padding: 0\n margin: 2rem 0\n background-color: var(--color-background-border)\n border: 0\n\n.centered\n text-align: center\n\n// Links\na\n text-decoration: underline\n\n color: var(--color-link)\n text-decoration-color: var(--color-link-underline)\n\n &:visited\n color: var(--color-link--visited)\n text-decoration-color: var(--color-link-underline--visited)\n &:hover\n color: var(--color-link--visited--hover)\n text-decoration-color: var(--color-link-underline--visited--hover)\n\n &:hover\n color: var(--color-link--hover)\n text-decoration-color: var(--color-link-underline--hover)\n &.muted-link\n color: inherit\n &:hover\n color: var(--color-link--hover)\n text-decoration-color: var(--color-link-underline--hover)\n &:visited\n color: var(--color-link--visited--hover)\n text-decoration-color: var(--color-link-underline--visited--hover)\n","// This file contains the styles for the overall layouting of the documentation\n// skeleton, including the responsive changes as well as sidebar toggles.\n//\n// This is implemented as a mobile-last design, which isn't ideal, but it is\n// reasonably good-enough and I got pretty tired by the time I'd finished this\n// to move the rules around to fix this. Shouldn't take more than 3-4 hours,\n// if you know what you're doing tho.\n\n// HACK: Not all browsers account for the scrollbar width in media queries.\n// This results in horizontal scrollbars in the breakpoint where we go\n// from displaying everything to hiding the ToC. We accomodate for this by\n// adding a bit of padding to the TOC drawer, disabling the horizontal\n// scrollbar and allowing the scrollbars to cover the padding.\n// https://www.456bereastreet.com/archive/201301/media_query_width_and_vertical_scrollbars/\n\n// HACK: Always having the scrollbar visible, prevents certain browsers from\n// causing the content to stutter horizontally between taller-than-viewport and\n// not-taller-than-viewport pages.\n@use \"variables\" as *\n\nhtml\n overflow-x: hidden\n overflow-y: scroll\n scroll-behavior: smooth\n\n.sidebar-scroll, .toc-scroll, article[role=main] *\n scrollbar-width: thin\n scrollbar-color: var(--color-foreground-border) transparent\n\n//\n// Overalls\n//\nhtml,\nbody\n height: 100%\n color: var(--color-foreground-primary)\n background: var(--color-background-primary)\n\n.skip-to-content\n position: fixed\n padding: 1rem\n border-radius: 1rem\n left: 0.25rem\n top: 0.25rem\n z-index: 40\n background: var(--color-background-primary)\n color: var(--color-foreground-primary)\n\n transform: translateY(-200%)\n transition: transform 300ms ease-in-out\n\n &:focus-within\n transform: translateY(0%)\n\narticle\n color: var(--color-content-foreground)\n background: var(--color-content-background)\n overflow-wrap: break-word\n\n.page\n display: flex\n // fill the viewport for pages with little content.\n min-height: 100%\n\n.mobile-header\n width: 100%\n height: var(--header-height)\n background-color: var(--color-header-background)\n color: var(--color-header-text)\n border-bottom: 1px solid var(--color-header-border)\n\n // Looks like sub-script/super-script have this, and we need this to\n // be \"on top\" of those.\n z-index: 10\n\n // We don't show the header on large screens.\n display: none\n\n // Add shadow when scrolled\n &.scrolled\n border-bottom: none\n box-shadow: 0 0 0.2rem rgba(0, 0, 0, 0.1), 0 0.2rem 0.4rem rgba(0, 0, 0, 0.2)\n\n .header-center\n a\n color: var(--color-header-text)\n text-decoration: none\n\n.main\n display: flex\n flex: 1\n\n// Sidebar (left) also covers the entire left portion of screen.\n.sidebar-drawer\n box-sizing: border-box\n\n border-right: 1px solid var(--color-sidebar-background-border)\n background: var(--color-sidebar-background)\n\n display: flex\n justify-content: flex-end\n // These next two lines took me two days to figure out.\n width: calc((100% - #{$full-width}) / 2 + #{$sidebar-width})\n min-width: $sidebar-width\n\n// Scroll-along sidebars\n.sidebar-container,\n.toc-drawer\n box-sizing: border-box\n width: $sidebar-width\n\n.toc-drawer\n background: var(--color-toc-background)\n // See HACK described on top of this document\n padding-right: 1rem\n\n.sidebar-sticky,\n.toc-sticky\n position: sticky\n top: 0\n height: min(100%, 100vh)\n height: 100vh\n\n display: flex\n flex-direction: column\n\n.sidebar-scroll,\n.toc-scroll\n flex-grow: 1\n flex-shrink: 1\n\n overflow: auto\n scroll-behavior: smooth\n\n// Central items.\n.content\n padding: 0 $content-padding\n width: $content-width\n\n display: flex\n flex-direction: column\n justify-content: space-between\n\n.icon\n display: inline-block\n height: 1rem\n width: 1rem\n svg\n width: 100%\n height: 100%\n\n//\n// Accommodate announcement banner\n//\n.announcement\n background-color: var(--color-announcement-background)\n color: var(--color-announcement-text)\n\n height: var(--header-height)\n display: flex\n align-items: center\n overflow-x: auto\n & + .page\n min-height: calc(100% - var(--header-height))\n\n.announcement-content\n box-sizing: border-box\n padding: 0.5rem\n min-width: 100%\n white-space: nowrap\n text-align: center\n\n a\n color: var(--color-announcement-text)\n text-decoration-color: var(--color-announcement-text)\n\n &:hover\n color: var(--color-announcement-text)\n text-decoration-color: var(--color-link--hover)\n\n////////////////////////////////////////////////////////////////////////////////\n// Toggles for theme\n////////////////////////////////////////////////////////////////////////////////\n.no-js .theme-toggle-container // don't show theme toggle if there's no JS\n display: none\n\n.theme-toggle-container\n display: flex\n\n.theme-toggle\n display: flex\n cursor: pointer\n border: none\n padding: 0\n background: transparent\n\n.theme-toggle svg\n height: 1.25rem\n width: 1.25rem\n color: var(--color-foreground-primary)\n display: none\n\n.theme-toggle-header\n display: flex\n align-items: center\n justify-content: center\n\n////////////////////////////////////////////////////////////////////////////////\n// Toggles for elements\n////////////////////////////////////////////////////////////////////////////////\n.toc-overlay-icon, .nav-overlay-icon\n display: none\n cursor: pointer\n\n .icon\n color: var(--color-foreground-secondary)\n height: 1.5rem\n width: 1.5rem\n\n.toc-header-icon, .nav-overlay-icon\n // for when we set display: flex\n justify-content: center\n align-items: center\n\n.toc-content-icon\n height: 1.5rem\n width: 1.5rem\n\n.content-icon-container\n float: right\n display: flex\n margin-top: 1.5rem\n margin-left: 1rem\n margin-bottom: 1rem\n gap: 0.5rem\n\n .edit-this-page, .view-this-page\n svg\n color: inherit\n height: 1.25rem\n width: 1.25rem\n\n.sidebar-toggle\n position: absolute\n display: none\n// \n.sidebar-toggle[name=\"__toc\"]\n left: 20px\n.sidebar-toggle:checked\n left: 40px\n// \n\n.overlay\n position: fixed\n top: 0\n width: 0\n height: 0\n\n transition: width 0ms, height 0ms, opacity 250ms ease-out\n\n opacity: 0\n background-color: rgba(0, 0, 0, 0.54)\n.sidebar-overlay\n z-index: 20\n.toc-overlay\n z-index: 40\n\n// Keep things on top and smooth.\n.sidebar-drawer\n z-index: 30\n transition: left 250ms ease-in-out\n.toc-drawer\n z-index: 50\n transition: right 250ms ease-in-out\n\n// Show the Sidebar\n#__navigation:checked\n & ~ .sidebar-overlay\n width: 100%\n height: 100%\n opacity: 1\n & ~ .page\n .sidebar-drawer\n top: 0\n left: 0\n // Show the toc sidebar\n#__toc:checked\n & ~ .toc-overlay\n width: 100%\n height: 100%\n opacity: 1\n & ~ .page\n .toc-drawer\n top: 0\n right: 0\n\n////////////////////////////////////////////////////////////////////////////////\n// Back to top\n////////////////////////////////////////////////////////////////////////////////\n.back-to-top\n text-decoration: none\n\n display: none\n position: fixed\n left: 0\n top: 1rem\n padding: 0.5rem\n padding-right: 0.75rem\n border-radius: 1rem\n font-size: 0.8125rem\n\n background: var(--color-background-primary)\n box-shadow: 0 0.2rem 0.5rem rgba(0, 0, 0, 0.05), #6b728080 0px 0px 1px 0px\n\n z-index: 10\n\n margin-left: 50%\n transform: translateX(-50%)\n svg\n height: 1rem\n width: 1rem\n fill: currentColor\n display: inline-block\n\n span\n margin-left: 0.25rem\n\n .show-back-to-top &\n display: flex\n align-items: center\n\n////////////////////////////////////////////////////////////////////////////////\n// Responsive layouting\n////////////////////////////////////////////////////////////////////////////////\n// Make things a bit bigger on bigger screens.\n@media (min-width: $full-width + $sidebar-width)\n html\n font-size: 110%\n\n@media (max-width: $full-width)\n // Collapse \"toc\" into the icon.\n .toc-content-icon\n display: flex\n .toc-drawer\n position: fixed\n height: 100vh\n top: 0\n right: -$sidebar-width\n border-left: 1px solid var(--color-background-muted)\n .toc-tree\n border-left: none\n font-size: var(--toc-font-size--mobile)\n\n // Accomodate for a changed content width.\n .sidebar-drawer\n width: calc((100% - #{$full-width - $sidebar-width}) / 2 + #{$sidebar-width})\n\n@media (max-width: $content-padded-width + $sidebar-width)\n // Center the page\n .content\n margin-left: auto\n margin-right: auto\n padding: 0 $content-padding--small\n\n@media (max-width: $content-padded-width--small + $sidebar-width)\n // Collapse \"navigation\".\n .nav-overlay-icon\n display: flex\n .sidebar-drawer\n position: fixed\n height: 100vh\n width: $sidebar-width\n\n top: 0\n left: -$sidebar-width\n\n // Swap which icon is visible.\n .toc-header-icon, .theme-toggle-header\n display: flex\n .toc-content-icon, .theme-toggle-content\n display: none\n\n // Show the header.\n .mobile-header\n position: sticky\n top: 0\n display: flex\n justify-content: space-between\n align-items: center\n\n .header-left,\n .header-right\n display: flex\n height: var(--header-height)\n padding: 0 var(--header-padding)\n label\n height: 100%\n width: 100%\n user-select: none\n\n .nav-overlay-icon .icon,\n .theme-toggle svg\n height: 1.5rem\n width: 1.5rem\n\n // Add a scroll margin for the content\n :target\n scroll-margin-top: calc(var(--header-height) + 2.5rem)\n\n // Show back-to-top below the header\n .back-to-top\n top: calc(var(--header-height) + 0.5rem)\n\n // Accommodate for the header.\n .page\n flex-direction: column\n justify-content: center\n\n@media (max-width: $content-width + 2* $content-padding--small)\n // Content should respect window limits.\n .content\n width: 100%\n overflow-x: auto\n\n@media (max-width: $content-width)\n article[role=main] aside.sidebar\n float: none\n width: 100%\n margin: 1rem 0\n","@use \"sass:list\"\n@use \"../variables\" as *\n\n// The design here is strongly inspired by mkdocs-material.\n.admonition, .topic\n margin: 1rem auto\n padding: 0 0.5rem 0.5rem 0.5rem\n\n background: var(--color-admonition-background)\n\n border-radius: 0.2rem\n box-shadow: 0 0.2rem 0.5rem rgba(0, 0, 0, 0.05), 0 0 0.0625rem rgba(0, 0, 0, 0.1)\n\n font-size: var(--admonition-font-size)\n\n overflow: hidden\n page-break-inside: avoid\n\n // First element should have no margin, since the title has it.\n > :nth-child(2)\n margin-top: 0\n\n // Last item should have no margin, since we'll control that w/ padding\n > :last-child\n margin-bottom: 0\n\n.admonition p.admonition-title,\np.topic-title\n position: relative\n margin: 0 -0.5rem 0.5rem\n padding-left: 2rem\n padding-right: .5rem\n padding-top: .4rem\n padding-bottom: .4rem\n\n font-weight: 500\n font-size: var(--admonition-title-font-size)\n line-height: 1.3\n\n // Our fancy icon\n &::before\n content: \"\"\n position: absolute\n left: 0.5rem\n width: 1rem\n height: 1rem\n\n// Default styles\np.admonition-title\n background-color: var(--color-admonition-title-background)\n &::before\n background-color: var(--color-admonition-title)\n mask-image: var(--icon-admonition-default)\n mask-repeat: no-repeat\n\np.topic-title\n background-color: var(--color-topic-title-background)\n &::before\n background-color: var(--color-topic-title)\n mask-image: var(--icon-topic-default)\n mask-repeat: no-repeat\n\n//\n// Variants\n//\n.admonition\n border-left: 0.2rem solid var(--color-admonition-title)\n\n @each $type, $value in $admonitions\n &.#{$type}\n border-left-color: var(--color-admonition-title--#{$type})\n > .admonition-title\n background-color: var(--color-admonition-title-background--#{$type})\n &::before\n background-color: var(--color-admonition-title--#{$type})\n mask-image: var(--icon-#{list.nth($value, 2)})\n\n.admonition-todo > .admonition-title\n text-transform: uppercase\n","// This file stylizes the API documentation (stuff generated by autodoc). It's\n// deeply nested due to how autodoc structures the HTML without enough classes\n// to select the relevant items.\n\n// API docs!\ndl[class]:not(.option-list):not(.field-list):not(.footnote):not(.glossary):not(.simple)\n // Tweak the spacing of all the things!\n dd\n margin-left: 2rem\n > :first-child\n margin-top: 0.125rem\n > :last-child\n margin-bottom: 0.75rem\n\n // This is used for the arguments\n .field-list\n margin-bottom: 0.75rem\n\n // \"Headings\" (like \"Parameters\" and \"Return\")\n > dt\n text-transform: uppercase\n font-size: var(--font-size--small)\n\n dd:empty\n margin-bottom: 0.5rem\n dd > ul\n margin-left: -1.2rem\n > li\n > p:nth-child(2)\n margin-top: 0\n // When the last-empty-paragraph follows a paragraph, it doesn't need\n // to augument the existing spacing.\n > p + p:last-child:empty\n margin-top: 0\n margin-bottom: 0\n\n // Colorize the elements\n > dt\n color: var(--color-api-overall)\n\n.sig:not(.sig-inline)\n font-weight: bold\n\n font-size: var(--api-font-size)\n font-family: var(--font-stack--monospace)\n\n margin-left: -0.25rem\n margin-right: -0.25rem\n padding-top: 0.25rem\n padding-bottom: 0.25rem\n padding-right: 0.5rem\n\n // These are intentionally em, to properly match the font size.\n padding-left: 3em\n text-indent: -2.5em\n\n border-radius: 0.25rem\n\n background: var(--color-api-background)\n transition: background 100ms ease-out\n\n &:hover\n background: var(--color-api-background-hover)\n\n // adjust the size of the [source] link on the right.\n a.reference\n .viewcode-link\n font-weight: normal\n width: 4.25rem\n\nem.property, span.property\n font-style: normal\n &:first-child\n color: var(--color-api-keyword)\n.sig-name\n color: var(--color-api-name)\n.sig-prename\n font-weight: normal\n color: var(--color-api-pre-name)\n.sig-paren\n color: var(--color-api-paren)\n.sig-param\n font-style: normal\n\ndiv.versionadded,\ndiv.versionchanged,\ndiv.deprecated,\ndiv.versionremoved\n border-left: 0.1875rem solid\n border-radius: 0.125rem\n\n padding-left: 0.75rem\n\n p\n margin-top: 0.125rem\n margin-bottom: 0.125rem\n\ndiv.versionadded\n border-color: var(--color-api-added-border)\n .versionmodified\n color: var(--color-api-added)\n\ndiv.versionchanged\n border-color: var(--color-api-changed-border)\n .versionmodified\n color: var(--color-api-changed)\n\ndiv.deprecated\n border-color: var(--color-api-deprecated-border)\n .versionmodified\n color: var(--color-api-deprecated)\n\ndiv.versionremoved\n border-color: var(--color-api-removed-border)\n .versionmodified\n color: var(--color-api-removed)\n\n// Align the [docs] and [source] to the right.\n.viewcode-link, .viewcode-back\n float: right\n text-align: right\n",".line-block\n margin-top: 0.5rem\n margin-bottom: 0.75rem\n .line-block\n margin-top: 0rem\n margin-bottom: 0rem\n padding-left: 1rem\n","// Captions\narticle p.caption,\ntable > caption,\n.code-block-caption\n font-size: var(--font-size--small)\n text-align: center\n\n// Caption above a TOCTree\n.toctree-wrapper.compound\n .caption, :not(.caption) > .caption-text\n font-size: var(--font-size--small)\n text-transform: uppercase\n\n text-align: initial\n margin-bottom: 0\n\n > ul\n margin-top: 0\n margin-bottom: 0\n","// Inline code\ncode.literal, .sig-inline\n background: var(--color-inline-code-background)\n border-radius: 0.2em\n // Make the font smaller, and use padding to recover.\n font-size: var(--font-size--small--2)\n padding: 0.1em 0.2em\n\n pre.literal-block &\n font-size: inherit\n padding: 0\n\n p &\n border: 1px solid var(--color-background-border)\n\n.sig-inline\n font-family: var(--font-stack--monospace)\n\n// Code and Literal Blocks\n$code-spacing-vertical: 0.625rem\n$code-spacing-horizontal: 0.875rem\n\n// Wraps every literal block + line numbers.\ndiv[class*=\" highlight-\"],\ndiv[class^=\"highlight-\"]\n margin: 1em 0\n display: flex\n\n .table-wrapper\n margin: 0\n padding: 0\n\npre\n margin: 0\n padding: 0\n overflow: auto\n\n // Needed to have more specificity than pygments' \"pre\" selector. :(\n article[role=\"main\"] .highlight &\n line-height: 1.5\n\n &.literal-block,\n .highlight &\n font-size: var(--code-font-size)\n padding: $code-spacing-vertical $code-spacing-horizontal\n\n // Make it look like all the other blocks.\n &.literal-block\n margin-top: 1rem\n margin-bottom: 1rem\n\n border-radius: 0.2rem\n background-color: var(--color-code-background)\n color: var(--color-code-foreground)\n\n// All code is always contained in this.\n.highlight\n width: 100%\n border-radius: 0.2rem\n\n // Make line numbers and prompts un-selectable.\n .gp, span.linenos\n user-select: none\n pointer-events: none\n\n // Expand the line-highlighting.\n .hll\n display: block\n margin-left: -$code-spacing-horizontal\n margin-right: -$code-spacing-horizontal\n padding-left: $code-spacing-horizontal\n padding-right: $code-spacing-horizontal\n\n/* Make code block captions be nicely integrated */\n.code-block-caption\n display: flex\n padding: $code-spacing-vertical $code-spacing-horizontal\n\n border-radius: 0.25rem\n border-bottom-left-radius: 0\n border-bottom-right-radius: 0\n font-weight: 300\n border-bottom: 1px solid\n\n background-color: var(--color-code-background)\n color: var(--color-code-foreground)\n border-color: var(--color-background-border)\n\n + div[class]\n margin-top: 0\n > .highlight\n border-top-left-radius: 0\n border-top-right-radius: 0\n\n// When `html_codeblock_linenos_style` is table.\n.highlighttable\n width: 100%\n display: block\n tbody\n display: block\n\n tr\n display: flex\n\n // Line numbers\n td.linenos\n background-color: var(--color-code-background)\n color: var(--color-code-foreground)\n padding: $code-spacing-vertical $code-spacing-horizontal\n padding-right: 0\n border-top-left-radius: 0.2rem\n border-bottom-left-radius: 0.2rem\n\n .linenodiv\n padding-right: $code-spacing-horizontal\n font-size: var(--code-font-size)\n box-shadow: -0.0625rem 0 var(--color-foreground-border) inset\n\n // Actual code\n td.code\n padding: 0\n display: block\n flex: 1\n overflow: hidden\n\n .highlight\n border-top-left-radius: 0\n border-bottom-left-radius: 0\n\n// When `html_codeblock_linenos_style` is inline.\n.highlight\n span.linenos\n display: inline-block\n padding-left: 0\n padding-right: $code-spacing-horizontal\n margin-right: $code-spacing-horizontal\n box-shadow: -0.0625rem 0 var(--color-foreground-border) inset\n","// Inline Footnote Reference\n.footnote-reference\n font-size: var(--font-size--small--4)\n vertical-align: super\n\n// Definition list, listing the content of each note.\n// docutils <= 0.17\ndl.footnote.brackets\n font-size: var(--font-size--small)\n color: var(--color-foreground-secondary)\n\n display: grid\n grid-template-columns: max-content auto\n dt\n margin: 0\n > .fn-backref\n margin-left: 0.25rem\n\n &:after\n content: \":\"\n\n .brackets\n &:before\n content: \"[\"\n &:after\n content: \"]\"\n\n dd\n margin: 0\n padding: 0 1rem\n\n// docutils >= 0.18\naside.footnote\n font-size: var(--font-size--small)\n color: var(--color-foreground-secondary)\n\naside.footnote > span,\ndiv.citation > span\n float: left\n font-weight: 500\n padding-right: 0.25rem\n\naside.footnote > *:not(span),\ndiv.citation > p\n margin-left: 2rem\n","//\n// Figures\n//\nimg\n box-sizing: border-box\n max-width: 100%\n height: auto\n\narticle\n figure, .figure\n border-radius: 0.2rem\n\n margin: 0\n :last-child\n margin-bottom: 0\n\n .align-left\n float: left\n clear: left\n margin: 0 1rem 1rem\n\n .align-right\n float: right\n clear: right\n margin: 0 1rem 1rem\n\n .align-default,\n .align-center\n display: block\n text-align: center\n margin-left: auto\n margin-right: auto\n\n // WELL, table needs to be stylised like a table.\n table.align-default\n display: table\n text-align: initial\n",".genindex-jumpbox, .domainindex-jumpbox\n border-top: 1px solid var(--color-background-border)\n border-bottom: 1px solid var(--color-background-border)\n padding: 0.25rem\n\n.genindex-section, .domainindex-section\n h2\n margin-top: 0.75rem\n margin-bottom: 0.5rem\n ul\n margin-top: 0\n margin-bottom: 0\n","ul,\nol\n padding-left: 1.2rem\n\n // Space lists out like paragraphs\n margin-top: 1rem\n margin-bottom: 1rem\n // reduce margins within li.\n li\n > p:first-child\n margin-top: 0.25rem\n margin-bottom: 0.25rem\n\n > p:last-child\n margin-top: 0.25rem\n\n > ul,\n > ol\n margin-top: 0.5rem\n margin-bottom: 0.5rem\n\nol\n &.arabic\n list-style: decimal\n &.loweralpha\n list-style: lower-alpha\n &.upperalpha\n list-style: upper-alpha\n &.lowerroman\n list-style: lower-roman\n &.upperroman\n list-style: upper-roman\n\n// Don't space lists out when they're \"simple\" or in a `.. toctree::`\n.simple,\n.toctree-wrapper\n li\n > ul,\n > ol\n margin-top: 0\n margin-bottom: 0\n\n// Definition Lists\n.field-list,\n.option-list,\ndl:not([class]),\ndl.simple,\ndl.footnote,\ndl.glossary\n dt\n font-weight: 500\n margin-top: 0.25rem\n + dt\n margin-top: 0\n\n .classifier::before\n content: \":\"\n margin-left: 0.2rem\n margin-right: 0.2rem\n\n dd\n > p:first-child,\n ul\n margin-top: 0.125rem\n\n ul\n margin-bottom: 0.125rem\n",".math-wrapper\n width: 100%\n overflow-x: auto\n\ndiv.math\n position: relative\n text-align: center\n\n .headerlink,\n &:focus .headerlink\n display: none\n\n &:hover .headerlink\n display: inline-block\n\n span.eqno\n position: absolute\n right: 0.5rem\n top: 50%\n transform: translate(0, -50%)\n z-index: 1\n","// Abbreviations\nabbr[title]\n cursor: help\n\n// \"Problematic\" content, as identified by Sphinx\n.problematic\n color: var(--color-problematic)\n\n// Keyboard / Mouse \"instructions\"\nkbd:not(.compound)\n margin: 0 0.2rem\n padding: 0 0.2rem\n border-radius: 0.2rem\n border: 1px solid var(--color-foreground-border)\n color: var(--color-foreground-primary)\n vertical-align: text-bottom\n\n font-size: var(--font-size--small--3)\n display: inline-block\n\n box-shadow: 0 0.0625rem 0 rgba(0, 0, 0, 0.2), inset 0 0 0 0.125rem var(--color-background-primary)\n\n background-color: var(--color-background-secondary)\n\n// Blockquote\nblockquote\n border-left: 4px solid var(--color-background-border)\n background: var(--color-background-secondary)\n\n margin-left: 0\n margin-right: 0\n padding: 0.5rem 1rem\n\n .attribution\n font-weight: 600\n text-align: right\n\n &.pull-quote,\n &.highlights\n font-size: 1.25em\n\n &.epigraph,\n &.pull-quote\n border-left-width: 0\n border-radius: 0.5rem\n\n &.highlights\n border-left-width: 0\n background: transparent\n\n// Center align embedded-in-text images\np .reference img\n vertical-align: middle\n","p.rubric\n line-height: 1.25\n font-weight: bold\n font-size: 1.125em\n\n // For Numpy-style documentation that's got rubrics within it.\n // https://github.com/pradyunsg/furo/discussions/505\n dd &\n line-height: inherit\n font-weight: inherit\n\n font-size: var(--font-size--small)\n text-transform: uppercase\n","article .sidebar\n float: right\n clear: right\n width: 30%\n\n margin-left: 1rem\n margin-right: 0\n\n border-radius: 0.2rem\n background-color: var(--color-background-secondary)\n border: var(--color-background-border) 1px solid\n\n > *\n padding-left: 1rem\n padding-right: 1rem\n\n > ul, > ol // lists need additional padding, because bullets.\n padding-left: 2.2rem\n\n .sidebar-title\n margin: 0\n padding: 0.5rem 1rem\n border-bottom: var(--color-background-border) 1px solid\n\n font-weight: 500\n\n// TODO: subtitle\n// TODO: dedicated variables?\n","[role=main] .table-wrapper.container\n width: 100%\n overflow-x: auto\n margin-top: 1rem\n margin-bottom: 0.5rem\n padding: 0.2rem 0.2rem 0.75rem\n\ntable.docutils\n border-radius: 0.2rem\n border-spacing: 0\n border-collapse: collapse\n\n box-shadow: 0 0.2rem 0.5rem rgba(0, 0, 0, 0.05), 0 0 0.0625rem rgba(0, 0, 0, 0.1)\n\n th\n background: var(--color-table-header-background)\n\n td,\n th\n // Space things out properly\n padding: 0 0.25rem\n\n // Get the borders looking just-right.\n border-left: 1px solid var(--color-table-border)\n border-right: 1px solid var(--color-table-border)\n border-bottom: 1px solid var(--color-table-border)\n\n p\n margin: 0.25rem\n\n &:first-child\n border-left: none\n &:last-child\n border-right: none\n\n // MyST-parser tables set these classes for control of column alignment\n &.text-left\n text-align: left\n &.text-right\n text-align: right\n &.text-center\n text-align: center\n","@use \"../variables\" as *\n\n:target\n scroll-margin-top: 2.5rem\n\n@media (max-width: $full-width - $sidebar-width)\n :target\n scroll-margin-top: calc(2.5rem + var(--header-height))\n\n // When a heading is selected\n section > span:target\n scroll-margin-top: calc(2.8rem + var(--header-height))\n\n// Permalinks\n.headerlink\n font-weight: 100\n user-select: none\n\nh1,\nh2,\nh3,\nh4,\nh5,\nh6,\ndl dt,\np.caption,\nfigcaption p,\ntable > caption,\n.code-block-caption\n > .headerlink\n margin-left: 0.5rem\n visibility: hidden\n &:hover > .headerlink\n visibility: visible\n\n // Don't change to link-like, if someone adds the contents directive.\n > .toc-backref\n color: inherit\n text-decoration-line: none\n\n// Figure and table captions are special.\nfigure:hover > figcaption > p > .headerlink,\ntable:hover > caption > .headerlink\n visibility: visible\n\n:target >, // Regular section[id] style anchors\nspan:target ~ // Non-regular span[id] style \"extra\" anchors\n h1,\n h2,\n h3,\n h4,\n h5,\n h6\n &:nth-of-type(1)\n background-color: var(--color-highlight-on-target)\n // .headerlink\n // visibility: visible\n code.literal\n background-color: transparent\n\ntable:target > caption,\nfigure:target\n background-color: var(--color-highlight-on-target)\n\n// Inline page contents\n.this-will-duplicate-information-and-it-is-still-useful-here li :target\n background-color: var(--color-highlight-on-target)\n\n// Code block permalinks\n.literal-block-wrapper:target .code-block-caption\n background-color: var(--color-highlight-on-target)\n\n// When a definition list item is selected\n//\n// There isn't really an alternative to !important here, due to the\n// high-specificity of API documentation's selector.\ndt:target\n background-color: var(--color-highlight-on-target) !important\n\n// When a footnote reference is selected\n.footnote > dt:target + dd,\n.footnote-reference:target\n background-color: var(--color-highlight-on-target)\n",".guilabel\n background-color: var(--color-guilabel-background)\n border: 1px solid var(--color-guilabel-border)\n color: var(--color-guilabel-text)\n\n padding: 0 0.3em\n border-radius: 0.5em\n font-size: 0.9em\n","// This file contains the styles used for stylizing the footer that's shown\n// below the content.\n@use \"../variables\" as *\n\nfooter\n font-size: var(--font-size--small)\n display: flex\n flex-direction: column\n\n margin-top: 2rem\n\n// Bottom of page information\n.bottom-of-page\n display: flex\n align-items: center\n justify-content: space-between\n\n margin-top: 1rem\n padding-top: 1rem\n padding-bottom: 1rem\n\n color: var(--color-foreground-secondary)\n border-top: 1px solid var(--color-background-border)\n\n line-height: 1.5\n\n @media (max-width: $content-width)\n text-align: center\n flex-direction: column-reverse\n gap: 0.25rem\n\n .left-details\n font-size: var(--font-size--small)\n\n .right-details\n display: flex\n flex-direction: column\n gap: 0.25rem\n text-align: right\n\n .icons\n display: flex\n justify-content: flex-end\n gap: 0.25rem\n font-size: 1rem\n\n a\n text-decoration: none\n\n svg,\n img\n font-size: 1.125rem\n height: 1em\n width: 1em\n\n// Next/Prev page information\n.related-pages\n a\n display: flex\n align-items: center\n\n text-decoration: none\n &:hover .page-info .title\n text-decoration: underline\n color: var(--color-link)\n text-decoration-color: var(--color-link-underline)\n\n svg.furo-related-icon,\n svg.furo-related-icon > use\n flex-shrink: 0\n\n color: var(--color-foreground-border)\n\n width: 0.75rem\n height: 0.75rem\n margin: 0 0.5rem\n\n &.next-page\n max-width: 50%\n\n float: right\n clear: right\n text-align: right\n\n &.prev-page\n max-width: 50%\n\n float: left\n clear: left\n\n svg\n transform: rotate(180deg)\n\n.page-info\n display: flex\n flex-direction: column\n overflow-wrap: anywhere\n\n .next-page &\n align-items: flex-end\n\n .context\n display: flex\n align-items: center\n\n padding-bottom: 0.1rem\n\n color: var(--color-foreground-muted)\n font-size: var(--font-size--small)\n text-decoration: none\n","// This file contains the styles for the contents of the left sidebar, which\n// contains the navigation tree, logo, search etc.\n\n////////////////////////////////////////////////////////////////////////////////\n// Brand on top of the scrollable tree.\n////////////////////////////////////////////////////////////////////////////////\n.sidebar-brand\n display: flex\n flex-direction: column\n flex-shrink: 0\n\n padding: var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal)\n text-decoration: none\n\n.sidebar-brand-text\n color: var(--color-sidebar-brand-text)\n overflow-wrap: break-word\n margin: var(--sidebar-item-spacing-vertical) 0\n font-size: 1.5rem\n\n.sidebar-logo-container\n margin: var(--sidebar-item-spacing-vertical) 0\n\n.sidebar-logo\n margin: 0 auto\n display: block\n max-width: 100%\n\n////////////////////////////////////////////////////////////////////////////////\n// Search\n////////////////////////////////////////////////////////////////////////////////\n.sidebar-search-container\n display: flex\n align-items: center\n margin-top: var(--sidebar-search-space-above)\n\n position: relative\n\n background: var(--color-sidebar-search-background)\n &:hover,\n &:focus-within\n background: var(--color-sidebar-search-background--focus)\n\n &::before\n content: \"\"\n position: absolute\n left: var(--sidebar-item-spacing-horizontal)\n width: var(--sidebar-search-icon-size)\n height: var(--sidebar-search-icon-size)\n\n background-color: var(--color-sidebar-search-icon)\n mask-image: var(--icon-search)\n\n.sidebar-search\n box-sizing: border-box\n\n border: none\n border-top: 1px solid var(--color-sidebar-search-border)\n border-bottom: 1px solid var(--color-sidebar-search-border)\n\n padding-top: var(--sidebar-search-input-spacing-vertical)\n padding-bottom: var(--sidebar-search-input-spacing-vertical)\n padding-right: var(--sidebar-search-input-spacing-horizontal)\n padding-left: calc(var(--sidebar-item-spacing-horizontal) + var(--sidebar-search-input-spacing-horizontal) + var(--sidebar-search-icon-size))\n\n width: 100%\n\n color: var(--color-sidebar-search-foreground)\n background: transparent\n z-index: 10\n\n &:focus\n outline: none\n\n &::placeholder\n font-size: var(--sidebar-search-input-font-size)\n\n//\n// Hide Search Matches link\n//\n#searchbox .highlight-link\n padding: var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal) 0\n margin: 0\n text-align: center\n\n a\n color: var(--color-sidebar-search-icon)\n font-size: var(--font-size--small--2)\n\n////////////////////////////////////////////////////////////////////////////////\n// Structure/Skeleton of the navigation tree (left)\n////////////////////////////////////////////////////////////////////////////////\n.sidebar-tree\n font-size: var(--sidebar-item-font-size)\n margin-top: var(--sidebar-tree-space-above)\n margin-bottom: var(--sidebar-item-spacing-vertical)\n\n ul\n padding: 0\n margin-top: 0\n margin-bottom: 0\n\n display: flex\n flex-direction: column\n\n list-style: none\n\n li\n position: relative\n margin: 0\n\n > ul\n margin-left: var(--sidebar-item-spacing-horizontal)\n\n .icon\n color: var(--color-sidebar-link-text)\n\n .reference\n box-sizing: border-box\n color: var(--color-sidebar-link-text)\n\n // Fill the parent.\n display: inline-block\n line-height: var(--sidebar-item-line-height)\n text-decoration: none\n\n // Don't allow long words to cause wrapping.\n overflow-wrap: anywhere\n\n height: 100%\n width: 100%\n\n padding: var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal)\n\n &:hover\n color: var(--color-sidebar-link-text)\n background: var(--color-sidebar-item-background--hover)\n\n // Add a nice little \"external-link\" arrow here.\n &.external::after\n content: url('data:image/svg+xml,')\n margin: 0 0.25rem\n vertical-align: middle\n color: var(--color-sidebar-link-text)\n\n // Make the current page reference bold.\n .current-page > .reference\n font-weight: bold\n\n label\n position: absolute\n top: 0\n right: 0\n height: var(--sidebar-item-height)\n width: var(--sidebar-expander-width)\n\n cursor: pointer\n user-select: none\n\n display: flex\n justify-content: center\n align-items: center\n\n .caption, :not(.caption) > .caption-text\n font-size: var(--sidebar-caption-font-size)\n color: var(--color-sidebar-caption-text)\n\n font-weight: bold\n text-transform: uppercase\n\n margin: var(--sidebar-caption-space-above) 0 0 0\n padding: var(--sidebar-item-spacing-vertical) var(--sidebar-item-spacing-horizontal)\n\n // If it has children, add a bit more padding to wrap the content to avoid\n // overlapping with the