Skip to content

Commit

Permalink
Merge pull request useblocks#19 from useblocks/duplicate-table-ids
Browse files Browse the repository at this point in the history
fixed table id for issue#18
  • Loading branch information
ubmarco authored Jun 16, 2022
2 parents 9983b6f + ddff175 commit 2532222
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 92 deletions.
43 changes: 21 additions & 22 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,28 +9,27 @@ on:

jobs:
build-n-publish:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2

- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8

- name: Install poetry
run: |
python -m pip install poetry
- name: Build the package
run: |
poetry build
- name: Publish package
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@master
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
- uses: actions/checkout@v2

- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8

- name: Install poetry
run: |
python -m pip install poetry
- name: Build the package
run: |
poetry build
- name: Publish package
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@master
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
122 changes: 60 additions & 62 deletions .github/workflows/tox.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,86 +2,84 @@ name: Run tox tests

on:
push:
branches: [ master ]
branches: [master]
pull_request:
branches: [ master ]
branches: [master]

jobs:
pytest:

runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: [[3,7], [3,8], [3,9]]
python-version: [[3, 7], [3, 8], [3, 9]]
os: [ubuntu-latest, macos-latest, windows-latest]

steps:
- uses: actions/checkout@v2

- name: Set up Python ${{ join(matrix.python-version, '.') }}
uses: actions/setup-python@v2
with:
python-version: ${{ join(matrix.python-version, '.') }}

- name: Copy policy file for ubuntu-latest (needed to use ImageMagic in visual debugging tox tests)
run: |
sudo cp .github/workflows/policy.xml /etc/ImageMagick-6/policy.xml
if: matrix.os == 'ubuntu-latest'

- name: Install ghostscript
run: |
sudo apt install ghostscript
if: matrix.os == 'ubuntu-latest'

- name: Install ImageMagick on macos-latest
run: |
brew install freetype imagemagick
if: matrix.os == 'macos-latest'

- name: Install poetry and tox
run: |
python -m pip install poetry tox
- name: Run tox env pytest on Linux, macOS
run: |
tox -e py${{ join(matrix.python-version, '') }}
if: runner.os == 'Linux' || runner.os == 'macOS'

# set the shell for Windows so env var expansion works in tox and subprocesses
- name: Run tox env pytest on Windows
run: |
tox -e py${{ join(matrix.python-version, '') }}
shell: cmd
if: runner.os == 'Windows'
- uses: actions/checkout@v2

- name: Set up Python ${{ join(matrix.python-version, '.') }}
uses: actions/setup-python@v2
with:
python-version: ${{ join(matrix.python-version, '.') }}

- name: Copy policy file for ubuntu-latest (needed to use ImageMagic in visual debugging tox tests)
run: |
sudo cp .github/workflows/policy.xml /etc/ImageMagick-6/policy.xml
if: matrix.os == 'ubuntu-latest'

- name: Install ghostscript
run: |
sudo apt install ghostscript
if: matrix.os == 'ubuntu-latest'

- name: Install ImageMagick on macos-latest
run: |
brew install freetype imagemagick
if: matrix.os == 'macos-latest'

- name: Install poetry and tox
run: |
python -m pip install poetry tox
- name: Run tox env pytest on Linux, macOS
run: |
tox -e py${{ join(matrix.python-version, '') }}
if: runner.os == 'Linux' || runner.os == 'macOS'

# set the shell for Windows so env var expansion works in tox and subprocesses
- name: Run tox env pytest on Windows
run: |
tox -e py${{ join(matrix.python-version, '') }}
shell: cmd
if: runner.os == 'Windows'

flake8_pylint_docs_black:

runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v2

- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8
- name: Set up Python 3.8
uses: actions/setup-python@v2
with:
python-version: 3.8

- name: Install poetry and tox
run: |
python -m pip install poetry tox
- name: Install poetry and tox
run: |
python -m pip install poetry tox
- name: Run tox env flake8
run: |
tox -e flake8
- name: Run tox env flake8
run: |
tox -e flake8
- name: Run tox env pylint
run: |
tox -e pylint
- name: Run tox env pylint
run: |
tox -e pylint
- name: Run tox env docs
run: |
tox -e docs
- name: Run tox env docs
run: |
tox -e docs
- name: Run tox env black
run: |
tox -e black
- name: Run tox env black
run: |
tox -e black
1 change: 0 additions & 1 deletion README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,3 @@ Introduction
============

``libpdf`` allows the extraction of structured data from machine readable PDFs.

13 changes: 6 additions & 7 deletions libpdf/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def extract_pdf_table(pdf, pages_list: List[Page], figure_list: List[Figure]):

table_dict = {'page': {}}
table_list = []

table_id = 1
for idx_page, page in enumerate(
tqdm(pdf.pages, desc='###### Extracting tables', unit='pages', bar_format=bar_format_lvl2()),
):
Expand All @@ -88,7 +88,6 @@ def extract_pdf_table(pdf, pages_list: List[Page], figure_list: List[Figure]):
if len((page.find_tables(table_settings))) != 0:
table_dict['page'].update({idx_page + 1: []})
tables = page.find_tables(table_settings)
counter = 1
lt_page = page._layout # pylint: disable=protected-access # easiest way to obtain LTPage
for table in tables:
# bbox in tables use pdfplumber bbox coordination (x0, top, y0, bottom), hence, need to
Expand All @@ -112,7 +111,7 @@ def extract_pdf_table(pdf, pages_list: List[Page], figure_list: List[Figure]):
if _table_figure_check(table_pos, figure_list) is True:
table_dict['page'][idx_page + 1].append(
{
'id': 'table.' + str(counter),
'id': 'table.' + str(table_id),
'type': 'table',
'positions': table_pos,
# 'text': table_temp.extract(2, 2),
Expand All @@ -123,16 +122,16 @@ def extract_pdf_table(pdf, pages_list: List[Page], figure_list: List[Figure]):
cells = extract_cells(
lt_page,
table.rows,
table_dict['page'][idx_page + 1][counter - 1]['cell'],
table_dict['page'][idx_page + 1][len(table_dict['page'][idx_page + 1]) - 1]['cell'],
pages_list[idx_page],
)

table = Table(idx=counter, cells=cells, position=table_pos)
table = Table(idx=table_id, cells=cells, position=table_pos)
table_list.append(table)

counter += 1
table_id += 1

if counter == 1: # no table is added
if len(table_dict['page'][idx_page + 1]) == 0: # no table is added
del table_dict['page'][idx_page + 1]

return table_list
Expand Down

0 comments on commit 2532222

Please sign in to comment.