Skip to content

fix: ValueError when converting cells to html #1369

fix: ValueError when converting cells to html

fix: ValueError when converting cells to html #1369

Workflow file for this run

name: CI
on:
push:
branches: [ main, robinson/initial-repo-setup ]
pull_request:
branches: [ main ]
env:
PYTHON_VERSION: 3.9
jobs:
setup:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/cache@v3
id: virtualenv-cache
with:
path: |
.venv
key: ${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements/*.txt') }}
lookup-only: true
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- name: Install Poppler
run: |
sudo apt-get update
sudo apt-get -y install poppler-utils
- name: Setup virtual environment (no cache hit)
if: steps.virtualenv-cache.outputs.cache-hit != 'true'
run: |
python${{ env.PYTHON_VERSION }} -m venv .venv
source .venv/bin/activate
make install-ci
lint:
runs-on: ubuntu-latest
needs: setup
steps:
- uses: actions/checkout@v4
- uses: actions/cache/restore@v3
id: virtualenv-cache
with:
path: .venv
key: ${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements/*.txt') }}
# NOTE(robinson) - This is a fallback in case the lint job does not find the cache.
# We can take this out when we implement the fix in CORE-99
- name: Setup virtual environment (no cache hit)
if: steps.virtualenv-cache.outputs.cache-hit != 'true'
run: |
python${{ env.PYTHON_VERSION }} -m venv .venv
source .venv/bin/activate
make install-ci
- name: Lint
run: |
source .venv/bin/activate
make check
shellcheck:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: ShellCheck
uses: ludeeus/action-shellcheck@master
test:
runs-on: ubuntu-latest
needs: [setup, lint]
steps:
- uses: actions/checkout@v4
- uses: actions/cache/restore@v3
id: virtualenv-cache
with:
path: |
.venv
key: ${{ runner.os }}-${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements/*.txt') }}
# NOTE(robinson) - This is a fallback in case the lint job does not find the cache.
# We can take this out when we implement the fix in CORE-99
- name: Setup virtual environment (no cache hit)
if: steps.virtualenv-cache.outputs.cache-hit != 'true'
run: |
python${{ env.PYTHON_VERSION }} -m venv .venv
source .venv/bin/activate
make install-ci
- name: Install Poppler
run: |
sudo apt-get update
sudo apt-get -y install poppler-utils tesseract-ocr
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_DEFAULT_REGION }}
- name: Test
env:
UNSTRUCTURED_HF_TOKEN: ${{ secrets.HF_TOKEN }}
run: |
source .venv/bin/activate
aws s3 cp s3://utic-dev-models/ci_test_model/test_ci_model.onnx test_unstructured_inference/models/
CI=true make test
make check-coverage
# NOTE(robinson) - disabling ingest tests for now, as of 5/22/2024 they seem to have been
# broken for the past six months
# test_ingest:
# strategy:
# matrix:
# python-version: ["3.9","3.10"]
# runs-on: ubuntu-latest
# env:
# NLTK_DATA: ${{ github.workspace }}/nltk_data
# needs: lint
# steps:
# - name: Checkout unstructured repo for integration testing
# uses: actions/checkout@v4
# with:
# repository: 'Unstructured-IO/unstructured'
# - name: Checkout this repo
# uses: actions/checkout@v4
# with:
# path: inference
# - name: Set up Python ${{ matrix.python-version }}
# uses: actions/setup-python@v4
# with:
# python-version: ${{ matrix.python-version }}
# - name: Test
# env:
# GH_READ_ONLY_ACCESS_TOKEN: ${{ secrets.GH_READ_ONLY_ACCESS_TOKEN }}
# SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}
# DISCORD_TOKEN: ${{ secrets.DISCORD_TOKEN }}
# run: |
# python${{ matrix.python-version }} -m venv .venv
# source .venv/bin/activate
# [ ! -d "$NLTK_DATA" ] && mkdir "$NLTK_DATA"
# make install-ci
# pip install -e inference/
# sudo apt-get update
# sudo apt-get install -y libmagic-dev poppler-utils libreoffice pandoc
# sudo add-apt-repository -y ppa:alex-p/tesseract-ocr5
# sudo apt-get install -y tesseract-ocr
# sudo apt-get install -y tesseract-ocr-kor
# sudo apt-get install -y diffstat
# tesseract --version
# make install-all-ingest
# # only run ingest tests that check expected output diffs.
# bash inference/scripts/test-unstructured-ingest-helper.sh
changelog:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- if: github.ref != 'refs/heads/main'
uses: dorny/paths-filter@v2
id: changes
with:
filters: |
src:
- 'unstructured_inference/**'
- if: steps.changes.outputs.src == 'true' && github.ref != 'refs/heads/main'
uses: dangoslen/changelog-enforcer@v3