Skip to content
87 changes: 87 additions & 0 deletions .github/workflows/test-database-processing.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
name: Test Database Processing

on:
push:
pull_request:
workflow_dispatch:

jobs:
test-database-processing:
runs-on: ubuntu-latest

steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
lfs: true

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'

- name: Install dependencies (7z)
run: |
sudo apt-get update
sudo apt-get install -y p7zip-full unzip

- name: Install SQLite from official release
run: |
# Ubuntu's SQLite 3.45.1 has a bug causing segfaults with complex views
# Download official precompiled SQLite 3.50.4 instead
SQLITE_VERSION=3500400
SQLITE_YEAR=2025
wget https://www.sqlite.org/${SQLITE_YEAR}/sqlite-tools-linux-x64-${SQLITE_VERSION}.zip
unzip sqlite-tools-linux-x64-${SQLITE_VERSION}.zip
ls -la
sudo cp sqlite3 /usr/local/bin/
sudo chmod +x /usr/local/bin/sqlite3
# Make sure /usr/local/bin is in PATH first
echo "/usr/local/bin" >> $GITHUB_PATH

- name: Verify installations and SQLite versions
run: |
echo "=== 7z version ==="
7z --help | head -5
echo ""
echo "=== System sqlite3 version ==="
/usr/local/bin/sqlite3 --version
echo ""
echo "=== Python sqlite3 module version ==="
python -c "import sqlite3; print('Python sqlite3 module uses SQLite version:', sqlite3.sqlite_version)"

- name: Extract latest.7z
run: |
echo "Extracting latest.7z..."
7z x latest.7z
echo "Extraction complete."
ls -lh latest.db

- name: Run add_primary_keys.py
run: |
echo "Running add_primary_keys.py..."
python scripts/add_primary_keys.py --db latest.db
echo "Primary keys added successfully."

- name: Run create_views.sh
run: |
echo "Running create_views.sh..."
chmod +x scripts/create_views.sh
scripts/create_views.sh latest.db
echo "Views created successfully."

- name: Verify database integrity
run: |
echo "Checking database integrity..."
sqlite3 latest.db "PRAGMA integrity_check;"
echo "Verifying views exist..."
sqlite3 latest.db "SELECT name FROM sqlite_master WHERE type='view' ORDER BY name;"
echo "All checks passed!"

- name: Upload database for debugging
if: always()
uses: actions/upload-artifact@v4
with:
name: debug-db
path: latest.db
retention-days: 7
58 changes: 38 additions & 20 deletions scripts/create_views.sh
Original file line number Diff line number Diff line change
Expand Up @@ -995,23 +995,41 @@ echo "Finished view View_StatusData."
echo "Created CBDB views in '$DB_PATH'."

echo "Running sanity counts on views..."
sqlite3 "$DB_PATH" <<'SQL'
SELECT 'View_AltnameData' AS view_name, COUNT(*) AS row_count FROM View_AltnameData;
SELECT 'View_Association' AS view_name, COUNT(*) AS row_count FROM View_Association;
SELECT 'View_BiogAddrData' AS view_name, COUNT(*) AS row_count FROM View_BiogAddrData;
SELECT 'View_BiogInstAddrData' AS view_name, COUNT(*) AS row_count FROM View_BiogInstAddrData;
SELECT 'View_BiogInstData' AS view_name, COUNT(*) AS row_count FROM View_BiogInstData;
SELECT 'View_BiogSourceData' AS view_name, COUNT(*) AS row_count FROM View_BiogSourceData;
SELECT 'View_BiogTextData' AS view_name, COUNT(*) AS row_count FROM View_BiogTextData;
SELECT 'View_Entry' AS view_name, COUNT(*) AS row_count FROM View_Entry;
SELECT 'View_EventAddr' AS view_name, COUNT(*) AS row_count FROM View_EventAddr;
SELECT 'View_EventData' AS view_name, COUNT(*) AS row_count FROM View_EventData;
SELECT 'View_KinAddr' AS view_name, COUNT(*) AS row_count FROM View_KinAddr;
SELECT 'View_People' AS view_name, COUNT(*) AS row_count FROM View_People;
SELECT 'View_PeopleAddr' AS view_name, COUNT(*) AS row_count FROM View_PeopleAddr;
SELECT 'View_Possessions' AS view_name, COUNT(*) AS row_count FROM View_Possessions;
SELECT 'View_PossessionsAddr' AS view_name, COUNT(*) AS row_count FROM View_PossessionsAddr;
SELECT 'View_PostingAddr' AS view_name, COUNT(*) AS row_count FROM View_PostingAddr;
SELECT 'View_PostingOffice' AS view_name, COUNT(*) AS row_count FROM View_PostingOffice;
SELECT 'View_StatusData' AS view_name, COUNT(*) AS row_count FROM View_StatusData;
SQL

# List of views to check
VIEWS=(
"View_AltnameData"
"View_Association"
"View_BiogAddrData"
"View_BiogInstAddrData"
"View_BiogInstData"
"View_BiogSourceData"
"View_BiogTextData"
"View_Entry"
"View_EventAddr"
"View_EventData"
"View_KinAddr"
"View_People"
"View_PeopleAddr"
"View_Possessions"
"View_PossessionsAddr"
"View_PostingAddr"
"View_PostingOffice"
"View_StatusData"
)

# Check each view individually to identify which one causes issues
for view in "${VIEWS[@]}"; do
echo "Checking view: $view..."
if sqlite3 "$DB_PATH" "PRAGMA mmap_size=0; SELECT '$view' AS view_name, COUNT(*) AS row_count FROM $view;" 2>&1; then
echo " ✓ $view completed successfully"
else
EXIT_CODE=$?
echo " ✗ ERROR: $view failed with exit code $EXIT_CODE"
echo " Memory info:"
free -h 2>/dev/null || true
exit $EXIT_CODE
fi
done

echo "All sanity checks passed!"