Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Demo/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ static void TestTable()

try
{
string testFilePath = Path.GetFullPath("../../../TestDocuments/national-capitals.pdf");
string testFilePath = Path.GetFullPath("../../../TestDocuments/err_table.pdf");

if (!File.Exists(testFilePath))
{
Expand Down
Binary file added Demo/TestDocuments/err_table.pdf
Binary file not shown.
91 changes: 88 additions & 3 deletions MuPDF.NET.Test/TableTest.cs
Original file line number Diff line number Diff line change
@@ -1,13 +1,98 @@
using System;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using MuPDF.NET;
using NUnit.Framework;

namespace MuPDF.NET.Test
{
public class TableTest
{
/// <summary>
/// Table test based on Demo Program.TestTable():
/// Loads err_table.pdf, gets tables with lines_strict/lines/text strategies,
/// asserts Extract() and ToMarkdown() work for any tables found.
/// </summary>
[Test]
public void TestTable()
{
string testFilePath = Path.GetFullPath(Path.Combine(TestContext.CurrentContext.TestDirectory, "../../../resources/err_table.pdf"));
Assert.That(File.Exists(testFilePath), Is.True, $"Test file not found: {testFilePath}");

Document doc = new Document(testFilePath);
try
{
Assert.That(doc.PageCount, Is.GreaterThanOrEqualTo(1));

Page page = doc[0];

// Test 1: Get tables with 'lines_strict' strategy (as in Demo)
List<Table> tables = Utils.GetTables(
page,
clip: page.Rect,
vertical_strategy: "lines_strict",
horizontal_strategy: "lines_strict");

Assert.That(tables, Is.Not.Null);

if (tables.Count == 0)
{
// Test 2: Fallback with 'lines' strategy (as in Demo)
tables = Utils.GetTables(
page,
clip: page.Rect,
vertical_strategy: "lines",
horizontal_strategy: "lines");
}

// Test 3: Get tables with 'text' strategy (as in Demo)
List<Table> textTables = Utils.GetTables(
page,
clip: page.Rect,
vertical_strategy: "text",
horizontal_strategy: "text");

Assert.That(textTables, Is.Not.Null);

// For each table found with lines_strict/lines: validate structure and Extract/ToMarkdown
for (int i = 0; i < tables.Count; i++)
{
Table table = tables[i];
Assert.That(table.row_count, Is.GreaterThanOrEqualTo(0));
Assert.That(table.col_count, Is.GreaterThanOrEqualTo(0));

List<List<string>> tableData = table.Extract();
Assert.That(tableData, Is.Not.Null);

string markdown = table.ToMarkdown(clean: false, fillEmpty: true);
Assert.That(markdown, Is.Not.Null);
}

// Test 4: Get tables from all pages (as in Demo)
int totalTables = 0;
for (int pageNum = 0; pageNum < doc.PageCount; pageNum++)
{
Page currentPage = doc[pageNum];
List<Table> pageTables = Utils.GetTables(
currentPage,
clip: currentPage.Rect,
vertical_strategy: "lines_strict",
horizontal_strategy: "lines_strict");
if (pageTables.Count > 0)
totalTables += pageTables.Count;
currentPage.Dispose();
}

Assert.That(totalTables, Is.GreaterThanOrEqualTo(0));
page.Dispose();
}
finally
{
doc.Close();
}
}

/*
[Test]
public void BorderedTable()
Expand Down
Binary file added MuPDF.NET.Test/resources/err_table.pdf
Binary file not shown.
2 changes: 1 addition & 1 deletion MuPDF.NET/MuPDF.NET.nuspec
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<package xmlns="http://schemas.microsoft.com/packaging/2012/06/nuspec.xsd">
<metadata>
<id>MuPDF.NET</id>
<version>3.2.13-rc.11</version>
<version>3.2.13-rc.12</version>
<authors>Artifex Software Inc.</authors>
<requireLicenseAcceptance>true</requireLicenseAcceptance>
<license type="file">LICENSE.md</license>
Expand Down
32 changes: 24 additions & 8 deletions MuPDF.NET/Table.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2556,7 +2556,7 @@ bool RowHasBold(Rect rowBbox)

// Get text blocks above table
dynamic pageInfo = page.GetText("dict", clip: clip, flags: (int)TextFlagsExtension.TEXTFLAGS_TEXT);
List<Block> blocks = pageInfo?.BLOCKS ?? new List<Block>();
List<Block> blocks = pageInfo?.Blocks ?? new List<Block>();

// Non-empty, non-superscript spans above table, sorted descending by y1
var spans = new List<Dictionary<string, object>>();
Expand Down Expand Up @@ -2601,7 +2601,7 @@ bool RowHasBold(Rect rowBbox)

float y1 = Convert.ToSingle(sbbox[3]);
float h = y1 - Convert.ToSingle(sbbox[1]);
bool bold = ((int)s["flags"] & (int)FontStyle.TEXT_FONT_BOLD) != 0;
bool bold = (Convert.ToInt32(s["flags"]) & (int)FontStyle.TEXT_FONT_BOLD) != 0;

if (i == 0)
{
Expand Down Expand Up @@ -3145,19 +3145,27 @@ private List<Edge> GetEdges()
}
}

List<Edge> vBase = new List<Edge>();
List<Edge> vBase;
if (vStrat == "lines")
{
vBase = TableGlobals.EDGES.Where(e => e.orientation == "v").ToList();
vBase = EdgeProcessing.FilterEdges(TableGlobals.EDGES, "v");
}
else if (vStrat == "lines_strict")
{
vBase = TableGlobals.EDGES.Where(e => e.orientation == "v" && e.object_type == "line").ToList();
vBase = EdgeProcessing.FilterEdges(TableGlobals.EDGES, "v", "line");
}
else if (vStrat == "text")
{
vBase = EdgeProcessing.WordsToEdgesV(words, (int)settings.min_words_vertical);
}
else if (vStrat == "explicit")
{
vBase = new List<Edge>();
}
else
{
vBase = new List<Edge>();
}

var v = vBase.Concat(vExplicit).ToList();

Expand Down Expand Up @@ -3197,19 +3205,27 @@ private List<Edge> GetEdges()
}
}

List<Edge> hBase = new List<Edge>();
List<Edge> hBase;
if (hStrat == "lines")
{
hBase = TableGlobals.EDGES.Where(e => e.orientation == "h").ToList();
hBase = EdgeProcessing.FilterEdges(TableGlobals.EDGES, "h");
}
else if (hStrat == "lines_strict")
{
hBase = TableGlobals.EDGES.Where(e => e.orientation == "h" && e.object_type == "line").ToList();
hBase = EdgeProcessing.FilterEdges(TableGlobals.EDGES, "h", "line");
}
else if (hStrat == "text")
{
hBase = EdgeProcessing.WordsToEdgesH(words, (int)settings.min_words_horizontal);
}
else if (hStrat == "explicit")
{
hBase = new List<Edge>();
}
else
{
hBase = new List<Edge>();
}

var h = hBase.Concat(hExplicit).ToList();

Expand Down