Skip to content

Commit a2d3434

Browse files
authored
LaTeX reader: fix improper empty cell filtering (#6689)
1 parent 6ef38e9 commit a2d3434

File tree

2 files changed

+108
-14
lines changed

2 files changed

+108
-14
lines changed

src/Text/Pandoc/Readers/LaTeX.hs

Lines changed: 82 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
{-# LANGUAGE BangPatterns #-}
12
{-# LANGUAGE CPP #-}
23
{-# LANGUAGE FlexibleInstances #-}
34
{-# LANGUAGE MultiParamTypeClasses #-}
@@ -2144,6 +2145,8 @@ parseAligns = try $ do
21442145
toColWidth _ = ColWidthDefault
21452146
toSpec (x, y, z) = (x, toColWidth y, z)
21462147

2148+
-- N.B. this parser returns a Row that may have erroneous empty cells
2149+
-- in it. See the note above fixTableHead for details.
21472150
parseTableRow :: PandocMonad m
21482151
=> Text -- ^ table environment name
21492152
-> [([Tok], [Tok])] -- ^ pref/suffixes
@@ -2168,9 +2171,7 @@ parseTableRow envname prefsufs = do
21682171
cells <- mapM (\ts -> setInput ts >> parseTableCell) rawcells
21692172
setInput oldInput
21702173
spaces
2171-
-- Because of table normalization performed by Text.Pandoc.Builder.table,
2172-
-- we need to remove empty cells
2173-
return $ Row nullAttr $ filter (\c -> c /= emptyCell) cells
2174+
return $ Row nullAttr cells
21742175

21752176
parseTableCell :: PandocMonad m => LP m Cell
21762177
parseTableCell = do
@@ -2246,6 +2247,80 @@ multicolumnCell = controlSeq "multicolumn" >> do
22462247
parseSimpleCell :: PandocMonad m => LP m Cell
22472248
parseSimpleCell = simpleCell <$> (plainify <$> blocks)
22482249

2250+
-- LaTeX tables are stored with empty cells underneath multirow cells
2251+
-- denoting the grid spaces taken up by them. More specifically, if a
2252+
-- cell spans m rows, then it will overwrite all the cells in the
2253+
-- columns it spans for (m-1) rows underneath it, requiring padding
2254+
-- cells in these places. These padding cells need to be removed for
2255+
-- proper table reading. See #6603.
2256+
--
2257+
-- These fixTable functions do not otherwise fix up malformed
2258+
-- input tables: that is left to the table builder.
2259+
fixTableHead :: TableHead -> TableHead
2260+
fixTableHead (TableHead attr rows) = TableHead attr rows'
2261+
where
2262+
rows' = fixTableRows rows
2263+
2264+
fixTableBody :: TableBody -> TableBody
2265+
fixTableBody (TableBody attr rhc th tb)
2266+
= TableBody attr rhc th' tb'
2267+
where
2268+
th' = fixTableRows th
2269+
tb' = fixTableRows tb
2270+
2271+
fixTableRows :: [Row] -> [Row]
2272+
fixTableRows = fixTableRows' $ repeat Nothing
2273+
where
2274+
fixTableRows' oldHang (Row attr cells : rs)
2275+
= let (newHang, cells') = fixTableRow oldHang cells
2276+
rs' = fixTableRows' newHang rs
2277+
in Row attr cells' : rs'
2278+
fixTableRows' _ [] = []
2279+
2280+
-- The overhang is represented as Just (relative cell dimensions) or
2281+
-- Nothing for an empty grid space.
2282+
fixTableRow :: [Maybe (ColSpan, RowSpan)] -> [Cell] -> ([Maybe (ColSpan, RowSpan)], [Cell])
2283+
fixTableRow oldHang cells
2284+
-- If there's overhang, drop cells until their total width meets the
2285+
-- width of the occupied grid spaces (or we run out)
2286+
| (n, prefHang, restHang) <- splitHang oldHang
2287+
, n > 0
2288+
= let cells' = dropToWidth getCellW n cells
2289+
(restHang', cells'') = fixTableRow restHang cells'
2290+
in (prefHang restHang', cells'')
2291+
-- Otherwise record the overhang of a pending cell and fix the rest
2292+
-- of the row
2293+
| c@(Cell _ _ h w _):cells' <- cells
2294+
= let h' = max 1 h
2295+
w' = max 1 w
2296+
oldHang' = dropToWidth getHangW w' oldHang
2297+
(newHang, cells'') = fixTableRow oldHang' cells'
2298+
in (toHang w' h' <> newHang, c : cells'')
2299+
| otherwise
2300+
= (oldHang, [])
2301+
where
2302+
getCellW (Cell _ _ _ w _) = w
2303+
getHangW = maybe 1 fst
2304+
getCS (ColSpan n) = n
2305+
2306+
toHang c r
2307+
| r > 1 = [Just (c, r)]
2308+
| otherwise = replicate (getCS c) Nothing
2309+
2310+
-- Take the prefix of the overhang list representing filled grid
2311+
-- spaces. Also return the remainder and the length of this prefix.
2312+
splitHang = splitHang' 0 id
2313+
2314+
splitHang' !n l (Just (c, r):xs)
2315+
= splitHang' (n + c) (l . (toHang c (r-1) ++)) xs
2316+
splitHang' n l xs = (n, l, xs)
2317+
2318+
-- Drop list items until the total width of the dropped items
2319+
-- exceeds the passed width.
2320+
dropToWidth _ n l | n < 1 = l
2321+
dropToWidth wproj n (c:cs) = dropToWidth wproj (n - wproj c) cs
2322+
dropToWidth _ _ [] = []
2323+
22492324
simpTable :: PandocMonad m => Text -> Bool -> LP m Blocks
22502325
simpTable envname hasWidthParameter = try $ do
22512326
when hasWidthParameter $ () <$ (spaces >> tok)
@@ -2273,11 +2348,10 @@ simpTable envname hasWidthParameter = try $ do
22732348
optional lbreak
22742349
spaces
22752350
lookAhead $ controlSeq "end" -- make sure we're at end
2276-
return $ table emptyCaption
2277-
(zip aligns widths)
2278-
(TableHead nullAttr header')
2279-
[TableBody nullAttr 0 [] rows]
2280-
(TableFoot nullAttr [])
2351+
let th = fixTableHead $ TableHead nullAttr header'
2352+
let tbs = [fixTableBody $ TableBody nullAttr 0 [] rows]
2353+
let tf = TableFoot nullAttr []
2354+
return $ table emptyCaption (zip aligns widths) th tbs tf
22812355

22822356
addTableCaption :: PandocMonad m => Blocks -> LP m Blocks
22832357
addTableCaption = walkM go

test/Tests/Readers/LaTeX.hs

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -174,20 +174,21 @@ tests = [ testGroup "tokenization"
174174
, Row nullAttr [ simpleCell (plain "Two") ]
175175
]
176176
, "Table with nested multirow/multicolumn item" =:
177-
T.unlines [ "\\begin{tabular}{c c c}"
178-
, "\\multicolumn{2}{c}{\\multirow{2}{5em}{One}}&Two\\\\"
179-
, "& & Three\\\\"
180-
, "Four&Five&Six\\\\"
177+
T.unlines [ "\\begin{tabular}{c c c c}"
178+
, "\\multicolumn{3}{c}{\\multirow{2}{5em}{One}}&Two\\\\"
179+
, "\\multicolumn{2}{c}{} & & Three\\\\"
180+
, "Four&Five&Six&Seven\\\\"
181181
, "\\end{tabular}"
182182
] =?>
183-
table' [AlignCenter, AlignCenter, AlignCenter]
184-
[ Row nullAttr [ cell AlignCenter (RowSpan 2) (ColSpan 2) (plain "One")
183+
table' [AlignCenter, AlignCenter, AlignCenter, AlignCenter]
184+
[ Row nullAttr [ cell AlignCenter (RowSpan 2) (ColSpan 3) (plain "One")
185185
, simpleCell (plain "Two")
186186
]
187187
, Row nullAttr [ simpleCell (plain "Three") ]
188188
, Row nullAttr [ simpleCell (plain "Four")
189189
, simpleCell (plain "Five")
190190
, simpleCell (plain "Six")
191+
, simpleCell (plain "Seven")
191192
]
192193
]
193194
, "Table with multicolumn header" =:
@@ -205,6 +206,25 @@ tests = [ testGroup "tokenization"
205206
]
206207
]
207208
(TableFoot nullAttr [])
209+
, "Table with normal empty cells" =:
210+
T.unlines [ "\\begin{tabular}{|r|r|r|}"
211+
, "A & & B \\\\"
212+
, " & C &"
213+
, "\\end{tabular}"
214+
] =?>
215+
table emptyCaption
216+
(replicate 3 (AlignRight, ColWidthDefault))
217+
(TableHead nullAttr [])
218+
[TableBody nullAttr 0 []
219+
[Row nullAttr [ simpleCell (plain "A")
220+
, emptyCell
221+
, simpleCell (plain "B")
222+
]
223+
,Row nullAttr [ emptyCell
224+
, simpleCell (plain "C")
225+
, emptyCell
226+
]]]
227+
(TableFoot nullAttr [])
208228
]
209229

210230
, testGroup "citations"

0 commit comments

Comments
 (0)