@@ -40,139 +40,219 @@ public function __construct(private readonly LoggerInterface $logger)
4040 {
4141 }
4242
43+ /** @throws Exception */
4344 protected function compile (ParserContext $ context ): TableNode
4445 {
45- $ columnRanges = $ context ->getColumnRanges ();
46- $ finalHeadersRow = $ context ->getHeaderRows ();
46+ $ rows = $ this ->extractTableRows ($ context );
47+ $ rows = $ this ->concatenateTableRows ($ rows , $ context );
48+ $ headers = $ this ->extractHeaderRows ($ rows , $ context );
49+
50+ return new TableNode ($ rows , $ headers );
51+ }
4752
48- /** @var TableRow[] $rows */
53+ /** @return array<int, TableRow> */
54+ private function extractTableRows (ParserContext $ context ): array
55+ {
56+ /** @var array<int, TableRow> $rows */
4957 $ rows = [];
50- $ partialSeparatorRows = $ this -> findRowSpans ( $ context );
58+ $ columnRanges = $ context -> getColumnRanges ( );
5159 $ currentSpan = 1 ;
52-
5360 foreach ($ context ->getDataLines () as $ rowIndex => $ line ) {
54- $ row = new TableRow ();
55- $ currentColumnStart = null ;
56- $ previousColumnEnd = null ;
57- foreach ($ columnRanges as $ start => $ end ) {
58- $ this ->assertColumnEnded ($ currentColumnStart , $ previousColumnEnd );
59-
60- if ($ currentColumnStart !== null ) {
61- $ cellText = mb_substr ($ line , $ previousColumnEnd , $ start - $ previousColumnEnd );
62- if (!str_contains ($ cellText , '| ' ) && !str_contains ($ cellText , '+ ' )) {
63- // text continued through the "gap". This is a colspan
64- // "+" is an odd character - it's usually "|", but "+" can
65- // happen in row-span situations
66- $ currentSpan ++;
67- $ previousColumnEnd = $ end ;
68- continue ;
69- }
70-
71- // we just hit a proper "gap" record the line up until now
72- $ row ->addColumn (
73- $ this ->createColumn ($ line , $ currentColumnStart , $ previousColumnEnd , $ currentSpan ),
74- );
75- $ currentSpan = 1 ;
76- $ currentColumnStart = null ;
77- }
61+ $ rows [$ rowIndex ] = $ this ->extractRow ($ columnRanges , $ line , $ currentSpan );
62+ }
7863
79- // if the current column start is null, then set it
80- // otherwise, leave it - this is a colspan, and eventually
81- // we want to get all the text starting here
82- $ currentColumnStart = $ start ;
64+ return $ rows ;
65+ }
8366
84- $ previousColumnEnd = $ end ;
85- }
67+ /** @param array<int, int> $columnRanges */
68+ private function extractRow (array $ columnRanges , string $ line , int &$ currentSpan ): TableRow
69+ {
70+ $ row = new TableRow ();
71+ $ currentColumnStart = null ;
72+ $ previousColumnEnd = null ;
73+ $ this ->extractTableCell ($ columnRanges , $ currentColumnStart , $ previousColumnEnd , $ line , $ currentSpan , $ row );
74+
75+ // record the last column
76+ $ this ->assertColumnEnded ($ currentColumnStart , $ previousColumnEnd );
77+
78+ if ($ currentColumnStart !== null ) {
79+ $ row ->addColumn (
80+ $ this ->createColumn ($ line , $ currentColumnStart , $ previousColumnEnd , $ currentSpan ),
81+ );
82+ }
8683
87- // record the last column
84+ return $ row ;
85+ }
86+
87+ /** @param list<int> $columnRanges */
88+ private function extractTableCell (array $ columnRanges , int |null &$ currentColumnStart , int |null &$ previousColumnEnd , string $ line , int &$ currentSpan , TableRow $ row ): void
89+ {
90+ foreach ($ columnRanges as $ start => $ end ) {
8891 $ this ->assertColumnEnded ($ currentColumnStart , $ previousColumnEnd );
8992
9093 if ($ currentColumnStart !== null ) {
94+ $ cellText = mb_substr ($ line , $ previousColumnEnd , $ start - $ previousColumnEnd );
95+ if (!str_contains ($ cellText , '| ' ) && !str_contains ($ cellText , '+ ' )) {
96+ // text continued through the "gap". This is a colspan
97+ // "+" is an odd character - it's usually "|", but "+" can
98+ // happen in row-span situations
99+ $ currentSpan ++;
100+ $ previousColumnEnd = $ end ;
101+ continue ;
102+ }
103+
104+ // we just hit a proper "gap" record the line up until now
91105 $ row ->addColumn (
92106 $ this ->createColumn ($ line , $ currentColumnStart , $ previousColumnEnd , $ currentSpan ),
93107 );
108+ $ currentSpan = 1 ;
109+ $ currentColumnStart = null ;
94110 }
95111
96- $ rows [$ rowIndex ] = $ row ;
112+ // if the current column start is null, then set it
113+ // otherwise, leave it - this is a colspan, and eventually
114+ // we want to get all the text starting here
115+ $ currentColumnStart = $ start ;
116+
117+ $ previousColumnEnd = $ end ;
97118 }
119+ }
98120
121+ /**
122+ * @param array<int, TableRow> $rows
123+ *
124+ * @return array<int, TableRow>
125+ *
126+ * @throws Exception
127+ */
128+ private function concatenateTableRows (array $ rows , ParserContext $ context ): array
129+ {
130+ $ partialSeparatorRows = $ this ->findRowSpans ($ context );
99131 $ columnIndexesCurrentlyInRowspan = [];
100132 foreach ($ rows as $ rowIndex => $ row ) {
101133 if (isset ($ partialSeparatorRows [$ rowIndex ])) {
102- // this row is part content, part separator due to a rowspan
103- // for each column that contains content, we need to
104- // push it onto the last real row's content and record
105- // that this column in the next row should also be
106- // included in that previous row's content
107- foreach ($ row ->getColumns () as $ columnIndex => $ column ) {
108- if (
109- !$ column ->isCompletelyEmpty ()
110- && str_repeat (
111- '- ' ,
112- mb_strlen ($ column ->getContent ()),
113- ) === $ column ->getContent ()
114- ) {
115- // only a line separator in this column - not content!
116- continue ;
117- }
118-
119- $ prevTargetColumn = $ this ->findColumnInPreviousRows ((int ) $ columnIndex , $ rows , (int ) $ rowIndex );
120- $ prevTargetColumn ->addContent ("\n" . $ column ->getContent ());
121- $ prevTargetColumn ->incrementRowSpan ();
122- // mark that this column on the next row should also be added
123- // to the previous row
124- $ columnIndexesCurrentlyInRowspan [] = $ columnIndex ;
125- }
126-
127- // remove the row - it's not real
128- unset($ rows [$ rowIndex ]);
129-
134+ $ rows = $ this ->handlePartialSeparator ($ row , $ rows , $ rowIndex , $ columnIndexesCurrentlyInRowspan );
130135 continue ;
131136 }
132137
133- // check if the previous row was a partial separator row, and
134- // we need to take some columns and add them to a previous row's content
135- foreach ($ columnIndexesCurrentlyInRowspan as $ columnIndex ) {
136- $ prevTargetColumn = $ this ->findColumnInPreviousRows ($ columnIndex , $ rows , (int ) $ rowIndex );
137- $ columnInRowspan = $ row ->getColumn ($ columnIndex );
138- if ($ columnInRowspan === null ) {
139- $ context ->addError (sprintf ('Cannot find column for index "%s" ' , $ columnIndex ));
140- continue ;
141- }
138+ $ this ->handlePreviousRowWasAPartialSeparator ($ columnIndexesCurrentlyInRowspan , $ rows , $ rowIndex , $ row , $ context );
142139
143- $ prevTargetColumn ->addContent ("\n" . $ columnInRowspan ->getContent ());
140+ $ columnIndexesCurrentlyInRowspan = [];
141+ $ rows = $ this ->concatenateTableRow ($ rows , $ rowIndex , $ partialSeparatorRows , $ row );
142+ }
143+
144+ return $ rows ;
145+ }
144146
145- // now this column actually needs to be removed from this row,
146- // as it's not a real column that needs to be printed
147- $ row ->removeColumn ($ columnIndex );
147+ /**
148+ * @param array<int, TableRow> $rows
149+ * @param array<int, bool> $partialSeparatorRows
150+ *
151+ * @return array<int, TableRow>
152+ */
153+ private function concatenateTableRow (array $ rows , int $ rowIndex , array $ partialSeparatorRows , TableRow $ row ): array
154+ {
155+ // if the next row is just $i+1, it means there
156+ // was no "separator" and this is really just a
157+ // continuation of this row.
158+ $ nextRowCounter = 1 ;
159+ while (isset ($ rows [$ rowIndex + $ nextRowCounter ])) {
160+ // but if the next line is actually a partial separator, then
161+ // it is not a continuation of the content - quit now
162+ if (isset ($ partialSeparatorRows [$ rowIndex + $ nextRowCounter ])) {
163+ break ;
148164 }
149165
150- $ columnIndexesCurrentlyInRowspan = [];
166+ $ targetRow = $ rows [$ rowIndex + $ nextRowCounter ];
167+ unset($ rows [$ rowIndex + $ nextRowCounter ]);
151168
152- // if the next row is just $i+1, it means there
153- // was no "separator" and this is really just a
154- // continuation of this row.
155- $ nextRowCounter = 1 ;
156- while (isset ($ rows [(int ) $ rowIndex + $ nextRowCounter ])) {
157- // but if the next line is actually a partial separator, then
158- // it is not a continuation of the content - quit now
159- if (isset ($ partialSeparatorRows [(int ) $ rowIndex + $ nextRowCounter ])) {
160- break ;
161- }
169+ try {
170+ $ row ->absorbRowContent ($ targetRow );
171+ } catch (InvalidTableStructure $ e ) {
172+ $ this ->logger ->error ($ e ->getMessage ());
173+ }
162174
163- $ targetRow = $ rows [( int ) $ rowIndex + $ nextRowCounter] ;
164- unset( $ rows [( int ) $ rowIndex + $ nextRowCounter ]);
175+ $ nextRowCounter++ ;
176+ }
165177
166- try {
167- $ row ->absorbRowContent ($ targetRow );
168- } catch (InvalidTableStructure $ e ) {
169- $ this ->logger ->error ($ e ->getMessage ());
170- }
178+ return $ rows ;
179+ }
171180
172- $ nextRowCounter ++;
181+ /**
182+ * @param array<int, int> $columnIndexesCurrentlyInRowspan
183+ * @param array<int, TableRow> $rows
184+ *
185+ * @throws Exception
186+ */
187+ private function handlePreviousRowWasAPartialSeparator (array $ columnIndexesCurrentlyInRowspan , array $ rows , int $ rowIndex , TableRow $ row , ParserContext $ context ): void
188+ {
189+ // check if the previous row was a partial separator row, and
190+ // we need to take some columns and add them to a previous row's content
191+ foreach ($ columnIndexesCurrentlyInRowspan as $ columnIndex ) {
192+ $ prevTargetColumn = $ this ->findColumnInPreviousRows ($ columnIndex , $ rows , $ rowIndex );
193+ $ columnInRowspan = $ row ->getColumn ($ columnIndex );
194+ if ($ columnInRowspan === null ) {
195+ $ context ->addError (sprintf ('Cannot find column for index "%s" ' , $ columnIndex ));
196+ continue ;
173197 }
198+
199+ $ prevTargetColumn ->addContent ("\n" . $ columnInRowspan ->getContent ());
200+
201+ // now this column actually needs to be removed from this row,
202+ // as it's not a real column that needs to be printed
203+ $ row ->removeColumn ($ columnIndex );
204+ }
205+ }
206+
207+ /**
208+ * @param array<int, TableRow> $rows
209+ * @param array<int, int> $columnIndexesCurrentlyInRowspan
210+ *
211+ * @return array<int, TableRow>
212+ *
213+ * @throws Exception
214+ */
215+ private function handlePartialSeparator (TableRow $ row , array $ rows , int $ rowIndex , array &$ columnIndexesCurrentlyInRowspan ): array
216+ {
217+ // this row is part content, part separator due to a rowspan
218+ // for each column that contains content, we need to
219+ // push it onto the last real row's content and record
220+ // that this column in the next row should also be
221+ // included in that previous row's content
222+ foreach ($ row ->getColumns () as $ columnIndex => $ column ) {
223+ if (
224+ !$ column ->isCompletelyEmpty ()
225+ && str_repeat (
226+ '- ' ,
227+ mb_strlen ($ column ->getContent ()),
228+ ) === $ column ->getContent ()
229+ ) {
230+ // only a line separator in this column - not content!
231+ continue ;
232+ }
233+
234+ $ prevTargetColumn = $ this ->findColumnInPreviousRows ((int ) $ columnIndex , $ rows , $ rowIndex );
235+ $ prevTargetColumn ->addContent ("\n" . $ column ->getContent ());
236+ $ prevTargetColumn ->incrementRowSpan ();
237+ // mark that this column on the next row should also be added
238+ // to the previous row
239+ $ columnIndexesCurrentlyInRowspan [] = $ columnIndex ;
174240 }
175241
242+ // remove the row - it's not real
243+ unset($ rows [$ rowIndex ]);
244+
245+ return $ rows ;
246+ }
247+
248+ /**
249+ * @param array<int, TableRow> $rows
250+ *
251+ * @return array<int, TableRow>
252+ */
253+ private function extractHeaderRows (array &$ rows , ParserContext $ context ): array
254+ {
255+ $ finalHeadersRow = $ context ->getHeaderRows ();
176256 $ headers = [];
177257 // one more loop to set headers
178258 foreach ($ rows as $ rowIndex => $ row ) {
@@ -184,13 +264,13 @@ protected function compile(ParserContext $context): TableNode
184264 unset($ rows [$ rowIndex ]);
185265 }
186266
187- return new TableNode ( $ rows , $ headers) ;
267+ return $ headers ;
188268 }
189269
190- /** @param TableRow[] $rows */
270+ /** @param array<int, TableRow> $rows */
191271 private function findColumnInPreviousRows (int $ columnIndex , array $ rows , int $ currentRowIndex ): TableColumn
192272 {
193- /** @var TableRow[] $reversedRows */
273+ /** @var array<int, TableRow> $reversedRows */
194274 $ reversedRows = array_reverse ($ rows , true );
195275
196276 // go through the rows backwards to find the last/previous
0 commit comments