@@ -40,139 +40,219 @@ public function __construct(private readonly LoggerInterface $logger)
40
40
{
41
41
}
42
42
43
+ /** @throws Exception */
43
44
protected function compile (ParserContext $ context ): TableNode
44
45
{
45
- $ columnRanges = $ context ->getColumnRanges ();
46
- $ finalHeadersRow = $ context ->getHeaderRows ();
46
+ $ rows = $ this ->extractTableRows ($ context );
47
+ $ rows = $ this ->concatenateTableRows ($ rows , $ context );
48
+ $ headers = $ this ->extractHeaderRows ($ rows , $ context );
49
+
50
+ return new TableNode ($ rows , $ headers );
51
+ }
47
52
48
- /** @var TableRow[] $rows */
53
+ /** @return array<int, TableRow> */
54
+ private function extractTableRows (ParserContext $ context ): array
55
+ {
56
+ /** @var array<int, TableRow> $rows */
49
57
$ rows = [];
50
- $ partialSeparatorRows = $ this -> findRowSpans ( $ context );
58
+ $ columnRanges = $ context -> getColumnRanges ( );
51
59
$ currentSpan = 1 ;
52
-
53
60
foreach ($ context ->getDataLines () as $ rowIndex => $ line ) {
54
- $ row = new TableRow ();
55
- $ currentColumnStart = null ;
56
- $ previousColumnEnd = null ;
57
- foreach ($ columnRanges as $ start => $ end ) {
58
- $ this ->assertColumnEnded ($ currentColumnStart , $ previousColumnEnd );
59
-
60
- if ($ currentColumnStart !== null ) {
61
- $ cellText = mb_substr ($ line , $ previousColumnEnd , $ start - $ previousColumnEnd );
62
- if (!str_contains ($ cellText , '| ' ) && !str_contains ($ cellText , '+ ' )) {
63
- // text continued through the "gap". This is a colspan
64
- // "+" is an odd character - it's usually "|", but "+" can
65
- // happen in row-span situations
66
- $ currentSpan ++;
67
- $ previousColumnEnd = $ end ;
68
- continue ;
69
- }
70
-
71
- // we just hit a proper "gap" record the line up until now
72
- $ row ->addColumn (
73
- $ this ->createColumn ($ line , $ currentColumnStart , $ previousColumnEnd , $ currentSpan ),
74
- );
75
- $ currentSpan = 1 ;
76
- $ currentColumnStart = null ;
77
- }
61
+ $ rows [$ rowIndex ] = $ this ->extractRow ($ columnRanges , $ line , $ currentSpan );
62
+ }
78
63
79
- // if the current column start is null, then set it
80
- // otherwise, leave it - this is a colspan, and eventually
81
- // we want to get all the text starting here
82
- $ currentColumnStart = $ start ;
64
+ return $ rows ;
65
+ }
83
66
84
- $ previousColumnEnd = $ end ;
85
- }
67
+ /** @param array<int, int> $columnRanges */
68
+ private function extractRow (array $ columnRanges , string $ line , int &$ currentSpan ): TableRow
69
+ {
70
+ $ row = new TableRow ();
71
+ $ currentColumnStart = null ;
72
+ $ previousColumnEnd = null ;
73
+ $ this ->extractTableCell ($ columnRanges , $ currentColumnStart , $ previousColumnEnd , $ line , $ currentSpan , $ row );
74
+
75
+ // record the last column
76
+ $ this ->assertColumnEnded ($ currentColumnStart , $ previousColumnEnd );
77
+
78
+ if ($ currentColumnStart !== null ) {
79
+ $ row ->addColumn (
80
+ $ this ->createColumn ($ line , $ currentColumnStart , $ previousColumnEnd , $ currentSpan ),
81
+ );
82
+ }
86
83
87
- // record the last column
84
+ return $ row ;
85
+ }
86
+
87
+ /** @param list<int> $columnRanges */
88
+ private function extractTableCell (array $ columnRanges , int |null &$ currentColumnStart , int |null &$ previousColumnEnd , string $ line , int &$ currentSpan , TableRow $ row ): void
89
+ {
90
+ foreach ($ columnRanges as $ start => $ end ) {
88
91
$ this ->assertColumnEnded ($ currentColumnStart , $ previousColumnEnd );
89
92
90
93
if ($ currentColumnStart !== null ) {
94
+ $ cellText = mb_substr ($ line , $ previousColumnEnd , $ start - $ previousColumnEnd );
95
+ if (!str_contains ($ cellText , '| ' ) && !str_contains ($ cellText , '+ ' )) {
96
+ // text continued through the "gap". This is a colspan
97
+ // "+" is an odd character - it's usually "|", but "+" can
98
+ // happen in row-span situations
99
+ $ currentSpan ++;
100
+ $ previousColumnEnd = $ end ;
101
+ continue ;
102
+ }
103
+
104
+ // we just hit a proper "gap" record the line up until now
91
105
$ row ->addColumn (
92
106
$ this ->createColumn ($ line , $ currentColumnStart , $ previousColumnEnd , $ currentSpan ),
93
107
);
108
+ $ currentSpan = 1 ;
109
+ $ currentColumnStart = null ;
94
110
}
95
111
96
- $ rows [$ rowIndex ] = $ row ;
112
+ // if the current column start is null, then set it
113
+ // otherwise, leave it - this is a colspan, and eventually
114
+ // we want to get all the text starting here
115
+ $ currentColumnStart = $ start ;
116
+
117
+ $ previousColumnEnd = $ end ;
97
118
}
119
+ }
98
120
121
+ /**
122
+ * @param array<int, TableRow> $rows
123
+ *
124
+ * @return array<int, TableRow>
125
+ *
126
+ * @throws Exception
127
+ */
128
+ private function concatenateTableRows (array $ rows , ParserContext $ context ): array
129
+ {
130
+ $ partialSeparatorRows = $ this ->findRowSpans ($ context );
99
131
$ columnIndexesCurrentlyInRowspan = [];
100
132
foreach ($ rows as $ rowIndex => $ row ) {
101
133
if (isset ($ partialSeparatorRows [$ rowIndex ])) {
102
- // this row is part content, part separator due to a rowspan
103
- // for each column that contains content, we need to
104
- // push it onto the last real row's content and record
105
- // that this column in the next row should also be
106
- // included in that previous row's content
107
- foreach ($ row ->getColumns () as $ columnIndex => $ column ) {
108
- if (
109
- !$ column ->isCompletelyEmpty ()
110
- && str_repeat (
111
- '- ' ,
112
- mb_strlen ($ column ->getContent ()),
113
- ) === $ column ->getContent ()
114
- ) {
115
- // only a line separator in this column - not content!
116
- continue ;
117
- }
118
-
119
- $ prevTargetColumn = $ this ->findColumnInPreviousRows ((int ) $ columnIndex , $ rows , (int ) $ rowIndex );
120
- $ prevTargetColumn ->addContent ("\n" . $ column ->getContent ());
121
- $ prevTargetColumn ->incrementRowSpan ();
122
- // mark that this column on the next row should also be added
123
- // to the previous row
124
- $ columnIndexesCurrentlyInRowspan [] = $ columnIndex ;
125
- }
126
-
127
- // remove the row - it's not real
128
- unset($ rows [$ rowIndex ]);
129
-
134
+ $ rows = $ this ->handlePartialSeparator ($ row , $ rows , $ rowIndex , $ columnIndexesCurrentlyInRowspan );
130
135
continue ;
131
136
}
132
137
133
- // check if the previous row was a partial separator row, and
134
- // we need to take some columns and add them to a previous row's content
135
- foreach ($ columnIndexesCurrentlyInRowspan as $ columnIndex ) {
136
- $ prevTargetColumn = $ this ->findColumnInPreviousRows ($ columnIndex , $ rows , (int ) $ rowIndex );
137
- $ columnInRowspan = $ row ->getColumn ($ columnIndex );
138
- if ($ columnInRowspan === null ) {
139
- $ context ->addError (sprintf ('Cannot find column for index "%s" ' , $ columnIndex ));
140
- continue ;
141
- }
138
+ $ this ->handlePreviousRowWasAPartialSeparator ($ columnIndexesCurrentlyInRowspan , $ rows , $ rowIndex , $ row , $ context );
142
139
143
- $ prevTargetColumn ->addContent ("\n" . $ columnInRowspan ->getContent ());
140
+ $ columnIndexesCurrentlyInRowspan = [];
141
+ $ rows = $ this ->concatenateTableRow ($ rows , $ rowIndex , $ partialSeparatorRows , $ row );
142
+ }
143
+
144
+ return $ rows ;
145
+ }
144
146
145
- // now this column actually needs to be removed from this row,
146
- // as it's not a real column that needs to be printed
147
- $ row ->removeColumn ($ columnIndex );
147
+ /**
148
+ * @param array<int, TableRow> $rows
149
+ * @param array<int, bool> $partialSeparatorRows
150
+ *
151
+ * @return array<int, TableRow>
152
+ */
153
+ private function concatenateTableRow (array $ rows , int $ rowIndex , array $ partialSeparatorRows , TableRow $ row ): array
154
+ {
155
+ // if the next row is just $i+1, it means there
156
+ // was no "separator" and this is really just a
157
+ // continuation of this row.
158
+ $ nextRowCounter = 1 ;
159
+ while (isset ($ rows [$ rowIndex + $ nextRowCounter ])) {
160
+ // but if the next line is actually a partial separator, then
161
+ // it is not a continuation of the content - quit now
162
+ if (isset ($ partialSeparatorRows [$ rowIndex + $ nextRowCounter ])) {
163
+ break ;
148
164
}
149
165
150
- $ columnIndexesCurrentlyInRowspan = [];
166
+ $ targetRow = $ rows [$ rowIndex + $ nextRowCounter ];
167
+ unset($ rows [$ rowIndex + $ nextRowCounter ]);
151
168
152
- // if the next row is just $i+1, it means there
153
- // was no "separator" and this is really just a
154
- // continuation of this row.
155
- $ nextRowCounter = 1 ;
156
- while (isset ($ rows [(int ) $ rowIndex + $ nextRowCounter ])) {
157
- // but if the next line is actually a partial separator, then
158
- // it is not a continuation of the content - quit now
159
- if (isset ($ partialSeparatorRows [(int ) $ rowIndex + $ nextRowCounter ])) {
160
- break ;
161
- }
169
+ try {
170
+ $ row ->absorbRowContent ($ targetRow );
171
+ } catch (InvalidTableStructure $ e ) {
172
+ $ this ->logger ->error ($ e ->getMessage ());
173
+ }
162
174
163
- $ targetRow = $ rows [( int ) $ rowIndex + $ nextRowCounter] ;
164
- unset( $ rows [( int ) $ rowIndex + $ nextRowCounter ]);
175
+ $ nextRowCounter++ ;
176
+ }
165
177
166
- try {
167
- $ row ->absorbRowContent ($ targetRow );
168
- } catch (InvalidTableStructure $ e ) {
169
- $ this ->logger ->error ($ e ->getMessage ());
170
- }
178
+ return $ rows ;
179
+ }
171
180
172
- $ nextRowCounter ++;
181
+ /**
182
+ * @param array<int, int> $columnIndexesCurrentlyInRowspan
183
+ * @param array<int, TableRow> $rows
184
+ *
185
+ * @throws Exception
186
+ */
187
+ private function handlePreviousRowWasAPartialSeparator (array $ columnIndexesCurrentlyInRowspan , array $ rows , int $ rowIndex , TableRow $ row , ParserContext $ context ): void
188
+ {
189
+ // check if the previous row was a partial separator row, and
190
+ // we need to take some columns and add them to a previous row's content
191
+ foreach ($ columnIndexesCurrentlyInRowspan as $ columnIndex ) {
192
+ $ prevTargetColumn = $ this ->findColumnInPreviousRows ($ columnIndex , $ rows , $ rowIndex );
193
+ $ columnInRowspan = $ row ->getColumn ($ columnIndex );
194
+ if ($ columnInRowspan === null ) {
195
+ $ context ->addError (sprintf ('Cannot find column for index "%s" ' , $ columnIndex ));
196
+ continue ;
173
197
}
198
+
199
+ $ prevTargetColumn ->addContent ("\n" . $ columnInRowspan ->getContent ());
200
+
201
+ // now this column actually needs to be removed from this row,
202
+ // as it's not a real column that needs to be printed
203
+ $ row ->removeColumn ($ columnIndex );
204
+ }
205
+ }
206
+
207
+ /**
208
+ * @param array<int, TableRow> $rows
209
+ * @param array<int, int> $columnIndexesCurrentlyInRowspan
210
+ *
211
+ * @return array<int, TableRow>
212
+ *
213
+ * @throws Exception
214
+ */
215
+ private function handlePartialSeparator (TableRow $ row , array $ rows , int $ rowIndex , array &$ columnIndexesCurrentlyInRowspan ): array
216
+ {
217
+ // this row is part content, part separator due to a rowspan
218
+ // for each column that contains content, we need to
219
+ // push it onto the last real row's content and record
220
+ // that this column in the next row should also be
221
+ // included in that previous row's content
222
+ foreach ($ row ->getColumns () as $ columnIndex => $ column ) {
223
+ if (
224
+ !$ column ->isCompletelyEmpty ()
225
+ && str_repeat (
226
+ '- ' ,
227
+ mb_strlen ($ column ->getContent ()),
228
+ ) === $ column ->getContent ()
229
+ ) {
230
+ // only a line separator in this column - not content!
231
+ continue ;
232
+ }
233
+
234
+ $ prevTargetColumn = $ this ->findColumnInPreviousRows ((int ) $ columnIndex , $ rows , $ rowIndex );
235
+ $ prevTargetColumn ->addContent ("\n" . $ column ->getContent ());
236
+ $ prevTargetColumn ->incrementRowSpan ();
237
+ // mark that this column on the next row should also be added
238
+ // to the previous row
239
+ $ columnIndexesCurrentlyInRowspan [] = $ columnIndex ;
174
240
}
175
241
242
+ // remove the row - it's not real
243
+ unset($ rows [$ rowIndex ]);
244
+
245
+ return $ rows ;
246
+ }
247
+
248
+ /**
249
+ * @param array<int, TableRow> $rows
250
+ *
251
+ * @return array<int, TableRow>
252
+ */
253
+ private function extractHeaderRows (array &$ rows , ParserContext $ context ): array
254
+ {
255
+ $ finalHeadersRow = $ context ->getHeaderRows ();
176
256
$ headers = [];
177
257
// one more loop to set headers
178
258
foreach ($ rows as $ rowIndex => $ row ) {
@@ -184,13 +264,13 @@ protected function compile(ParserContext $context): TableNode
184
264
unset($ rows [$ rowIndex ]);
185
265
}
186
266
187
- return new TableNode ( $ rows , $ headers) ;
267
+ return $ headers ;
188
268
}
189
269
190
- /** @param TableRow[] $rows */
270
+ /** @param array<int, TableRow> $rows */
191
271
private function findColumnInPreviousRows (int $ columnIndex , array $ rows , int $ currentRowIndex ): TableColumn
192
272
{
193
- /** @var TableRow[] $reversedRows */
273
+ /** @var array<int, TableRow> $reversedRows */
194
274
$ reversedRows = array_reverse ($ rows , true );
195
275
196
276
// go through the rows backwards to find the last/previous
0 commit comments