Skip to content

Commit 0911397

Browse files
committed
Refactor loadCsv and loadXml to use generators for memory efficiency
- Add generator methods loadFromCsvGenerator() and loadFromXmlGenerator() in FileLoader - Implement batched processing to reduce memory consumption - Add generator support in DialectInterface and concrete dialects - MySQL and PostgreSQL keep native commands but wrapped as generators - Update return types in TableManagementTrait from static to self - All 478 tests passing with no skipped tests
1 parent d17c83c commit 0911397

File tree

7 files changed

+197
-87
lines changed

7 files changed

+197
-87
lines changed

src/dialects/DialectAbstract.php

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,20 @@ public function buildLoadXML(string $table, string $filePath, array $options = [
208208
return $this->getFileLoader()->loadFromXml($table, $filePath, $options);
209209
}
210210

211+
/**
212+
* Build SQL generator for loading data from XML file.
213+
*
214+
* @param string $table
215+
* @param string $filePath
216+
* @param array<string, mixed> $options
217+
*
218+
* @return \Generator<string>
219+
*/
220+
public function buildLoadXMLGenerator(string $table, string $filePath, array $options = []): \Generator
221+
{
222+
return $this->getFileLoader()->loadFromXmlGenerator($table, $filePath, $options);
223+
}
224+
211225
/**
212226
* Build SQL for loading data from CSV file.
213227
*
@@ -222,6 +236,20 @@ public function buildLoadCsvSql(string $table, string $filePath, array $options
222236
return $this->getFileLoader()->loadFromCsv($table, $filePath, $options);
223237
}
224238

239+
/**
240+
* Build SQL generator for loading data from CSV file.
241+
*
242+
* @param string $table
243+
* @param string $filePath
244+
* @param array<string, mixed> $options
245+
*
246+
* @return \Generator<string>
247+
*/
248+
public function buildLoadCsvSqlGenerator(string $table, string $filePath, array $options = []): \Generator
249+
{
250+
return $this->getFileLoader()->loadFromCsvGenerator($table, $filePath, $options);
251+
}
252+
225253
/**
226254
* Normalize JSON path input.
227255
*

src/dialects/DialectInterface.php

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -448,6 +448,17 @@ public function buildTruncateSql(string $table): string;
448448
*/
449449
public function buildLoadXML(string $table, string $filePath, array $options = []): string;
450450

451+
/**
452+
* Build SQL generator for loading data from XML file.
453+
*
454+
* @param string $table
455+
* @param string $filePath
456+
* @param array<string, mixed> $options
457+
*
458+
* @return \Generator<string>
459+
*/
460+
public function buildLoadXMLGenerator(string $table, string $filePath, array $options = []): \Generator;
461+
451462
/**
452463
* Build SQL for loading data from CSV file.
453464
*
@@ -458,4 +469,15 @@ public function buildLoadXML(string $table, string $filePath, array $options = [
458469
* @return string
459470
*/
460471
public function buildLoadCsvSql(string $table, string $filePath, array $options = []): string;
472+
473+
/**
474+
* Build SQL generator for loading data from CSV file.
475+
*
476+
* @param string $table
477+
* @param string $filePath
478+
* @param array<string, mixed> $options
479+
*
480+
* @return \Generator<string>
481+
*/
482+
public function buildLoadCsvSqlGenerator(string $table, string $filePath, array $options = []): \Generator;
461483
}

src/dialects/MySQLDialect.php

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,6 +384,24 @@ public function buildLoadCsvSql(string $table, string $filePath, array $options
384384
return $sql;
385385
}
386386

387+
/**
388+
* {@inheritDoc}
389+
*/
390+
public function buildLoadCsvSqlGenerator(string $table, string $filePath, array $options = []): \Generator
391+
{
392+
// MySQL uses native LOAD DATA INFILE which loads entire file at once
393+
yield $this->buildLoadCsvSql($table, $filePath, $options);
394+
}
395+
396+
/**
397+
* {@inheritDoc}
398+
*/
399+
public function buildLoadXMLGenerator(string $table, string $filePath, array $options = []): \Generator
400+
{
401+
// MySQL uses native LOAD XML LOCAL INFILE which loads entire file at once
402+
yield $this->buildLoadXML($table, $filePath, $options);
403+
}
404+
387405
/**
388406
* {@inheritDoc}
389407
*/

src/dialects/PostgreSQLDialect.php

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,15 @@ public function buildLoadCsvSql(string $table, string $filePath, array $options
420420
);
421421
}
422422

423+
/**
424+
* {@inheritDoc}
425+
*/
426+
public function buildLoadCsvSqlGenerator(string $table, string $filePath, array $options = []): \Generator
427+
{
428+
// PostgreSQL uses native COPY which loads entire file at once
429+
yield $this->buildLoadCsvSql($table, $filePath, $options);
430+
}
431+
423432
/**
424433
* {@inheritDoc}
425434
*/

src/dialects/loaders/FileLoader.php

Lines changed: 93 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,21 @@ public function __construct(
2525
* @param array<string, mixed> $options
2626
*/
2727
public function loadFromCsv(string $table, string $filePath, array $options = []): string
28+
{
29+
$sqlParts = [];
30+
foreach ($this->loadFromCsvGenerator($table, $filePath, $options) as $sql) {
31+
$sqlParts[] = $sql;
32+
}
33+
return $sqlParts === [] ? '' : implode("\n", $sqlParts);
34+
}
35+
36+
/**
37+
* Load data from CSV file using generator for memory efficiency.
38+
*
39+
* @param array<string, mixed> $options
40+
* @return \Generator<string>
41+
*/
42+
public function loadFromCsvGenerator(string $table, string $filePath, array $options = []): \Generator
2843
{
2944
$defaults = [
3045
'fieldChar' => ',',
@@ -90,8 +105,6 @@ public function loadFromCsv(string $table, string $filePath, array $options = []
90105

91106
$batchSize = self::DEFAULT_BATCH_SIZE;
92107
$batch = [];
93-
$sqlParts = [];
94-
$rows = 0;
95108

96109
// Read and build batches
97110
while (!$file->eof()) {
@@ -133,25 +146,18 @@ public function loadFromCsv(string $table, string $filePath, array $options = []
133146
}
134147

135148
$batch[] = '(' . implode(', ', $vals) . ')';
136-
$rows++;
137149

138150
if (count($batch) >= $batchSize) {
139-
$sqlParts[] = 'INSERT INTO ' . $tableQ . ' (' . implode(', ', $colsQ) . ')'
151+
yield 'INSERT INTO ' . $tableQ . ' (' . implode(', ', $colsQ) . ')'
140152
. ' VALUES ' . implode(', ', $batch) . ';';
141153
$batch = [];
142154
}
143155
}
144156

145157
if (!empty($batch)) {
146-
$sqlParts[] = 'INSERT INTO ' . $tableQ . ' (' . implode(', ', $colsQ) . ')'
158+
yield 'INSERT INTO ' . $tableQ . ' (' . implode(', ', $colsQ) . ')'
147159
. ' VALUES ' . implode(', ', $batch) . ';';
148160
}
149-
150-
if ($rows === 0) {
151-
return '';
152-
}
153-
154-
return implode("\n", $sqlParts);
155161
}
156162

157163
/**
@@ -160,6 +166,21 @@ public function loadFromCsv(string $table, string $filePath, array $options = []
160166
* @param array<string, mixed> $options
161167
*/
162168
public function loadFromXml(string $table, string $filePath, array $options = []): string
169+
{
170+
$sqlParts = [];
171+
foreach ($this->loadFromXmlGenerator($table, $filePath, $options) as $sql) {
172+
$sqlParts[] = $sql;
173+
}
174+
return $sqlParts === [] ? '' : implode("\n", $sqlParts);
175+
}
176+
177+
/**
178+
* Load data from XML file using generator for memory efficiency.
179+
*
180+
* @param array<string, mixed> $options
181+
* @return \Generator<string>
182+
*/
183+
public function loadFromXmlGenerator(string $table, string $filePath, array $options = []): \Generator
163184
{
164185
$defaults = [
165186
'rowTag' => '<row>',
@@ -181,99 +202,92 @@ public function loadFromXml(string $table, string $filePath, array $options = []
181202
$tableQ = $this->quoteIdentifier($table);
182203
$columns = [];
183204
$batch = [];
184-
$batchesSql = [];
185205
$batchSize = self::DEFAULT_BATCH_SIZE;
186-
$rowsProcessed = 0;
187206
$skipped = 0;
188207

189-
while ($reader->read()) {
190-
if ($reader->nodeType !== XMLReader::ELEMENT) {
191-
continue;
192-
}
193-
194-
if ($reader->localName !== $rowTag) {
195-
continue;
196-
}
208+
try {
209+
while ($reader->read()) {
210+
if ($reader->nodeType !== XMLReader::ELEMENT) {
211+
continue;
212+
}
197213

198-
// Skip first N logical row elements if requested
199-
if ($skipped < $skipRows) {
200-
$skipped++;
201-
$reader->next();
202-
continue;
203-
}
214+
if ($reader->localName !== $rowTag) {
215+
continue;
216+
}
204217

205-
$xml = $reader->readOuterXml();
206-
if ($xml === '') {
207-
$reader->next();
208-
continue;
209-
}
218+
// Skip first N logical row elements if requested
219+
if ($skipped < $skipRows) {
220+
$skipped++;
221+
$reader->next();
222+
continue;
223+
}
210224

211-
$elem = simplexml_load_string($xml);
212-
if ($elem === false) {
213-
$reader->next();
214-
continue;
215-
}
225+
$xml = $reader->readOuterXml();
226+
if ($xml === '') {
227+
$reader->next();
228+
continue;
229+
}
216230

217-
// Determine columns from the first encountered row
218-
if ($columns === []) {
219-
foreach ($elem->children() as $child) {
220-
$columns[] = (string)$child->getName();
231+
$elem = simplexml_load_string($xml);
232+
if ($elem === false) {
233+
$reader->next();
234+
continue;
221235
}
222236

223-
// fallback to attributes if no child elements
237+
// Determine columns from the first encountered row
224238
if ($columns === []) {
225-
foreach ($elem->attributes() as $name => $val) {
226-
$columns[] = (string)$name;
239+
foreach ($elem->children() as $child) {
240+
$columns[] = (string)$child->getName();
241+
}
242+
243+
// fallback to attributes if no child elements
244+
if ($columns === []) {
245+
foreach ($elem->attributes() as $name => $val) {
246+
$columns[] = (string)$name;
247+
}
248+
}
249+
250+
if ($columns === []) {
251+
$reader->close();
252+
return;
227253
}
228254
}
229255

230-
if ($columns === []) {
231-
$reader->close();
232-
return '';
256+
$values = [];
257+
foreach ($columns as $col) {
258+
$val = null;
259+
260+
if (isset($elem->{$col}) && (string)$elem->{$col} !== '') {
261+
$val = (string)$elem->{$col};
262+
} elseif ($elem->attributes()->{$col} !== null) {
263+
$val = (string)$elem->attributes()->{$col};
264+
}
265+
266+
$values[] = $this->quoteValue($val);
233267
}
234-
}
235268

236-
$values = [];
237-
foreach ($columns as $col) {
238-
$val = null;
269+
$batch[] = '(' . implode(', ', $values) . ')';
239270

240-
if (isset($elem->{$col}) && (string)$elem->{$col} !== '') {
241-
$val = (string)$elem->{$col};
242-
} elseif ($elem->attributes()->{$col} !== null) {
243-
$val = (string)$elem->attributes()->{$col};
271+
if (count($batch) >= $batchSize) {
272+
$colsEscaped = array_map(fn ($c) => $this->quoteColumnName($c), $columns);
273+
274+
yield 'INSERT INTO ' . $tableQ . ' (' . implode(', ', $colsEscaped) . ')'
275+
. ' VALUES ' . implode(', ', $batch) . ';';
276+
$batch = [];
244277
}
245278

246-
$values[] = $this->quoteValue($val);
279+
$reader->next();
247280
}
248281

249-
$batch[] = '(' . implode(', ', $values) . ')';
250-
$rowsProcessed++;
251-
252-
if (count($batch) >= $batchSize) {
282+
if ($batch !== []) {
253283
$colsEscaped = array_map(fn ($c) => $this->quoteColumnName($c), $columns);
254284

255-
$batchesSql[] = 'INSERT INTO ' . $tableQ . ' (' . implode(', ', $colsEscaped) . ')'
285+
yield 'INSERT INTO ' . $tableQ . ' (' . implode(', ', $colsEscaped) . ')'
256286
. ' VALUES ' . implode(', ', $batch) . ';';
257-
$batch = [];
258287
}
259-
260-
$reader->next();
288+
} finally {
289+
$reader->close();
261290
}
262-
263-
$reader->close();
264-
265-
if ($batch !== []) {
266-
$colsEscaped = array_map(fn ($c) => $this->quoteColumnName($c), $columns);
267-
268-
$batchesSql[] = 'INSERT INTO ' . $tableQ . ' (' . implode(', ', $colsEscaped) . ')'
269-
. ' VALUES ' . implode(', ', $batch) . ';';
270-
}
271-
272-
if ($rowsProcessed === 0) {
273-
return '';
274-
}
275-
276-
return implode("\n", $batchesSql);
277291
}
278292

279293
/**

src/query/FileLoader.php

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,17 @@ public function loadCsv(string $filePath, array $options = []): bool
4343
}
4444

4545
try {
46-
$sql = $this->connection->getDialect()->buildLoadCsvSql($this->prefix . $this->table, $filePath, $options);
47-
$this->connection->prepare($sql)->execute();
46+
$generator = $this->connection->getDialect()->buildLoadCsvSqlGenerator(
47+
$this->prefix . $this->table,
48+
$filePath,
49+
$options
50+
);
51+
52+
foreach ($generator as $batchSql) {
53+
$sql = $batchSql;
54+
$this->connection->prepare($sql)->execute();
55+
}
56+
4857
if ($this->connection->inTransaction()) {
4958
$this->connection->commit();
5059
}
@@ -85,8 +94,18 @@ public function loadXml(string $filePath, string $rowTag = '<row>', ?int $linesT
8594
'rowTag' => $rowTag,
8695
'linesToIgnore' => $linesToIgnore,
8796
];
88-
$sql = $this->connection->getDialect()->buildLoadXML($this->prefix . $this->table, $filePath, $options);
89-
$this->connection->prepare($sql)->execute();
97+
98+
$generator = $this->connection->getDialect()->buildLoadXMLGenerator(
99+
$this->prefix . $this->table,
100+
$filePath,
101+
$options
102+
);
103+
104+
foreach ($generator as $batchSql) {
105+
$sql = $batchSql;
106+
$this->connection->prepare($sql)->execute();
107+
}
108+
90109
if ($this->connection->inTransaction()) {
91110
$this->connection->commit();
92111
}

0 commit comments

Comments
 (0)