Skip to content

Commit 2a1ece2

Browse files
committed
Backport PR #4189 Csv Method
1 parent 4e58d8a commit 2a1ece2

File tree

8 files changed

+128
-16
lines changed

8 files changed

+128
-16
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
55
The format is based on [Keep a Changelog](https://keepachangelog.com)
66
and this project adheres to [Semantic Versioning](https://semver.org).
77

8+
## TBD - 2.1.2
9+
10+
### Added
11+
12+
- Method to Test Whether Csv Will Be Affected by Php9 (backport of PR #4189 intended for 3.4.0)
13+
814
## 2024-09-29 2.1.1
915

1016
### Fixed

src/PhpSpreadsheet/Reader/Csv.php

Lines changed: 69 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
use PhpOffice\PhpSpreadsheet\Shared\StringHelper;
1111
use PhpOffice\PhpSpreadsheet\Spreadsheet;
1212
use PhpOffice\PhpSpreadsheet\Worksheet\Worksheet;
13+
use Throwable;
1314

1415
class Csv extends BaseReader
1516
{
@@ -74,7 +75,7 @@ class Csv extends BaseReader
7475
* It is anticipated that it will conditionally be set
7576
* to null-string for Php9 and above.
7677
*/
77-
private static string $defaultEscapeCharacter = '\\';
78+
private static string $defaultEscapeCharacter = PHP_VERSION_ID < 90000 ? '\\' : '';
7879

7980
/**
8081
* Callback for setting defaults in construction.
@@ -286,6 +287,12 @@ private function openFileOrMemory(string $filename): void
286287
if (!$fhandle) {
287288
throw new ReaderException($filename . ' is an Invalid Spreadsheet file.');
288289
}
290+
if ($this->inputEncoding === 'UTF-8') {
291+
$encoding = self::guessEncodingBom($filename);
292+
if ($encoding !== '') {
293+
$this->inputEncoding = $encoding;
294+
}
295+
}
289296
if ($this->inputEncoding === self::GUESS_ENCODING) {
290297
$this->inputEncoding = self::guessEncoding($filename, $this->fallbackEncoding);
291298
}
@@ -313,7 +320,7 @@ public function setTestAutoDetect(bool $value): self
313320
private function setAutoDetect(?string $value): ?string
314321
{
315322
$retVal = null;
316-
if ($value !== null && $this->testAutodetect) {
323+
if ($value !== null && $this->testAutodetect && PHP_VERSION_ID < 90000) {
317324
$retVal2 = @ini_set('auto_detect_line_endings', $value);
318325
if (is_string($retVal2)) {
319326
$retVal = $retVal2;
@@ -362,6 +369,20 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo
362369
// Deprecated in Php8.1
363370
$iniset = $this->setAutoDetect('1');
364371

372+
try {
373+
$this->loadStringOrFile2($filename, $spreadsheet, $dataUri);
374+
$this->setAutoDetect($iniset);
375+
} catch (Throwable $e) {
376+
$this->setAutoDetect($iniset);
377+
378+
throw $e;
379+
}
380+
381+
return $spreadsheet;
382+
}
383+
384+
private function loadStringOrFile2(string $filename, Spreadsheet $spreadsheet, bool $dataUri): void
385+
{
365386
// Open file
366387
if ($dataUri) {
367388
$this->openDataUri($filename);
@@ -433,11 +454,6 @@ private function loadStringOrFile(string $filename, Spreadsheet $spreadsheet, bo
433454

434455
// Close file
435456
fclose($fileHandle);
436-
437-
$this->setAutoDetect($iniset);
438-
439-
// Return
440-
return $spreadsheet;
441457
}
442458

443459
/**
@@ -545,6 +561,10 @@ public function getContiguous(): bool
545561
*/
546562
public function setEscapeCharacter(string $escapeCharacter): self
547563
{
564+
if (PHP_VERSION_ID >= 90000 && $escapeCharacter !== '') {
565+
throw new ReaderException('Escape character must be null string for Php9+');
566+
}
567+
548568
$this->escapeCharacter = $escapeCharacter;
549569

550570
return $this;
@@ -621,17 +641,15 @@ private static function guessEncodingTestBom(string &$encoding, string $first4,
621641
}
622642
}
623643

624-
private static function guessEncodingBom(string $filename): string
644+
public static function guessEncodingBom(string $filename, ?string $convertString = null): string
625645
{
626646
$encoding = '';
627-
$first4 = file_get_contents($filename, false, null, 0, 4);
628-
if ($first4 !== false) {
629-
self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
630-
self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE');
631-
self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE');
632-
self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE');
633-
self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE');
634-
}
647+
$first4 = $convertString ?? (string) file_get_contents($filename, false, null, 0, 4);
648+
self::guessEncodingTestBom($encoding, $first4, self::UTF8_BOM, 'UTF-8');
649+
self::guessEncodingTestBom($encoding, $first4, self::UTF16BE_BOM, 'UTF-16BE');
650+
self::guessEncodingTestBom($encoding, $first4, self::UTF32BE_BOM, 'UTF-32BE');
651+
self::guessEncodingTestBom($encoding, $first4, self::UTF32LE_BOM, 'UTF-32LE');
652+
self::guessEncodingTestBom($encoding, $first4, self::UTF16LE_BOM, 'UTF-16LE');
635653

636654
return $encoding;
637655
}
@@ -688,4 +706,39 @@ private static function getCsv(
688706

689707
return fgetcsv($stream, $length, $separator, $enclosure, $escape);
690708
}
709+
710+
public static function affectedByPhp9(
711+
string $filename,
712+
string $inputEncoding = 'UTF-8',
713+
?string $delimiter = null,
714+
string $enclosure = '"',
715+
string $escapeCharacter = '\\'
716+
): bool {
717+
if (PHP_VERSION_ID < 70400 || PHP_VERSION_ID >= 90000) {
718+
throw new ReaderException('Function valid only for Php7.4 or Php8'); // @codeCoverageIgnore
719+
}
720+
$reader1 = new self();
721+
$reader1->setInputEncoding($inputEncoding)
722+
->setTestAutoDetect(true)
723+
->setEscapeCharacter($escapeCharacter)
724+
->setDelimiter($delimiter)
725+
->setEnclosure($enclosure);
726+
$spreadsheet1 = $reader1->load($filename);
727+
$sheet1 = $spreadsheet1->getActiveSheet();
728+
$array1 = $sheet1->toArray(null, false, false);
729+
$spreadsheet1->disconnectWorksheets();
730+
731+
$reader2 = new self();
732+
$reader2->setInputEncoding($inputEncoding)
733+
->setTestAutoDetect(false)
734+
->setEscapeCharacter('')
735+
->setDelimiter($delimiter)
736+
->setEnclosure($enclosure);
737+
$spreadsheet2 = $reader2->load($filename);
738+
$sheet2 = $spreadsheet2->getActiveSheet();
739+
$array2 = $sheet2->toArray(null, false, false);
740+
$spreadsheet2->disconnectWorksheets();
741+
742+
return $array1 !== $array2;
743+
}
691744
}

tests/PhpSpreadsheetTests/Reader/Csv/CsvLineEndingTest.php

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,9 @@ protected function tearDown(): void
2525
*/
2626
public function testEndings(string $ending): void
2727
{
28+
if ($ending === "\r" && PHP_VERSION_ID >= 90000) {
29+
self::markTestSkipped('Mac line endings not supported for Php9+');
30+
}
2831
$this->tempFile = $filename = File::temporaryFilename();
2932
$data = ['123', '456', '789'];
3033
file_put_contents($filename, implode($ending, $data));

tests/PhpSpreadsheetTests/Reader/Csv/CsvTest.php

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,10 @@ public static function providerCanLoad(): array
114114

115115
public function testEscapeCharacters(): void
116116
{
117+
if (PHP_VERSION_ID >= 90000) {
118+
$this->expectException(ReaderException::class);
119+
$this->expectExceptionMessage('Escape character must be null string');
120+
}
117121
$reader = (new Csv())->setEscapeCharacter('"');
118122
$worksheet = $reader->load('tests/data/Reader/CSV/backslash.csv')
119123
->getActiveSheet();
@@ -230,6 +234,10 @@ public function testReadNonexistentFileName(): void
230234
*/
231235
public function testInferSeparator(string $escape, string $delimiter): void
232236
{
237+
if (PHP_VERSION_ID >= 90000 && $escape !== '') {
238+
$this->expectException(ReaderException::class);
239+
$this->expectExceptionMessage('Escape character must be null string');
240+
}
233241
$reader = new Csv();
234242
$reader->setEscapeCharacter($escape);
235243
$filename = 'tests/data/Reader/CSV/escape.csv';
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace PhpOffice\PhpSpreadsheetTests\Reader\Csv;
6+
7+
use PhpOffice\PhpSpreadsheet\Reader\Csv;
8+
use PhpOffice\PhpSpreadsheet\Reader\Exception as ReaderException;
9+
use PHPUnit\Framework\TestCase;
10+
11+
class Php9Test extends TestCase
12+
{
13+
public function testAffectedByPhp9(): void
14+
{
15+
if (PHP_VERSION_ID >= 90000) {
16+
$this->expectException(ReaderException::class);
17+
$this->expectExceptionMessage('Php7.4 or Php8');
18+
}
19+
$dir = 'tests/data/Reader/CSV';
20+
$files = glob("$dir/*");
21+
self::assertNotFalse($files);
22+
$affected = [];
23+
foreach ($files as $file) {
24+
$base = basename($file);
25+
$encoding = 'UTF-8';
26+
if (str_contains($base, 'utf') && !str_contains($base, 'bom')) {
27+
$encoding = 'guess';
28+
}
29+
$result = Csv::affectedByPhp9($file, $encoding);
30+
if ($result) {
31+
$affected[] = $base;
32+
}
33+
}
34+
$expected = ['backslash.csv', 'escape.csv', 'linend.mac.csv'];
35+
self::assertSame($expected, $affected);
36+
}
37+
}

tests/data/Reader/CSV/linend.mac.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
A,12,3
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
A,1
2+
2,3
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
A,1
2+
2,3

0 commit comments

Comments
 (0)