From ce37e2651acb031b4e914c9cec7acf522524d806 Mon Sep 17 00:00:00 2001 From: Ignace Nyamagana Butera Date: Wed, 5 Mar 2014 10:33:24 +0100 Subject: [PATCH] # This is a combination of 2 commits. # The first commit's message is: # This is a combination of 4 commits. # The first commit's message is: implementing detectDelimiter #16 # The 2nd commit message will be skipped: # implementing detectDelimiter with Exceptions #16 # The 3rd commit message will be skipped: # implementing detectDelimiter #16 # The 4th commit message will be skipped: # implementing detectDelimiter with Exceptions #16 # The 2nd commit message will be skipped: # implementing detectDelimiter with Exceptions #16 --- src/AbstractCsv.php | 45 +++++++++++++++++++++++++++++++++++++++++++++ test/CsvTest.php | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+) diff --git a/src/AbstractCsv.php b/src/AbstractCsv.php index 4d92418b..a5b6399d 100644 --- a/src/AbstractCsv.php +++ b/src/AbstractCsv.php @@ -39,6 +39,8 @@ use SplFileObject; use SplTempFileObject; use InvalidArgumentException; +use LimitIterator; +use CallbackFilterIterator; /** * A abstract class to enable basic CSV manipulation @@ -209,6 +211,49 @@ public function getDelimiter() return $this->delimiter; } + /** + * Detect the CSV file delimiter + * + * @param integer $nbRows + * @param array $delimiters additional delimiters + * + * @return string + * + * @throws \InvalidArgumentException If $nbRows value is invalid + * @throws \RuntimeException If too many delimiters are found + */ + public function detectDelimiter($nbRows = 1, array $delimiters = []) + { + $nbRows = filter_var($nbRows, FILTER_VALIDATE_INT, ['options' => ['min_range' => 1]]); + if (! $nbRows) { + throw new InvalidArgumentException('`$nbRows` must be a valid positive integer'); + } + $delimiters = array_filter($delimiters, function ($str) { + return 1 == mb_strlen($str); + }); + $delimiters = array_merge([',', ';', "\t"], $delimiters); + $delimiters = array_unique($delimiters); + $iterator = new CallbackFilterIterator( + new LimitIterator($this->csv, 0, $nbRows), + function ($row) { + return is_array($row) && count($row) > 1; + } + ); + $res = []; + foreach ($delimiters as $delim) { + $iterator->setCsvControl($delim, $this->enclosure, $this->escape); + $res[$delim] = count(iterator_to_array($iterator, false)); + } + arsort($res, SORT_NUMERIC); + $res = array_keys(array_filter($res)); + if (! $res) { + return null; + } elseif (count($res) == 1) { + return $res[0]; + } + throw new RuntimeException('too many delimiters were found: `'.implode('`,`', $res).'`'); + } + /** * set the field enclosure * diff --git a/test/CsvTest.php b/test/CsvTest.php index e10ff76e..e6943e03 100644 --- a/test/CsvTest.php +++ b/test/CsvTest.php @@ -84,6 +84,45 @@ public function testDelimeter() $this->csv->setDelimiter('foo'); } + public function testDetectDelimiter() + { + $this->assertSame($this->csv->detectDelimiter(), ','); + } + + /** + * @expectedException InvalidArgumentException + */ + public function testDetectDelimiterWithInvalidRowLimit() + { + $this->csv->detectDelimiter(-4); + } + + public function testDetectDelimiterWithNoCSV() + { + $file = new SplTempFileObject; + $file->fwrite("How are you today ?\nI'm doing fine thanks!"); + $csv = new Writer($file); + $this->assertNull($csv->detectDelimiter(5, ['toto', '|'])); + } + + /** + * @expectedException RuntimeException + */ + public function testDetectDelimiterWithInconsistentCSV() + { + $csv = new Writer(new SplTempFileObject); + $csv->setDelimiter(';'); + $csv->insertOne(['toto', 'tata', 'tutu']); + $csv->setDelimiter('|'); + $csv->insertAll([ + ['toto', 'tata', 'tutu'], + ['toto', 'tata', 'tutu'], + ['toto', 'tata', 'tutu'] + ]); + + $csv->detectDelimiter(5, ['toto', '|']); + } + /** * @expectedException InvalidArgumentException */