From b681d7c649c74cea87a6b5b6f5772dace4038745 Mon Sep 17 00:00:00 2001 From: Mike Date: Fri, 23 Jun 2017 17:16:46 -0400 Subject: [PATCH] fixes issue with multi-line captions Matches blocks of captions based on more specific regex --- src/SrtParser/Parser.php | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/SrtParser/Parser.php b/src/SrtParser/Parser.php index cc44ffe..dc9a49d 100644 --- a/src/SrtParser/Parser.php +++ b/src/SrtParser/Parser.php @@ -21,7 +21,7 @@ public function loadFile($file) return $this; } - + public function loadString($string) { $this->data = $string; @@ -43,12 +43,15 @@ public function parse() */ private static function splitData($data) { - $sections = explode("\r\n\r\n", $data); + //find digits followed by a single line break and timestamps + $sections = preg_split('/\d+(?:\r\n|\r|\n)(?=(?:\d\d:\d\d:\d\d,\d\d\d)\s-->\s(?:\d\d:\d\d:\d\d,\d\d\d))/m', $data,-1,PREG_SPLIT_NO_EMPTY); $matches = []; foreach ($sections as $section) { - $matches[] = explode("\r\n", $section, 3); + //cleans out control characters, borrowed from https://stackoverflow.com/a/23066553 + $section = preg_replace('/[^\PC\s]/u', '', $section); + if(trim($section) == '') continue; + $matches[] = preg_split('/(\r\n|\r|\n)/', $section, 2,PREG_SPLIT_NO_EMPTY); } - return $matches; } @@ -56,8 +59,8 @@ private static function buildCaptions($matches) { $captions = []; foreach ($matches as $match) { - $times = self::timeMatch($match[1]); - $text = self::textMatch($match[2]); + $times = self::timeMatch($match[0]); + $text = self::textMatch($match[1]); $captions[] = new Caption($times['start_time'], $times['end_time'], $text); }