Skip to content

Commit 1770824

Browse files
authored
Merge pull request #66 from veewee/reader-improvements
Add reader MatchingNode results and a signal to stop reading
2 parents 143c565 + 54bb7ec commit 1770824

File tree

10 files changed

+252
-23
lines changed

10 files changed

+252
-23
lines changed

docs/reader.md

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,21 +7,31 @@ As a result, the reader provides a generator of XML strings that match your matc
77
## Example
88

99
```php
10-
use VeeWee\Xml\Dom\Document;
10+
use VeeWee\Xml\Dom\Configurator;
1111
use VeeWee\Xml\Reader\Reader;
12+
use VeeWee\Xml\Reader\Signal;
1213
use VeeWee\Xml\Reader\Matcher;
1314

1415
$reader = Reader::fromXmlFile('large-data.xml');
1516
$provider = $reader->provide(
16-
Matcher\all(
17+
$matcher = Matcher\all(
1718
Matcher\node_name('item'),
1819
Matcher\node_attribute('locale', 'nl-BE')
19-
)
20+
),
21+
// Optionally, you can provide a signal to stop reading at a given point:
22+
$signal = new Signal()
2023
);
2124

2225
foreach ($provider as $nlItem) {
23-
$dom = Document::fromXmlString($nlItem);
2426
// Do something with it
27+
$xml = $nlItem->xml();
28+
$dom = $nlItem->intoDocument(Configurator\canonicalize());
29+
$decoded = $nlItem->decode(Configurator\canonicalize());
30+
$matched = $nlItem->matches($matcher);
31+
$sequence = $nlItem->nodeSequence();
32+
33+
// If you have loaded sufficient items, you can stop reading the XML file:
34+
$signal->stop();
2535
}
2636
```
2737

@@ -54,7 +64,8 @@ The reader will keep only small parts of the XML in memory by reading the XML st
5464
When the reader detects the first `breakfast_menu` element, it will ask the provided matchers if you are interested in this tag.
5565
A matcher is a function that returns `true` when interested or `false` when it is not interested in this element.
5666
When the matcher returns `true`, the reader will read the complete outer XML of current tag and `yield` this matching XML to your logic.
57-
This means that the memory-safety of YOUR reader is based on the part inside the XML you are interested in:
67+
This XML is wrapped in a `MatchingNode` which also contains the `NodeSequence` and some handy shortcut functions to e.g. convert the XML into a DOM Document.
68+
Do note that, the memory-safety of YOUR reader is based on the part inside the XML you are interested in:
5869
If you only match on the root node, it will yield the complete XML and therefore won't be memory-safe.
5970

6071
After deciding if you are interested in the previous tag, it jumps over to the next tag: `breakfast_menu > food[position() = 1 AND @soldOUt=false AND @bestSeller = true]` and asks the matcher if you are interested in this.

src/Xml/Reader/MatchingNode.php

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
<?php
2+
declare(strict_types=1);
3+
4+
namespace VeeWee\Xml\Reader;
5+
6+
use DOMDocument;
7+
use VeeWee\Xml\Dom\Document;
8+
use VeeWee\Xml\Encoding\Exception\EncodingException;
9+
use VeeWee\Xml\Exception\RuntimeException;
10+
use VeeWee\Xml\Reader\Node\NodeSequence;
11+
use function VeeWee\Xml\Encoding\xml_decode;
12+
13+
final class MatchingNode
14+
{
15+
/**
16+
* @param non-empty-string $xml
17+
*/
18+
public function __construct(
19+
private readonly string $xml,
20+
private readonly NodeSequence $nodeSequence
21+
) {
22+
}
23+
24+
/**
25+
* @return non-empty-string
26+
*/
27+
public function xml(): string
28+
{
29+
return $this->xml;
30+
}
31+
32+
public function nodeSequence(): NodeSequence
33+
{
34+
return $this->nodeSequence;
35+
}
36+
37+
/**
38+
* @param list<callable(DOMDocument): DOMDocument> $configurators
39+
*
40+
* @throws RuntimeException
41+
*/
42+
public function intoDocument(callable ... $configurators): Document
43+
{
44+
return Document::fromXmlString($this->xml, ...$configurators);
45+
}
46+
47+
/**
48+
* @param list<callable(DOMDocument): DOMDocument> $configurators
49+
*
50+
* @throws RuntimeException
51+
* @throws EncodingException
52+
*/
53+
public function decode(callable ... $configurators): array
54+
{
55+
return xml_decode($this->xml, ...$configurators);
56+
}
57+
58+
/**
59+
* @param callable(NodeSequence): bool $matcher
60+
*/
61+
public function matches(callable $matcher): bool
62+
{
63+
return $matcher($this->nodeSequence);
64+
}
65+
}

src/Xml/Reader/Reader.php

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -60,18 +60,25 @@ public static function fromXmlString(string $xml, callable ... $configurators):
6060
/**
6161
* @param callable(NodeSequence): bool $matcher
6262
*
63-
* @return Generator<string>
63+
* @return Generator<MatchingNode>
6464
*
6565
* @throws RuntimeException
6666
*/
67-
public function provide(callable $matcher): Generator
67+
public function provide(callable $matcher, ?Signal $signal = null): Generator
6868
{
69+
$signal ??= new Signal();
6970
$reader = ($this->factory)();
7071
$pointer = Pointer::create();
7172

7273
yield from stop_on_first_issue(
73-
static fn (): bool => $reader->read(),
74-
static function () use ($reader, $pointer, $matcher) : ?string {
74+
static function () use ($reader, $signal): bool {
75+
if($signal->stopRequested()) {
76+
return !$reader->close();
77+
}
78+
79+
return $reader->read();
80+
},
81+
static function () use ($reader, $pointer, $matcher) : ?MatchingNode {
7582
if ($reader->nodeType === XMLReader::END_ELEMENT) {
7683
$pointer->leaveElement();
7784

@@ -93,13 +100,14 @@ static function () use ($reader): array {
93100
);
94101

95102
$pointer->enterElement($element);
96-
$result = $matcher($pointer->getNodeSequence()) ? $reader->readOuterXml() : null;
103+
$outerXml = $matcher($pointer->getNodeSequence()) ? $reader->readOuterXml() : null;
104+
$match = $outerXml ? new MatchingNode($outerXml, $pointer->getNodeSequence()) : null;
97105

98106
if ($isEmptyElement) {
99107
$pointer->leaveElement();
100108
}
101109

102-
return $result;
110+
return $match;
103111
}
104112

105113
return null;

src/Xml/Reader/Signal.php

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
<?php
2+
declare(strict_types=1);
3+
4+
namespace VeeWee\Xml\Reader;
5+
6+
final class Signal
7+
{
8+
private bool $stopRequested = false;
9+
10+
public function stop(): void
11+
{
12+
$this->stopRequested = true;
13+
}
14+
15+
public function stopRequested(): bool
16+
{
17+
return $this->stopRequested;
18+
}
19+
}

tests/Xml/Reader/Configurator/SubstituteEntitiesTest.php

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,9 @@
55
namespace VeeWee\Tests\Xml\Reader\Configurator;
66

77
use PHPUnit\Framework\TestCase;
8+
use VeeWee\Xml\Reader\MatchingNode;
89
use VeeWee\Xml\Reader\Reader;
10+
use function Psl\Vec\map;
911
use function VeeWee\Xml\Reader\Configurator\substitute_entities;
1012
use function VeeWee\Xml\Reader\Matcher\node_name;
1113

@@ -21,11 +23,11 @@ public function test_it_can_substitute_entities(): void
2123
[
2224
'<user>my entity value</user>',
2325
],
24-
[...$iterator]
26+
map($iterator, static fn (MatchingNode $match): string => $match->xml())
2527
);
2628
}
2729

28-
30+
2931
public function test_it_can_skip_substituting_entities(): void
3032
{
3133
$xml = $this->buildXml();
@@ -36,7 +38,7 @@ public function test_it_can_skip_substituting_entities(): void
3638
[
3739
'<user>&entity;</user>',
3840
],
39-
[...$iterator]
41+
map($iterator, static fn (MatchingNode $match): string => $match->xml())
4042
);
4143
}
4244

tests/Xml/Reader/Configurator/XsdSchemaTest.php

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,18 @@
77
use PHPUnit\Framework\TestCase;
88
use VeeWee\Tests\Xml\Helper\FillFileTrait;
99
use VeeWee\Xml\Exception\RuntimeException;
10+
use VeeWee\Xml\Reader\MatchingNode;
1011
use VeeWee\Xml\Reader\Reader;
1112
use XMLReader;
13+
use function Psl\Vec\map;
1214
use function VeeWee\Xml\Reader\Configurator\xsd_schema;
1315
use function VeeWee\Xml\Reader\Matcher\node_name;
1416

1517
final class XsdSchemaTest extends TestCase
1618
{
1719
use FillFileTrait;
1820

19-
21+
2022
public function test_it_can_iterate_if_the_schema_matches(): void
2123
{
2224
[$xsdFile, $xsdHandle] = $this->createXsdFile();
@@ -37,13 +39,13 @@ public function test_it_can_iterate_if_the_schema_matches(): void
3739
'<user>Bos</user>',
3840
'<user>Mos</user>'
3941
],
40-
[...$iterator]
42+
map($iterator, static fn (MatchingNode $match): string => $match->xml())
4143
);
4244

4345
fclose($xsdHandle);
4446
}
4547

46-
48+
4749
public function test_it_triggers_an_error_on_invalid_schema(): void
4850
{
4951
[$xsdFile, $xsdHandle] = $this->createXsdFile();
@@ -65,7 +67,7 @@ public function test_it_triggers_an_error_on_invalid_schema(): void
6567
fclose($xsdHandle);
6668
}
6769

68-
70+
6971
public function test_it_triggers_an_error_if_schema_file_does_not_exist(): void
7072
{
7173
$xml = '<root />';
@@ -80,7 +82,7 @@ public function test_it_triggers_an_error_if_schema_file_does_not_exist(): void
8082
fclose($xsdHandle);
8183
}
8284

83-
85+
8486
public function test_it_can_not_set_a_schema_if_the_reader_started_reading(): void
8587
{
8688
[$xsdFile, $xsdHandle] = $this->createXsdFile();
@@ -93,7 +95,7 @@ public function test_it_can_not_set_a_schema_if_the_reader_started_reading(): vo
9395
fclose($xsdHandle);
9496
}
9597

96-
98+
9799
public function test_it_can_not_set_a_schema_if_the_schema_is_invalid(): void
98100
{
99101
[$xsdFile, $xsdHandle] = $this->fillFile('invalid schema');

tests/Xml/Reader/Matcher/AbstractMatcherTest.php

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
use Closure;
77
use Generator;
88
use PHPUnit\Framework\TestCase;
9+
use VeeWee\Xml\Reader\MatchingNode;
910
use VeeWee\Xml\Reader\Node\NodeSequence;
1011
use VeeWee\Xml\Reader\Reader;
12+
use function Psl\Vec\map;
1113

1214
abstract class AbstractMatcherTest extends TestCase
1315
{
@@ -23,7 +25,7 @@ abstract public static function provideMatcherCases(): Generator;
2325
public function test_real_xml_cases(Closure $matcher, string $xml, array $expected)
2426
{
2527
$reader = Reader::fromXmlString($xml);
26-
$actual = [...$reader->provide($matcher)];
28+
$actual = map($reader->provide($matcher), static fn (MatchingNode $match): string => $match->xml());
2729

2830
static::assertSame($actual, $expected);
2931
}

tests/Xml/Reader/MatchingNodeTest.php

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
<?php
2+
declare(strict_types=1);
3+
4+
namespace VeeWee\Tests\Xml\Reader;
5+
6+
use PHPUnit\Framework\TestCase;
7+
use VeeWee\Xml\Reader\MatchingNode;
8+
use VeeWee\Xml\Reader\Node\ElementNode;
9+
use VeeWee\Xml\Reader\Node\NodeSequence;
10+
use function Psl\Fun\identity;
11+
use function VeeWee\Xml\Dom\Locator\document_element;
12+
use function VeeWee\Xml\Dom\Mapper\xml_string;
13+
use function VeeWee\Xml\Reader\Matcher\element_name;
14+
15+
final class MatchingNodeTest extends TestCase
16+
{
17+
18+
public function test_it_is_a_matching_node(): void
19+
{
20+
$match = new MatchingNode(
21+
$xml = '<hello/>',
22+
$sequence = new NodeSequence(
23+
new ElementNode(1, 'hello', 'hello', '', '', [])
24+
)
25+
);
26+
27+
static::assertSame($xml, $match->xml());
28+
static::assertSame($sequence, $match->nodeSequence());
29+
}
30+
31+
32+
public function test_it_can_match(): void
33+
{
34+
$match = new MatchingNode(
35+
'<hello/>',
36+
new NodeSequence(
37+
new ElementNode(1, 'hello', 'hello', '', '', [])
38+
)
39+
);
40+
41+
static::assertTrue($match->matches(element_name('hello')));
42+
static::assertFalse($match->matches(element_name('world')));
43+
}
44+
45+
46+
public function test_it_can_transform_into_a_dom_document(): void
47+
{
48+
$match = new MatchingNode(
49+
$xml = '<hello/>',
50+
new NodeSequence(
51+
new ElementNode(1, 'hello', 'hello', '', '', [])
52+
)
53+
);
54+
55+
$document = $match->intoDocument(identity());
56+
57+
static::assertSame($xml, xml_string()($document->map(document_element())));
58+
}
59+
60+
public function test_it_can_decode_the_xml(): void
61+
{
62+
$match = new MatchingNode(
63+
$xml = '<hello/>',
64+
new NodeSequence(
65+
new ElementNode(1, 'hello', 'hello', '', '', [])
66+
)
67+
);
68+
69+
$decoded = $match->decode(identity());
70+
71+
static::assertSame(['hello' => ''], $decoded);
72+
}
73+
}

0 commit comments

Comments
 (0)