Skip to content

Commit

Permalink
Fix phpGH-8996: DOMNode serialization on PHP ^8.1
Browse files Browse the repository at this point in the history
PHP 8.1 introduced a seemingly unintentional BC break in ca94d55 by
blocking the (un)serialization of DOM objects.
This was done because the serialization never really worked and just
resulted in an empty object, which upon unserialization just resulted in
an object that you can't use.

Users can however implement their own serialization methods, but the
commit made that impossible as the ACC flag gets passed down to the
child class. An approach was tried in php#10307 with a new ACC flag to
selectively allow serialization with subclasses if they implement the
right methods. However, that was found to be too ad hoc.

Instead, let's abuse how the __sleep and __wakeup methods work to throw
the exception instead. If the child class implements the __serialize /
__unserialize method, then the throwing methods won't be called.
Similarly, if the child class implements __sleep and __wakeup, then
they're overridden and it doesn't matter that they throw.

For the user, this PR has the exact same behaviour for (sub)classes that
don't implement the serialization methods: an exception will be thrown.
For code that previously implemented subclasses with these methods, this
approach will make that code work again. This approach should be both BC
preserving and unbreak user's code.

For the test:
Co-authored-by: wazelin <contact@sergeimikhailov.com>
  • Loading branch information
nielsdos committed Oct 8, 2023
1 parent 6bb536e commit 13eb54c
Show file tree
Hide file tree
Showing 5 changed files with 160 additions and 5 deletions.
21 changes: 21 additions & 0 deletions ext/dom/node.c
Original file line number Diff line number Diff line change
Expand Up @@ -1786,4 +1786,25 @@ PHP_METHOD(DOMNode, getLineNo)
}
/* }}} */

/**
* We want to block the serialization and unserialization of DOM classes.
* However, using @not-serializable makes the child classes also not serializable, even if the user implements the methods.
* So instead, we implement the methods wherein we throw exceptions.
* The reason we choose these methods is because:
* - If the user implements __serialize / __unserialize, the respective throwing methods are not called.
* - If the user implements __sleep / __wakeup, then it's also not a problem because they will not enter the throwing methods.
*/

PHP_METHOD(DOMNode, __sleep)
{
zend_throw_exception_ex(NULL, 0, "Serialization of '%s' is not allowed", ZSTR_VAL(Z_OBJCE_P(ZEND_THIS)->name));
RETURN_THROWS();
}

PHP_METHOD(DOMNode, __wakeup)
{
zend_throw_exception_ex(NULL, 0, "Unserialization of '%s' is not allowed", ZSTR_VAL(Z_OBJCE_P(ZEND_THIS)->name));
RETURN_THROWS();
}

#endif
12 changes: 10 additions & 2 deletions ext/dom/php_dom.stub.php
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ public function after(...$nodes): void;
public function replaceWith(...$nodes): void;
}

/** @not-serializable */
class DOMNode
{
/** @readonly */
Expand Down Expand Up @@ -104,6 +103,10 @@ class DOMNode

public string $textContent;

public function __sleep(): array {}

public function __wakeup(): void {}

/** @return DOMNode|false */
public function appendChild(DOMNode $node) {}

Expand Down Expand Up @@ -156,7 +159,6 @@ public function removeChild(DOMNode $child) {}
public function replaceChild(DOMNode $node, DOMNode $child) {}
}

/** @not-serializable */
class DOMNameSpaceNode
{
/** @readonly */
Expand All @@ -182,6 +184,12 @@ class DOMNameSpaceNode

/** @readonly */
public ?DOMNode $parentNode;

/** @implementation-alias DOMNode::__sleep */
public function __sleep(): array {}

/** @implementation-alias DOMNode::__wakeup */
public function __wakeup(): void {}
}

class DOMImplementation
Expand Down
19 changes: 16 additions & 3 deletions ext/dom/php_dom_arginfo.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* This is a generated file, edit the .stub.php file instead.
* Stub hash: 20a0ff883af3bbf073d9c8bc8246646ffafe7818 */
* Stub hash: 203760d1cf0e063ffd9abe743a0e24a97985767e */

ZEND_BEGIN_ARG_WITH_RETURN_OBJ_INFO_EX(arginfo_dom_import_simplexml, 0, 1, DOMElement, 0)
ZEND_ARG_TYPE_INFO(0, node, IS_OBJECT, 0)
Expand Down Expand Up @@ -28,6 +28,11 @@ ZEND_END_ARG_INFO()

#define arginfo_class_DOMChildNode_replaceWith arginfo_class_DOMParentNode_append

ZEND_BEGIN_ARG_WITH_RETURN_TYPE_INFO_EX(arginfo_class_DOMNode___sleep, 0, 0, IS_ARRAY, 0)
ZEND_END_ARG_INFO()

#define arginfo_class_DOMNode___wakeup arginfo_class_DOMChildNode_remove

ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOMNode_appendChild, 0, 0, 1)
ZEND_ARG_OBJ_INFO(0, node, DOMNode, 0)
ZEND_END_ARG_INFO()
Expand Down Expand Up @@ -100,6 +105,10 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_class_DOMNode_replaceChild, 0, 0, 2)
ZEND_ARG_OBJ_INFO(0, child, DOMNode, 0)
ZEND_END_ARG_INFO()

#define arginfo_class_DOMNameSpaceNode___sleep arginfo_class_DOMNode___sleep

#define arginfo_class_DOMNameSpaceNode___wakeup arginfo_class_DOMChildNode_remove

ZEND_BEGIN_ARG_WITH_TENTATIVE_RETURN_TYPE_INFO_EX(arginfo_class_DOMImplementation_getFeature, 0, 2, IS_NEVER, 0)
ZEND_ARG_TYPE_INFO(0, feature, IS_STRING, 0)
ZEND_ARG_TYPE_INFO(0, version, IS_STRING, 0)
Expand Down Expand Up @@ -491,6 +500,8 @@ ZEND_END_ARG_INFO()
ZEND_FUNCTION(dom_import_simplexml);
ZEND_METHOD(DOMCdataSection, __construct);
ZEND_METHOD(DOMComment, __construct);
ZEND_METHOD(DOMNode, __sleep);
ZEND_METHOD(DOMNode, __wakeup);
ZEND_METHOD(DOMNode, appendChild);
ZEND_METHOD(DOMNode, C14N);
ZEND_METHOD(DOMNode, C14NFile);
Expand Down Expand Up @@ -672,6 +683,8 @@ static const zend_function_entry class_DOMChildNode_methods[] = {


static const zend_function_entry class_DOMNode_methods[] = {
ZEND_ME(DOMNode, __sleep, arginfo_class_DOMNode___sleep, ZEND_ACC_PUBLIC)
ZEND_ME(DOMNode, __wakeup, arginfo_class_DOMNode___wakeup, ZEND_ACC_PUBLIC)
ZEND_ME(DOMNode, appendChild, arginfo_class_DOMNode_appendChild, ZEND_ACC_PUBLIC)
ZEND_ME(DOMNode, C14N, arginfo_class_DOMNode_C14N, ZEND_ACC_PUBLIC)
ZEND_ME(DOMNode, C14NFile, arginfo_class_DOMNode_C14NFile, ZEND_ACC_PUBLIC)
Expand All @@ -694,6 +707,8 @@ static const zend_function_entry class_DOMNode_methods[] = {


static const zend_function_entry class_DOMNameSpaceNode_methods[] = {
ZEND_MALIAS(DOMNode, __sleep, __sleep, arginfo_class_DOMNameSpaceNode___sleep, ZEND_ACC_PUBLIC)
ZEND_MALIAS(DOMNode, __wakeup, __wakeup, arginfo_class_DOMNameSpaceNode___wakeup, ZEND_ACC_PUBLIC)
ZEND_FE_END
};

Expand Down Expand Up @@ -989,7 +1004,6 @@ static zend_class_entry *register_class_DOMNode(void)

INIT_CLASS_ENTRY(ce, "DOMNode", class_DOMNode_methods);
class_entry = zend_register_internal_class_ex(&ce, NULL);
class_entry->ce_flags |= ZEND_ACC_NOT_SERIALIZABLE;

zval property_nodeName_default_value;
ZVAL_UNDEF(&property_nodeName_default_value);
Expand Down Expand Up @@ -1104,7 +1118,6 @@ static zend_class_entry *register_class_DOMNameSpaceNode(void)

INIT_CLASS_ENTRY(ce, "DOMNameSpaceNode", class_DOMNameSpaceNode_methods);
class_entry = zend_register_internal_class_ex(&ce, NULL);
class_entry->ce_flags |= ZEND_ACC_NOT_SERIALIZABLE;

zval property_nodeName_default_value;
ZVAL_UNDEF(&property_nodeName_default_value);
Expand Down
84 changes: 84 additions & 0 deletions ext/dom/tests/gh8996.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
--TEST--
GH-8996: DOMNode serialization on PHP ^8.1
--EXTENSIONS--
dom
--FILE--
<?php

echo "=== __sleep and __wakeup ===\n";

class SerializableDomDocumentSleepWakeup extends DOMDocument
{
private $xmlData;

public function __sleep(): array
{
$this->xmlData = $this->saveXML();
return ['xmlData'];
}

public function __wakeup(): void
{
$this->loadXML($this->xmlData);
}
}

$dom = new SerializableDomDocumentSleepWakeup('1.0', 'UTF-8');
$dom->loadXML('<tag>value</tag>');

$serialized = serialize($dom);
var_dump($serialized);
$unserialized = unserialize($serialized);

echo "Serialized:\n-----------\n$serialized\n-----------\nRestored:\n-----------\n{$unserialized->saveXml()}";

echo "=== __serialize and __unserialize ===\n";

class SerializableDomDocumentSerializeUnserialize extends DOMDocument
{
public function __serialize(): array
{
return ['xmlData' => $this->saveXML()];
}

public function __unserialize(array $data): void
{
$this->loadXML($data['xmlData']);
}
}

$dom = new SerializableDomDocumentSerializeUnserialize('1.0', 'UTF-8');
$dom->loadXML('<tag>value</tag>');

$serialized = serialize($dom);
$unserialized = unserialize($serialized);

echo "Serialized:\n-----------\n$serialized\n-----------\nRestored:\n-----------\n{$unserialized->saveXml()}";

?>
--EXPECTF--
=== __sleep and __wakeup ===
string(144) "O:34:"SerializableDomDocumentSleepWakeup":1:{s:43:"%0SerializableDomDocumentSleepWakeup%0xmlData";s:39:"<?xml version="1.0"?>
<tag>value</tag>
";}"
Serialized:
-----------
O:34:"SerializableDomDocumentSleepWakeup":1:{s:43:"%0SerializableDomDocumentSleepWakeup%0xmlData";s:39:"<?xml version="1.0"?>
<tag>value</tag>
";}
-----------
Restored:
-----------
<?xml version="1.0"?>
<tag>value</tag>
=== __serialize and __unserialize ===
Serialized:
-----------
O:43:"SerializableDomDocumentSerializeUnserialize":1:{s:7:"xmlData";s:39:"<?xml version="1.0"?>
<tag>value</tag>
";}
-----------
Restored:
-----------
<?xml version="1.0"?>
<tag>value</tag>
29 changes: 29 additions & 0 deletions ext/dom/tests/not_unserializable.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
--TEST--
DOM classes are not unserializable
--EXTENSIONS--
dom
--FILE--
<?php

$classes = [
"DOMXPath",
"DOMDocument",
"DOMNode",
"DOMNameSpaceNode",
];

foreach ($classes as $class)
{
try {
unserialize('O:' . strlen($class) . ':"' . $class . '":0:{}');
} catch (Exception $e) {
echo $e->getMessage(), "\n";
}
}

?>
--EXPECT--
Unserialization of 'DOMXPath' is not allowed
Unserialization of 'DOMDocument' is not allowed
Unserialization of 'DOMNode' is not allowed
Unserialization of 'DOMNameSpaceNode' is not allowed

0 comments on commit 13eb54c

Please sign in to comment.