Skip to content

Implement iteration cache, item cache and length cache for node list iteration #11330

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Jun 2, 2023
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions UPGRADING.INTERNALS
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,22 @@ PHP 8.3 INTERNALS UPGRADE NOTES
- A new function dom_get_doc_props_read_only() is added to gather the document
properties in a read-only way. This function avoids allocation when there are
no document properties changed yet.
- The node list returned by DOMNode::getElementsByTagName() and
DOMNode::getElementsByTagNameNS() now caches the length and the last requested item.
This means that the length and the last requested item are not recalculated
when the node list is iterated over multiple times.
If you do not use the internal PHP dom APIs to modify the document, you need to
manually invalidate the cache using php_libxml_invalidate_node_list_cache_from_doc().
Furthermore, the following internal APIs were added to handle the cache:
. php_dom_is_cache_tag_stale_from_doc_ptr()
. php_dom_is_cache_tag_stale_from_node()
. php_dom_mark_cache_tag_up_to_date_from_node()
- The function dom_get_elements_by_tag_name_ns_raw() has an additional parameter to indicate
the base node of the node list.

g. ext/libxml
- Two new functions: php_libxml_invalidate_node_list_cache_from_doc() and
php_libxml_invalidate_node_list_cache() were added to invalidate the cache of a node list.

========================
4. OpCode changes
Expand Down
26 changes: 26 additions & 0 deletions ext/dom/document.c
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,8 @@ PHP_METHOD(DOMDocument, importNode)
}
}

php_libxml_invalidate_node_list_cache_from_doc(docp);

DOM_RET_OBJ((xmlNodePtr) retnodep, &ret, intern);
}
/* }}} end dom_document_import_node */
Expand Down Expand Up @@ -1070,6 +1072,8 @@ PHP_METHOD(DOMDocument, normalizeDocument)

DOM_GET_OBJ(docp, id, xmlDocPtr, intern);

php_libxml_invalidate_node_list_cache_from_doc(docp);

dom_normalize((xmlNodePtr) docp);
}
/* }}} end dom_document_normalize_document */
Expand Down Expand Up @@ -1328,10 +1332,14 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) {

if (id != NULL) {
intern = Z_DOMOBJ_P(id);
size_t old_modification_nr = 0;
if (intern != NULL) {
docp = (xmlDocPtr) dom_object_get_node(intern);
doc_prop = NULL;
if (docp != NULL) {
const php_libxml_doc_ptr *doc_ptr = docp->_private;
ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */
old_modification_nr = doc_ptr->cache_tag.modification_nr;
php_libxml_decrement_node_ptr((php_libxml_node_object *) intern);
doc_prop = intern->document->doc_props;
intern->document->doc_props = NULL;
Expand All @@ -1348,6 +1356,12 @@ static void dom_parse_document(INTERNAL_FUNCTION_PARAMETERS, int mode) {
}

php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern);
/* Since iterators should invalidate, we need to start the modification number from the old counter */
if (old_modification_nr != 0) {
php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */
doc_ptr->cache_tag.modification_nr = old_modification_nr;
php_libxml_invalidate_node_list_cache(doc_ptr);
}

RETURN_TRUE;
} else {
Expand Down Expand Up @@ -1563,6 +1577,8 @@ PHP_METHOD(DOMDocument, xinclude)
php_dom_remove_xinclude_nodes(root);
}

php_libxml_invalidate_node_list_cache_from_doc(docp);

if (err) {
RETVAL_LONG(err);
} else {
Expand Down Expand Up @@ -1871,10 +1887,14 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */

if (id != NULL && instanceof_function(Z_OBJCE_P(id), dom_document_class_entry)) {
intern = Z_DOMOBJ_P(id);
size_t old_modification_nr = 0;
if (intern != NULL) {
docp = (xmlDocPtr) dom_object_get_node(intern);
doc_prop = NULL;
if (docp != NULL) {
const php_libxml_doc_ptr *doc_ptr = docp->_private;
ZEND_ASSERT(doc_ptr != NULL); /* Must exist, we have a document */
old_modification_nr = doc_ptr->cache_tag.modification_nr;
php_libxml_decrement_node_ptr((php_libxml_node_object *) intern);
doc_prop = intern->document->doc_props;
intern->document->doc_props = NULL;
Expand All @@ -1891,6 +1911,12 @@ static void dom_load_html(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{ */
}

php_libxml_increment_node_ptr((php_libxml_node_object *)intern, (xmlNodePtr)newdoc, (void *)intern);
/* Since iterators should invalidate, we need to start the modification number from the old counter */
if (old_modification_nr != 0) {
php_libxml_doc_ptr* doc_ptr = (php_libxml_doc_ptr*) ((php_libxml_node_object*) intern)->node; /* downcast */
doc_ptr->cache_tag.modification_nr = old_modification_nr;
php_libxml_invalidate_node_list_cache(doc_ptr);
}

RETURN_TRUE;
} else {
Expand Down
43 changes: 27 additions & 16 deletions ext/dom/dom_iterators.c
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */
dom_object *intern;
dom_object *nnmap;
dom_nnodemap_object *objmap;
int previndex=0;
int previndex;
HashTable *nodeht;
zval *entry;
bool do_curobj_undef = 1;
Expand All @@ -205,23 +205,32 @@ static void php_dom_iterator_move_forward(zend_object_iterator *iter) /* {{{ */
do_curobj_undef = 0;
}
} else {
curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
if (objmap->nodetype == XML_ATTRIBUTE_NODE ||
objmap->nodetype == XML_ELEMENT_NODE) {
curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
curnode = curnode->next;
} else {
/* Nav the tree evey time as this is LIVE */
/* The collection is live, we nav the tree from the base object if we cannot
* use the cache to restart from the last point. */
basenode = dom_object_get_node(objmap->baseobj);
if (basenode && (basenode->type == XML_DOCUMENT_NODE ||
basenode->type == XML_HTML_DOCUMENT_NODE)) {
basenode = xmlDocGetRootElement((xmlDoc *) basenode);
} else if (basenode) {
basenode = basenode->children;
} else {
if (UNEXPECTED(!basenode)) {
goto err;
}
if (php_dom_is_cache_tag_stale_from_node(&iterator->cache_tag, basenode)) {
php_dom_mark_cache_tag_up_to_date_from_node(&iterator->cache_tag, basenode);
previndex = 0;
if (basenode && (basenode->type == XML_DOCUMENT_NODE ||
basenode->type == XML_HTML_DOCUMENT_NODE)) {
curnode = xmlDocGetRootElement((xmlDoc *) basenode);
} else {
curnode = basenode->children;
}
} else {
previndex = iter->index - 1;
curnode = (xmlNodePtr)((php_libxml_node_ptr *)intern->ptr)->node;
}
curnode = dom_get_elements_by_tag_name_ns_raw(
basenode, (char *) objmap->ns, (char *) objmap->local, &previndex, iter->index);
basenode, curnode, (char *) objmap->ns, (char *) objmap->local, &previndex, iter->index);
}
}
} else {
Expand Down Expand Up @@ -258,7 +267,7 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i
{
dom_object *intern;
dom_nnodemap_object *objmap;
xmlNodePtr nodep, curnode=NULL;
xmlNodePtr curnode=NULL;
int curindex = 0;
HashTable *nodeht;
zval *entry;
Expand All @@ -270,6 +279,7 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i
}
iterator = emalloc(sizeof(php_dom_iterator));
zend_iterator_init(&iterator->intern);
iterator->cache_tag.modification_nr = 0;

ZVAL_OBJ_COPY(&iterator->intern.data, Z_OBJ_P(object));
iterator->intern.funcs = &php_dom_iterator_funcs;
Expand All @@ -288,24 +298,25 @@ zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, i
ZVAL_COPY(&iterator->curobj, entry);
}
} else {
nodep = (xmlNode *)dom_object_get_node(objmap->baseobj);
if (!nodep) {
xmlNodePtr basep = (xmlNode *)dom_object_get_node(objmap->baseobj);
if (!basep) {
goto err;
}
if (objmap->nodetype == XML_ATTRIBUTE_NODE || objmap->nodetype == XML_ELEMENT_NODE) {
if (objmap->nodetype == XML_ATTRIBUTE_NODE) {
curnode = (xmlNodePtr) nodep->properties;
curnode = (xmlNodePtr) basep->properties;
} else {
curnode = (xmlNodePtr) nodep->children;
curnode = (xmlNodePtr) basep->children;
}
} else {
xmlNodePtr nodep = basep;
if (nodep->type == XML_DOCUMENT_NODE || nodep->type == XML_HTML_DOCUMENT_NODE) {
nodep = xmlDocGetRootElement((xmlDoc *) nodep);
} else {
nodep = nodep->children;
}
curnode = dom_get_elements_by_tag_name_ns_raw(
nodep, (char *) objmap->ns, (char *) objmap->local, &curindex, 0);
basep, nodep, (char *) objmap->ns, (char *) objmap->local, &curindex, 0);
}
}
} else {
Expand Down
14 changes: 14 additions & 0 deletions ext/dom/node.c
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ int dom_node_node_value_write(dom_object *obj, zval *newval)
break;
}

php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);

zend_string_release_ex(str, 0);
return SUCCESS;
}
Expand Down Expand Up @@ -769,6 +771,8 @@ int dom_node_text_content_write(dom_object *obj, zval *newval)
return FAILURE;
}

php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);

const xmlChar *xmlChars = (const xmlChar *) ZSTR_VAL(str);
int type = nodep->type;

Expand Down Expand Up @@ -897,6 +901,8 @@ PHP_METHOD(DOMNode, insertBefore)
php_libxml_increment_doc_ref((php_libxml_node_object *)childobj, NULL);
}

php_libxml_invalidate_node_list_cache_from_doc(parentp->doc);

if (ref != NULL) {
DOM_GET_OBJ(refp, ref, xmlNodePtr, refpobj);
if (refp->parent != parentp) {
Expand Down Expand Up @@ -1086,6 +1092,7 @@ PHP_METHOD(DOMNode, replaceChild)
nodep->doc->intSubset = (xmlDtd *) newchild;
}
}
php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
DOM_RET_OBJ(oldchild, &ret, intern);
}
/* }}} end dom_node_replace_child */
Expand Down Expand Up @@ -1127,6 +1134,7 @@ PHP_METHOD(DOMNode, removeChild)
}

xmlUnlinkNode(child);
php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);
DOM_RET_OBJ(child, &ret, intern);
}
/* }}} end dom_node_remove_child */
Expand Down Expand Up @@ -1230,6 +1238,8 @@ PHP_METHOD(DOMNode, appendChild)

dom_reconcile_ns(nodep->doc, new_child);

php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);

DOM_RET_OBJ(new_child, &ret, intern);
}
/* }}} end dom_node_append_child */
Expand Down Expand Up @@ -1339,6 +1349,8 @@ PHP_METHOD(DOMNode, normalize)

DOM_GET_OBJ(nodep, id, xmlNodePtr, intern);

php_libxml_invalidate_node_list_cache_from_doc(nodep->doc);

dom_normalize(nodep);

}
Expand Down Expand Up @@ -1571,6 +1583,8 @@ static void dom_canonicalization(INTERNAL_FUNCTION_PARAMETERS, int mode) /* {{{
RETURN_THROWS();
}

php_libxml_invalidate_node_list_cache_from_doc(docp);

if (xpath_array == NULL) {
if (nodep->type != XML_DOCUMENT_NODE) {
ctxp = xmlXPathNewContext(docp);
Expand Down
Loading