Skip to content

Commit 4b45646

Browse files
committed
8285081: Improve XPath operators count accuracy
Backport-of: 8e0783917975075aae5d586f0076d5093afb0b62
1 parent 6496396 commit 4b45646

File tree

3 files changed

+60
-23
lines changed

3 files changed

+60
-23
lines changed

src/java.xml/share/classes/com/sun/java_cup/internal/runtime/lr_parser.java

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@
137137
* @see com.sun.java_cup.internal.runtime.virtual_parse_stack
138138
* @author Frank Flannery
139139
*
140-
* @LastModified: Jan 2022
140+
* @LastModified: June 2022
141141
*/
142142

143143
public abstract class lr_parser {
@@ -149,6 +149,7 @@ public abstract class lr_parser {
149149
private int grpCount = 0;
150150
private int opCount = 0;
151151
private int totalOpCount = 0;
152+
private int lastSym;
152153

153154
/*-----------------------------------------------------------*/
154155
/*--- Constructor(s) ----------------------------------------*/
@@ -377,13 +378,17 @@ public Symbol scan() throws Exception {
377378
opCount++; // function
378379
isLiteral = false;
379380
} else if (contains(sym.OPERATORS, s.sym)) {
380-
opCount++;
381+
// axis nodetest is counted as one step, so not counted if last=DCOLON
382+
if (lastSym != sym.DCOLON) {
383+
opCount++;
384+
}
381385
isLiteral = false;
382386
}
383387

384388
if (s.sym == sym.Literal || s.sym == sym.QNAME) {
385389
isLiteral = true;
386390
}
391+
lastSym = s.sym;
387392

388393
return s;
389394
}
@@ -588,6 +593,7 @@ public Symbol parse() throws java.lang.Exception
588593
isLiteral = false;
589594
grpCount = 0;
590595
opCount = 0;
596+
lastSym = -1;
591597

592598
/* get the first token */
593599
cur_token = scan();

src/java.xml/share/classes/com/sun/org/apache/xalan/internal/xsltc/compiler/sym.java

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,11 @@
2525

2626
package com.sun.org.apache.xalan.internal.xsltc.compiler;
2727

28-
import java.util.Arrays;
29-
3028
/**
3129
* CUP generated class containing symbol constants.
3230
* This class was generated by CUP v0.10j on Fri Feb 27 13:01:50 PST 2004.
3331
*
34-
* @LastModified: Jan 2022
32+
* @LastModified: June 2022
3533
*/
3634
public class sym {
3735
/* terminals */
@@ -92,9 +90,11 @@ public class sym {
9290
/*
9391
AXES: count once at DCOLON,
9492
these axes names are therefore not counted:
95-
NAMESPACE, FOLLOWINGSIBLING, CHILD, DESCENDANTORSELF, DESCENDANT
96-
, PRECEDINGSIBLING, SELF, ANCESTORORSELF, PRECEDING, ANCESTOROR, PARENT, FOLLOWING, ATTRIBUTE
93+
NAMESPACE, FOLLOWINGSIBLING, CHILD, DESCENDANTORSELF, DESCENDANT,
94+
PRECEDINGSIBLING, SELF, ANCESTORORSELF, PRECEDING, ANCESTOROR, PARENT,
95+
FOLLOWING, ATTRIBUTE
9796
*/
98-
public static final int[] OPERATORS = {GE, SLASH, ATSIGN, LPAREN, DCOLON,
99-
MINUS, STAR, LT, OR, DIV, PLUS, LE, VBAR, MOD, EQ, LBRACK, DOLLAR, NE, GT};
97+
public static final int[] OPERATORS = {GT, GE, EQ, NE, LT, LE, SLASH, DSLASH,
98+
DOT, DDOT, ATSIGN, DCOLON, PLUS, MINUS, STAR, DIV, MOD, AND, OR, LPAREN,
99+
LBRACK, VBAR, DOLLAR, NODE, TEXT, PI, PIPARAM};
100100
}

src/java.xml/share/classes/com/sun/org/apache/xpath/internal/compiler/Lexer.java

Lines changed: 45 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
* This class is in charge of lexical processing of the XPath
3333
* expression into tokens.
3434
*
35-
* @LastModified: Apr 2022
35+
* @LastModified: June 2022
3636
*/
3737
class Lexer
3838
{
@@ -155,6 +155,7 @@ void tokenize(String pat, List<String> targetStrings)
155155
boolean isStartOfPat = true;
156156
boolean isAttrName = false;
157157
boolean isNum = false;
158+
boolean isAxis = false;
158159

159160
// Nesting of '[' so we can know if the given element should be
160161
// counted inside the m_patternMap.
@@ -254,8 +255,7 @@ void tokenize(String pat, List<String> targetStrings)
254255
// check operator symbol
255256
String s = pat.substring(startSubstring, i);
256257
if (Token.contains(s)) {
257-
m_opCount++;
258-
isLiteral = false;
258+
incrementCount();
259259
}
260260
addToTokenQueue(s);
261261
}
@@ -339,23 +339,45 @@ else if (Token.STAR == c)
339339
{
340340
nesting--;
341341
}
342-
else if ((Token.LPAREN == c) || (Token.LBRACK == c))
342+
else if (Token.LBRACK == c)
343343
{
344344
nesting++;
345-
if (!isLiteral && (Token.LPAREN == c)) {
346-
m_grpCount++;
347-
m_opCount++;
348-
isLiteral = false;
345+
incrementCount();
346+
isAxis = false;
347+
}
348+
else if ((Token.LPAREN == c))
349+
{
350+
nesting++;
351+
if (isLiteral) {
352+
if (!isAxis) {
353+
incrementCount();
354+
}
355+
} else {
356+
m_grpCount++;
357+
incrementCount();
349358
}
359+
isAxis = false;
350360
}
351361

352-
if ((Token.GT == c || Token.LT == c || Token.EQ == c) && Token.EQ != peekNext(pat, i)) {
353-
m_opCount++;
354-
isLiteral = false;
362+
if ((Token.GT == c || Token.LT == c || Token.EQ == c || Token.EM == c)) {
363+
if (Token.EQ != peekNext(pat, i)) {
364+
incrementCount();
365+
}
355366
}
356-
else if ((Token.LPAREN != c) && (Token.RPAREN != c) && (Token.RBRACK != c)) {
357-
m_opCount++;
358-
isLiteral = false;
367+
else if (Token.SLASH == c) {
368+
isAxis = false;
369+
if (Token.SLASH != peekNext(pat, i)) {
370+
incrementCount();
371+
}
372+
}
373+
// '(' and '[' already counted above; ':' is examined in case below
374+
// ',' is part of a function
375+
else if ((Token.LPAREN != c) && (Token.LBRACK != c) && (Token.RPAREN != c)
376+
&& (Token.RBRACK != c) && (Token.COLON != c) && (Token.COMMA != c)) {
377+
if (Token.STAR != c || !isAxis) {
378+
incrementCount();
379+
}
380+
isAxis = false;
359381
}
360382

361383
addToTokenQueue(pat.substring(i, i + 1));
@@ -376,6 +398,7 @@ else if ((Token.LPAREN != c) && (Token.RPAREN != c) && (Token.RBRACK != c)) {
376398
startSubstring = -1;
377399
posOfNSSep = -1;
378400
m_opCount++;
401+
isAxis = true;
379402
addToTokenQueue(pat.substring(i - 1, i + 1));
380403

381404
break;
@@ -389,6 +412,9 @@ else if ((Token.LPAREN != c) && (Token.RPAREN != c) && (Token.RBRACK != c)) {
389412
// fall through on purpose
390413
default :
391414
isLiteral = true;
415+
if (!isNum && Token.DOT == c && Token.DOT != peekNext(pat, i)) {
416+
incrementCount();
417+
}
392418
if (-1 == startSubstring)
393419
{
394420
startSubstring = i;
@@ -443,6 +469,11 @@ else if (null != targetStrings)
443469
m_processor.m_queueMark = 0;
444470
}
445471

472+
private void incrementCount() {
473+
m_opCount++;
474+
isLiteral = false;
475+
}
476+
446477
/**
447478
* Peeks at the next character without advancing the index.
448479
* @param s the input string

0 commit comments

Comments
 (0)