Skip to content

Commit db94481

Browse files
committed
LUCENE-15196: Support multiple delimiters
1 parent 8e8e37d commit db94481

File tree

1 file changed

+18
-4
lines changed

1 file changed

+18
-4
lines changed

lucene/analysis/common/src/java/org/apache/lucene/analysis/path/PathHierarchyTokenizer.java

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@
1717
package org.apache.lucene.analysis.path;
1818

1919
import java.io.IOException;
20+
import java.util.HashSet;
21+
import java.util.Objects;
22+
import java.util.Set;
2023
import org.apache.lucene.analysis.Tokenizer;
2124
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
2225
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
@@ -84,17 +87,28 @@ public PathHierarchyTokenizer(
8487
}
8588
termAtt.resizeBuffer(bufferSize);
8689

87-
this.delimiter = delimiter;
90+
this.delimiters = Set.of(delimiter);
8891
this.replacement = replacement;
8992
this.skip = skip;
9093
resultToken = new StringBuilder(bufferSize);
9194
}
9295

96+
public PathHierarchyTokenizer(Set<Character> delimiters, char replacement, int skip) {
97+
super(DEFAULT_TOKEN_ATTRIBUTE_FACTORY);
98+
this.delimiters =
99+
Objects.isNull(delimiters) || delimiters.isEmpty()
100+
? new HashSet<>(DEFAULT_DELIMITER)
101+
: delimiters;
102+
this.replacement = replacement;
103+
this.skip = skip;
104+
resultToken = new StringBuilder(DEFAULT_BUFFER_SIZE);
105+
}
106+
93107
private static final int DEFAULT_BUFFER_SIZE = 1024;
94108
public static final char DEFAULT_DELIMITER = '/';
95109
public static final int DEFAULT_SKIP = 0;
96110

97-
private final char delimiter;
111+
private Set<Character> delimiters;
98112
private final char replacement;
99113
private final int skip;
100114

@@ -145,13 +159,13 @@ public final boolean incrementToken() throws IOException {
145159
added = true;
146160
skipped++;
147161
if (skipped > skip) {
148-
termAtt.append(c == delimiter ? replacement : (char) c);
162+
termAtt.append(delimiters.contains((char) c) ? replacement : (char) c);
149163
length++;
150164
} else {
151165
startPosition++;
152166
}
153167
} else {
154-
if (c == delimiter) {
168+
if (delimiters.contains((char) c)) {
155169
if (skipped > skip) {
156170
endDelimiter = true;
157171
break;

0 commit comments

Comments
 (0)