|
17 | 17 | package org.apache.lucene.analysis.path; |
18 | 18 |
|
19 | 19 | import java.io.IOException; |
| 20 | +import java.util.HashSet; |
| 21 | +import java.util.Objects; |
| 22 | +import java.util.Set; |
20 | 23 | import org.apache.lucene.analysis.Tokenizer; |
21 | 24 | import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
22 | 25 | import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; |
@@ -84,17 +87,28 @@ public PathHierarchyTokenizer( |
84 | 87 | } |
85 | 88 | termAtt.resizeBuffer(bufferSize); |
86 | 89 |
|
87 | | - this.delimiter = delimiter; |
| 90 | + this.delimiters = Set.of(delimiter); |
88 | 91 | this.replacement = replacement; |
89 | 92 | this.skip = skip; |
90 | 93 | resultToken = new StringBuilder(bufferSize); |
91 | 94 | } |
92 | 95 |
|
| 96 | + public PathHierarchyTokenizer(Set<Character> delimiters, char replacement, int skip) { |
| 97 | + super(DEFAULT_TOKEN_ATTRIBUTE_FACTORY); |
| 98 | + this.delimiters = |
| 99 | + Objects.isNull(delimiters) || delimiters.isEmpty() |
| 100 | + ? new HashSet<>(DEFAULT_DELIMITER) |
| 101 | + : delimiters; |
| 102 | + this.replacement = replacement; |
| 103 | + this.skip = skip; |
| 104 | + resultToken = new StringBuilder(DEFAULT_BUFFER_SIZE); |
| 105 | + } |
| 106 | + |
93 | 107 | private static final int DEFAULT_BUFFER_SIZE = 1024; |
94 | 108 | public static final char DEFAULT_DELIMITER = '/'; |
95 | 109 | public static final int DEFAULT_SKIP = 0; |
96 | 110 |
|
97 | | - private final char delimiter; |
| 111 | + private Set<Character> delimiters; |
98 | 112 | private final char replacement; |
99 | 113 | private final int skip; |
100 | 114 |
|
@@ -145,13 +159,13 @@ public final boolean incrementToken() throws IOException { |
145 | 159 | added = true; |
146 | 160 | skipped++; |
147 | 161 | if (skipped > skip) { |
148 | | - termAtt.append(c == delimiter ? replacement : (char) c); |
| 162 | + termAtt.append(delimiters.contains((char) c) ? replacement : (char) c); |
149 | 163 | length++; |
150 | 164 | } else { |
151 | 165 | startPosition++; |
152 | 166 | } |
153 | 167 | } else { |
154 | | - if (c == delimiter) { |
| 168 | + if (delimiters.contains((char) c)) { |
155 | 169 | if (skipped > skip) { |
156 | 170 | endDelimiter = true; |
157 | 171 | break; |
|
0 commit comments