Skip to content

Commit d7a226c

Browse files
committed
Merge pull request #23 from lanwen/count_capture
add - capture and count methods to builder
2 parents ab001c2 + f9149bc commit d7a226c

File tree

4 files changed

+187
-35
lines changed

4 files changed

+187
-35
lines changed

README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ String url = "https://www.google.com";
4848
testRegex.testExact(url); //True
4949

5050
testRegex.toString(); // Outputs the regex used:
51-
// ^(http)(s)?(\:\/\/)(www\.)?([^\ ]*)$
51+
// ^(?:http)(?:s)?(?:\:\/\/)(?:www\.)?(?:[^\ ]*)$
5252

5353
VerbalExpression testRegex = VerbalExpression.regex()
5454
.startOfLine()
@@ -59,8 +59,9 @@ VerbalExpression testRegex = VerbalExpression.regex()
5959
String testString = "defzzz";
6060

6161
//Use VerbalExpression's test() method to test if parts if the string match the regex
62-
testRegex.test(testString); //true
63-
testRegex.testExact(testString); //false
62+
testRegex.test(testString); // true
63+
testRegex.testExact(testString); // false
64+
testRegex.getText(testString); // returns: def
6465
```
6566

6667
Builder can be cloned:

src/main/java/ru/lanwen/verbalregex/VerbalExpression.java

Lines changed: 83 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
public class VerbalExpression {
77

88
private final Pattern pattern;
9-
9+
1010
public static class Builder {
1111

1212
private StringBuilder prefixes = new StringBuilder();
@@ -15,7 +15,11 @@ public static class Builder {
1515
private int modifiers = Pattern.MULTILINE;
1616

1717
private String sanitize(final String pValue) {
18-
return pValue.replaceAll("[\\W]", "\\\\$0");
18+
return pValue.replaceAll("[\\W]", "\\\\$0");
19+
}
20+
21+
private int countOccurrencesOf(String where, String what) {
22+
return (where.length() - where.replace(what, "").length()) / what.length();
1923
}
2024

2125
public VerbalExpression build() {
@@ -48,7 +52,7 @@ public Builder endOfLine() {
4852
}
4953

5054
public Builder then(String pValue) {
51-
this.add("(" + sanitize(pValue) + ")");
55+
this.add("(?:" + sanitize(pValue) + ")");
5256
return this;
5357
}
5458

@@ -58,32 +62,31 @@ public Builder find(String value) {
5862
}
5963

6064
public Builder maybe(final String pValue) {
61-
this.add("(" + sanitize(pValue) + ")?");
62-
return this;
65+
return this.then(pValue).add("?");
6366
}
6467

6568
public Builder anything() {
66-
this.add("(.*)");
69+
this.add("(?:.*)");
6770
return this;
6871
}
6972

7073
public Builder anythingButNot(final String pValue) {
71-
this.add("([^" + sanitize(pValue) + "]*)");
74+
this.add("(?:[^" + sanitize(pValue) + "]*)");
7275
return this;
7376
}
7477

7578
public Builder something() {
76-
this.add("(.+)");
79+
this.add("(?:.+)");
7780
return this;
7881
}
7982

8083
public Builder somethingButNot(final String pValue) {
81-
this.add("([^" + sanitize(pValue) + "]+)");
84+
this.add("(?:[^" + sanitize(pValue) + "]+)");
8285
return this;
8386
}
8487

8588
public Builder lineBreak() {
86-
this.add("(\\n|(\\r\\n))");
89+
this.add("(?:\\n|(\\r\\n))");
8790
return this;
8891
}
8992

@@ -115,8 +118,8 @@ public Builder any(final String value) {
115118
public Builder range(String... pArgs) {
116119
String value = "[";
117120
for (int _to = 1; _to < pArgs.length; _to += 2) {
118-
String from = sanitize((String)pArgs[_to - 1]);
119-
String to = sanitize((String)pArgs[_to]);
121+
String from = sanitize((String) pArgs[_to - 1]);
122+
String to = sanitize((String) pArgs[_to]);
120123

121124
value += from + "-" + to;
122125
}
@@ -209,32 +212,82 @@ public Builder searchOneLine(boolean pEnable) {
209212
}
210213

211214
public Builder multiple(final String pValue) {
212-
String value = this.sanitize(pValue);
213-
switch (value.charAt(0)) {
215+
switch (pValue.charAt(0)) {
214216
case '*':
215217
case '+':
216-
break;
218+
return this.add(pValue);
217219
default:
218-
value += '+';
220+
return this.add(this.sanitize(pValue) + '+');
219221
}
220-
this.add(value);
222+
}
223+
224+
/**
225+
* Add count of previous group
226+
* for example:
227+
* .find("w").count(3) // produce - (?:w){3}
228+
*
229+
* @param count - number of occurrences of previous group in expression
230+
* @return this Builder
231+
*/
232+
public Builder count(int count) {
233+
this.source.append("{").append(count).append("}");
234+
return this;
235+
}
236+
237+
/**
238+
* Produce range count
239+
* for example:
240+
* .find("w").count(1, 3) // produce (?:w){1,3}
241+
*
242+
* @param from - minimal number of occurrences
243+
* @param to - max number of occurrences
244+
* @return this Builder
245+
* @see #count(int)
246+
*/
247+
public Builder count(int from, int to) {
248+
this.source.append("{").append(from).append(",").append(to).append("}");
221249
return this;
222250
}
223251

224252
public Builder or(final String pValue) {
225-
if (this.prefixes.indexOf("(") == -1) {
226-
this.prefixes.append("(");
227-
}
228-
if (this.suffixes.indexOf(")") == -1) {
229-
this.suffixes.append(")" + this.suffixes.toString());
253+
this.prefixes.append("(");
254+
255+
int opened = countOccurrencesOf(this.prefixes.toString(), "(");
256+
int closed = countOccurrencesOf(this.suffixes.toString(), ")");
257+
258+
if (opened >= closed) {
259+
this.suffixes = new StringBuilder(")" + this.suffixes.toString());
230260
}
231261

232-
this.add(")|(");
262+
this.add(")|(?:");
233263
if (pValue != null) {
234264
this.then(pValue);
235265
}
236266
return this;
237267
}
268+
269+
/**
270+
* Adds capture - open brace to current position and closed to suffixes
271+
* @return this builder
272+
*/
273+
public Builder capture() {
274+
this.suffixes.append(")");
275+
return this.add("(");
276+
}
277+
278+
/**
279+
* Close brace for previous capture and remove last closed brace from suffixes
280+
* Can be used to continue build regex after capture or to add multiply captures
281+
* @return this builder
282+
*/
283+
public Builder endCapture() {
284+
if(this.suffixes.length() > 0 && this.suffixes.indexOf(")") + 1 == this.suffixes.length()) {
285+
this.suffixes.setLength(suffixes.length() - 1);
286+
return this.add(")");
287+
} else {
288+
throw new IllegalStateException("Can't end capture when it not started");
289+
}
290+
}
238291
}
239292

240293
public boolean testExact(final String pToTest) {
@@ -256,12 +309,16 @@ public boolean test(final String pToTest) {
256309
private VerbalExpression(final Pattern pattern) {
257310
this.pattern = pattern;
258311
}
259-
312+
260313
public String getText(String toTest) {
314+
return getText(toTest, 0);
315+
}
316+
317+
public String getText(String toTest, int group) {
261318
Matcher m = pattern.matcher(toTest);
262319
StringBuilder result = new StringBuilder();
263-
while (m.find()){
264-
result.append(m.group());
320+
while (m.find()) {
321+
result.append(m.group(group));
265322
}
266323
return result.toString();
267324
}

src/test/java/ru/lanwen/verbalregex/BasicFunctionalityUnitTest.java

Lines changed: 99 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
import org.junit.Test;
44

5+
import static org.hamcrest.CoreMatchers.equalTo;
6+
import static org.hamcrest.CoreMatchers.is;
57
import static org.junit.Assert.*;
68

79
public class BasicFunctionalityUnitTest {
@@ -86,7 +88,7 @@ public void testMaybe() {
8688
.maybe("b")
8789
.build();
8890

89-
assertEquals("Regex isn't correct", testRegex.toString(), "^(a)(b)?");
91+
assertThat("Regex isn't correct", testRegex.toString(), equalTo("^(?:a)(?:b)?"));
9092

9193
assertTrue("Maybe has a 'b' after an 'a'", testRegex.test("acb"));
9294
assertTrue("Maybe has a 'b' after an 'a'", testRegex.test("abc"));
@@ -133,16 +135,21 @@ public void testLineBreak() {
133135

134136
@Test
135137
public void testBr() {
136-
VerbalExpression testRegex = new VerbalExpression.Builder()
138+
VerbalExpression testRegexBr = new VerbalExpression.Builder()
139+
.startOfLine()
140+
.then("abc")
141+
.br()
142+
.then("def")
143+
.build();
144+
145+
VerbalExpression testRegexLineBr = new VerbalExpression.Builder()
137146
.startOfLine()
138147
.then("abc")
139148
.lineBreak()
140149
.then("def")
141150
.build();
142151

143-
assertTrue("abc then line break then def", testRegex.test("abc\r\ndef"));
144-
assertTrue("abc then line break then def", testRegex.test("abc\ndef"));
145-
assertFalse("abc then line break then space then def", testRegex.test("abc\r\n def"));
152+
assertThat(".br() differs from .lineBreak()", testRegexBr.toString(), equalTo(testRegexLineBr.toString()));
146153
}
147154

148155
@Test
@@ -212,4 +219,91 @@ public void testGetText() {
212219

213220
}
214221

222+
@Test
223+
public void testStartCapture() {
224+
String text = "aaabcd";
225+
VerbalExpression regex = VerbalExpression.regex()
226+
.find("a").count(3)
227+
.capture().find("b").anything().build();
228+
229+
assertThat("regex don't match string", regex.getText(text), equalTo(text));
230+
assertThat("can't get first captured group", regex.getText(text, 1), equalTo("bcd"));
231+
}
232+
233+
@Test
234+
public void shouldReturnEmptyStringWhenNoGroupFound() {
235+
String text = "abc";
236+
VerbalExpression regex = VerbalExpression.regex().find("d").capture().find("e").build();
237+
238+
assertThat("regex don't match string", regex.getText(text), equalTo(""));
239+
assertThat("first captured group not empty string", regex.getText(text, 1), equalTo(""));
240+
assertThat("second captured group not empty string", regex.getText(text, 2), equalTo(""));
241+
}
242+
243+
@Test
244+
public void testCountWithRange() {
245+
String text4c = "abcccce";
246+
String text2c = "abcce";
247+
String text1c = "abce";
248+
249+
VerbalExpression regex = VerbalExpression.regex().find("c").count(2, 3).build();
250+
251+
assertThat("regex don't match string", regex.getText(text4c), equalTo("ccc"));
252+
assertThat("regex don't match string", regex.getText(text2c), equalTo("cc"));
253+
assertThat("regex don't match string", regex.test(text1c), is(false));
254+
}
255+
256+
257+
@Test(expected = IndexOutOfBoundsException.class)
258+
public void shouldExceptionWhenTryGetMoreThanCapturedGroup() {
259+
String text = "abc";
260+
VerbalExpression regex = VerbalExpression.regex().find("b").capture().find("c").build();
261+
262+
regex.getText(text, 2);
263+
}
264+
265+
@Test
266+
public void testEndCapture() {
267+
String text = "aaabcd";
268+
VerbalExpression regex = VerbalExpression.regex()
269+
.find("a")
270+
.capture().find("b").anything().endCapture().then("cd").build();
271+
272+
assertThat(regex.getText(text), equalTo("abcd"));
273+
assertThat("can't get first captured group", regex.getText(text, 1), equalTo("b"));
274+
}
275+
276+
277+
@Test
278+
public void testMultiplyCapture() {
279+
String text = "aaabcd";
280+
VerbalExpression regex = VerbalExpression.regex()
281+
.find("a").count(1)
282+
.capture().find("b").endCapture().anything().capture().find("d").build();
283+
284+
assertThat("can't get first captured group", regex.getText(text, 1), equalTo("b"));
285+
assertThat("can't get second captured group", regex.getText(text, 2), equalTo("d"));
286+
}
287+
288+
@Test(expected = IllegalStateException.class)
289+
public void testEndCaptureOnEmptyRegex() {
290+
VerbalExpression.regex().endCapture().build();
291+
}
292+
293+
@Test
294+
public void testOrWithCapture() {
295+
VerbalExpression testRegex = VerbalExpression.regex()
296+
.capture()
297+
.find("abc")
298+
.or("def")
299+
.build();
300+
assertTrue("Starts with abc or def", testRegex.test("defzzz"));
301+
assertTrue("Starts with abc or def", testRegex.test("abczzz"));
302+
assertFalse("Doesn't start with abc or def", testRegex.testExact("xyzabcefg"));
303+
304+
assertThat(testRegex.getText("xxxabcdefzzz", 1), equalTo("abcdef"));
305+
assertThat(testRegex.getText("xxxdefzzz", 2), equalTo("null"));
306+
assertThat(testRegex.getText("xxxabcdefzzz", 2), equalTo("abcnull"));
307+
}
308+
215309
}

src/test/java/ru/lanwen/verbalregex/RealWorldUnitTest.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ public void testUrl() {
2929

3030
assertThat("Regex doesn't match same regex as in example",
3131
testRegex.toString(),
32-
equalTo("^(http)(s)?(\\:\\/\\/)(www\\.)?([^\\ ]*)$"));
32+
equalTo("^(?:http)(?:s)?(?:\\:\\/\\/)(?:www\\.)?(?:[^\\ ]*)$"));
3333
}
3434

3535
@Test

0 commit comments

Comments
 (0)