Skip to content

add - capture and count methods to builder #23

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 10, 2014
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ String url = "https://www.google.com";
testRegex.testExact(url); //True

testRegex.toString(); // Outputs the regex used:
// ^(http)(s)?(\:\/\/)(www\.)?([^\ ]*)$
// ^(?:http)(?:s)?(?:\:\/\/)(?:www\.)?(?:[^\ ]*)$

VerbalExpression testRegex = VerbalExpression.regex()
.startOfLine()
Expand All @@ -59,8 +59,9 @@ VerbalExpression testRegex = VerbalExpression.regex()
String testString = "defzzz";

//Use VerbalExpression's test() method to test if parts if the string match the regex
testRegex.test(testString); //true
testRegex.testExact(testString); //false
testRegex.test(testString); // true
testRegex.testExact(testString); // false
testRegex.getText(testString); // returns: def
```

Builder can be cloned:
Expand Down
109 changes: 83 additions & 26 deletions src/main/java/ru/lanwen/verbalregex/VerbalExpression.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
public class VerbalExpression {

private final Pattern pattern;

public static class Builder {

private StringBuilder prefixes = new StringBuilder();
Expand All @@ -15,7 +15,11 @@ public static class Builder {
private int modifiers = Pattern.MULTILINE;

private String sanitize(final String pValue) {
return pValue.replaceAll("[\\W]", "\\\\$0");
return pValue.replaceAll("[\\W]", "\\\\$0");
}

private int countOccurrencesOf(String where, String what) {
return (where.length() - where.replace(what, "").length()) / what.length();
}

public VerbalExpression build() {
Expand Down Expand Up @@ -48,7 +52,7 @@ public Builder endOfLine() {
}

public Builder then(String pValue) {
this.add("(" + sanitize(pValue) + ")");
this.add("(?:" + sanitize(pValue) + ")");
return this;
}

Expand All @@ -58,32 +62,31 @@ public Builder find(String value) {
}

public Builder maybe(final String pValue) {
this.add("(" + sanitize(pValue) + ")?");
return this;
return this.then(pValue).add("?");
}

public Builder anything() {
this.add("(.*)");
this.add("(?:.*)");
return this;
}

public Builder anythingButNot(final String pValue) {
this.add("([^" + sanitize(pValue) + "]*)");
this.add("(?:[^" + sanitize(pValue) + "]*)");
return this;
}

public Builder something() {
this.add("(.+)");
this.add("(?:.+)");
return this;
}

public Builder somethingButNot(final String pValue) {
this.add("([^" + sanitize(pValue) + "]+)");
this.add("(?:[^" + sanitize(pValue) + "]+)");
return this;
}

public Builder lineBreak() {
this.add("(\\n|(\\r\\n))");
this.add("(?:\\n|(\\r\\n))");
return this;
}

Expand Down Expand Up @@ -115,8 +118,8 @@ public Builder any(final String value) {
public Builder range(String... pArgs) {
String value = "[";
for (int _to = 1; _to < pArgs.length; _to += 2) {
String from = sanitize((String)pArgs[_to - 1]);
String to = sanitize((String)pArgs[_to]);
String from = sanitize((String) pArgs[_to - 1]);
String to = sanitize((String) pArgs[_to]);

value += from + "-" + to;
}
Expand Down Expand Up @@ -209,32 +212,82 @@ public Builder searchOneLine(boolean pEnable) {
}

public Builder multiple(final String pValue) {
String value = this.sanitize(pValue);
switch (value.charAt(0)) {
switch (pValue.charAt(0)) {
case '*':
case '+':
break;
return this.add(pValue);
default:
value += '+';
return this.add(this.sanitize(pValue) + '+');
}
this.add(value);
}

/**
* Add count of previous group
* for example:
* .find("w").count(3) // produce - (?:w){3}
*
* @param count - number of occurrences of previous group in expression
* @return this Builder
*/
public Builder count(int count) {
this.source.append("{").append(count).append("}");
return this;
}

/**
* Produce range count
* for example:
* .find("w").count(1, 3) // produce (?:w){1,3}
*
* @param from - minimal number of occurrences
* @param to - max number of occurrences
* @return this Builder
* @see #count(int)
*/
public Builder count(int from, int to) {
this.source.append("{").append(from).append(",").append(to).append("}");
return this;
}

public Builder or(final String pValue) {
if (this.prefixes.indexOf("(") == -1) {
this.prefixes.append("(");
}
if (this.suffixes.indexOf(")") == -1) {
this.suffixes.append(")" + this.suffixes.toString());
this.prefixes.append("(");

int opened = countOccurrencesOf(this.prefixes.toString(), "(");
int closed = countOccurrencesOf(this.suffixes.toString(), ")");

if (opened >= closed) {
this.suffixes = new StringBuilder(")" + this.suffixes.toString());
}

this.add(")|(");
this.add(")|(?:");
if (pValue != null) {
this.then(pValue);
}
return this;
}

/**
* Adds capture - open brace to current position and closed to suffixes
* @return this builder
*/
public Builder capture() {
this.suffixes.append(")");
return this.add("(");
}

/**
* Close brace for previous capture and remove last closed brace from suffixes
* Can be used to continue build regex after capture or to add multiply captures
* @return this builder
*/
public Builder endCapture() {
if(this.suffixes.length() > 0 && this.suffixes.indexOf(")") + 1 == this.suffixes.length()) {
this.suffixes.setLength(suffixes.length() - 1);
return this.add(")");
} else {
throw new IllegalStateException("Can't end capture when it not started");
}
}
}

public boolean testExact(final String pToTest) {
Expand All @@ -256,12 +309,16 @@ public boolean test(final String pToTest) {
private VerbalExpression(final Pattern pattern) {
this.pattern = pattern;
}

public String getText(String toTest) {
return getText(toTest, 0);
}

public String getText(String toTest, int group) {
Matcher m = pattern.matcher(toTest);
StringBuilder result = new StringBuilder();
while (m.find()){
result.append(m.group());
while (m.find()) {
result.append(m.group(group));
}
return result.toString();
}
Expand Down
104 changes: 99 additions & 5 deletions src/test/java/ru/lanwen/verbalregex/BasicFunctionalityUnitTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import org.junit.Test;

import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.is;
import static org.junit.Assert.*;

public class BasicFunctionalityUnitTest {
Expand Down Expand Up @@ -86,7 +88,7 @@ public void testMaybe() {
.maybe("b")
.build();

assertEquals("Regex isn't correct", testRegex.toString(), "^(a)(b)?");
assertThat("Regex isn't correct", testRegex.toString(), equalTo("^(?:a)(?:b)?"));

assertTrue("Maybe has a 'b' after an 'a'", testRegex.test("acb"));
assertTrue("Maybe has a 'b' after an 'a'", testRegex.test("abc"));
Expand Down Expand Up @@ -133,16 +135,21 @@ public void testLineBreak() {

@Test
public void testBr() {
VerbalExpression testRegex = new VerbalExpression.Builder()
VerbalExpression testRegexBr = new VerbalExpression.Builder()
.startOfLine()
.then("abc")
.br()
.then("def")
.build();

VerbalExpression testRegexLineBr = new VerbalExpression.Builder()
.startOfLine()
.then("abc")
.lineBreak()
.then("def")
.build();

assertTrue("abc then line break then def", testRegex.test("abc\r\ndef"));
assertTrue("abc then line break then def", testRegex.test("abc\ndef"));
assertFalse("abc then line break then space then def", testRegex.test("abc\r\n def"));
assertThat(".br() differs from .lineBreak()", testRegexBr.toString(), equalTo(testRegexLineBr.toString()));
}

@Test
Expand Down Expand Up @@ -212,4 +219,91 @@ public void testGetText() {

}

@Test
public void testStartCapture() {
String text = "aaabcd";
VerbalExpression regex = VerbalExpression.regex()
.find("a").count(3)
.capture().find("b").anything().build();

assertThat("regex don't match string", regex.getText(text), equalTo(text));
assertThat("can't get first captured group", regex.getText(text, 1), equalTo("bcd"));
}

@Test
public void shouldReturnEmptyStringWhenNoGroupFound() {
String text = "abc";
VerbalExpression regex = VerbalExpression.regex().find("d").capture().find("e").build();

assertThat("regex don't match string", regex.getText(text), equalTo(""));
assertThat("first captured group not empty string", regex.getText(text, 1), equalTo(""));
assertThat("second captured group not empty string", regex.getText(text, 2), equalTo(""));
}

@Test
public void testCountWithRange() {
String text4c = "abcccce";
String text2c = "abcce";
String text1c = "abce";

VerbalExpression regex = VerbalExpression.regex().find("c").count(2, 3).build();

assertThat("regex don't match string", regex.getText(text4c), equalTo("ccc"));
assertThat("regex don't match string", regex.getText(text2c), equalTo("cc"));
assertThat("regex don't match string", regex.test(text1c), is(false));
}


@Test(expected = IndexOutOfBoundsException.class)
public void shouldExceptionWhenTryGetMoreThanCapturedGroup() {
String text = "abc";
VerbalExpression regex = VerbalExpression.regex().find("b").capture().find("c").build();

regex.getText(text, 2);
}

@Test
public void testEndCapture() {
String text = "aaabcd";
VerbalExpression regex = VerbalExpression.regex()
.find("a")
.capture().find("b").anything().endCapture().then("cd").build();

assertThat(regex.getText(text), equalTo("abcd"));
assertThat("can't get first captured group", regex.getText(text, 1), equalTo("b"));
}


@Test
public void testMultiplyCapture() {
String text = "aaabcd";
VerbalExpression regex = VerbalExpression.regex()
.find("a").count(1)
.capture().find("b").endCapture().anything().capture().find("d").build();

assertThat("can't get first captured group", regex.getText(text, 1), equalTo("b"));
assertThat("can't get second captured group", regex.getText(text, 2), equalTo("d"));
}

@Test(expected = IllegalStateException.class)
public void testEndCaptureOnEmptyRegex() {
VerbalExpression.regex().endCapture().build();
}

@Test
public void testOrWithCapture() {
VerbalExpression testRegex = VerbalExpression.regex()
.capture()
.find("abc")
.or("def")
.build();
assertTrue("Starts with abc or def", testRegex.test("defzzz"));
assertTrue("Starts with abc or def", testRegex.test("abczzz"));
assertFalse("Doesn't start with abc or def", testRegex.testExact("xyzabcefg"));

assertThat(testRegex.getText("xxxabcdefzzz", 1), equalTo("abcdef"));
assertThat(testRegex.getText("xxxdefzzz", 2), equalTo("null"));
assertThat(testRegex.getText("xxxabcdefzzz", 2), equalTo("abcnull"));
}

}
2 changes: 1 addition & 1 deletion src/test/java/ru/lanwen/verbalregex/RealWorldUnitTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public void testUrl() {

assertThat("Regex doesn't match same regex as in example",
testRegex.toString(),
equalTo("^(http)(s)?(\\:\\/\\/)(www\\.)?([^\\ ]*)$"));
equalTo("^(?:http)(?:s)?(?:\\:\\/\\/)(?:www\\.)?(?:[^\\ ]*)$"));
}

@Test
Expand Down