-
Notifications
You must be signed in to change notification settings - Fork 349
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Browse the repository at this point in the history
* #49: Allow to fix Payload with Keyword
- Loading branch information
Showing
25 changed files
with
1,572 additions
and
292 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
language: java | ||
install: mvn install -DskipTests=true -Dgpg.skip=true | ||
jdk: | ||
- oraclejdk8 | ||
- openjdk8 | ||
after_success: | ||
- bash <(curl -s https://codecov.io/bash) | ||
- bash <(curl -s https://codecov.io/bash) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
package org.ahocorasick.trie; | ||
|
||
public class DefaultToken extends Token { | ||
|
||
private PayloadToken<String> payloadToken; | ||
|
||
public DefaultToken(PayloadToken<String> payloadToken) { | ||
super(payloadToken.getFragment()); | ||
this.payloadToken = payloadToken; | ||
} | ||
|
||
public boolean isMatch() { | ||
return payloadToken.isMatch(); | ||
} | ||
|
||
public Emit getEmit() { | ||
PayloadEmit<String> emit = payloadToken.getEmit(); | ||
return new Emit(emit.getStart(), emit.getEnd(), emit.getKeyword()); | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,4 +15,5 @@ public boolean isMatch() { | |
public Emit getEmit() { | ||
return null; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
package org.ahocorasick.trie; | ||
|
||
/** | ||
* Payload holds the matched keyword and some payload-data. | ||
* | ||
* @author Daniel Beck | ||
* | ||
* @param <T> The type of the wrapped payload data. | ||
*/ | ||
public class Payload<T> implements Comparable<Payload<T>> { | ||
|
||
private final String keyword; | ||
private final T data; | ||
|
||
public Payload(final String keyword, final T data) { | ||
super(); | ||
this.keyword = keyword; | ||
this.data = data; | ||
} | ||
|
||
public String getKeyword() { | ||
return keyword; | ||
} | ||
|
||
public T getData() { | ||
return data; | ||
} | ||
|
||
@Override | ||
public int compareTo(Payload<T> other) { | ||
return keyword.compareTo(other.getKeyword()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
package org.ahocorasick.trie; | ||
|
||
import org.ahocorasick.interval.Interval; | ||
import org.ahocorasick.interval.Intervalable; | ||
|
||
/** | ||
* PayloadEmit contains a matched term and its associated payload data. | ||
* | ||
* @param <T> Type of the wrapped payload-data. | ||
* @author Daniel Beck | ||
* | ||
*/ | ||
public class PayloadEmit<T> extends Interval implements Intervalable { | ||
|
||
private final String keyword; | ||
|
||
private final T payload; | ||
|
||
/** | ||
* Created a PayloadEmit | ||
* | ||
* @param start Start of the matched search term. | ||
* @param end End of the matched search term. | ||
* @param keyword Keyword that matched. | ||
* @param payload Emitted payload data. | ||
*/ | ||
public PayloadEmit(final int start, final int end, String keyword, T payload) { | ||
super(start, end); | ||
this.keyword = keyword; | ||
this.payload = payload; | ||
} | ||
|
||
public String getKeyword() { | ||
return this.keyword; | ||
} | ||
|
||
/** | ||
* Returns the payload associated to this emit. | ||
* | ||
* @return the associated payload | ||
*/ | ||
public T getPayload() { | ||
return this.payload; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return super.toString() + "=" + this.keyword + (this.payload != null ? "->" + this.payload : ""); | ||
} | ||
} |
31 changes: 31 additions & 0 deletions
31
src/main/java/org/ahocorasick/trie/PayloadFragmentToken.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package org.ahocorasick.trie; | ||
|
||
/*** | ||
* PayloadFragmentToken holds a text ("the fragment"). | ||
* <p> | ||
* It does not matches a search term - so its <code>isMatch</code>-method | ||
* returns always false. <code>getEmits</code> returns not Emits. | ||
* | ||
* @author Daniel Beck | ||
* | ||
* @param <T> The Type of the emitted payloads. | ||
*/ | ||
public class PayloadFragmentToken<T> extends PayloadToken<T> { | ||
|
||
public PayloadFragmentToken(String fragment) { | ||
super(fragment); | ||
} | ||
|
||
@Override | ||
public boolean isMatch() { | ||
return false; | ||
} | ||
|
||
/** | ||
* Returns null. | ||
*/ | ||
@Override | ||
public PayloadEmit<T> getEmit() { | ||
return null; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
package org.ahocorasick.trie; | ||
|
||
/** | ||
* PayloadMatchToken holds a text ("the fragment") an emits some output. | ||
* <p> | ||
* It matches a search term - so its <code>isMatch</code>-method returns always | ||
* true.. | ||
* | ||
* @author Daniel Beck | ||
* | ||
* @param <T> The Type of the emitted payloads. | ||
*/ | ||
public class PayloadMatchToken<T> extends PayloadToken<T> { | ||
|
||
private final PayloadEmit<T> emit; | ||
|
||
public PayloadMatchToken(final String fragment, final PayloadEmit<T> emit) { | ||
super(fragment); | ||
this.emit = emit; | ||
} | ||
|
||
@Override | ||
public boolean isMatch() { | ||
return true; | ||
} | ||
|
||
@Override | ||
public PayloadEmit<T> getEmit() { | ||
return this.emit; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,156 @@ | ||
package org.ahocorasick.trie; | ||
|
||
import java.util.*; | ||
|
||
/** | ||
* <p> | ||
* A state has various important tasks it must attend to: | ||
* </p> | ||
* <p> | ||
* <ul> | ||
* <li>success; when a character points to another state, it must return that | ||
* state</li> | ||
* <li>failure; when a character has no matching state, the algorithm must be | ||
* able to fall back on a state with less depth</li> | ||
* <li>emits; when this state is passed and keywords have been matched, the | ||
* matches and their payloads must be 'emitted' so that they can be used later | ||
* on.</li> | ||
* </ul> | ||
* <p> | ||
* <p> | ||
* The root state is special in the sense that it has no failure state; it | ||
* cannot fail. If it 'fails' it will still parse the next character and start | ||
* from the root node. This ensures that the algorithm always runs. All other | ||
* states always have a fail state. | ||
* </p> | ||
* | ||
* @author Daniel Beck | ||
*/ | ||
public class PayloadState<T> { | ||
|
||
/** | ||
* effective the size of the keyword | ||
*/ | ||
private final int depth; | ||
|
||
/** | ||
* only used for the root state to refer to itself in case no matches have been | ||
* found | ||
*/ | ||
private final PayloadState<T> rootState; | ||
|
||
/** | ||
* referred to in the white paper as the 'goto' structure. From a state it is | ||
* possible to go to other states, depending on the character passed. | ||
*/ | ||
private final Map<Character, PayloadState<T>> success = new HashMap<>(); | ||
|
||
/** | ||
* if no matching states are found, the failure state will be returned | ||
*/ | ||
private PayloadState<T> failure; | ||
|
||
/** | ||
* whenever this state is reached, it will emit the matches keywords for future | ||
* reference | ||
*/ | ||
private Set<Payload<T>> emits; | ||
|
||
public PayloadState() { | ||
this(0); | ||
} | ||
|
||
public PayloadState(final int depth) { | ||
this.depth = depth; | ||
this.rootState = depth == 0 ? this : null; | ||
} | ||
|
||
private PayloadState<T> nextState(final Character character, final boolean ignoreRootState) { | ||
PayloadState<T> nextState = this.success.get(character); | ||
|
||
if (!ignoreRootState && nextState == null && this.rootState != null) { | ||
nextState = this.rootState; | ||
} | ||
|
||
return nextState; | ||
} | ||
|
||
public PayloadState<T> nextState(final Character character) { | ||
return nextState(character, false); | ||
} | ||
|
||
public PayloadState<T> nextStateIgnoreRootState(Character character) { | ||
return nextState(character, true); | ||
} | ||
|
||
public PayloadState<T> addState(String keyword) { | ||
PayloadState<T> state = this; | ||
|
||
for (final Character character : keyword.toCharArray()) { | ||
state = state.addState(character); | ||
} | ||
|
||
return state; | ||
} | ||
|
||
public PayloadState<T> addState(Character character) { | ||
PayloadState<T> nextState = nextStateIgnoreRootState(character); | ||
if (nextState == null) { | ||
nextState = new PayloadState<T>(this.depth + 1); | ||
this.success.put(character, nextState); | ||
} | ||
return nextState; | ||
} | ||
|
||
public int getDepth() { | ||
return this.depth; | ||
} | ||
|
||
/** | ||
* Adds a payload to be emitted for this state. | ||
* | ||
* @param emit Payload to be emitted. | ||
*/ | ||
public void addEmit(Payload<T> payload) { | ||
if (this.emits == null) { | ||
this.emits = new TreeSet<>(); | ||
} | ||
this.emits.add(payload); | ||
} | ||
|
||
/** | ||
* Adds a collection of payloads to be emitted for this state. | ||
* | ||
* @param emits Collection of payloads to be emitted. | ||
*/ | ||
public void addEmit(Collection<Payload<T>> emits) { | ||
for (Payload<T> emit : emits) { | ||
addEmit(emit); | ||
} | ||
} | ||
|
||
/** | ||
* Returns a collection of emitted payloads for this state. | ||
* | ||
* @return Collection of emitted payloads. | ||
*/ | ||
public Collection<Payload<T>> emit() { | ||
return this.emits == null ? Collections.<Payload<T>>emptyList() : this.emits; | ||
} | ||
|
||
public PayloadState<T> failure() { | ||
return this.failure; | ||
} | ||
|
||
public void setFailure(PayloadState<T> failState) { | ||
this.failure = failState; | ||
} | ||
|
||
public Collection<PayloadState<T>> getStates() { | ||
return this.success.values(); | ||
} | ||
|
||
public Collection<Character> getTransitions() { | ||
return this.success.keySet(); | ||
} | ||
} |
Oops, something went wrong.