Skip to content

Commit

Permalink
Lazy update time parser
Browse files Browse the repository at this point in the history
Author: lrusso96

Fixes: High number of memory allocations when parsing timeago dates
  • Loading branch information
ShareASmile authored Jul 5, 2023
1 parent 7935eaf commit a8699ba
Showing 1 changed file with 64 additions and 20 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

Expand Down Expand Up @@ -51,7 +54,7 @@ public DateWrapper parse(final String textualDate) throws ParsingException {
final String caseText = caseMapToAmountEntry.getKey();
final Integer caseAmount = caseMapToAmountEntry.getValue();

if (textualDateMatches(textualDate, caseText)) {
if (textualDate.contains(caseText)) {
return getResultFor(caseAmount, chronoUnit);
}
}
Expand All @@ -71,38 +74,79 @@ private int parseTimeAgoAmount(final String textualDate) {
}

private ChronoUnit parseChronoUnit(final String textualDate) throws ParsingException {

if (patternsHolder.wordSeparator().isEmpty()) {
return patternsHolder.asMap().entrySet().stream().filter(e -> e.getValue().stream()
.anyMatch(agoPhrase -> textualDate.contains(agoPhrase)))
.map(Map.Entry::getKey)
.findFirst()
.orElseThrow(() ->
new ParsingException("Unable to parse the date: " + textualDate));
}

String date = textualDate.toLowerCase();
List<String> words = new ArrayList<>();
String word = getNextWord(date);
while (!word.isEmpty()) {
words.add(word);
date = date.substring(word.length());
word = getNextWord(date);
}

return patternsHolder.asMap().entrySet().stream()
.filter(e -> e.getValue().stream()
.anyMatch(agoPhrase -> textualDateMatches(textualDate, agoPhrase)))
.anyMatch(agoPhrase -> textualDateMatches(words, agoPhrase) || textualDate.equals(agoPhrase)))
.map(Map.Entry::getKey)
.findFirst()
.orElseThrow(() ->
new ParsingException("Unable to parse the date: " + textualDate));
}

private boolean textualDateMatches(final String textualDate, final String agoPhrase) {
if (textualDate.equals(agoPhrase)) {
return true;
public static int getNextNonBlankIndex(final String s){
int left = 0;
final int len = s.length();
// Includes numbers...
final int[] spaces = {9, 32, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 160, 5760, 6158, 8192, 8193, 8194, 8195, 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8239, 8287, 12288};
while (left < len){
int c = (int) s.charAt(left);
if (Arrays.binarySearch(spaces, c) < 0)
break;
left++;
}
return left;
}

if (patternsHolder.wordSeparator().isEmpty()) {
return textualDate.toLowerCase().contains(agoPhrase.toLowerCase());
public static int getNextBlankIndex(final String s){
int left = 0;
final int len = s.length();
// Includes numbers...
final int[] spaces = {9, 32, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 160, 5760, 6158, 8192, 8193, 8194, 8195, 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8239, 8287, 12288};
while (left < len){
int c = (int) s.charAt(left);
if (Arrays.binarySearch(spaces, c) >= 0)
break;
left++;
}
return left;
}

final String escapedPhrase = Pattern.quote(agoPhrase.toLowerCase());
final String escapedSeparator = patternsHolder.wordSeparator().equals(" ")
// From JDK8 → \h - Treat horizontal spaces as a normal one
// (non-breaking space, thin space, etc.)
// Also split the string on numbers to be able to parse strings like "2wk"
? "[ \\t\\xA0\\u1680\\u180e\\u2000-\\u200a\\u202f\\u205f\\u3000\\d]"
: Pattern.quote(patternsHolder.wordSeparator());

// (^|separator)pattern($|separator)
// Check if the pattern is surrounded by separators or start/end of the string.
final String pattern =
"(^|" + escapedSeparator + ")" + escapedPhrase + "($|" + escapedSeparator + ")";
public static String getNextWord(final String s){
final int len = s.length();
final int left = getNextNonBlankIndex(s);
if (left == len)
return "";
if (left + 1 == len)
return "" + s.charAt(left);
final int right = getNextBlankIndex(s.substring(left+1)) + left + 1;
return s.substring(left, right);
}

return Parser.isMatch(pattern, textualDate.toLowerCase());
private boolean textualDateMatches(final List<String> words, final String agoPhrase) {
for (String word : words){
if (agoPhrase.equals(word))
return true;
}
return false;
}

private DateWrapper getResultFor(final int timeAgoAmount, final ChronoUnit chronoUnit) {
Expand Down

0 comments on commit a8699ba

Please sign in to comment.