Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reduce space used by OBO Parser #429

Merged
merged 5 commits into from
Aug 9, 2015
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add guava cache for Strings to OBOFormatParser. Cache is restricted t…
…o 8K chars.

Cached values are looked up in getParseUntil(String,boolean)

Call freeze on header frame, term frames, and typedef frames.
  • Loading branch information
sesuncedu committed Aug 8, 2015
commit c990b220c8eea699ebba02275edfe8c36a99d5c8
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
package org.obolibrary.oboformat.parser;

import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.Weigher;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
Expand All @@ -13,12 +16,15 @@
import java.util.Date;
import java.util.LinkedList;
import java.util.List;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import org.obolibrary.oboformat.model.*;
import org.obolibrary.oboformat.model.Clause;
import org.obolibrary.oboformat.model.Frame;
import org.obolibrary.oboformat.model.Frame.FrameType;
import org.obolibrary.oboformat.model.FrameMergeException;
import org.obolibrary.oboformat.model.OBODoc;
import org.obolibrary.oboformat.model.QualifierValue;
import org.obolibrary.oboformat.model.Xref;
import org.obolibrary.oboformat.parser.OBOFormatConstants.OboFormatTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -37,6 +43,40 @@ protected static SimpleDateFormat getISODateFormat() {
private boolean followImport;
private Object location;
protected final MyStream stream;
public final com.google.common.cache.LoadingCache<String,String> stringCache;

/**
*
*/
public OBOFormatParser() {
this(new MyStream());
}

/**
*
* @param s
*/
protected OBOFormatParser(MyStream s) {
stream = s;
Weigher<String, String> stringWeigher = new Weigher<String, String>() {
@Override
public int weigh(String key, String value) {
return key.length();
}
};
CacheLoader<String, String> loader = new CacheLoader<String, String>() {
@Override
public String load(String key) throws Exception {
return key;
}
};
if (LOG.isDebugEnabled()) {
stringCache = CacheBuilder.newBuilder().recordStats().maximumWeight(8192*1024).weigher(stringWeigher).build(loader);
} else {
stringCache = CacheBuilder.newBuilder().maximumWeight(8192*1024).weigher(stringWeigher).build(loader);
}
}


protected static class MyStream {

Expand Down Expand Up @@ -158,17 +198,6 @@ public int getLineNo() {
}
}

/**
*
*/
public OBOFormatParser() {
this(new MyStream());
}

protected OBOFormatParser(MyStream s) {
stream = s;
}

/**
* @param r
* r
Expand Down Expand Up @@ -347,6 +376,7 @@ public void parseOBODoc(@Nonnull OBODoc obodoc) {
Frame h = new Frame(FrameType.HEADER);
obodoc.setHeaderFrame(h);
parseHeaderFrame(h);
h.freeze();
parseZeroOrMoreWsOptCmtNl();
while (!stream.eof()) {
parseEntityFrame(obodoc);
Expand Down Expand Up @@ -573,6 +603,7 @@ public void parseTermFrame(@Nonnull OBODoc obodoc) {
parseZeroOrMoreWsOptCmtNl();
}
try {
f.freeze();
obodoc.addFrame(f);
} catch (FrameMergeException e) {
throw new OBOFormatParserException("Could not add frame " + f
Expand Down Expand Up @@ -710,6 +741,7 @@ public void parseTypedefFrame(@Nonnull OBODoc obodoc) {
parseZeroOrMoreWsOptCmtNl();
}
try {
f.freeze();
obodoc.addFrame(f);
} catch (FrameMergeException e) {
throw new OBOFormatParserException("Could not add frame " + f
Expand Down Expand Up @@ -1435,7 +1467,13 @@ private String getParseUntil(@Nonnull String compl, boolean commaWhitespace) {
ret = sb.toString();
}
stream.advance(i);
return ret;
String cachedValue = stringCache.getUnchecked(ret);
if(LOG.isTraceEnabled()) {
if (ret != cachedValue) {
LOG.trace("Cache hit for {}", cachedValue);
}
}
return cachedValue;
}

@Nonnull
Expand Down