Skip to content

Feature/#61: Improve logfile parsing speed by factor 2 for AbstractDataReaderSun #177

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 32 additions & 41 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@
<maven.antrun.plugin.version>1.8</maven.antrun.plugin.version>
<maven.compiler.plugin.version>3.3</maven.compiler.plugin.version>
<maven.buildnumber.plugin.version>1.3</maven.buildnumber.plugin.version>
<maven.jar.plugin.version>2.6</maven.jar.plugin.version>
<maven.shade.plugin.version>2.4.3</maven.shade.plugin.version>
<maven.surefire.plugin.version>2.18.1</maven.surefire.plugin.version>
<maven.release.plugin.version>2.5.2</maven.release.plugin.version>
<maven.scm.plugin.version>1.9.4</maven.scm.plugin.version>
Expand All @@ -187,6 +187,12 @@
</scm>

<dependencies>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.4</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Expand Down Expand Up @@ -271,46 +277,6 @@
<getRevisionOnlyOnce>true</getRevisionOnlyOnce>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>${maven.jar.plugin.version}</version>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
<mainClass>com.tagtraum.perf.gcviewer.GCViewer</mainClass>
<addClasspath>true</addClasspath>
</manifest>
<manifestEntries>
<Implementation-Date>${timestamp}</Implementation-Date>
<Implementation-Revision>${revision}</Implementation-Revision>
</manifestEntries>
</archive>
<excludes>
<exclude>**/*.psd</exclude>
</excludes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>${maven.javadoc.plugin.version}</version>
<configuration>
<archive>
<manifest>
<addDefaultImplementationEntries>true</addDefaultImplementationEntries>
</manifest>
<manifestEntries>
<Implementation-Date>${timestamp}</Implementation-Date>
<Implementation-Revision>${revision}</Implementation-Revision>
</manifestEntries>
</archive>
<links>
<link>http://docs.oracle.com/javase/8/docs/api/</link>
</links>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
Expand Down Expand Up @@ -536,6 +502,31 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>${maven.shade.plugin.version}</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<minimizeJar>true</minimizeJar>
<createDependencyReducedPom>true</createDependencyReducedPom>
<dependencyReducedPomLocation>
${java.io.tmpdir}/dependency-reduced-pom.xml
</dependencyReducedPomLocation>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.tagtraum.perf.gcviewer.GCViewer</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>

</build>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@
import com.tagtraum.perf.gcviewer.model.GCResource;
import com.tagtraum.perf.gcviewer.util.NumberParser;
import com.tagtraum.perf.gcviewer.util.ParseInformation;
import org.apache.commons.lang3.time.FastDateFormat;

import java.io.IOException;
import java.io.InputStream;
import java.io.LineNumberReader;
import java.io.UnsupportedEncodingException;
import java.time.ZoneId;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.time.format.DateTimeParseException;
import java.time.temporal.ChronoUnit;
import java.util.LinkedList;
import java.util.List;
Expand All @@ -33,7 +34,8 @@
*/
public abstract class AbstractDataReaderSun extends AbstractDataReader {

public static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd'T'HH:mm:ss.SSSZ");
public static final String TIMESTAMP_PATTERN = "yyyy-MM-dd'T'HH:mm:ss.SSSZ";
public static final FastDateFormat DATE_TIME_FORMATTER = FastDateFormat.getInstance(TIMESTAMP_PATTERN);
private static final int LENGTH_OF_DATESTAMP = 29;

private static final String CMS_PRINT_PROMOTION_FAILURE = "promotion failure size";
Expand All @@ -58,6 +60,8 @@ public abstract class AbstractDataReaderSun extends AbstractDataReader {

/** the log type allowing for small differences between different versions of the gc logs */
protected GcLogType gcLogType;
/** the ZoneId of the GcResource that is being parsed. Lazily initialized. Assumption: constant for a GcResource */
private ZoneId zoneId;

/**
* Create an instance of this class passing an inputStream an the type of the logfile.
Expand Down Expand Up @@ -231,8 +235,8 @@ protected void setMemory(GCEvent event, String line, ParseInformation pos) throw
}

protected double parsePause(String line, ParseInformation pos) throws ParseException {
// usual pattern expected: "..., 0.002032 secs]"
// but may be as well (G1): "..., 0.003032]"
// usual pattern expected: "..., 0.002032 secs]"
// but may be as well (G1): "..., 0.003032]"

// if the next token is "icms_dc" skip until after the comma
// ...] icms_dc=0 , 8.0600619 secs]
Expand Down Expand Up @@ -476,20 +480,33 @@ protected ZonedDateTime parseDatestamp(String line, ParseInformation pos) throws
ZonedDateTime zonedDateTime = null;
if (nextIsDatestamp(line, pos)) {
try {
zonedDateTime = ZonedDateTime.parse(line.substring(pos.getIndex(), pos.getIndex() + LENGTH_OF_DATESTAMP - 1),
DATE_TIME_FORMATTER);
String timestamp = line.substring(pos.getIndex(), pos.getIndex() + LENGTH_OF_DATESTAMP - 1);
zonedDateTime = parseDatestamp(timestamp);
pos.setIndex(pos.getIndex() + LENGTH_OF_DATESTAMP);
if (pos.getFirstDateStamp() == null) {
pos.setFirstDateStamp(zonedDateTime);
}
} catch (DateTimeParseException e){
} catch (java.text.ParseException e){
throw new ParseException(e.toString(), line);
}
}

return zonedDateTime;
}

private ZonedDateTime parseDatestamp(String timestamp) throws java.text.ParseException {
ZoneId zone = getOrParseZoneId(timestamp);
// Faster than using ZonedDateTime#parse. Zone must be explicitly set in order to keep original offset value.
return ZonedDateTime.from(DATE_TIME_FORMATTER.parse(timestamp).toInstant().atZone(zone));
}

private ZoneId getOrParseZoneId(String timestamp) {
if (zoneId == null) {
zoneId = ZonedDateTime.parse(timestamp, DateTimeFormatter.ofPattern(TIMESTAMP_PATTERN)).getZone();
}
return zoneId;
}

/**
* Returns <code>true</code> if text at parsePosition is a datestamp.
*
Expand Down
26 changes: 23 additions & 3 deletions src/main/java/com/tagtraum/perf/gcviewer/imp/DataReaderFacade.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import com.tagtraum.perf.gcviewer.util.BuildInfoReader;
import com.tagtraum.perf.gcviewer.util.HttpUrlConnectionHelper;
import com.tagtraum.perf.gcviewer.util.LocalisationHelper;
import org.apache.commons.lang3.time.DurationFormatUtils;
import org.apache.commons.lang3.time.StopWatch;

import java.beans.PropertyChangeListener;
import java.io.File;
Expand All @@ -19,6 +21,7 @@
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
Expand Down Expand Up @@ -83,13 +86,20 @@ public GCModel loadModel(GCResource gcResource) throws DataReaderException {
/**
* Loads the {@link GCResource}s as a rotated series of logfiles. Takes care of ordering them
*
* @param gcResource the {@link GcResourceSeries} to load
* @param series the {@link GcResourceSeries} to load
* @return a {@link GCModel} containing all events found in the given {@link GCResource}s that were readable
* @throws DataReaderException
*/
protected GCModel loadModelFromSeries(GcResourceSeries gcResource) throws DataReaderException {
protected GCModel loadModelFromSeries(GcResourceSeries series) throws DataReaderException {
StopWatch stopWatch = new StopWatch();
stopWatch.start();

GcSeriesLoader seriesLoader = new GcSeriesLoader(this);
return seriesLoader.load(gcResource);
GCModel model = seriesLoader.load(series);

stopWatch.stop();
series.getLogger().log(Level.INFO, "Parsing logfile series containing of " + series.getResourcesInOrder().size() + " files took " + getDurationFormatted(stopWatch));
return model;
}

/**
Expand Down Expand Up @@ -126,11 +136,21 @@ private GCModel readModel(GcResourceFile gcResource) throws IOException {
}
}

StopWatch stopWatch = new StopWatch();
stopWatch.start();

DataReader reader = factory.getDataReader(gcResource, in);
GCModel model = reader.read();
model.setURL(url);

stopWatch.stop();
gcResource.getLogger().log(Level.INFO, "Parsing logfile " + gcResource.getResourceName() + " took " + getDurationFormatted(stopWatch));

return model;
}

private String getDurationFormatted(StopWatch stopWatch) {
return DurationFormatUtils.formatDuration(stopWatch.getTime(), "s,SS") + "s";
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,12 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;

import static org.hamcrest.core.Is.is;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;

public class TestAbstractDataReaderSun {

Expand Down Expand Up @@ -90,7 +94,16 @@ public void setExtendedMemoryFloatingPointPreEden_postEden() throws ParseExcepti
assertEquals("heap before", 121344, event.getPreUsed());
assertEquals("heap after", 128, event.getPostUsed());
}


@Test
public void testParseDatestamp() throws Exception {
String line =
"2016-04-14T22:37:55.315+0200: 467.260: [GC (Allocation Failure) 467.260: [ParNew: 226563K->6586K(245760K), 0.0044323 secs] 385679K->165875K(791936K), 0.0045438 secs] [Times: user=0.00 sys=0.00, real=0.00 secs] ";
ParseInformation pos = new ParseInformation(0);
ZonedDateTime time = dataReader.parseDatestamp(line, pos);
assertThat(time, is(ZonedDateTime.of(2016, 4, 14, 22, 37, 55, 315000000, ZoneOffset.ofHours(2))));
}

/**
* Subclass of {@link AbstractDataReaderSun} which makes those methods public, I want to test here.
*
Expand Down
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
package com.tagtraum.perf.gcviewer.imp;

import com.tagtraum.perf.gcviewer.UnittestHelper;
import com.tagtraum.perf.gcviewer.model.GcResourceFile;
import com.tagtraum.perf.gcviewer.model.GCEvent;
import com.tagtraum.perf.gcviewer.model.GCModel;
import com.tagtraum.perf.gcviewer.model.GCResource;
import com.tagtraum.perf.gcviewer.model.GcResourceFile;
import org.junit.Test;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.time.format.DateTimeFormatter;
import java.util.logging.Level;

import static org.hamcrest.Matchers.equalTo;
Expand All @@ -20,27 +20,25 @@

public class TestDataReaderSun1_6_0 {

private final DateTimeFormatter dateTimeFormatter = AbstractDataReaderSun.DATE_TIME_FORMATTER;

private InputStream getInputStream(String fileName) throws IOException {
return UnittestHelper.getResourceAsStream(UnittestHelper.FOLDER_OPENJDK, fileName);
}

@Test
public void testPrintGCDateStamps() throws Exception {
ByteArrayInputStream in = new ByteArrayInputStream(
("2011-10-05T04:23:39.427+0200: 19.845: [GC 19.845: [ParNew: 93184K->5483K(104832K), 0.0384413 secs] 93184K->5483K(1036928K), 0.0388082 secs] [Times: user=0.41 sys=0.06, real=0.04 secs]")
.getBytes());

ByteArrayInputStream in =
new ByteArrayInputStream(("2011-10-05T04:23:39.427+0200: 19.845: [GC 19.845: [ParNew: 93184K->5483K(104832K), 0.0384413 secs] 93184K->5483K(1036928K), 0.0388082 secs] [Times: user=0.41 sys=0.06, real=0.04 secs]").getBytes());

DataReader reader = new DataReaderSun1_6_0(new GcResourceFile("byteArray"), in, GcLogType.SUN1_6);
GCModel model = reader.read();
GCModel model = reader.read();

assertTrue("hasDateStamp", model.hasDateStamp());
assertEquals("DateStamp",
ZonedDateTime.parse("2011-10-05T04:23:39.427+0200", dateTimeFormatter),
model.getFirstDateStamp());
assertTrue("hasDateStamp", model.hasDateStamp());
ZonedDateTime expected =
ZonedDateTime.from(AbstractDataReaderSun.DATE_TIME_FORMATTER.parse("2011-10-05T04:23:39.427+0200").toInstant().atZone(ZoneOffset.ofHours(2)));
assertEquals("DateStamp", expected, model.getFirstDateStamp());
assertEquals("gc pause", 0.0388082, model.getGCPause().getMax(), 0.000001);
}
}

@Test
public void testCMSPromotionFailed() throws Exception {
Expand Down
Loading