Skip to content

Commit 17fd83d

Browse files
committed
Switch from logging to ProfilingTimer
1 parent c2eb04a commit 17fd83d

File tree

1 file changed

+31
-17
lines changed

1 file changed

+31
-17
lines changed

src/main/java/com/medallia/word2vec/Word2VecModel.java

Lines changed: 31 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
import com.google.common.primitives.Doubles;
1818
import com.medallia.word2vec.thrift.Word2VecModelThrift;
1919
import com.medallia.word2vec.util.Common;
20-
import org.apache.log4j.Logger;
20+
import com.medallia.word2vec.util.ProfilingTimer;
21+
import com.medallia.word2vec.util.AC;
22+
2123

2224
/**
2325
* Represents the Word2Vec model, containing vectors for each word
@@ -31,8 +33,6 @@
3133
* @see {@link #forSearch()}
3234
*/
3335
public class Word2VecModel {
34-
static Logger logger = Logger.getLogger(Word2VecModel.class);
35-
3636
final List<String> vocab;
3737
final int layerSize;
3838
final double[] vectors;
@@ -80,23 +80,35 @@ public static Word2VecModel fromTextFile(File file) throws IOException {
8080
}
8181

8282
/**
83-
* Forwards to {@link #fromBinFile(File, ByteOrder)} with the default
84-
* ByteOrder.LITTLE_ENDIAN
83+
* Forwards to {@link #fromBinFile(File, ByteOrder, ProfilingTimer)} with the default
84+
* ByteOrder.LITTLE_ENDIAN and no ProfilingTimer
8585
*/
8686
public static Word2VecModel fromBinFile(File file)
8787
throws IOException {
88-
return fromBinFile(file, ByteOrder.LITTLE_ENDIAN);
88+
return fromBinFile(file, ByteOrder.LITTLE_ENDIAN, ProfilingTimer.NONE);
89+
}
90+
91+
/**
92+
* Forwards to {@link #fromBinFile(File, ByteOrder, ProfilingTimer)} with no ProfilingTimer
93+
*/
94+
public static Word2VecModel fromBinFile(File file, ByteOrder byteOrder)
95+
throws IOException {
96+
return fromBinFile(file, byteOrder, ProfilingTimer.NONE);
8997
}
9098

9199
/**
92100
* @return {@link Word2VecModel} created from the binary representation output
93101
* by the open source C version of word2vec using the given byte order.
94102
*/
95-
public static Word2VecModel fromBinFile(File file, ByteOrder byteOrder)
103+
public static Word2VecModel fromBinFile(File file, ByteOrder byteOrder, ProfilingTimer timer)
96104
throws IOException {
97105

98-
try (FileInputStream fis = new FileInputStream(file)) {
106+
try (
107+
final FileInputStream fis = new FileInputStream(file);
108+
final AC ac = timer.start("Loading vectors from bin file")
109+
) {
99110
final FileChannel channel = fis.getChannel();
111+
timer.start("Reading gigabyte #1");
100112
MappedByteBuffer buffer =
101113
channel.map(
102114
FileChannel.MapMode.READ_ONLY,
@@ -122,8 +134,10 @@ public static Word2VecModel fromBinFile(File file, ByteOrder byteOrder)
122134

123135
final int vocabSize = Integer.parseInt(firstLine.substring(0, index));
124136
final int layerSize = Integer.parseInt(firstLine.substring(index + 1));
125-
logger.info(
126-
String.format("Loading %d vectors with dimensionality %d", vocabSize, layerSize));
137+
timer.appendToLog(String.format(
138+
"Loading %d vectors with dimensionality %d",
139+
vocabSize,
140+
layerSize));
127141

128142
List<String> vocabs = new ArrayList<String>(vocabSize);
129143
double vectors[] = new double[vocabSize * layerSize];
@@ -156,7 +170,7 @@ public static Word2VecModel fromBinFile(File file, ByteOrder byteOrder)
156170
final long now = System.currentTimeMillis();
157171
if (now - lastLogMessage > 1000) {
158172
final double percentage = ((double) (lineno + 1) / (double) vocabSize) * 100.0;
159-
logger.info(
173+
timer.appendToLog(
160174
String.format("Loaded %d/%d vectors (%f%%)", lineno + 1, vocabSize, percentage));
161175
lastLogMessage = now;
162176
}
@@ -165,12 +179,11 @@ public static Word2VecModel fromBinFile(File file, ByteOrder byteOrder)
165179
if (buffer.position() > ONE_GB) {
166180
final int newPosition = (int) (buffer.position() - ONE_GB);
167181
final long size = Math.min(channel.size() - ONE_GB * bufferCount, Integer.MAX_VALUE);
168-
logger.debug(
169-
String.format(
170-
"Remapping for GB number %d. Start: %d, size: %d",
171-
bufferCount,
172-
ONE_GB * bufferCount,
173-
size));
182+
timer.endAndStart(
183+
"Reading gigabyte #%d. Start: %d, size: %d",
184+
bufferCount,
185+
ONE_GB * bufferCount,
186+
size);
174187
buffer = channel.map(
175188
FileChannel.MapMode.READ_ONLY,
176189
ONE_GB * bufferCount,
@@ -180,6 +193,7 @@ public static Word2VecModel fromBinFile(File file, ByteOrder byteOrder)
180193
bufferCount += 1;
181194
}
182195
}
196+
timer.end();
183197

184198
return new Word2VecModel(vocabs, layerSize, vectors);
185199
}

0 commit comments

Comments
 (0)