Skip to content

Added pruneUselessLabels to SparseNetworkLearner #110

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions lbjava-examples/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<artifactId>lbjava-project</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>1.3.1</version>
<version>1.3.3</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand All @@ -27,12 +27,12 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>LBJava</artifactId>
<version>1.3.1</version>
<version>1.3.2</version>
</dependency>
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>lbjava-maven-plugin</artifactId>
<version>1.3.1</version>
<version>1.3.2</version>
</dependency>
</dependencies>

Expand Down Expand Up @@ -63,7 +63,7 @@
<plugin>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>lbjava-maven-plugin</artifactId>
<version>1.3.1</version>
<version>1.3.2</version>
<configuration>
<gspFlag>${project.basedir}/src/main/java</gspFlag>
<dFlag>${project.basedir}/target/classes</dFlag>
Expand Down
4 changes: 2 additions & 2 deletions lbjava-mvn-plugin/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<parent>
<artifactId>lbjava-project</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>1.3.1</version>
<version>1.3.3</version>
</parent>

<artifactId>lbjava-maven-plugin</artifactId>
Expand Down Expand Up @@ -76,7 +76,7 @@
<dependency>
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>LBJava</artifactId>
<version>1.3.1</version>
<version>1.3.2</version>
<type>jar</type>
<scope>compile</scope>
</dependency>
Expand Down
2 changes: 1 addition & 1 deletion lbjava/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
<parent>
<artifactId>lbjava-project</artifactId>
<groupId>edu.illinois.cs.cogcomp</groupId>
<version>1.3.1</version>
<version>1.3.3</version>
</parent>

<modelVersion>4.0.0</modelVersion>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ public Feature encode(String e) {
* @return The hash code of this feature.
**/
public int hashCode() {
return 31 * super.hashCode() + 17 * identifier.hashCode() + value.hashCode();
return super.hashCode() + 17 * identifier.hashCode() + value.hashCode();
}


Expand All @@ -237,8 +237,7 @@ public boolean equals(Object o) {
return false;
if (o instanceof DiscretePrimitiveStringFeature) {
DiscretePrimitiveStringFeature f = (DiscretePrimitiveStringFeature) o;
return identifier.equals(f.identifier) && valueIndex > -1 ? valueIndex == f.valueIndex
: value.equals(f.value);
return identifier.equals(f.identifier) && value.equals(f.value);
}

DiscretePrimitiveFeature f = (DiscretePrimitiveFeature) o;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import edu.illinois.cs.cogcomp.lbjava.util.ClassUtils;
import edu.illinois.cs.cogcomp.lbjava.util.FVector;
import edu.illinois.cs.cogcomp.lbjava.util.TableFormat;
import gnu.trove.map.hash.THashMap;


/**
Expand Down Expand Up @@ -132,7 +133,7 @@ public static Lexicon readLexicon(ExceptionlessInputStream in, boolean readCount

// Member variables.
/** The map of features to integer keys. */
protected Map lexicon;
protected Map<Feature, Integer> lexicon;
/** The inverted map of integer keys to their features. */
protected FVector lexiconInv;
/** The encoding to use for new features added to this lexicon. */
Expand Down Expand Up @@ -182,7 +183,7 @@ public Lexicon(String e) {

/** Clears the data structures associated with this instance. */
public void clear() {
lexicon = new HashMap();
lexicon = new THashMap();
lexiconInv = new FVector();
lexiconChildren = null;
pruneCutoff = -1;
Expand Down Expand Up @@ -709,7 +710,7 @@ public Object clone() {
}

if (lexicon != null) {
clone.lexicon = new HashMap();
clone.lexicon = new THashMap();
clone.lexicon.putAll(lexicon);
}
clone.lexiconInv = (FVector) lexiconInv.clone();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
package edu.illinois.cs.cogcomp.lbjava.learn;

import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map.Entry;

import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessInputStream;
import edu.illinois.cs.cogcomp.core.datastructures.vectors.ExceptionlessOutputStream;
Expand Down Expand Up @@ -688,6 +688,44 @@ public void read(ExceptionlessInputStream in) {
for (int i = 0; i < N; ++i)
network.add(Learner.readLearner(in));
}

/**
* This method will discard learners not associated with the provided labels. For labels that are
* not needed at runtime, this would improve performance as well as memory footprint. For example,
* imagine you have a 4 class model, PER, ORG, LOC and OTHER, but you could care less about OTHER.
* In this case, you could eliminate that label and improve the performance of the model proportionally.
* <p>
* <b>Use of this feature may cause terms previously classified by a discarded classifier to be labeled
* as one of the remaining classes.</b>
* </p>
* @param keepers A list of the only labels to keep.
*/
public void pruneUnusedLabels(ArrayList<String> keepers) {
int N = network.size();
for (int i = 0; i < N; ++i) {
LinearThresholdUnit ltu = (LinearThresholdUnit) network.get(i);
if (ltu == null)
continue;

// get the label and determine if it should be pruned.
String label = labelLexicon.lookupKey(i).getStringValue();
if (label.length() > 2) {
// Take off the B-, I-, L- or U-
label = label.substring(2);
boolean keepit = false;
for (String checkme : keepers) {
if (label.equals(checkme)) {
keepit = true;
break;
}
}
if (!keepit)
network.set(i, null);
} else {
// keep other("O"), this is like a non-label to begin with.
}
}
}

/** Returns a deep clone of this learning algorithm. */
public Object clone() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
public class ArrayFileParser implements Parser {
/** Reader for file currently being parsed. */
protected DataInputStream in;
/** the zip file must also be closed, if this is compressed file. */
protected ZipFile zipFile=null;
/** The name of the file to parse. */
protected String exampleFileName;
/** A single array from which all examples can be parsed. */
Expand Down Expand Up @@ -190,13 +192,11 @@ public void reset() {
try {
if (exampleFileName != null) {
if (zipped) {
ZipFile zip = new ZipFile(exampleFileName);
in =
new DataInputStream(new BufferedInputStream(zip.getInputStream(zip
zipFile = new ZipFile(exampleFileName);
in = new DataInputStream(new BufferedInputStream(zipFile.getInputStream(zipFile
.getEntry(ExceptionlessInputStream.zipEntryName))));
} else
in =
new DataInputStream(new BufferedInputStream(new FileInputStream(
in = new DataInputStream(new BufferedInputStream(new FileInputStream(
exampleFileName)));
} else if (zipped) {
ZipInputStream zip = new ZipInputStream(new ByteArrayInputStream(exampleData));
Expand All @@ -218,6 +218,9 @@ public void close() {
return;
try {
in.close();
if (zipFile != null) {
zipFile.close();
}
} catch (Exception e) {
System.err.println("Can't close '" + exampleFileName + "':");
e.printStackTrace();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,4 @@ public class SparseNetworkLearningPruneTest {
@Test
public void test() {
}

}
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
<groupId>edu.illinois.cs.cogcomp</groupId>
<artifactId>lbjava-project</artifactId>
<packaging>pom</packaging>
<version>1.3.1</version>
<version>1.3.3</version>

<modules>
<module>lbjava</module>
Expand Down