Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Hopefully fix an issue in OpenIE where graphs can be disconnected aft… #1230

Merged
merged 1 commit into from
Dec 15, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@ public void testParseArray() {

@Test
public void testParseTrees() {
// NOTE: this is not a legal basic dependency tree, as "curling" is the dependent of two relations
List<List<String>> entries = Collections.singletonList(
Arrays.asList(
"3424",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,8 @@ protected ClauseSplitterSearchProblem(SemanticGraph tree, boolean assumedTruth,
extraEdgesByGovernor.put(vertex, new ArrayList<>());
extraEdgesByDependent.put(vertex, new ArrayList<>());
}
List<SemanticGraphEdge> extraEdges = Util.cleanTree(this.tree);
SemanticGraph originalTree = new SemanticGraph(this.tree);
List<SemanticGraphEdge> extraEdges = Util.cleanTree(this.tree, originalTree);
assert Util.isTree(this.tree);
for (SemanticGraphEdge edge : extraEdges) {
extraEdgesByGovernor.get(edge.getGovernor()).add(edge);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ private List<SearchResult> searchImplementation() {
}
}
// Clean the tree
Util.cleanTree(parseTree);
Util.cleanTree(parseTree, this.parseTree);
assert Util.isTree(parseTree);

// Find the subject / object split
Expand Down
9 changes: 7 additions & 2 deletions src/edu/stanford/nlp/naturalli/NotTreeException.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@
import edu.stanford.nlp.semgraph.SemanticGraphEdge;

public class NotTreeException extends RuntimeException {
public NotTreeException(SemanticGraph graph, SemanticGraphEdge edge) {
super("The graph \n" + graph + "\nis not a tree after removing\n" + edge);
SemanticGraph brokenGraph;
SemanticGraph originalGraph;

public NotTreeException(SemanticGraph graph, SemanticGraph originalGraph) {
super("The graph \n" + graph + "\nis not a tree after its surgery. Original graph:\n" + originalGraph);
this.brokenGraph = graph;
this.originalGraph = originalGraph;
}
}
12 changes: 6 additions & 6 deletions src/edu/stanford/nlp/naturalli/OpenIE.java
Original file line number Diff line number Diff line change
Expand Up @@ -462,16 +462,16 @@ public void annotateSentence(CoreMap sentence, Map<CoreLabel, List<CoreLabel>> c
} else {

// Get the dependency tree
SemanticGraph parse = sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class);
if (parse == null) {
parse = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph originalParse = sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class);
if (originalParse == null) {
originalParse = sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
if (parse == null) {
if (originalParse == null) {
throw new IllegalStateException("Cannot run OpenIE without a parse tree!");
}
// Clean the tree
parse = new SemanticGraph(parse);
Util.cleanTree(parse);
SemanticGraph parse = new SemanticGraph(originalParse);
Util.cleanTree(parse, originalParse);

// Resolve Coreference
SemanticGraph canonicalizedParse = parse;
Expand Down
109 changes: 83 additions & 26 deletions src/edu/stanford/nlp/naturalli/Util.java
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,29 @@ public static void annotate(CoreMap sentence, AnnotationPipeline pipeline) {
pipeline.annotate(ann);
}

/**
* Verify that when removing edges from the graph, the graph is not disconnected after the removal.
*<br>
* The edge specified in toKeep will be kept.
*/
public static boolean verifyRemoval(SemanticGraph tree, Collection<SemanticGraphEdge> removeEdges, SemanticGraphEdge toKeep, IndexedWord dependent) {
SemanticGraph copy = tree.makeSoftCopy();
IndexedWord root = copy.getFirstRoot();

for (SemanticGraphEdge remove : removeEdges) {
if (remove == toKeep) {
continue;
}
boolean removed = copy.removeEdge(remove);
assert removed;
}
if (copy.getShortestDirectedPathNodes(root, dependent) == null) {
return false;
}
return true;
}


/**
* Fix some bizarre peculiarities with certain trees.
* So far, these include:
Expand All @@ -152,8 +175,8 @@ public static void annotate(CoreMap sentence, AnnotationPipeline pipeline) {
* @param tree The tree to clean (in place!).
* @return A list of extra edges, which are valid but were removed.
*/
public static List<SemanticGraphEdge> cleanTree(SemanticGraph tree) {
// assert !isCyclic(tree);
public static List<SemanticGraphEdge> cleanTree(SemanticGraph tree, SemanticGraph originalTree) {
// assert !isCyclic(tree);

// Clean nodes
List<IndexedWord> toDelete = new ArrayList<>();
Expand Down Expand Up @@ -202,37 +225,69 @@ public static List<SemanticGraphEdge> cleanTree(SemanticGraph tree) {
toDelete.forEach(tree::removeVertex);
for (Triple<IndexedWord, IndexedWord, SemanticGraphEdge> edge : toAdd) {
tree.addEdge(edge.first, edge.second,
edge.third.getRelation(), edge.third.getWeight(), edge.third.isExtra());
edge.third.getRelation(), edge.third.getWeight(), edge.third.isExtra());
}

// Handle extra edges.
// Two cases:
// (1) the extra edge is a subj/obj edge and the main edge is a conj:.*
// in this case, keep the extra
// (2) otherwise, delete the extra
// exception: the case when removing the conj is removing the
// path from the root to this node
// (2) otherwise, delete the extra(s)
List<SemanticGraphEdge> extraEdges = new ArrayList<>();
for (SemanticGraphEdge edge : tree.edgeIterable()) {
if (edge.isExtra()) {
List<SemanticGraphEdge> incomingEdges = tree.incomingEdgeList(edge.getDependent());
SemanticGraphEdge toKeep = null;
for (SemanticGraphEdge candidate : incomingEdges) {
if (toKeep == null) {
toKeep = candidate;
} else if (toKeep.getRelation().toString().startsWith("conj") && candidate.getRelation().toString().matches(".subj.*|.obj.*")) {
toKeep = candidate;
} else if (!candidate.isExtra() &&
!(candidate.getRelation().toString().startsWith("conj") && toKeep.getRelation().toString().matches(".subj.*|.obj.*"))) {
toKeep = candidate;
for (IndexedWord vertex : tree.vertexSet()) {
// can't prune any edges if there is only one incoming edge.
// that would ruin the graph
// conversely, if there is more than one edge, at least one of
// them should be extra
if (tree.inDegree(vertex) <= 1) {
continue;
}

List<SemanticGraphEdge> incomingEdges = tree.incomingEdgeList(vertex);
SemanticGraphEdge toKeep = null;
SemanticGraphEdge original = null;
for (SemanticGraphEdge candidate : incomingEdges) {
if (!candidate.isExtra()) {
if (original != null) {
// TODO(horatio): This would be an ideal place to throw an
// error, but unfortunately even the basic graph can have
// errors with multiple incoming edges as currently constructed.
//throw new IllegalArgumentException("Somehow, the original edge returned\n Original graph:\n" + originalTree +
// "\n Current graph:\n" + tree +
// "\n First edge found: " + original +
// "\n Next edge found: " + candidate);
} else {
// either or both could be wrong, so we don't try to
// figure out which to keep
original = candidate;
}
}
for (SemanticGraphEdge candidate : incomingEdges) {
if (candidate != toKeep) {
extraEdges.add(candidate);
}
if (toKeep == null) {
toKeep = candidate;
} else if (toKeep.getRelation().toString().startsWith("conj") && candidate.getRelation().toString().matches(".subj.*|.obj.*")) {
toKeep = candidate;
} else if (!candidate.isExtra() &&
!(candidate.getRelation().toString().startsWith("conj") && toKeep.getRelation().toString().matches(".subj.*|.obj.*"))) {
toKeep = candidate;
}
}
if (!verifyRemoval(tree, incomingEdges, toKeep, toKeep.getDependent())) {
if (original == null) {
throw new RuntimeException("Could not find a valid edge to remove");
}
toKeep = original;
}
List<SemanticGraphEdge> removeEdges = new ArrayList<>();
for (SemanticGraphEdge candidate : incomingEdges) {
if (candidate != toKeep) {
removeEdges.add(candidate);
}
}
removeEdges.forEach(tree::removeEdge);
extraEdges.addAll(removeEdges);
}
extraEdges.forEach(tree::removeEdge);

// Add apposition edges (simple coref)
for (SemanticGraphEdge extraEdge : new ArrayList<>(extraEdges)) { // note[gabor] prevent concurrent modification exception
Expand Down Expand Up @@ -321,7 +376,9 @@ public static List<SemanticGraphEdge> cleanTree(SemanticGraph tree) {
}

// Return
assert isTree(tree);
if (!isTree(tree)) {
throw new NotTreeException(tree, originalTree);
}
return extraEdges;
}

Expand All @@ -331,7 +388,7 @@ public static List<SemanticGraphEdge> cleanTree(SemanticGraph tree) {
* This replicates the behavior of the old Stanford dependencies on universal dependencies.
* @param tree The tree to modify in place.
*/
public static void stripPrepCases(SemanticGraph tree) {
public static void stripPrepCases(SemanticGraph tree, SemanticGraph originalTree) {
// Find incoming case edges that have an 'nmod' incoming edge
List<SemanticGraphEdge> toClean = new ArrayList<>();
for (SemanticGraphEdge edge : tree.edgeIterable()) {
Expand All @@ -353,9 +410,9 @@ public static void stripPrepCases(SemanticGraph tree) {
for (SemanticGraphEdge edge : toClean) {
tree.removeEdge(edge);
tree.removeVertex(edge.getDependent());
if (!isTree(tree)) {
throw new NotTreeException(tree, edge);
}
}
if (!isTree(tree)) {
throw new NotTreeException(tree, originalTree);
}
}

Expand Down