Skip to content

Commit

Permalink
add some documentation
Browse files Browse the repository at this point in the history
  • Loading branch information
iorixxx committed Mar 22, 2021
1 parent c936589 commit 6ad5ae8
Showing 1 changed file with 14 additions and 8 deletions.
22 changes: 14 additions & 8 deletions src/main/java/edu/anadolu/cmdline/FeatureTool.java
Original file line number Diff line number Diff line change
Expand Up @@ -111,22 +111,28 @@ void resultListFeatures(DataSet dataset) throws IOException {
}
}

/**
* The ratio of Intersection over Union. The Jaccard coefficient measures similarity between finite sample sets,
* and is defined as the size of the intersection divided by the size of the union of the sample sets.
*
* @param reference set1
* @param alternate set1
* @return Note that by design, 0 <= J(A,B) <=1. If A and B are both empty, define J(A,B) = 1.
*/
private static double systemSimilarity(List<SubmissionFile.Tuple> reference, List<SubmissionFile.Tuple> alternate) {

Set<String> set1 = reference.stream().map(SubmissionFile.Tuple::docID).collect(Collectors.toSet());
Set<String> set2 = alternate.stream().map(SubmissionFile.Tuple::docID).collect(Collectors.toSet());

Set<String> result = new HashSet<>(set1);
result.addAll(set2); // Union
if (set1.isEmpty() && set2.isEmpty()) return 1.0;

int union = result.size();
Set<String> union = new HashSet<>(set1);
union.addAll(set2); // Union

result = new HashSet<>(set1);
result.retainAll(set2); // Intersection
Set<String> intersection = new HashSet<>(set1);
intersection.retainAll(set2); // Intersection

int intersection = result.size();

return (double) intersection / union;
return (double) intersection.size() / union.size();
}

@Override
Expand Down

0 comments on commit 6ad5ae8

Please sign in to comment.