-
Notifications
You must be signed in to change notification settings - Fork 41
Optimize AffinityPropagation class #76
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 4 commits
435c0fb
fbdfa1d
8c5ade0
d6f476d
c895362
c3ec00a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,318 @@ | ||
/* | ||
* File: OptimizedAffinityPropagation.java | ||
* Authors: Marco Pezzulla | ||
* Company: PTV SISTeMA | ||
* Project: Cognitive Foundry | ||
* | ||
* Copyright August 7, 2007, Sandia Corporation. Under the terms of Contract | ||
* DE-AC04-94AL85000, there is a non-exclusive license for use of this work by | ||
* or on behalf of the U.S. Government. Export of this program may require a | ||
* license from the United States Government. See CopyrightHistory.txt for | ||
* complete details. | ||
* | ||
*/ | ||
|
||
|
||
package gov.sandia.cognition.learning.algorithm.clustering; | ||
|
||
import gov.sandia.cognition.learning.algorithm.clustering.AffinityPropagation; | ||
import gov.sandia.cognition.math.DivergenceFunction; | ||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.HashMap; | ||
import java.util.stream.DoubleStream; | ||
|
||
/** | ||
* The <code>OptimizedAffinityPropagation</code> extends <code>AffinityPropagation</code>. | ||
* The <code>OptimizedAffinityPropagation</code> algorithm requires three parameters: | ||
* a divergence function, a value to use for self-divergence, and a damping | ||
* factor (called lambda in the paper; 0.5 is the default). It clusters by | ||
* passing messages between each point to determine the best exemplar for the | ||
* point. | ||
* <BR><BR> | ||
* This implementation takes a divergence function instead of a similarity | ||
* function and sets the similarity value to the negative of the divergence | ||
* value, as described in the paper for Euclidean distance. | ||
* <BR><BR> | ||
* The self-divergence value is what controls how many clusters are generated. | ||
* Typically this value is set to the mean or median of all the divergence | ||
* values or the maximum divergence. In general, a smaller value will mean more | ||
* clusters and a larger value will mean less clusters. In the paper this is | ||
* called self-similarity (s(k,k)) but since this implementation uses a | ||
* divergence metric, we use self-divergence instead. | ||
* The implementation of <code>OptimizedAffinityPropagation</code> upgrades the implementation | ||
* of <code>AffinityPropagation</code> in terms of computation efficiency. | ||
* The two core functions are reimplemented. Computational complexity reduced from n^3 to n^2 | ||
* for both the functions. | ||
* Moreover it adds the computation of the median of similarities if the "selfDivergence" | ||
* parameter is not specified in input by the client. | ||
* @author Marco Pezzulla | ||
* @since 2.0 | ||
*/ | ||
public class OptimizedAffinityPropagation<T> | ||
extends AffinityPropagation<T> | ||
{ | ||
|
||
private static final long serialVersionUID = 7170610951827891277L; | ||
|
||
OptimizedAffinityPropagation(){this(null, DEFAULT_SELF_DIVERGENCE);} | ||
jbasilico marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
||
OptimizedAffinityPropagation( | ||
jbasilico marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
DivergenceFunction<? super T, ? super T> divergence) | ||
{ | ||
this(divergence, Double.NaN); | ||
} | ||
|
||
/** | ||
* Creates a new instance of OptimizedAffinityPropagation. | ||
* | ||
* @param divergence The divergence function to use to determine the | ||
* divergence between two examples. | ||
* @param selfDivergence The maxValue for self-divergence to use, which | ||
* controls the number of clusters created. | ||
*/ | ||
OptimizedAffinityPropagation( | ||
|
||
DivergenceFunction<? super T, ? super T> divergence, | ||
double selfDivergence) | ||
{ | ||
this(divergence, selfDivergence, DEFAULT_DAMPING_FACTOR); | ||
} | ||
|
||
/** | ||
* Creates a new instance of OptimizedAffinityPropagation. | ||
* | ||
* @param divergence The divergence function to use to determine the | ||
* divergence between two examples. | ||
* @param selfDivergence The maxValue for self-divergence to use, which | ||
* controls the number of clusters created. | ||
* @param dampingFactor The damping factor (lambda). Must be between 0.0 | ||
* and 1.0. | ||
*/ | ||
OptimizedAffinityPropagation( | ||
DivergenceFunction<? super T, ? super T> divergence, | ||
double selfDivergence, | ||
double dampingFactor) | ||
{ | ||
this(divergence, selfDivergence, dampingFactor, DEFAULT_MAX_ITERATIONS); | ||
} | ||
|
||
/** | ||
* Creates a new instance of OptimizedAffinityPropagation. | ||
* | ||
* @param divergence The divergence function to use to determine the | ||
* divergence between two examples. | ||
* @param selfDivergence The maxValue for self-divergence to use, which | ||
* controls the number of clusters created. | ||
* @param dampingFactor The damping factor (lambda). Must be between 0.0 | ||
* and 1.0. | ||
* @param maxIterations The maximum number of iterations. | ||
*/ | ||
OptimizedAffinityPropagation( | ||
DivergenceFunction<? super T, ? super T> divergence, | ||
double selfDivergence, | ||
double dampingFactor, | ||
int maxIterations) | ||
{ | ||
super(divergence, selfDivergence, dampingFactor, maxIterations); | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the divergenciesArray (should be divergencesArray?) is a complex enough (but cool!) trick that it's worth j function Javadoc explaining its internal referencing specifically. It appears to be a 1-d vector of an upper-triangular 2-d array, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't get this point. I simply extended the self divergence hanling, set to the median of all the divergencies, when it's not specified by the client. What do i have to explain in a more specific way? Thank you. |
||
@Override | ||
protected boolean initializeAlgorithm() | ||
{ | ||
if (this.getData() == null || this.getData().isEmpty()) | ||
{ | ||
// Make sure that the data is valid. | ||
return false; | ||
} | ||
|
||
// Initialize the main data for the algorithm. | ||
setExamples(new ArrayList<>(getData())); | ||
setSimilarities(new double[exampleCount][exampleCount]); | ||
setResponsibilities(new double[exampleCount][exampleCount]); | ||
setAvailabilities(new double[exampleCount][exampleCount]); | ||
|
||
double[] divergenciesArray = new double[exampleCount * (exampleCount+1)/2]; | ||
boolean computeSelfDivergence = Double.isNaN(getSelfDivergence()); | ||
|
||
// Compute the similarity matrix. | ||
fillSimilarityStructures(divergenciesArray); | ||
|
||
if(computeSelfDivergence) { | ||
double median = computeMedian(divergenciesArray); | ||
setSelfDivergence(median); | ||
} | ||
|
||
fillSimilarityDiagonal(); | ||
|
||
// Initialize the assignments to -1, the changed count, and the | ||
// clusters. | ||
this.setAssignments(new int[exampleCount]); | ||
this.setChangedCount(exampleCount); | ||
this.setClusters(new HashMap<>()); | ||
for (int i = 0; i < exampleCount; i++) | ||
{ | ||
this.assignments[i] = -1; | ||
} | ||
|
||
// Ready to learn. | ||
return true; | ||
} | ||
|
||
/** | ||
* Set the self similarity based on the self divergence. | ||
*/ | ||
private void fillSimilarityDiagonal() | ||
{ | ||
for (int i = 0; i < exampleCount; i++) | ||
{ | ||
similarities[i][i] = -getSelfDivergence(); | ||
} | ||
} | ||
|
||
private void fillSimilarityStructures(double[] divergenciesArray) | ||
{ | ||
for (int i = 0; i < exampleCount; i++) | ||
{ | ||
final T exampleI = examples.get(i); | ||
|
||
for (int j = 0; j <= i; j++) | ||
{ | ||
// We compute similarity, which is the negative of divergence, | ||
// since a lower divergence means a higher similarity. | ||
final T exampleJ = examples.get(j); | ||
final double similarity = -divergence.evaluate( | ||
exampleI, exampleJ); | ||
similarities[i][j] = similarity; | ||
similarities[j][i] = similarity; | ||
divergenciesArray[i * (i+1) / 2 + j] = -similarity; | ||
} | ||
} | ||
} | ||
|
||
private double computeMedian(double[] divergenciesArray) | ||
jbasilico marked this conversation as resolved.
Show resolved
Hide resolved
|
||
{ | ||
|
||
double[] doubles = Arrays.stream(divergenciesArray) | ||
.flatMap(DoubleStream::of) | ||
.distinct() | ||
.sorted() | ||
.toArray(); | ||
|
||
double median; | ||
int size = doubles.length; | ||
if (size == 0) { | ||
median = 0; | ||
} else { | ||
if (size % 2 == 0) | ||
median = (doubles[size / 2] + doubles[size / 2 - 1]) / 2; | ||
else | ||
median = doubles[size / 2]; | ||
} | ||
return median; | ||
|
||
} | ||
|
||
/** | ||
* Updates the responsibilities matrix using the similarity values and the | ||
* current availability values. | ||
*/ | ||
@Override | ||
protected void updateResponsibilities() | ||
{ | ||
|
||
for(int i = 0 ; i < exampleCount; i++) { | ||
double[] rowArray = createSumArray(availabilities[i], similarities[i]); | ||
MaxResult maxResult = computeMax(rowArray); | ||
for (int j = 0; j < exampleCount; j++) { | ||
double maxToSubtract = j != maxResult.maxIndex ? maxResult.maxValue : maxResult.secondMaxValue; | ||
responsibilities[i][j] = oneMinusDampingFactor * (similarities[i][j] - maxToSubtract) + dampingFactor * responsibilities[i][j]; | ||
} | ||
} | ||
} | ||
|
||
private MaxResult computeMax(double[] rowArray) | ||
{ | ||
MaxResult maxResult = new MaxResult(); | ||
for (int i = 0; i < rowArray.length; i++) { | ||
if(rowArray[i] > maxResult.maxValue){ | ||
maxResult.secondMaxValue = maxResult.maxValue; | ||
maxResult.maxValue = rowArray[i]; | ||
maxResult.maxIndex = i; | ||
}else if(rowArray[i] > maxResult.secondMaxValue){ | ||
maxResult.secondMaxValue = rowArray[i]; | ||
} | ||
} | ||
return maxResult; | ||
} | ||
|
||
|
||
private double[] createSumArray(double[] availability, double[] similarity) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You may be able to avoid needing a utility function like this if you use DenseVectors instead of doing it all with raw arrays. There may be a slight performance penalty from the vector overhead, though that is generally what we use in the rest of the foundry to get the easier utility methods. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't get if you would like to change the raw arrays structure wherever or only in this point. I'm following what is already implemented in AffinityPropagation class. If i have to change it, could you provide me an example, please? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Its true those classes use the raw arrays, but they don't need to do these kinds of vector-style operations in their implementation. The way to do it would be to use the DenseVector via VectorFactory.getDenseDefault to make the two arrays, then fill them via set(), then this operation is just availability.plus(similarity). |
||
{ | ||
double[] resultArray = new double[availability.length]; | ||
for (int i = 0; i < availability.length; i++) { | ||
resultArray[i] = availability[i] + similarity[i]; | ||
} | ||
return resultArray; | ||
} | ||
|
||
/** | ||
* Updates the availabilities matrix based on the current responsibility | ||
* values. | ||
*/ | ||
@Override | ||
protected void updateAvailabilities() | ||
{ | ||
ColumnArray rp = new ColumnArray(exampleCount); | ||
for (int j = 0; j < exampleCount; j++) { | ||
removeNegativeValues(rp, responsibilities, j); | ||
rp.array[j] = responsibilities[j][j]; | ||
rp.sum += responsibilities[j][j]; | ||
updateAvailabilitiesMatrix(j, rp, availabilities); | ||
} | ||
} | ||
|
||
private void updateAvailabilitiesMatrix(int j, ColumnArray rp, double[][] availabilities) | ||
{ | ||
double old; | ||
for (int i = 0; i < availabilities[0].length; i++) { | ||
old = availabilities[i][j]; | ||
availabilities[i][j] = rp.sum - rp.array[i]; | ||
jbasilico marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
if(i != j){ | ||
availabilities[i][j] = Math.min(availabilities[i][j], 0); | ||
} | ||
availabilities[i][j] = oneMinusDampingFactor * availabilities[i][j] + dampingFactor * old; | ||
} | ||
} | ||
|
||
private void removeNegativeValues(ColumnArray columnArray, double[][] matrix, int j) | ||
{ | ||
columnArray.sum = 0; | ||
for (int i = 0; i < matrix.length; i++) { | ||
columnArray.array[i] = Math.max(matrix[i][j], 0); | ||
columnArray.sum += columnArray.array[i]; | ||
jbasilico marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
} | ||
} | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Please make these classes "private static", unless you think a deriving class would want access to them, then "protected static". I'm happy for this next part to be a discussion instead of a "change this". These two classes are essentially just data stores (C-like structs) instead of full classes. As they're completely contained in this class, they aren't part of the API for the Foundry. However, would it make sense to move some of the logic around adding new values from the various helper functions above into these classes themselves? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, they are just data stores, but really currently i don't know the code structure of the project and i wouldn't know where put them. In order to make "private static" the class, indeed, previously i should move the ColumnArray and MaxResult classes. If you could provide me a suggestion where to put these new structures, i would be happy to do that. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can have them stay as internal static classes here. I think Jeremy's comment is you can put something like the computeMax in the MaxResult class. |
||
class MaxResult { | ||
int maxIndex; | ||
double maxValue; | ||
double secondMaxValue; | ||
|
||
MaxResult(){ | ||
maxValue = Double.NEGATIVE_INFINITY; | ||
secondMaxValue = Double.NEGATIVE_INFINITY; | ||
maxIndex = -1; | ||
} | ||
} | ||
|
||
class ColumnArray | ||
jbasilico marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
{ | ||
double[] array; | ||
double sum; | ||
|
||
ColumnArray(int size){ | ||
array = new double[size]; | ||
sum = 0.; | ||
} | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.