-
Notifications
You must be signed in to change notification settings - Fork 45
Removed external library and added clustering algorithm #532
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 6 commits
Commits
Show all changes
16 commits
Select commit
Hold shift + click to select a range
d7c570e
Added basic interfaces
amandelpie 966b377
Added an initial draft
amandelpie 7e31bac
Added an initial draft
amandelpie b0bc272
Merge branch 'main' into feature-345
amandelpie e983dac
Fixed a few bugs
amandelpie a9e4568
Merge branch 'main' into feature-345
amandelpie 9d4816c
Added KDocs
amandelpie 57151cd
Added KDocs
amandelpie 00157a9
Fixed a few bugs
amandelpie fdeaf01
Merge branch 'main' into feature-345
amandelpie 84c3d98
Fixed a few docs
amandelpie 9108847
Formatted code
amandelpie e00f476
Formatted code
amandelpie c3d0dd9
Merge branch 'main' into feature-345
amandelpie c112184
Changed some comments and renamed variables
amandelpie d0c44e7
Merge remote-tracking branch 'origin/feature-345' into feature-345
amandelpie File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
28 changes: 28 additions & 0 deletions
28
utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANModel.kt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| package org.utbot.summary.clustering.dbscan | ||
|
|
||
| import org.utbot.summary.clustering.dbscan.neighbor.RangeQuery | ||
|
|
||
| /** | ||
| * @property k | ||
| * @property clusterLabels | ||
| * @property clusterSizes The number of observations in each cluster. | ||
| */ | ||
| class DBSCANModel<K>( | ||
| val k: Int = 0, | ||
| val clusterLabels: IntArray, | ||
| val clusterSizes: IntArray, | ||
| val rangeQuery: RangeQuery<K>, | ||
| val eps: Float, | ||
| val minSamples: Int | ||
| ) { | ||
| /** Find a cluster for new data. */ | ||
| /* fun predictCluster(data: K): Int { | ||
| val neighbors = rangeQuery.findNeighbors(data, eps) | ||
|
|
||
| if(neighbors.size < minSamples) { | ||
| return NOISE | ||
| } | ||
|
|
||
|
|
||
| }*/ | ||
| } |
79 changes: 79 additions & 0 deletions
79
utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/DBSCANTrainer.kt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| package org.utbot.summary.clustering.dbscan | ||
|
|
||
| import org.utbot.summary.clustering.dbscan.neighbor.LinearRangeQuery | ||
| import org.utbot.summary.clustering.dbscan.neighbor.RangeQuery | ||
|
|
||
| const val NOISE = -3 | ||
| const val CLUSTER_PART = -2 | ||
| const val UNDEFINED = -1 | ||
|
|
||
| class DBSCANTrainer<T>(val eps: Float, val minSamples: Int, val metric: Metric<T>, val rangeQuery: RangeQuery<T>) { | ||
| init { | ||
| require(minSamples > 0) { "MinSamples parameter should be more than 0: $minSamples" } | ||
| require(eps > 0.0f) { "Eps parameter should be more than 0: $eps" } | ||
| } | ||
|
|
||
| fun fit(data: Array<T>): DBSCANModel<T> { | ||
| if (rangeQuery is LinearRangeQuery) { | ||
| rangeQuery.data = data | ||
| rangeQuery.metric = metric | ||
| } // TODO: could be refactored if we add some new variants of RangeQuery | ||
|
|
||
| val numberOfClusters = 0 | ||
| val labels = IntArray(data.size) { _ -> UNDEFINED } | ||
| val clusterSizes = IntArray(numberOfClusters) | ||
|
|
||
| var k = 0 // cluster index | ||
| for (i in data.indices) { | ||
| if(labels[i] == UNDEFINED) { | ||
| val neigbors = rangeQuery.findNeighbors(data[i], eps).toMutableList() | ||
| if (neigbors.size < minSamples) { | ||
| labels[i] = NOISE | ||
| } else { | ||
| k++ | ||
| labels[i] = k | ||
| // expand cluster | ||
| neigbors.forEach { // Neighbors to expand | ||
| if(labels[it.index] == UNDEFINED) { | ||
| labels[it.index] = CLUSTER_PART // all neighbors of a cluster point became cluster points | ||
| } | ||
| } | ||
|
|
||
| for (j in neigbors.indices) { // Process every seed point Q | ||
| val q = neigbors[j] | ||
| val idx = q.index | ||
|
|
||
|
|
||
| if (labels[idx] == NOISE) { // Change Noise to border point | ||
| labels[idx] = k | ||
| } | ||
|
|
||
| if (labels[idx] == UNDEFINED || labels[idx] == CLUSTER_PART) { | ||
| labels[idx] = k | ||
|
|
||
|
|
||
| val qNeighbors = rangeQuery.findNeighbors(q.key, eps) | ||
|
|
||
| if (qNeighbors.size >= minSamples) { // Density check (if Q is a core point) | ||
| // merge two cluster parts | ||
| for (qNeighbor in qNeighbors) { | ||
| val label = labels[qNeighbor.index] | ||
| if (label == UNDEFINED) { | ||
| labels[qNeighbor.index] = CLUSTER_PART | ||
| } | ||
|
|
||
| if (label == UNDEFINED || label == NOISE) { | ||
| neigbors.add(qNeighbor) | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| } | ||
| } | ||
|
|
||
| return DBSCANModel(k = numberOfClusters, clusterLabels = labels, clusterSizes = clusterSizes, rangeQuery = rangeQuery, eps = eps, minSamples = minSamples) | ||
| } | ||
| } |
6 changes: 6 additions & 0 deletions
6
utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/Metric.kt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,6 @@ | ||
| package org.utbot.summary.clustering.dbscan | ||
|
|
||
|
|
||
| interface Metric<T> { | ||
| fun compute(object1: T, object2: T): Double | ||
| } |
20 changes: 20 additions & 0 deletions
20
...-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/LinearRangeQuery.kt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,20 @@ | ||
| package org.utbot.summary.clustering.dbscan.neighbor | ||
|
|
||
| import org.utbot.summary.clustering.dbscan.Metric | ||
|
|
||
| class LinearRangeQuery <K> (): RangeQuery<K> { | ||
| lateinit var data: Array<K> | ||
| lateinit var metric: Metric<K> | ||
|
|
||
| override fun findNeighbors(queryKey: K, radius: Float): List<Neighbor<K>> { | ||
| val neighbors = mutableListOf<Neighbor<K>>() | ||
| data.forEachIndexed { index, point -> | ||
| val distance = metric.compute(queryKey, point) | ||
| if (distance <= radius && queryKey != point) { | ||
| neighbors.add(Neighbor(point, index, distance)) | ||
| } | ||
| } | ||
|
|
||
| return neighbors | ||
| } | ||
| } |
8 changes: 8 additions & 0 deletions
8
utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/Neighbor.kt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,8 @@ | ||
| package org.utbot.summary.clustering.dbscan.neighbor | ||
|
|
||
| class Neighbor<K>(val key: K, val index: Int, val distance: Double): Comparable<Neighbor<K>> { | ||
| override fun compareTo(other: Neighbor<K>): Int { | ||
| val distance = distance.compareTo(other.distance) | ||
| return if (distance == 0) index.compareTo(other.index) else distance | ||
| } | ||
| } |
5 changes: 5 additions & 0 deletions
5
utbot-summary/src/main/kotlin/org/utbot/summary/clustering/dbscan/neighbor/RangeQuery.kt
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,5 @@ | ||
| package org.utbot.summary.clustering.dbscan.neighbor | ||
|
|
||
| interface RangeQuery<K> { | ||
| fun findNeighbors(queryKey: K, radius: Float): List<Neighbor<K>> | ||
| } |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.