Skip to content

Commit fe7cd38

Browse files
committed
Separate the dendrogram printing from clustering.
1 parent bd02f67 commit fe7cd38

12 files changed

+92
-36
lines changed

src/averagecluster.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ bool AverageCluster::getCandidate(float &minInexact, float &minExact, AverageVer
257257
return (minExact < 1.0f) && (minExact < minInexact || fabsf(minExact - minInexact) < EPSILON);
258258
}
259259

260-
void AverageCluster::clusterMatrix(InMatrix * mat) {
260+
Dendrogram AverageCluster::clusterMatrix(InMatrix * mat) {
261261

262262
uint maxEdges = mat->numPoints; // maxEdges = num of outEdges can be stored in RAM
263263

@@ -315,5 +315,6 @@ void AverageCluster::clusterMatrix(InMatrix * mat) {
315315

316316
fprintf(stderr, "Finished! number of edges = %u. linear ratio: %.2f. quadratic ratio: %zu\n", maxEdges,
317317
(double) maxEdges / mat->numPoints, mat->numElements / maxEdges);
318+
return dendrogram;
318319
}
319320

src/averagecluster.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,16 +68,16 @@ struct AverageCluster: public Cluster {
6868
float tempMax;
6969
uint totalEdges;
7070

71-
AverageCluster(uint n, char * treeName) :
72-
Cluster(n, treeName) {
71+
AverageCluster(uint n) :
72+
Cluster(n) {
7373
totalEdges = 0;
7474
tempMax = 0;
7575
}
7676

7777
// overrider super methods
7878
Vertex* createVertex(uint id);
7979

80-
void clusterMatrix(InMatrix * mat);
80+
Dendrogram clusterMatrix(InMatrix * mat);
8181

8282
// current class methods
8383
void merge(AverageVertex * v1, AverageVertex * v2, AverageVertex* v, float max);

src/cluster.cpp

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -19,13 +19,9 @@ void Vertex::updateAncestor(Vertex* ancestor) {
1919

2020
//----------------------------------------------------------------------------------------------------
2121

22-
Cluster::Cluster(uint n, char * treeName) :
23-
numLeaves(n) {
22+
Cluster::Cluster(uint n) :
23+
numLeaves(n),dendrogram(n){
2424
// add the leaves into the final graph
25-
phytree.open(treeName, ofstream::binary);
26-
if (!phytree)
27-
EXIT_MSG(treeName);
28-
phytree.precision(5);
2925
}
3026

3127
void Cluster::createLeaves() {
@@ -38,15 +34,10 @@ Cluster::~Cluster() {
3834

3935
for (uint i = 0; i < vertices.size(); ++i)
4036
delete vertices[i];
41-
phytree.close();
4237

4338
}
4439

4540
void Cluster::updateGraph(uint id1, uint id2, float dist) {
46-
47-
if (id1 < id2)
48-
phytree << id1 + 1 << " " << id2 + 1 << " " << dist << endl;
49-
else
50-
phytree << id2 + 1 << " " << id1 + 1 << " " << dist << endl;
41+
dendrogram.add(id1 ,id2 ,dist);
5142
}
5243

src/cluster.h

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <list>
1313
#include <fstream>
1414
#include "inmatrix.h"
15+
#include "dendrogram.h"
1516
using namespace std;
1617

1718
struct Vertex {
@@ -37,11 +38,12 @@ struct Cluster {
3738
protected:
3839
vector<Vertex *> vertices;
3940
uint numLeaves;
40-
41+
Dendrogram dendrogram;
4142
private:
42-
ofstream phytree;
43+
//ofstream phytree;
44+
4345
public:
44-
Cluster(uint n, char * treeName);
46+
Cluster(uint n);
4547

4648
virtual ~Cluster();
4749

@@ -52,7 +54,7 @@ struct Cluster {
5254
// overriden methods
5355
virtual Vertex* createVertex(uint id) = 0;
5456

55-
virtual void clusterMatrix(InMatrix* mat) = 0;
57+
virtual Dendrogram clusterMatrix(InMatrix* mat) = 0;
5658

5759
// non-overriden methods
5860
void createLeaves();

src/completecluster.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ void CompleteCluster::printAllEdges() {
112112
}
113113
}
114114

115-
void CompleteCluster::clusterMatrix(InMatrix * mat) {
115+
Dendrogram CompleteCluster::clusterMatrix(InMatrix * mat) {
116116

117117
uint maxEdges = mat->numPoints; // maxNumEdges = num of links can be stored in RAM
118118
uint newId = numLeaves;
@@ -127,7 +127,7 @@ void CompleteCluster::clusterMatrix(InMatrix * mat) {
127127
v2 = reinterpret_cast<CompleteVertex*>(vertices[col]->ancestor);
128128

129129
if (v1 == v2)
130-
return;
130+
return dendrogram;
131131

132132
if (v1->id < v2->id) {
133133
tempV = v1;
@@ -158,5 +158,6 @@ void CompleteCluster::clusterMatrix(InMatrix * mat) {
158158

159159
fprintf(stderr, "Finished! number of edges = %u. linear ratio: %.2f. quadratic ratio: %zu\n", maxEdges,
160160
(double) maxEdges / mat->numPoints, mat->numElements / maxEdges);
161+
return dendrogram;
161162
}
162163

src/completecluster.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,15 @@ struct CompleteCluster: public Cluster {
4242

4343
uint totalEdges;
4444

45-
CompleteCluster(uint matDim, char * treeName) :
46-
Cluster(matDim, treeName) {
45+
CompleteCluster(uint matDim) :
46+
Cluster(matDim) {
4747
totalEdges = 0;
4848
}
4949

5050
// overrider super methods
5151
Vertex* createVertex(uint id);
5252

53-
void clusterMatrix(InMatrix * mat);
53+
Dendrogram clusterMatrix(InMatrix * mat);
5454

5555
// current class methods
5656
void merge(CompleteVertex * v1, CompleteVertex * v2, CompleteVertex *v);

src/dendrogram.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
#include "dendrogram.h"
2+
3+
void Dendrogram::add(unsigned p1, unsigned p2, float distance)
4+
{
5+
Link l;
6+
l.p1=p1;
7+
l.p2=p2;
8+
l.distance=distance;
9+
l.nump=0;
10+
l.nump+=p1>=numPoints?pointsCount.at(p1-numPoints):1;
11+
l.nump+=p2>=numPoints?pointsCount.at(p2-numPoints):1;
12+
pointsCount.push_back(l.nump);
13+
links.push_back(l);
14+
}
15+

src/dendrogram.h

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#ifndef DENDROGRAM_H_
2+
#define DENDROGRAM_H_
3+
/*
4+
* A 4 by (n−1) matrix Z is returned.
5+
* At the i-th iteration, clusters with indices Z[i, 0] and Z[i, 1] are combined
6+
* to form cluster n+i. A cluster with an index less than n corresponds to one
7+
* of the n original observations. The distance between clusters Z[i, 0] and Z[i, 1]
8+
* is given by Z[i, 2]. The fourth value Z[i, 3] represents the number of original
9+
* observations in the newly formed cluster.
10+
*
11+
*/
12+
#include <vector>
13+
#include <unordered_map>
14+
#include <fstream>
15+
class Dendrogram {
16+
private:
17+
struct Link {
18+
unsigned p1,p2;
19+
float distance;
20+
unsigned nump;
21+
};
22+
const unsigned numPoints;
23+
std::vector<Link> links;
24+
std::vector<unsigned> pointsCount; //number of points in the cluster
25+
public:
26+
Dendrogram(unsigned numPoints):numPoints(numPoints)
27+
{
28+
links.reserve(numPoints-1);
29+
pointsCount.reserve(numPoints-1);
30+
}
31+
void add(unsigned p1, unsigned p2, float distance);
32+
void print(const std::string& fname) const
33+
{
34+
std::ofstream myfile;
35+
myfile.open (fname);
36+
for(const Link& l:links) {
37+
myfile<<l.p1<<' '<<l.p2<<' '<<l.distance<<' '<<l.nump<<'\n';
38+
}
39+
}
40+
};
41+
42+
#endif //DENDROGRAM_H_

src/inmatrix.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ struct InMatrix: public Matrix {
4646
void push(uint row, uint col, float value) {
4747
Element e(row,col,value);
4848
sorter.push(e);
49-
threshold=threshold<value?value:threshold;
49+
threshold=threshold<value?nextafterf(value,value+100.0f):threshold;
5050
}
5151
void sort() {
5252
sorter.sort();

src/main.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,12 @@ struct Point {
1717
for(int i=0; i<3; i++) {
1818
r[i]=unif(re);
1919
}
20-
for(int i=3; i<n; i++) {
20+
for(int i=3; i<n-3; i++) {
2121
r[i]=r[i-3]+0.1;
2222
}
23+
for(int i=n-3; i<n; i++) {
24+
r[i]=unif(re);
25+
}
2326
}
2427
static const unsigned n=3*150;
2528
const float invn=3.0f/n;
@@ -102,16 +105,16 @@ int main(int argc, char **argv) {
102105
Cluster * cluster = NULL;
103106

104107
if (linkage == 0)
105-
cluster = new AverageCluster(inMat.getNumPints(), treeFileName);
108+
cluster = new AverageCluster(inMat.getNumPints());
106109
else if (linkage == 1)
107-
cluster = new CompleteCluster(inMat.getNumPints(), treeFileName);
110+
cluster = new CompleteCluster(inMat.getNumPints());
108111
else if (linkage == 2)
109-
cluster = new SingleCluster(inMat.getNumPints(), treeFileName);
112+
cluster = new SingleCluster(inMat.getNumPints());
110113
else if (linkage == 3)
111-
cluster = new AverageCluster(inMat.getNumPints(), treeFileName);
114+
cluster = new AverageCluster(inMat.getNumPints());
112115

113116
cluster->createLeaves();
114-
PROFILE("cluster", cluster->clusterMatrix(&inMat));
117+
PROFILE("cluster", cluster->clusterMatrix(&inMat).print(treeFileName));
115118

116119
delete cluster;
117120

src/singlecluster.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ Vertex* SingleCluster::createVertex(uint id) {
1414

1515
//----------------------------------------------------------------------------------------------------
1616

17-
void SingleCluster::clusterMatrix(InMatrix * mat) {
17+
Dendrogram SingleCluster::clusterMatrix(InMatrix * mat) {
1818

1919
uint newId = numLeaves, maxVertices = numLeaves * 2 - 1;
2020
uint row, col;
@@ -47,4 +47,5 @@ void SingleCluster::clusterMatrix(InMatrix * mat) {
4747
}
4848

4949
fprintf(stderr, "Finished! index = %u\n", mat->pos);
50+
return dendrogram;
5051
}

src/singlecluster.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,14 @@ using namespace std;
1313

1414
struct SingleCluster: public Cluster {
1515

16-
SingleCluster(uint matDim, char * treeName) :
17-
Cluster(matDim, treeName) {
16+
SingleCluster(uint matDim) :
17+
Cluster(matDim) {
1818
}
1919

2020
Vertex* createVertex(uint id);
2121

2222
// overrider super methods
23-
void clusterMatrix(InMatrix * mat);
23+
Dendrogram clusterMatrix(InMatrix * mat);
2424
};
2525

2626
#endif /* MINCLUSTER_H_ */

0 commit comments

Comments
 (0)