-
Notifications
You must be signed in to change notification settings - Fork 0
/
lof.c
361 lines (273 loc) · 10.6 KB
/
lof.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
#include <stdio.h>
#include <stdlib.h>
#include "FileHandling.h"
#include <math.h>
#include <string.h>
#include <time.h>
int main(int argc, char *argv[])
{
int dim = 0; // Dimensions of Elements
int n = 0; // n Elements
int i,j,d; // i counter for n, j counter for Neighborhood Elements, d counter for dimensions
int kPoints; // Minimum points for a cluster to be created
int h,k; // h counter for every other Element, k counter for files
char filename[40]; // The given file
clock_t start, end; // Variables for counting execution time
double reachDist = 0;
double max;
int choise;// Choise holder for switch
char c;
/* -------------------------------------------------------------------------- */
// Reading Dataset
FILE* Dataset;
printf("\n Give the DataSet file:");
scanf("%s", filename);
Dataset = fopen(filename, "r");
if (!Dataset)
{
printf("\n There is something wrong with the Dataset file! \n\n");
return -1;
}
/* -------------------------------------------------------------------------- */
dim = getColumns(Dataset); //Getting the dimensions of each Element of the Dataset
rewind(Dataset);
n = getRows(Dataset); // Getting the Elements of the Dataset
rewind(Dataset);
printf("\n Elements:%d \n", n-1);
printf("\n Dimensions:%d \n", dim);
n--;
printf("Give the amount of K Points: "); // kPoints
scanf("%d",&kPoints );
/* -------------------------------------------------------------------------- */
// All the necessary memory allocation
float *X; // Array of Elements
X = (float*)calloc(n*dim, sizeof(float));
float *kDistance; // Array for holding k-Distances for the First Core point of a cluster
kDistance = (float*)calloc(n, sizeof(float));
float *distance; // Array for holding Distances for each ELement with each other Element
distance = (float*)calloc(n*n, sizeof(float));
unsigned int *Neighborhood; //Array for holding which element belong to which Neighborhood
Neighborhood =(int*) calloc(n*n, sizeof(int));
float *lrd; //Array for holding Local Reachability Density for each element
lrd =(float *)calloc(n, sizeof(float));
unsigned int *NeighborhoodSize; //Array for holding the size of each Neighborhood
NeighborhoodSize = (int*)calloc(n,sizeof(int));
float *reachDistSum; //Array for holding the sum of reachability Distances for each element
reachDistSum = (float *)calloc(n,sizeof(float));
float *LOF;//Array for holding LOF value for each Element
LOF = (float *)calloc(n,sizeof(float));
float *NeighborhoodLrdSum; //Array for holding the sum of each Neighborhood lrd
NeighborhoodLrdSum =(float *) calloc(n,sizeof(float));
float *OrderedList;
OrderedList =(float*) calloc(n*dim,sizeof(float));
float *tmp2;
tmp2 =(float*) calloc(n*dim,sizeof(float*));
float *tempDistance; // Array for holding Distances for each ELement with each other Element
tempDistance =(float*) calloc(n, sizeof(float));
for(i = n; i--;)
{
kDistance[i] = 0;
NeighborhoodLrdSum[i] = 0;
reachDistSum[i] = 0;
NeighborhoodSize[i] = 0;
lrd[i] = 0;
}
/* -------------------------------------------------------------------------- */
// Passing elements to Array X[n][dim]
X = getData(Dataset,n,dim,X);
for(i = n; i--;)
{
for(d = dim; d--;)
{
OrderedList[i*dim + d] = X[i*dim + d];
}
}
fclose(Dataset);
start = clock();
/* -------------------------------------------------------------------------- */
/* ---------------------------------LOF-------------------------------------- */
/* -------------------------------------------------------------------------- */
//STEP 1
// Finding the k-Distance of each element in the dataset
for(i = n; i--;)
{
distance[i*n + i] = 9999;
tempDistance[i] = 99999;
for(h = n; h--;)
{
if(h != i)
{
distance[i*n + h] = 0;
/* Calculating the distance of each element with i element
and then we store it to an array so that we can order it later,
we set the distance of i to max value so that we can avoid it at
the ordering */
for(d = dim; d--;)
distance[i*n + h] += (X[h*dim + d] - X[i*dim + d])*(X[h*dim + d] - X[i*dim + d]);
distance[i*n + h] = sqrt(distance[i*n +h]);
tempDistance[h] = distance[i*n + h];
}
}
/* Ordering the array holding the distances from min to max,
(distance[i] will be located at the distance[max]) */
qSort(tempDistance,n);
/* Using the ordered array to extract the k-distance of i element,
DIST_k(i) */
kDistance[i] = tempDistance[kPoints - 1];
// printf("%d : K-Distance: %lf\n",i,kDistance[i] );
}
/* -------------------------------------------------------------------------- */
//STEP 2
//Finding the Neighborhood of each element
for(i = n; i--;)
{
Neighborhood[i*n + i] = 0;
for(h = n; h--;)
{
if(h != i)
{
/* Calculating the distance of each element with i element
and then we store it to an array so that we can determine if that
element belongs to the Neighborhood of i element */
Neighborhood[i*n + h] = 0;
/* Comparing the distance of each potential Neighborhood element
with the k-distance of i element, if found less, set
Neighborhood[master][currentElement] to 1, that means that it belongs
to i element's Neighborhood */
if(distance[i*n + h] <= kDistance[i])
{
Neighborhood[i*n + h] = 1;
}
}
}
}
/* -------------------------------------------------------------------------- */
//STEP 3
//Getting the Neighborhood size of each element
for(i = n; i--;)
{
unsigned int counter = 0;
for(h = n; h--;)
{
if(Neighborhood[i*n + h] != 0)
{
counter++;
}
}
NeighborhoodSize[i] = counter;
}
/* -------------------------------------------------------------------------- */
//STEP 4
//Finding the Sum of reachDist_k(h <- i)
for(i = n; i--;)
{
for(h = n; h--;)
{
if(Neighborhood[i*n + h] != 0)
{
/*Calculating the reach dist of i in respect to h so that we will be
able to compare it with the k-distance of h */
reachDist = 0;
for(d = dim; d--;)
reachDist += (X[i*dim + d] - X[h*dim + d])*(X[i*dim + d] - X[h*dim + d]);
reachDist = sqrt(reachDist);
/*Comparing kDistance of h with the reach dist of i in respect to h,
we choose the Max between them, max(kDistance(h),reachDist_k(h <- i))*/
if(kDistance[h] > reachDist)
{
max = kDistance[h];
}else
{
max = reachDist;
}
//Adding the max to the total Sum
reachDistSum[i] += max;
}
}
}
/* -------------------------------------------------------------------------- */
//STEP 4
/* Calculating the Local Reachability Density(lrd) of each element,
lrd_k(i) = NeighborhoodSize(i)/reachDistSum(i) */
for(i = n; i--;)
{
lrd[i] = NeighborhoodSize[i]/(reachDistSum[i]);
}
/* -------------------------------------------------------------------------- */
//STEP 5
/* Calculating the sum of a Neighborhood element lrd divided by the
master's element lrd for each i.
Sum(lrd[h]/lrd[i]) for each Neighborhood element */
for(i = n; i--;)
{
for(h = n; h--;)
{
if(Neighborhood[i*n + h] != 0)
{
NeighborhoodLrdSum[i] += lrd[h]/lrd[i];
}
}
}
/* -------------------------------------------------------------------------- */
//STEP 6
//Calculating the LOF for each element
for(i = n; i--;)
{
LOF[i] = (NeighborhoodLrdSum[i]/NeighborhoodSize[i]);
}
/* -------------------------------------------------------------------------- */
//STEP 7
//Ordering the data according to LOF from max to min
for(i = n; i--; )
{
for(h = n; h--;)
{
if(LOF[i] >= LOF[h])
{
double tmp = LOF[i];
LOF[i] = LOF[h];
LOF[h] = tmp;
for(d = dim; d--;)
{
tmp2[i*dim + d] = OrderedList[i*dim + d];
OrderedList[i*dim + d] = OrderedList[h*dim + d];
OrderedList[h*dim + d] = tmp2[i*dim + d];
}
}
}
}
/* -------------------------------------------------------------------------- */
/* ---------------------------------END-------------------------------------- */
/* -------------------------------------------------------------------------- */
end = clock();
/*----------------------------------------------------------------------------*/
double total_time = ((double) (end - start)) / CLOCKS_PER_SEC;
printf("\n Time of Algorithm Execution: %lf \n\n",total_time);
FILE* OrderFile;
OrderFile = fopen("OrderFile.txt","w");
for(i = n; i--;)
{
fprintf(OrderFile, "LOF : %lf --> :",LOF[i]);
for(d = dim; d--;)
{
fprintf(OrderFile,"%lf ",OrderedList[i*dim + d] );
}
fprintf(OrderFile, "\n");
}
fclose(OrderFile);
/*----------------------------------------------------------------------------*/
// FREE EVERYTHING
free(X);
free(kDistance);
free(distance);
free(Neighborhood);
free(lrd);
free(NeighborhoodSize);
free(reachDistSum);
free(NeighborhoodLrdSum);
free(LOF);
free(tmp2);
free(OrderedList);
free(tempDistance);
return 0;
}