Skip to content

Commit 93af2a8

Browse files
committed
feature extraction for french up and running
Former-commit-id: 703dbd2cac43116e10ee4b5c6447119f8f0cad38
1 parent fefa7f3 commit 93af2a8

File tree

1 file changed

+239
-0
lines changed

1 file changed

+239
-0
lines changed

FeatureExtraction/testChar.js

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
const fs = require('fs');
2+
var unique = require('array-unique');
3+
var csvdata = require('csvdata');
4+
var writeFile = require('write');
5+
var createFile = require('create-file');
6+
var unique = require('array-unique');
7+
let writeStream = fs.createWriteStream('secret.txt');
8+
var HashMap = require('hashmap');
9+
var LineByLineReader = require('line-by-line'),
10+
lr = new LineByLineReader('testfinal.txt');
11+
12+
var LineByLineReader2 = require('line-by-line'),
13+
lr2 = new LineByLineReader2('French_v1.txt');
14+
var LineByLineReader3 = require('line-by-line'),
15+
lr3 = new LineByLineReader2('German_v1.txt');
16+
var LineByLineReader4 = require('line-by-line'),
17+
lr4 = new LineByLineReader2('Polish_v1.txt');
18+
var LineByLineReader5 = require('line-by-line'),
19+
lr5 = new LineByLineReader2('Slovak_v1.txt');
20+
var LineByLineReader6 = require('line-by-line'),
21+
lr6 = new LineByLineReader2('Spanish_v1.txt');
22+
var centroid=[];
23+
var centroid3=[];
24+
var centroid4=[];
25+
var centroid5=[];
26+
var centroid6=[];
27+
var total ="";
28+
// var logStream = fs.createWriteStream('log.txt', {'flags': 'a'});
29+
lr.on('error', function (err) {
30+
// 'err' contains error object
31+
32+
});
33+
var arr1=[];
34+
var charArr=[];
35+
var finalChars =[];
36+
37+
lr.on('line', function (line) {
38+
// 'line' contains the current line without the trailing newline character.
39+
total = line.toLowerCase();
40+
charArr = charArr.concat(total.split(/(?=.)/u));
41+
charArr=unique(charArr);
42+
// console.log(charArr);
43+
});
44+
45+
lr.on('end', function () {
46+
// All lines are read, file is closed now.
47+
// console.log(total);
48+
finalChars = unique(charArr);
49+
50+
// console.log(finalChars.length);
51+
for(var i=0;i<finalChars.length;i++){
52+
var c = finalChars[i] +'\n';
53+
fs.appendFile('totalChars.txt', c, (err) => {
54+
// throws an error, you could also catch it here
55+
if (err) throw err;
56+
57+
// success case, the file was saved
58+
// console.log('saved!');
59+
});
60+
}
61+
var LineByLineReader2 = require('line-by-line'),
62+
lr2 = new LineByLineReader2('French_v1.txt');
63+
var lineNo=0;
64+
lr2.on('line', function (line) {
65+
// 'line' contains the current line without the trailing newline character.
66+
total = line.toLowerCase();
67+
console.log(total);
68+
console.log(lineNo);
69+
var cent=[];
70+
for(var i =0;i<finalChars.length;i++){
71+
var count=0;
72+
for(var j=0;j<total.length;j++){
73+
if(total[j]==finalChars[i]){
74+
count++;
75+
}
76+
}
77+
cent[i]=count;
78+
}
79+
centroid[lineNo]=cent;
80+
lineNo++;
81+
});
82+
lr2.on('end', function () {
83+
var totalChar = new Array(centroid[0].length).fill(0);;
84+
// All lines are read, file is closed now.
85+
for(var q=0;q<centroid.length;q++){
86+
for(var w=0;w<centroid[0].length;w++){
87+
totalChar[w]=totalChar[w]+parseInt(centroid[q][w]);
88+
}
89+
}
90+
for(var e=0;e<centroid.length;e++){
91+
for(var f=0;f<centroid[0].length;f++){
92+
centroid[e][f]= Math.log(((parseInt(centroid[e][f])+0.0001)/parseInt(totalChar[f])*10));
93+
}
94+
}
95+
console.log("tyoyoyoyoyoyoyo",totalChar);
96+
console.log(lineNo);
97+
for(var i=0;i<lineNo-1;i++){
98+
var c = centroid[i] +'\n';
99+
fs.appendFile('frenchCentroid.csv', c, (err) => {
100+
// throws an error, you could also catch it here
101+
if (err) throw err;
102+
103+
// success case, the file was saved
104+
// console.log('saved!');
105+
});
106+
}
107+
108+
});
109+
// var lineNo=0;
110+
// lr3.on('line', function (line) {
111+
// // 'line' contains the current line without the trailing newline character.
112+
113+
// total = line.toLowerCase();
114+
// var cent=[];
115+
// for(var i =0;i<finalChars.length;i++){
116+
// var count=0;
117+
// for(var j=0;j<total.length;j++){
118+
// if(total[j]==finalChars[i]){
119+
// count++;
120+
// }
121+
// }
122+
// cent[i]=count;
123+
// }
124+
// centroid3[lineNo]=cent;
125+
// lineNo++;
126+
// });
127+
// lr3.on('end', function () {
128+
// // All lines are read, file is closed now.
129+
130+
// for(var i=0;i<lineNo-1;i++){
131+
// var c = centroid3[i] +'\n';
132+
// fs.appendFile('germanCentroid.txt', c, (err) => {
133+
// // throws an error, you could also catch it here
134+
// if (err) throw err;
135+
136+
// // success case, the file was saved
137+
// console.log('saved!');
138+
// });
139+
// }
140+
// });
141+
// var lineNo=0;
142+
// lr4.on('line', function (line) {
143+
// // 'line' contains the current line without the trailing newline character.
144+
// total = line.toLowerCase();
145+
// var cent=[];
146+
// for(var i =0;i<finalChars.length;i++){
147+
// var count=0;
148+
// for(var j=0;j<total.length;j++){
149+
// if(total[j]==finalChars[i]){
150+
// count++;
151+
// }
152+
// }
153+
// cent[i]=count;
154+
// }
155+
// centroid4[lineNo]=cent;
156+
// lineNo++;
157+
// });
158+
// lr4.on('end', function () {
159+
// // All lines are read, file is closed now.
160+
161+
// for(var i=0;i<lineNo-1;i++){
162+
// var c = centroid4[i] +'\n';
163+
// fs.appendFile('polishCentroid.txt', c, (err) => {
164+
// // throws an error, you could also catch it here
165+
// if (err) throw err;
166+
167+
// // success case, the file was saved
168+
// console.log('saved!');
169+
// });
170+
// }
171+
// });
172+
// var lineNo=0;
173+
// lr5.on('line', function (line) {
174+
// // 'line' contains the current line without the trailing newline character.
175+
// total = line.toLowerCase();
176+
// var cent=[];
177+
// for(var i =0;i<finalChars.length;i++){
178+
// var count=0;
179+
// for(var j=0;j<total.length;j++){
180+
// if(total[j]==finalChars[i]){
181+
// count++;
182+
// }
183+
// }
184+
// cent[i]=count;
185+
// }
186+
// centroid5[lineNo]=cent;
187+
// lineNo++;
188+
// });
189+
// lr5.on('end', function () {
190+
// // All lines are read, file is closed now.
191+
192+
// for(var i=0;i<lineNo-1;i++){
193+
// var c = centroid5[i] +'\n';
194+
// fs.appendFile('slovakCentroid.txt', c, (err) => {
195+
// // throws an error, you could also catch it here
196+
// if (err) throw err;
197+
198+
// // success case, the file was saved
199+
// console.log('saved!');
200+
// });
201+
// }
202+
// });
203+
// var lineNo=0;
204+
// lr6.on('line', function (line) {
205+
// // 'line' contains the current line without the trailing newline character.
206+
// total = line.toLowerCase();
207+
// var cent=[];
208+
// for(var i =0;i<finalChars.length;i++){
209+
// var count=0;
210+
// for(var j=0;j<total.length;j++){
211+
// if(total[j]==finalChars[i]){
212+
// count++;
213+
// }
214+
// }
215+
// cent[i]=count;
216+
// }
217+
// centroid6[lineNo]=cent;
218+
// lineNo++;
219+
// });
220+
// lr6.on('end', function () {
221+
// // All lines are read, file is closed now.
222+
223+
// for(var i=0;i<lineNo-1;i++){
224+
// var c = centroid6[i] +'\n';
225+
// fs.appendFile('spanishCentroid.txt', c, (err) => {
226+
// // throws an error, you could also catch it here
227+
// if (err) throw err;
228+
229+
// // success case, the file was saved
230+
// console.log('saved!');
231+
// });
232+
// }
233+
// });
234+
235+
// console.log(total.length);
236+
// csvdata.write('./stoppedString.csv', arr1, {header: 'Text'});
237+
// console.log(newString);
238+
// console.log(newString.length);
239+
});

0 commit comments

Comments
 (0)