Skip to content

Commit c65ac4f

Browse files
authored
Add files via upload
1 parent 875c97c commit c65ac4f

File tree

1 file changed

+335
-0
lines changed

1 file changed

+335
-0
lines changed

FeaturesExtractor.ts

Lines changed: 335 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,335 @@
1+
import { CommandLineValues } from './Common/CommandLineValues';
2+
import { ProgramFeatures } from './FeaturesEntities/ProgramFeatures';
3+
import { Common } from './Common/Common';
4+
import { Node, SyntaxKind, getDefaultCompilerOptions, createProgram, TypeFlags, Program, TypeChecker, Type } from 'typescript';
5+
import { ts, SourceFile, Project } from 'ts-morph'
6+
7+
export class FeatureExtractor {
8+
private m_CommandLineValues: CommandLineValues;
9+
private filePath: string;
10+
private project: Project;
11+
private sourceFile: SourceFile;
12+
private root: Node;
13+
private program: Program;
14+
public checker: TypeChecker;
15+
private functionAndMethodEntries: Array<ts.Symbol>;
16+
private functions: Array<Node>;
17+
private static s_ParentTypeToAddChildId: Set<String>= new Set<String>();
18+
private static lparen: string = "(";
19+
private static rparen: string = ")";
20+
private static upSymbol: string = "^";
21+
private static downSymbol: string = "_";
22+
23+
public static init(){
24+
FeatureExtractor.s_ParentTypeToAddChildId.add("AssignExpr");
25+
FeatureExtractor.s_ParentTypeToAddChildId.add("ArrayAccessExpr");
26+
FeatureExtractor.s_ParentTypeToAddChildId.add("FieldAccessExpr");
27+
FeatureExtractor.s_ParentTypeToAddChildId.add("MethodCallExpr");
28+
}
29+
30+
constructor(commandLineValues: CommandLineValues, filePath: string) {
31+
this.m_CommandLineValues = commandLineValues;
32+
this.filePath = filePath;
33+
this.program = createProgram([this.filePath], getDefaultCompilerOptions());
34+
this.checker = this.program.getTypeChecker();
35+
36+
this.project = new Project();
37+
this.sourceFile = this.project.addExistingSourceFile(filePath);
38+
this.sourceFile.getSymbol();
39+
this.findFunctionAndMethodEntries(this.sourceFile);
40+
this.root = this.sourceFile.compilerNode;
41+
this.functions = new Array<Node>();
42+
this.findFunctions(this.root);
43+
}
44+
45+
/**
46+
* Extracts program features. Each feature contains all the contexts of a specific identifier in the AST.
47+
*/
48+
public extractFeatures(): Array<ProgramFeatures> {
49+
var programFeatures: Array<ProgramFeatures> = new Array<ProgramFeatures>();
50+
for (let func of this.functions) {
51+
let identifiersSymbols: Array<ts.Symbol> = this.getFuncIdsSymbols(func);
52+
if (identifiersSymbols.length == 0) continue;
53+
let idsLeavesLists: Array<Array<Node>> = this.createLeavesListForEachIdentifier(func);
54+
if (idsLeavesLists.length == 0) continue;
55+
let funcLeaves: Array<Node> = this.getFunctionLeaves(func);
56+
let funcProgramFeatures: Array<ProgramFeatures> = this.generatePathFeaturesForFunction(idsLeavesLists, funcLeaves, identifiersSymbols);
57+
programFeatures = programFeatures.concat(funcProgramFeatures);
58+
}
59+
return programFeatures;
60+
}
61+
62+
/**
63+
* Gets the symbols of the identifiers in function /func.
64+
* @param func - the function whose identifiers' symbols should be returned.
65+
*/
66+
private getFuncIdsSymbols(func: Node) : Array<ts.Symbol> {
67+
let identifiersSymbols: Array<ts.Symbol> = new Array<ts.Symbol>();
68+
let funcName: string;
69+
func.forEachChild(node => {
70+
if (node.kind == SyntaxKind.Identifier) funcName = node.getText();
71+
});
72+
let funcEntry = this.functionAndMethodEntries.filter(entry => entry.getName() == funcName)[0];
73+
if (funcEntry == null) return identifiersSymbols;
74+
let variableEntries = funcEntry.valueDeclaration['locals'];
75+
variableEntries.forEach((id:ts.Symbol) => {
76+
identifiersSymbols.push(id);
77+
});
78+
return identifiersSymbols;
79+
}
80+
81+
/**
82+
* Creates for each identifier in the program an array of all its leaves. Then it returns
83+
* all these arrays (in an array).
84+
*/
85+
private createLeavesListForEachIdentifier(func: Node): Array<Array<Node>> {
86+
var lists: Array<Array<Node>> = new Array<Array<Node>>();
87+
let idLeaves: Array<Node> = this.getFunctionIdLeaves(func);
88+
if (idLeaves.length == 0) return null;
89+
90+
idLeaves.sort((n1,n2) => {
91+
let s1 = n1.getText(), s2 = n2.getText();
92+
if (s1 > s2) return 1;
93+
else if (s1 < s2) return -1;
94+
return 0;
95+
});
96+
97+
lists.push(new Array<Node>());
98+
var prev_id: Node = idLeaves[0];
99+
for (let i: number = 0, j: number = 0; i < idLeaves.length; i++) {
100+
if (idLeaves[i].getText() == prev_id.getText()) {
101+
lists[j].push(idLeaves[i]);
102+
} else {
103+
lists.push(new Array<Node>());
104+
lists[++j].push(idLeaves[i]);
105+
prev_id = idLeaves[i];
106+
}
107+
}
108+
return lists;
109+
}
110+
111+
/**
112+
* Gets the leaves of the identifiers in function /func.
113+
* @param func - the function.
114+
*/
115+
private getFunctionIdLeaves(func: Node): Array<Node> {
116+
let functionIdLeaves: Array<Node> = new Array<Node>();
117+
function findFunctionIdLeaves(node: Node) {
118+
if (node.kind === SyntaxKind.Identifier) {
119+
functionIdLeaves.push(node);
120+
}
121+
node.forEachChild(findFunctionIdLeaves);
122+
}
123+
findFunctionIdLeaves(func);
124+
return functionIdLeaves;
125+
}
126+
127+
/**
128+
* Gets the leaves of function /func.
129+
* @param func - the function.
130+
*/
131+
private getFunctionLeaves(func: Node): Array<Node> {
132+
let functionLeaves: Array<Node> = new Array<Node>();
133+
function findFunctionIdLeaves(node: Node) {
134+
if (node.getChildCount() == 0) {
135+
functionLeaves.push(node);
136+
}
137+
node.forEachChild(findFunctionIdLeaves);
138+
}
139+
findFunctionIdLeaves(func);
140+
return functionLeaves;
141+
}
142+
143+
/**
144+
* Generates features (contexts; paths) for each identifier in the AST.
145+
* @param idLeavesLists - an array of arrays. Each array in idLeavesLists contains all the
146+
* leaves of a specific identifier.
147+
*/
148+
private generatePathFeaturesForFunction(idLeavesLists: Array<Array<Node>>, functionLeaves: Array<Node>, identifiersSymbols: Array<ts.Symbol>): Array<ProgramFeatures> {
149+
let identifiersFeatures: Array<ProgramFeatures> = new Array<ProgramFeatures>();
150+
for (let idLeaves of idLeavesLists) {
151+
let singleIdFeatures: ProgramFeatures = this.generatePathFeaturesForIdentifier(idLeaves, functionLeaves, identifiersSymbols);
152+
if (!singleIdFeatures.isEmpty()) {
153+
identifiersFeatures.push(singleIdFeatures);
154+
}
155+
}
156+
return identifiersFeatures;
157+
}
158+
159+
/**
160+
* Generates features (contexts; paths) for an identifier in the AST.
161+
* @param idLeaves - the identifier's leaves in the AST.
162+
*/
163+
private generatePathFeaturesForIdentifier(idLeaves: Array<Node>, functionLeaves: Array<Node>, identifiersSymbols: Array<ts.Symbol>): ProgramFeatures {
164+
let programFeatures: ProgramFeatures = new ProgramFeatures(this.m_CommandLineValues, idLeaves.length);
165+
var identifier: Node = idLeaves[0];
166+
programFeatures.setVariableName(identifier.getText());
167+
168+
// Find and set the identifier's type:
169+
let idSymbol: ts.Symbol = identifiersSymbols.filter(id => id.getName()===identifier.getText())[0];
170+
if (idSymbol == null) return programFeatures;
171+
let currType: Type = this.checker.getTypeOfSymbolAtLocation(idSymbol, identifier);
172+
if (currType.flags === TypeFlags.Object) {
173+
programFeatures.setVariableType(currType.symbol.name);
174+
}
175+
else if (TypeFlags[currType.flags]) {
176+
programFeatures.setVariableType(TypeFlags[currType.flags].toLowerCase());
177+
}
178+
else {
179+
return programFeatures;
180+
}
181+
182+
// The following loop will create paths between the identifier's leaves themselves:
183+
for (let i: number = 0; i < idLeaves.length; i++) {
184+
for (let j: number = i+1; j < idLeaves.length; j++) {
185+
let source: Node = idLeaves[i];
186+
let target: Node = idLeaves[j];
187+
let path: string = this.generatePath(source, target);
188+
if (path != Common.EmptyString) {
189+
programFeatures.addFeature(source, path, target);
190+
}
191+
}
192+
}
193+
194+
/* The following loop will create paths between the identifier's leaves and
195+
all other leaves: */
196+
for (let i: number = 0; i < idLeaves.length; i++) {
197+
for (let j: number = 0; j < functionLeaves.length; j++) {
198+
let source: Node = idLeaves[i];
199+
let target: Node = functionLeaves[j];
200+
// This if statement makes sure we don't create a path between the identifier's leaves again:
201+
if (source.getText() == target.getText()) continue;
202+
let path: string = this.generatePath(source, target);
203+
if (path != Common.EmptyString) {
204+
programFeatures.addFeature(source, path, target);
205+
}
206+
}
207+
}
208+
209+
return programFeatures;
210+
}
211+
212+
/**
213+
* Finds the nodes of functions and methods in the AST whose root is /node.
214+
* @param node
215+
*/
216+
private findFunctions(root: Node): void {
217+
let functions: Array<Node> = new Array<Node>();
218+
function findFunctionNodes(node: Node) {
219+
if (node.kind == SyntaxKind.FunctionDeclaration || node.kind == SyntaxKind.MethodDeclaration) {
220+
functions.push(node);
221+
}
222+
node.forEachChild(findFunctionNodes);
223+
}
224+
findFunctionNodes(root);
225+
this.functions = functions;
226+
}
227+
228+
/**
229+
* Finds the symbols of the identifiers defined in functions and methods.
230+
* @param sourceFile - the source file.
231+
*/
232+
private findFunctionAndMethodEntries(sourceFile: SourceFile): void {
233+
// Get the identifiers table
234+
const localEntries = (sourceFile.compilerNode as any)['locals'] as ts.SymbolTable | undefined;
235+
236+
var functionAndMethodEntries = new Array<ts.Symbol>();
237+
238+
// The following forEach loop gets all the functions' entries and methods' entries:
239+
localEntries.forEach((entry) => {
240+
if(entry.valueDeclaration && entry.valueDeclaration.kind == SyntaxKind.FunctionDeclaration) {
241+
// entry is a function
242+
functionAndMethodEntries.push(entry);
243+
} else if (entry.declarations && entry.declarations[0].kind == SyntaxKind.ClassDeclaration) {
244+
const members = this.checker.getExportSymbolOfSymbol(entry).members;
245+
// entry is a class; The following forEach loop gets all the methods' entries of this class
246+
members.forEach((entry:ts.Symbol) => {
247+
if (entry.valueDeclaration && entry.valueDeclaration.kind == SyntaxKind.MethodDeclaration) {
248+
// entry is a method
249+
functionAndMethodEntries.push(entry);
250+
}
251+
});
252+
} else return;
253+
});
254+
255+
this.functionAndMethodEntries = functionAndMethodEntries;
256+
}
257+
258+
/**
259+
* Generates a string which represents a path between two leaves in the AST.
260+
* @param source - a source node (leaf).
261+
* @param target - a target node (leaf).
262+
*/
263+
private generatePath(source: Node , target: Node): string {
264+
let down: string = FeatureExtractor.downSymbol;
265+
let up: string = FeatureExtractor.upSymbol;
266+
let startSymbol: string = FeatureExtractor.lparen;
267+
let endSymbol: string = FeatureExtractor.rparen;
268+
269+
let stringBuilder: string = Common.EmptyString;
270+
let sourceStack: Array<Node> = FeatureExtractor.getTreeStack(source);
271+
let targetStack: Array<Node> = FeatureExtractor.getTreeStack(target);
272+
273+
let commonPrefix: number = 0;
274+
let currentSourceAncestorIndex: number = sourceStack.length - 1;
275+
let currentTargetAncestorIndex: number = targetStack.length - 1;
276+
while (currentSourceAncestorIndex >= 0 && currentTargetAncestorIndex >= 0
277+
&& sourceStack[currentSourceAncestorIndex] == targetStack[currentTargetAncestorIndex]) {
278+
commonPrefix++;
279+
currentSourceAncestorIndex--;
280+
currentTargetAncestorIndex--;
281+
}
282+
283+
let pathLength: number = sourceStack.length + targetStack.length - 2 * commonPrefix;
284+
if (pathLength > this.m_CommandLineValues.MaxPathLength) {
285+
return Common.EmptyString;
286+
}
287+
288+
/* Don't create a path between leaves that belong to different functions, classes and interfaces.
289+
This loop also makes sure that the path is created only between two leaves of the same line of code, or the same
290+
block (for example an if statement or a loop). This happens because we stop when we see that a FunctionDeclaration
291+
is in the path (the same happens with ClassDeclaration and InterfaceDeclaration). */
292+
for (let i = 0; i <= sourceStack.length - commonPrefix; i++) {
293+
var currentNode: Node = sourceStack[i];
294+
var kind: SyntaxKind = currentNode.kind;
295+
var pathUpperBoundTypes: Array<SyntaxKind> = [SyntaxKind.SourceFile,SyntaxKind.FunctionDeclaration, SyntaxKind.MethodDeclaration, SyntaxKind.ClassDeclaration, SyntaxKind.InterfaceDeclaration];
296+
if (pathUpperBoundTypes.some(x => x === kind)) {
297+
return Common.EmptyString;
298+
}
299+
}
300+
301+
// Build the string up the path
302+
for (let i = 0; i < sourceStack.length - commonPrefix; i++) {
303+
var currentNode: Node = sourceStack[i];
304+
stringBuilder = stringBuilder + startSymbol +
305+
SyntaxKind[currentNode.kind] + endSymbol + up;
306+
}
307+
308+
// Add the common ancestor
309+
var commonNode: Node = sourceStack[sourceStack.length - commonPrefix];
310+
stringBuilder = stringBuilder + startSymbol +
311+
SyntaxKind[commonNode.kind] + endSymbol;
312+
313+
// Continue building the string down the path
314+
for (let i = targetStack.length - commonPrefix - 1; i >= 0; i--) {
315+
var currentNode: Node = targetStack[i];
316+
stringBuilder = stringBuilder + down + startSymbol +
317+
SyntaxKind[currentNode.kind] + endSymbol;
318+
}
319+
return stringBuilder;
320+
}
321+
322+
/**
323+
* Gets the actual path in the AST between the given node and the root.
324+
* @param node - The returned path contains all the nodes between the node and the root
325+
*/
326+
private static getTreeStack(node: Node): Array<Node> {
327+
let upStack: Array<Node> = new Array<Node>();
328+
let currentNode: Node = node;
329+
while (currentNode != null) {
330+
upStack.push(currentNode);
331+
currentNode = currentNode.parent;
332+
}
333+
return upStack;
334+
}
335+
}

0 commit comments

Comments
 (0)