Skip to content

Commit fc82699

Browse files
authored
Merge pull request #49 from bakhtos/maven-xml-parsing
Parse `pom.xml` using XML parsers
2 parents 670b4d2 + 7046c66 commit fc82699

File tree

2 files changed

+85
-157
lines changed

2 files changed

+85
-157
lines changed

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ plantuml==0.3.0
33
PyYAML==6.0
44
ruamel.base==1.0.0
55
PyDriller==2.6.0
6+
lxml==5.3.0

technology_specific_extractors/maven/mvn_entry.py

Lines changed: 84 additions & 157 deletions
Original file line numberDiff line numberDiff line change
@@ -9,55 +9,53 @@
99
import technology_specific_extractors.docker.dcr_entry as dcr
1010
import tmp.tmp as tmp
1111
import output_generators.traceability as traceability
12-
from core.service import CService
12+
13+
try:
14+
from lxml import etree
15+
XML_BACKEND = "LXML"
16+
except ImportError:
17+
import xml.etree.ElementTree as etree
18+
XML_BACKEND = "PYTHON"
19+
NAMESPACE = {"mvn": "http://maven.apache.org/POM/4.0.0"}
1320

1421

1522
def set_microservices(dfd) -> dict:
1623
"""Extracts the list of services from pom.xml files and sets the variable in the tmp-file.
1724
"""
1825

19-
if not used_in_application():
20-
return False
21-
2226
if tmp.tmp_config.has_option("DFD", "microservices"):
2327
microservices = ast.literal_eval(tmp.tmp_config["DFD"]["microservices"])
2428
else:
2529
microservices = dict()
2630
microservices_set = set()
2731

2832
pom_files = fi.get_file_as_lines("pom.xml")
29-
module_tuples = list()
33+
module_dict = dict()
3034

3135
for pf in pom_files.keys():
3236
pom_file = pom_files[pf]
3337
image = "image_placeholder"
3438
modules = extract_modules(pom_file)
3539
if modules:
36-
module_tuples.append((pom_file["name"], modules))
40+
module_dict[(pom_file["name"])] = modules
3741
else:
3842
microservice, properties = parse_configurations(pom_file)
39-
properties = extract_dependencies(properties, pom_file["content"])
43+
properties = extract_dependencies(properties, pom_file)
4044
if microservice[0]:
4145
port = dcr.detect_port(pom_file["path"])
4246
# create microservice in dict
43-
try:
44-
id = max(microservices.keys()) + 1
45-
except:
46-
id = 0
47-
microservices[id] = dict()
48-
49-
microservices[id]["name"] = microservice[0]
50-
microservices[id]["image"] = image
51-
microservices[id]["type"] = "internal"
52-
microservices[id]["pom_path"] = pom_file["path"]
53-
microservices[id]["properties"] = properties
54-
microservices[id]["stereotype_instances"] = list()
47+
id_ = max(microservices.keys(), default=-1) + 1
48+
microservices[id_] = dict()
49+
microservices[id_]["name"] = microservice[0]
50+
microservices[id_]["image"] = image
51+
microservices[id_]["type"] = "internal"
52+
microservices[id_]["pom_path"] = pom_file["path"]
53+
microservices[id_]["properties"] = properties
54+
microservices[id_]["stereotype_instances"] = list()
5555
if port:
56-
microservices[id]["tagged_values"] = [("Port", port)]
56+
microservices[id_]["tagged_values"] = [("Port", port)]
5757
else:
58-
microservices[id]["tagged_values"] = list()
59-
60-
new_service = CService(microservice[0], )
58+
microservices[id_]["tagged_values"] = list()
6159
try:
6260
trace = dict()
6361
name = microservice[0]
@@ -70,54 +68,58 @@ def set_microservices(dfd) -> dict:
7068
except:
7169
pass
7270

73-
nested_microservices = check_nested_modules(module_tuples)
74-
for m in nested_microservices:
75-
microservices_set.add(m)
71+
nested_microservices = check_nested_modules(module_dict)
72+
microservices_set.update(nested_microservices)
7673

7774
tmp.tmp_config.set("DFD", "microservices", str(microservices).replace("%", "%%")) # Need to escape single percentage signs for ConfigParser
7875

7976
return microservices
8077

8178

82-
def extract_dependencies(properties: set, pom_file_lines) -> set:
79+
def extract_dependencies(properties: set, pom_file) -> set:
8380
"""Parses pom_file to check for dependencies.
8481
"""
8582

86-
for line in pom_file_lines:
87-
if "spring-cloud-starter-netflix-hystrix" in line:
88-
properties.add(("circuit_breaker", "Hystrix", ("file", "line", "span")))
83+
file_name = pom_file["path"]
84+
pom_path = os.path.join(tmp.tmp_config.get("Repository", "local_path"), file_name)
85+
tree = etree.parse(pom_path)
86+
root = tree.getroot()
8987

90-
return properties
91-
92-
93-
def used_in_application() -> bool:
94-
"""Checks if application has pom.xml file.
95-
"""
88+
dependencies = root.find('mvn:dependencies', NAMESPACE)
89+
if dependencies is not None:
90+
for dependency in dependencies.findall('mvn:dependency', NAMESPACE):
91+
artifactId = dependency.find('mvn:artifactId', NAMESPACE)
92+
if artifactId is not None and artifactId.text.strip() == "spring-cloud-starter-netflix-hystrix":
93+
properties.add(("circuit_breaker", "Hystrix", ("file", "line", "span")))
9694

97-
return fi.file_exists("pom.xml")
95+
return properties
9896

9997

100-
def extract_modules(file: list) -> list:
98+
def extract_modules(pom_file: dict) -> list:
10199
"""Extracts modules of a Maven project based on the <module> </module>-tag.
102100
"""
103101

104-
modules = list()
105-
for line in file["content"]:
106-
if "<module>" in line:
107-
modules.append(line.split("<module>")[1].split("</module>")[0].strip())
102+
file_name = pom_file["path"]
103+
pom_path = os.path.join(tmp.tmp_config.get("Repository", "local_path"), file_name)
104+
tree = etree.parse(pom_path)
105+
root = tree.getroot()
106+
107+
modules_list = set()
108+
modules = root.find('mvn:modules', NAMESPACE)
109+
if modules is not None:
110+
modules_list = {module.text.strip() for module in modules.findall('mvn:module', NAMESPACE)}
108111

109-
return modules
112+
return modules_list
110113

111114

112-
def check_nested_modules(module_tuples: list) -> list:
113-
"""Takes list of tuples of the form [(component, [modules])] and checks for links between them. If yes, returns list of components = services that need to be added to the list.
115+
def check_nested_modules(module_tuples: dict) -> set:
116+
"""Takes list of tuples of the form [(component, [modules])] and checks for links between them.
117+
If yes, returns list of components = services that need to be added to the list.
114118
"""
115119

116-
microservices = list()
117-
for tuple1 in module_tuples:
118-
for tuple2 in module_tuples:
119-
if tuple1[0] in tuple2[1]:
120-
microservices.append(tuple1[0])
120+
modules = set(*module_tuples.values())
121+
components = set(module_tuples.keys())
122+
microservices = components & modules
121123

122124
return microservices
123125

@@ -126,17 +128,13 @@ def parse_configurations(pom_file) -> str:
126128
"""Extracts servicename and properties for a given file. Tries properties file first, then pom file.
127129
"""
128130

129-
properties = set()
130131
microservice, properties = parse_properties_file(pom_file["path"])
131132
if not microservice[0]:
132-
microservice = extract_servicename_pom_file(pom_file["content"], pom_file["path"])
133-
134-
if microservice[0]:
135-
microservice[0] = "pom_" + microservice[0]
136-
if microservice[0]:
137-
return microservice, properties
133+
microservice = extract_servicename_pom_file(pom_file)
134+
if not microservice[0]:
135+
return (False, False), set()
138136

139-
return (False, False), properties
137+
return microservice, properties
140138

141139

142140
def parse_properties_file(pom_path: str):
@@ -181,119 +179,48 @@ def parse_properties_file(pom_path: str):
181179
return microservice, properties
182180

183181

184-
def extract_servicename_pom_file(pom_file: list, file_name: str) -> str:
182+
def extract_servicename_pom_file(pom_file) -> str:
185183
"""Extracts the name of a Maven-module based on the <finalName> tag if existing, else the <artifactIf>.
186184
"""
187185

188186
microservice = [False, False]
189-
found_finalName = False
190-
for line_nr in range(len(pom_file)):
191-
line = pom_file[line_nr]
192-
if "<finalName>" in line:
193-
microservice[0] = line.split("<finalName>")[1].split("</finalName>")[0].strip()
194-
195-
# traceability
196-
line_number = line_nr + 1
197-
length_tuple = re.search(re.escape(microservice[0]), line).span()
198-
span = "[" + str(length_tuple[0]) + ":" + str(length_tuple[1]) + "]"
199-
trace = (file_name, line_number, span)
200-
microservice[1] = trace
201-
202-
found_finalName = True
203-
if not found_finalName:
204-
for line_nr in range(len(pom_file)):
205-
line = pom_file[line_nr]
206-
if "<artifactId>" in line:
207-
if not in_dependency(pom_file, line_nr) and not in_parent(pom_file, line_nr) and not in_plugin(pom_file, line_nr):
208-
microservice[0] = line.split("<artifactId>")[1].split("</artifactId>")[0].strip()
209-
210-
# traceability
211-
line_number = line_nr + 1
212-
length_tuple = re.search(microservice[0], line).span()
213-
span = "[" + str(length_tuple[0]) + ":" + str(length_tuple[1]) + "]"
214-
trace = (file_name, line_number, span)
215-
microservice[1] = trace
216-
217-
return microservice
218-
219-
220-
def in_dependency(file: list, line_nr: str) -> bool:
221-
"""Checks if provided line is inside a <dependency> </dependency> block in the pom.xml .
222-
"""
223-
224-
count = line_nr
225-
while count >= 0:
226-
if "<dependency>" in file[count] and "</dependency>" in file[count]:
227-
return False
228-
if "<dependency>" in file[count]:
229-
return True
230-
if "</dependency>" in file[count]:
231-
return False
232-
count -= 1
233-
return False
234-
235-
236-
def in_parent(file: list, line_nr: str) -> bool:
237-
"""Checks if provided line is inside a <plugin> </parent> block in the pom.xml .
238-
"""
239-
240-
count = line_nr
241-
while count >= 0:
242-
if "<parent>" in file[count] and "</parent>" in file[count]:
243-
return False
244-
if "<parent>" in file[count]:
245-
return True
246-
if "</parent>" in file[count]:
247-
return False
248-
count -= 1
249-
return False
250-
251-
252-
def in_plugin(file: list, line_nr: str) -> bool:
253-
"""Checks if provided line is inside a <plugin> </plugin> block in the pom.xml .
254-
"""
255-
256-
count = line_nr
257-
while count >= 0:
258-
if "<plugin>" in file[count] and "</plugin>" in file[count]:
259-
return False
260-
if "<plugin>" in file[count]:
261-
return True
262-
if "</plugin>" in file[count]:
263-
return False
264-
count -= 1
265-
return False
266-
267-
268-
def in_comment(file:list, line_nr: str) -> bool:
269-
"""Checks if provided line is inside a comment block in the pom.xml .
270-
"""
187+
file_name = pom_file["path"]
188+
pom_path = os.path.join(tmp.tmp_config.get("Repository", "local_path"), file_name)
189+
tree = etree.parse(pom_path)
190+
root = tree.getroot()
191+
192+
artifactId = root.find('mvn:build/mvn:finalName', NAMESPACE)
193+
if artifactId is None:
194+
artifactId = root.find('mvn:artifactId', NAMESPACE)
195+
if artifactId is None:
196+
return microservice
197+
198+
microservice[0] = artifactId.text.strip()
199+
200+
# tracing
201+
if XML_BACKEND == "LXML":
202+
line_nr = artifactId.sourceline - 1
203+
line = pom_file["content"][line_nr]
204+
length_tuple = re.search(microservice[0], line).span()
205+
span = "[" + str(length_tuple[0]) + ":" + str(length_tuple[1]) + "]"
206+
else:
207+
line_nr = None
208+
span = "[?:?]"
209+
trace = (file_name, line_nr, span)
271210

272-
count = line_nr
273-
while count >= 0:
274-
if "<--" in file[count] and "-->" in file[count]:
275-
return False
276-
if "<--" in file[count]:
277-
return True
278-
if "-->" in file[count]:
279-
return False
280-
count -= 1
281-
return False
211+
microservice[0] = "pom_" + microservice[0]
212+
microservice[1] = trace
282213

214+
return microservice
283215

284-
count = 0
285216

286217
def detect_microservice(file_path, dfd):
287218
"""Detects which microservice a file belongs to by looking for next pom.xml.
288219
"""
289220

290-
if not used_in_application():
291-
return False
292-
293221
microservice = [False, False]
294222
microservices = tech_sw.get_microservices(dfd)
295223

296-
297224
path = file_path
298225
found_pom = False
299226

@@ -303,7 +230,7 @@ def detect_microservice(file_path, dfd):
303230
path = os.path.dirname(path)
304231
while not found_pom and path != "":
305232
dirs.append(os.scandir(os.path.join(local_repo_path, path)))
306-
while dirs:
233+
while dirs and not found_pom:
307234
dir = dirs.pop()
308235
for entry in dir:
309236
if entry.is_file():

0 commit comments

Comments
 (0)