Skip to content

Commit 958792d

Browse files
committed
Add files via upload
1 parent f48ac86 commit 958792d

File tree

2 files changed

+561
-0
lines changed

2 files changed

+561
-0
lines changed
Lines changed: 284 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,284 @@
1+
"""
2+
Parse XML file containing MSDN documentation.
3+
4+
Authors: Moritz Raabe, William Ballenthin
5+
Copyright 2014 Mandiant, A FireEye Company
6+
7+
TODO: License
8+
9+
Based on zynamics' code at
10+
https://code.google.com/p/zynamics/source/browse/?repo=msdn-ida-plugin
11+
"""
12+
13+
import os.path
14+
import sys
15+
import xml.sax.handler
16+
import itertools
17+
import logging
18+
19+
20+
class ParsingException(Exception):
21+
22+
def __init__(self, message):
23+
super(ParsingException, self).__init__(message)
24+
self.message = message
25+
26+
27+
class Argument:
28+
29+
def __init__(self):
30+
self.name = ""
31+
self.description = ""
32+
self.constants = []
33+
self.enums = []
34+
self._logger = logging.getLogger(__name__ + '.' + self.__class__.__name__)
35+
36+
def __str__(self):
37+
return ("(%s, %s): %s" % (self.name, self.enums, self.description)).encode("ISO-8859-1")
38+
39+
def __repr__(self):
40+
return self.__str__()
41+
42+
def get_constant(self, name):
43+
for const in self.constants:
44+
if const.name == name:
45+
return const
46+
return None
47+
48+
def merge(self, new_argument):
49+
if self.name != new_argument.name:
50+
return
51+
52+
if new_argument.description:
53+
self._logger.debug(' Overwriting argument description')
54+
self.description = new_argument.description
55+
if new_argument.constants:
56+
for constant in new_argument.constants:
57+
current_const = self.get_constant(constant.name)
58+
if not current_const:
59+
# Constant not in list yet
60+
self._logger.debug(' Adding new constant ' + constant.name)
61+
self.constants.append(constant)
62+
continue
63+
# Constant possibly needs to be updated
64+
current_const.merge(constant)
65+
if new_argument.enums:
66+
self._logger.debug(' Merging argument enums, resulting in [' + \
67+
', '.join(self.enums) + ']')
68+
self.enums += new_argument.enums
69+
70+
71+
class Constant:
72+
73+
def __init__(self):
74+
self.name = ""
75+
self.value = ""
76+
self.description = ""
77+
self._logger = logging.getLogger(__name__ + '.' + self.__class__.__name__)
78+
79+
def __str__(self):
80+
return ("(%s, %s)" % (self.name, self.value)).encode("ISO-8859-1")
81+
82+
def __repr__(self):
83+
return self.__str__()
84+
85+
def merge(self, new_constant):
86+
if self.name != new_constant.name:
87+
return
88+
89+
self._logger.debug(' Working on constant ' + self.name)
90+
if new_constant.value:
91+
self._logger.debug(' Overwriting constant value')
92+
self.value = new_constant.value
93+
if new_constant.description:
94+
self._logger.debug(' Overwriting constant description')
95+
self.description = new_constant.description
96+
97+
class Function:
98+
99+
def __init__(self):
100+
self.name = ""
101+
self.dll = ""
102+
self.description = ""
103+
self.arguments = []
104+
self.returns = ""
105+
self._logger = logging.getLogger(__name__ + '.' + self.__class__.__name__)
106+
107+
def __str__(self):
108+
return ("%s -- %s" % (self.name, self.arguments)).encode("ISO-8859-1")
109+
110+
def __repr__(self):
111+
return self.__str__()
112+
113+
def get_argument(self, name):
114+
for arg in self.arguments:
115+
if arg.name == name:
116+
return arg
117+
return None
118+
119+
def merge(self, new_function):
120+
"""
121+
Merge two function objects. Information found in the second function
122+
instance will overwrite previously obtained data.
123+
124+
Argument:
125+
new_function -- function object that will overwrite previous data
126+
"""
127+
if self.name != new_function.name:
128+
return
129+
130+
self._logger.debug('Merging function ' + self.name)
131+
if new_function.dll:
132+
self._logger.debug(' Overwriting DLL info')
133+
self.dll = new_function.dll
134+
if new_function.description:
135+
self._logger.debug(' Overwriting function description')
136+
self.description = new_function.description
137+
if new_function.arguments:
138+
for arg in new_function.arguments:
139+
self._logger.debug(' Working on argument ' + arg.name)
140+
current_arg = self.get_argument(arg.name)
141+
if not current_arg:
142+
# Argument not in list yet
143+
self._logger.debug(' Adding argument ' + arg.name + ' to arguments')
144+
self.arguments.append(arg)
145+
continue
146+
# Argument possibly needs to be updated
147+
current_arg.merge(arg)
148+
if new_function.returns:
149+
self._logger.debug(' Overwriting function return value')
150+
self.returns = new_function.returns
151+
152+
153+
class FunctionHandler(xml.sax.handler.ContentHandler):
154+
c = itertools.count()
155+
IN_FUNCTION = next(c)
156+
IN_FUNCTION_NAME = next(c)
157+
IN_DLL = next(c)
158+
IN_FUNCTION_DESCRIPTION = next(c)
159+
IN_ARGUMENTS = next(c)
160+
IN_ARGUMENT = next(c)
161+
IN_ARGUMENT_NAME = next(c)
162+
IN_ARGUMENT_DESCRIPTION = next(c)
163+
IN_RETURNS = next(c)
164+
IN_CONSTANTS = next(c)
165+
IN_CONSTANT = next(c)
166+
IN_CONSTANT_NAME = next(c)
167+
IN_CONSTANT_VALUE = next(c)
168+
IN_CONSTANT_DESCRIPTION = next(c)
169+
170+
def __init__(self):
171+
self.inTitle = 0
172+
self.mapping = {}
173+
self.current_step = 0
174+
self.functions = []
175+
self._logger = logging.getLogger(__name__ + '.' + self.__class__.__name__)
176+
177+
def startElement(self, name, attributes):
178+
if name == "msdn":
179+
pass
180+
elif name == "functions":
181+
pass
182+
elif name == "function":
183+
self.current_step = FunctionHandler.IN_FUNCTION
184+
self.function = Function()
185+
elif self.current_step == FunctionHandler.IN_FUNCTION and name == "name":
186+
self.current_step = FunctionHandler.IN_FUNCTION_NAME
187+
elif self.current_step == FunctionHandler.IN_ARGUMENT and name == "name":
188+
self.current_step = FunctionHandler.IN_ARGUMENT_NAME
189+
elif name == "dll":
190+
self.current_step = FunctionHandler.IN_DLL
191+
elif self.current_step == FunctionHandler.IN_FUNCTION and name == "description":
192+
self.current_step = FunctionHandler.IN_FUNCTION_DESCRIPTION
193+
elif self.current_step == FunctionHandler.IN_ARGUMENT and name == "description":
194+
self.current_step = FunctionHandler.IN_ARGUMENT_DESCRIPTION
195+
elif self.current_step == FunctionHandler.IN_CONSTANT and name == "name":
196+
self.current_step = FunctionHandler.IN_CONSTANT_NAME
197+
elif self.current_step == FunctionHandler.IN_CONSTANT and name == "value":
198+
self.current_step = FunctionHandler.IN_CONSTANT_VALUE
199+
elif self.current_step == FunctionHandler.IN_CONSTANT and name == "description":
200+
self.current_step = FunctionHandler.IN_CONSTANT_DESCRIPTION
201+
elif name == "arguments":
202+
self.current_step = FunctionHandler.IN_ARGUMENTS
203+
elif name == "argument":
204+
self.current_step = FunctionHandler.IN_ARGUMENT
205+
self.current_argument = Argument()
206+
elif self.current_step == FunctionHandler.IN_CONSTANTS and name == "constant":
207+
self.current_step = FunctionHandler.IN_CONSTANT
208+
self.current_constant = Constant()
209+
elif name == "constants":
210+
self.current_step = FunctionHandler.IN_CONSTANTS
211+
self.current_argument.enums = []
212+
if "enums" in attributes.getNames():
213+
enums = attributes.getValue('enums').encode('utf-8')
214+
if enums:
215+
self.current_argument.enums = enums.split(',')
216+
elif name == "returns":
217+
self.current_step = FunctionHandler.IN_RETURNS
218+
else:
219+
self._logger.warning('Error START: ' + name)
220+
raise ParsingException('start')
221+
222+
def characters(self, data):
223+
if self.current_step == FunctionHandler.IN_FUNCTION_NAME:
224+
self.function.name = self.function.name + data
225+
elif self.current_step == FunctionHandler.IN_DLL:
226+
self.function.dll = self.function.dll + data
227+
elif self.current_step == FunctionHandler.IN_FUNCTION_DESCRIPTION:
228+
self.function.description = self.function.description + data
229+
elif self.current_step == FunctionHandler.IN_ARGUMENT_NAME:
230+
self.current_argument.name = self.current_argument.name + data
231+
elif self.current_step == FunctionHandler.IN_ARGUMENT_DESCRIPTION:
232+
self.current_argument.description = self.current_argument.description + \
233+
data
234+
elif self.current_step == FunctionHandler.IN_RETURNS:
235+
self.function.returns = self.function.returns + data
236+
elif self.current_step == FunctionHandler.IN_CONSTANT_NAME:
237+
self.current_constant.name = self.current_constant.name + data
238+
elif self.current_step == FunctionHandler.IN_CONSTANT_VALUE:
239+
self.current_constant.value = self.current_constant.value + data
240+
elif self.current_step == FunctionHandler.IN_CONSTANT_DESCRIPTION:
241+
self.current_constant.description = self.current_constant.description + \
242+
data
243+
244+
def endElement(self, name):
245+
if name in ["functions", "msdn"]:
246+
pass
247+
elif name == "function":
248+
self.functions.append(self.function)
249+
elif self.current_step in [FunctionHandler.IN_ARGUMENT_NAME, FunctionHandler.IN_ARGUMENT_DESCRIPTION]:
250+
self.current_step = FunctionHandler.IN_ARGUMENT
251+
elif self.current_step in [FunctionHandler.IN_CONSTANT_NAME, FunctionHandler.IN_CONSTANT_VALUE, FunctionHandler.IN_CONSTANT_DESCRIPTION]:
252+
self.current_step = FunctionHandler.IN_CONSTANT
253+
elif name in ["name", "dll", "description", "arguments", "returns", "constants"]:
254+
self.current_step = FunctionHandler.IN_FUNCTION
255+
elif name == "argument":
256+
self.current_step = FunctionHandler.IN_ARGUMENT
257+
self.function.arguments.append(self.current_argument)
258+
elif name == "constant":
259+
self.current_step = FunctionHandler.IN_CONSTANTS
260+
self.current_argument.constants.append(self.current_constant)
261+
else:
262+
self._logger.warning('Error END: ' + name)
263+
raise ParsingException('end')
264+
265+
266+
g_logger = logging.getLogger(__name__)
267+
268+
def parse(xmlfile):
269+
"""
270+
Return parsed MSDN information.
271+
272+
Argument:
273+
xmlfile -- xml data file storing the MSDN information
274+
"""
275+
g_logger.info('Starting parsing ' + xmlfile)
276+
parser = xml.sax.make_parser()
277+
try:
278+
handler = FunctionHandler()
279+
except ParsingException as e:
280+
g_logger.warning(e.message)
281+
return None # TODO critical?
282+
parser.setContentHandler(handler)
283+
parser.parse(xmlfile)
284+
return handler.functions

0 commit comments

Comments
 (0)