Skip to content

Commit

Permalink
Analyze/describe/find plugins greatly enhanced.
Browse files Browse the repository at this point in the history
BUG=
R=scudette@gmail.com

Review URL: https://codereview.appspot.com/167640043
  • Loading branch information
the80srobot committed Nov 13, 2014
1 parent cc1777f commit c7fa181
Show file tree
Hide file tree
Showing 17 changed files with 586 additions and 120 deletions.
2 changes: 1 addition & 1 deletion rekall/entities/collector.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def __init__(self, entity_manager=None):
self.manager = entity_manager
self.session = entity_manager.session
self._indices_seen = set()
self.collect_queries = {}
self._ensure_compile_queries()

@property
Expand Down Expand Up @@ -159,7 +160,6 @@ def _ensure_compile_queries(self):
if self.collect_queries or self.collect_args is None:
return

self.collect_queries = {}
for arg, source in self.collect_args.iteritems():
self.collect_queries[arg] = entity_query.Query(source)

Expand Down
23 changes: 22 additions & 1 deletion rekall/entities/component.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,13 @@ class TypeDescriptor(object):
def __init__(self):
pass

def chill_coerce(self, value):
"""Like coerce, but is chill about getting exceptions."""
try:
return self.coerce(value)
except TypeError:
return value

def coerce(self, value):
"""Return value as this type or raise TypeError if not convertible."""
return value
Expand Down Expand Up @@ -251,10 +258,20 @@ class Field(object):
exclude_analysis: This field should not be considered by the query analyzer.
"""

def __init__(self, name, typedesc, docstring):
component = None # Needs to be set when the Field is attached to a
# component class.

def __init__(self, name, typedesc, docstring, width=20,
hidden=False):
self.name = name
self.typedesc = TypeFactory(typedesc)
self.docstring = docstring
self.width = width
self.hidden = hidden

@property
def path(self):
return "%s/%s" % (self.component.component_name, self.name)

def __unicode__(self):
return repr(self)
Expand Down Expand Up @@ -381,6 +398,10 @@ def DeclareComponent(name, docstring, *fields):

component_cls = type(name, (Component,), props)

# Attach a reference back to the component to each field.
for field in fields:
field.component = component_cls

# Redefine ComponentContainer to add a field for the new component class.
global ComponentContainer
component_names = list(ComponentContainer._fields)
Expand Down
29 changes: 17 additions & 12 deletions rekall/entities/definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,18 +88,22 @@

Process = component.DeclareComponent(
"Process", "A process.",
component.Field("pid", int, "PID, on systems that have one."),
component.Field("pid", int, "PID, on systems that have one.", width=6),
component.Field("parent", "Identity",
"Process that spawned this process."),
"Process that spawned this process.", width=20),
component.Field("user", "Identity",
"The user with whose credentials this is running."),
"The user with whose credentials this is running.",
width=20),
component.Field("command", str,
"The path to the binary or the command that executed."),
"The path to the binary or the command that executed.",
width=30),
component.Field("arguments", [str],
"List of arguments."),
component.Field("is_64bit", bool, "Is the process running in 64bit."),
"List of arguments.", width=40, hidden=True),
component.Field("is_64bit", bool, "Is the process running in 64bit.",
width=10),
component.Field("session",
"Identity", "The session this process belongs to."))
"Identity", "The session this process belongs to.",
width=20))


Terminal = component.DeclareComponent(
Expand Down Expand Up @@ -175,11 +179,12 @@
Timestamps = component.DeclareComponent(
"Timestamps",
"Standard times, such as ctime, atime, mtime, ...",
component.Field("created_at", int, "Creation/start time."),
component.Field("destroyed_at", int, "Deletion/destruction/stop time."),
component.Field("accessed_at", int, "Access time."),
component.Field("modified_at", int, "Modification time."),
component.Field("backup_at", int, "Backup time."))
component.Field("created_at", int, "Creation/start time.", width=25),
component.Field("destroyed_at", int, "Deletion/destruction/stop time.",
width=25),
component.Field("accessed_at", int, "Access time.", width=25),
component.Field("modified_at", int, "Modification time.", width=25),
component.Field("backup_at", int, "Backup time.", hidden=True, width=25))


Permissions = component.DeclareComponent(
Expand Down
17 changes: 14 additions & 3 deletions rekall/entities/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,17 +154,28 @@ def strict_superset(self, other):
def name(self):
name = self.get_raw("Named/name")
if name == None:
name = unicode(self.identity)
key = unicode(self.identity.first_index[1])
val = self.identity.first_index[2]

if isinstance(val, obj.Struct):
# Rekall uses the opposite meaning of repr and str from
# the entity layer. This is a temporary workaround until
# everything just uses renderers all the time.
val = repr(val)
else:
val = unicode(val)

return "%s: %s" % (key, val)

return name
return unicode(name)

@property
def kind(self):
kind = self.get_raw("Named/kind")
if kind == None:
kind = "Entity"

return kind
return unicode(kind)

def __repr__(self):
parts = []
Expand Down
46 changes: 39 additions & 7 deletions rekall/entities/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,15 +394,24 @@ def analyze(self, wanted):
if not isinstance(wanted, entity_query.Query):
wanted = entity_query.Query(wanted)

analysis = self._cached_query_analyses.get(wanted, None)
# We cache by the source and not the query because we want to reanalyze
# queries that are logically equivalent, but expressed differently, in
# order to have the right cursor positions stored for highlighting in
# GUI.
cache_key = wanted.source

analysis = self._cached_query_analyses.get(cache_key, None)
if analysis:
# We want to make a copy exactly one level deep.
analysis_copy = {}
for key, value in analysis.iteritems():
analysis_copy[key] = copy.copy(value)
return analysis_copy

include, exclude, suggested_indices = wanted.execute("QueryAnalyzer")
analyzer = wanted.execute("QueryAnalyzer")
include = analyzer.include
exclude = analyzer.exclude
suggested_indices = analyzer.latest_indices

# A collector is a match if any of its promises match any of the
# dependencies of the query.
Expand All @@ -427,15 +436,36 @@ def analyze(self, wanted):
# No exclusions.
collectors.add(collector)

# A component is guaranteed if any dependency lists it. It is likely
# if collectors we depend on output it (though not guaranteed).
guaranteed_components = set(analyzer.expected_components)
possible_components = set()

for dependency in include:
component = dependency.component
if component in guaranteed_components:
continue
possible_components.add(dependency.component)

for collector in collectors:
for promise in collector.promises:
component = promise.component
if component in guaranteed_components:
continue

possible_components.add(component)

analysis = dict(collectors=list(collectors),
lookups=suggested_indices,
dependencies=include,
exclusions=exclude)
self._cached_query_analyses[wanted] = analysis
exclusions=exclude,
guaranteed_components=guaranteed_components,
possible_components=possible_components)
self._cached_query_analyses[cache_key] = analysis

return analysis

def find(self, query, complete=True, validate=True):
def find(self, query, complete=True, validate=True, query_params=None):
"""Runs the query and yields entities that match.
Arguments:
Expand All @@ -453,12 +483,14 @@ def find(self, query, complete=True, validate=True):
if isinstance(query, dict):
results = {}
for query_name, expr in query.iteritems():
results[query_name] = self.find(expr, complete=complete)
results[query_name] = self.find(expr, complete=complete,
validate=validate,
query_params=query_params)

return results

if not isinstance(query, entity_query.Query):
query = entity_query.Query(query)
query = entity_query.Query(query, params=query_params)

if validate:
query.execute("QueryValidator")
Expand Down
61 changes: 43 additions & 18 deletions rekall/entities/query/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,14 @@ class SimpleDependency(Dependency):
SHORTHAND = re.compile(r"([A-Z][a-zA-Z]+)(?:\/([a-z_]+)=(.+))?")

def __init__(self, component, attribute=None, value=None, flag=True,
weak=False):
weak=False, expression=None):
self.component = component
self.attribute = attribute
self.value = value
self.flag = flag
self.weak = weak
self.dependencies = set([self])
self.expression = expression

@classmethod
def parse(cls, promise):
Expand All @@ -169,12 +170,14 @@ def parse(cls, promise):
return cls(*match.groups())

def simplified(self):
return SimpleDependency(self.component, None, None, True)
return SimpleDependency(component=self.component,
expression=self.expression)

def inverted(self):
"""Returns a SimpleDependency with the flag flipped."""
return SimpleDependency(self.component, self.attribute, self.value,
not self.flag)
return SimpleDependency(component=self.component, value=self.value,
attribute=self.attribute, flag=(not self.flag),
expression=self.expression)

def astuple(self):
return (self.component, self.attribute, self.value, self.flag)
Expand Down Expand Up @@ -220,8 +223,8 @@ class QueryAnalyzer(visitor.QueryVisitor):
other than 'socket' will be excluded.
"""

def __init__(self, *args, **kwargs):
super(QueryAnalyzer, self).__init__(*args, **kwargs)
include = ()
exclude = ()

def run(self):
"""Analyzes query for dependencies on collectors and indices.
Expand All @@ -231,19 +234,23 @@ def run(self):
- Set of SimpleDependency instances to be excluded.
- Set of names of attributes whose indices can speed up the query.
"""
self.query.execute("QueryValidator")
self.latest_indices = set()
self._let_stack = [] # How deep are we in Let expressions?
self.expected_components = set()
result = self.visit(self.expression)
if isinstance(result, Dependency):
include, exclude = result.normalize()
return include, exclude, self.latest_indices
self.include, self.exclude = result.normalize()

return (), (), self.latest_indices
return self

def visit_Literal(self, expr):
return expr.value

def visit_Binding(self, expr):
component, attribute = expr.value.split("/", 1)
if not self._let_stack:
self.expected_components.add(component)

# Certain types of fields should be considered weak dependencies, which
# means we definitely want to depend on the component, but if we can
Expand All @@ -252,27 +259,42 @@ def visit_Binding(self, expr):
component_cls = getattr(definitions, component)
field = component_cls.reflect_field(attribute)
if field.typedesc.type_name == "Identity":
return SimpleDependency(component, attribute, weak=True)
return SimpleDependency(component=component, attribute=attribute,
weak=True, expression=expr)

return SimpleDependency(component, attribute)
return SimpleDependency(component=component, attribute=attribute,
expression=expr)

def visit_ComponentLiteral(self, expr):
if not self._let_stack:
self.expected_components.add(expr.value)

return SimpleDependency(expr.value)

def visit_Let(self, expr):
context = expr.context.value
ctx_component, ctx_attribute = context.split("/", 1)
dependency = SimpleDependency(ctx_component, ctx_attribute)

if not self._let_stack:
self.expected_components.add(ctx_component)

dependency = SimpleDependency(component=ctx_component,
attribute=ctx_attribute,
expression=expr)

self._let_stack.append(expr)
value = self.visit(expr.expression)
self._let_stack.pop()

if isinstance(value, Dependency):
dependency = DependencySet(dependency, *value.dependencies)

return dependency

def visit_Sorted(self, expr):
key_component, _ = expr.binding.split("/", 1)
dependency = SimpleDependency(key_component)
dependency = SimpleDependency(component=key_component,
expression=expr)

value = self.visit(expr.expression)
if isinstance(value, Dependency):
Expand All @@ -291,7 +313,7 @@ def visit_Complement(self, expr):
# If the above didn't return the we can't do anything smart about this.
return self.visit_Expression(expr)

def _solve_Equivalence(self, dependency, value):
def _solve_Equivalence(self, dependency, value, expr):
attribute_path = "%s/%s" % (dependency.component, dependency.attribute)

# Suggest that the manager build an index for component/attribute.
Expand All @@ -306,6 +328,7 @@ def _solve_Equivalence(self, dependency, value):
else:
dependency.value = value

dependency.expression = expr
return dependency

def visit_Equivalence(self, expr):
Expand All @@ -317,10 +340,10 @@ def visit_Equivalence(self, expr):
y = self.visit(expr.children[1])
if (isinstance(x, SimpleDependency)
and not isinstance(y, Dependency)):
return self._solve_Equivalence(x, y)
return self._solve_Equivalence(x, y, expr)
elif (isinstance(y, SimpleDependency)
and not isinstance(x, Dependency)):
return self._solve_Equivalence(y, x)
return self._solve_Equivalence(y, x, expr)

# If the above doesn't return the we can't infer much here and fall
# through to the default behavior.
Expand Down Expand Up @@ -392,10 +415,12 @@ def visit_Intersection(self, expr):
results.add(dependency)

for component in simple:
results.add(SimpleDependency(component))
results.add(SimpleDependency(component=component,
expression=expr))

for component in weak:
results.add(SimpleDependency(component, weak=True))
results.add(SimpleDependency(component=component, weak=True,
expression=expr))

return DependencySet(*results)

Expand Down
Loading

0 comments on commit c7fa181

Please sign in to comment.