Skip to content

Commit 4ab32d7

Browse files
author
casework
committed
Added session exporting.
1 parent 20f684f commit 4ab32d7

File tree

4 files changed

+106
-27
lines changed

4 files changed

+106
-27
lines changed

case.py

Lines changed: 37 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
"""An API to the CASE ontology."""
22

3+
import datetime
34
import rdflib
45
from rdflib import RDF, XSD
56
import rdflib.term
@@ -29,25 +30,31 @@ def _json_ld_context(self):
2930
context['@vocab'] = str(CASE)
3031
return context
3132

33+
def create_uco_object(self, type=None, **properties):
34+
"""Creates and returns a UcoObject."""
35+
return UcoObject(self.graph, rdf_type=type, **properties)
36+
37+
# Make separate function for creating Trace because it's very common.
3238
def create_trace(self, **properties):
3339
"""Creates and returns a Trace object."""
3440
return Trace(self.graph, **properties)
3541

36-
def create_relationship(self, **properties):
37-
"""Creates and returns a Relationship object."""
38-
return Relationship(self.graph, **properties)
39-
4042
# Manually specify properties to help inforce both properties are supplied.
4143
def create_hash(self, hashMethod, hashValue):
42-
return Node(
43-
self.graph, rdf_type=CASE.Hash, hashMethod=hashMethod, hashValue=hashValue)
44+
return self.create_node(CASE.Hash, hashMethod=hashMethod, hashValue=hashValue)
45+
46+
def create_node(self, type=None, **properties):
47+
return Node(self.graph, rdf_type=type, **properties)
4448

4549
# We are going to default to json-ld instead of rdflib's default of xml.
4650
def serialize(self, format='json-ld', **kwargs):
4751
"""Serializes the document's graph to a destination.
4852
(Follows same arguments as rdflib.Graph().serialize())"""
49-
if format == 'json-ld' and 'context' not in kwargs:
50-
kwargs['context'] = self._json_ld_context()
53+
if format == 'json-ld':
54+
if 'context' not in kwargs:
55+
kwargs['context'] = self._json_ld_context()
56+
if 'auto_compact' not in kwargs:
57+
kwargs['auto_compact'] = True
5158
return self.graph.serialize(format=format, **kwargs)
5259

5360
def validate(self):
@@ -78,10 +85,12 @@ def __init__(self, graph, rdf_type=None, **properties):
7885
super(Node, self).__init__()
7986
self._node = rdflib.BNode()
8087
self._graph = graph
81-
if rdf_type:
82-
self.add(RDF.type, rdf_type)
83-
elif self.RDF_TYPE:
84-
self.add(RDF.type, self.RDF_TYPE)
88+
if not rdf_type:
89+
rdf_type = self.RDF_TYPE
90+
# Add namespace prefix to non URIRef to allow abstraction from rdflib.
91+
if not isinstance(rdf_type, rdflib.term.Node):
92+
rdf_type = self.NAMESPACE[rdf_type]
93+
self.add(RDF.type, rdf_type)
8594
for key, value in iter(properties.items()):
8695
self.add(key, value)
8796

@@ -119,6 +128,21 @@ def add(self, property, value):
119128
class UcoObject(Node):
120129
RDF_TYPE = CASE.UcoObject
121130

131+
def __init__(self, graph, rdf_type=None, **properties):
132+
"""Initializes and adds a node to the graph.
133+
NOTE: At least the type or a property must be supplied for the Node
134+
to exist in the graph.
135+
136+
Args:
137+
graph: The graph to add this node to. (instance of rdflib.Graph)
138+
rdf_type: The RDF type to set this node to.
139+
properties: Extra properties to add to this node.
140+
(More properties can be set after initialization by using the add() function.)
141+
"""
142+
super(UcoObject, self).__init__(graph, rdf_type=rdf_type, **properties)
143+
self.add('createdTime', datetime.datetime.now())
144+
# TODO: Add "createdBy" property.
145+
122146
def create_property_bundle(self, type=None, **properties):
123147
"""Convenience function for adding property bundles to this Trace.
124148
@@ -129,21 +153,15 @@ def create_property_bundle(self, type=None, **properties):
129153
Returns:
130154
The property bundle created (instance of PropertyBundle).
131155
"""
132-
# Add case prefix to non URIRef to allow abstraction from rdflib.
133-
if not isinstance(type, rdflib.term.Node):
134-
type = CASE[type]
135156
pb = PropertyBundle(self._graph, rdf_type=type, **properties)
136157
self.add(CASE.propertyBundle, pb)
137158
return pb
138159

139160

161+
# TODO: Do we need these extra classes?
140162
class Trace(UcoObject):
141163
RDF_TYPE = CASE.Trace
142164

143165

144-
class Relationship(UcoObject):
145-
RDF_TYPE = CASE.Relationship
146-
147-
148166
class PropertyBundle(Node):
149167
RDF_TYPE = CASE.PropertyBundle

case_plaso/event_exporters/filestat.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,8 @@ def export_path_spec(self, path_spec):
5252
# object pointing to its parent.
5353
if path_spec.HasParent():
5454
parent_trace, _ = self.export_path_spec(path_spec.parent)
55-
relationship = self.document.create_relationship(
55+
relationship = self.document.create_uco_object(
56+
'Relationship',
5657
source=trace,
5758
target=parent_trace,
5859
kindOfRelationship=mappings.kindOfRelationship.get(
@@ -99,7 +100,7 @@ def export_event(self, event):
99100
content_data = self._content_data_pbs[event.pathspec]
100101
for name, value in event.GetAttributes():
101102
if name in mappings.HashMethod and (content_data, name, value) not in self._processed_hashes:
102-
# Keep trace of processed hashes, so we don't add the same hash twice.
103+
# Keep track of processed hashes, so we don't add the same hash twice.
103104
# TODO: Refactor this out when github.com/log2timeline/plaso/issues/910 is solved.
104105
self._processed_hashes.add((content_data, name, value))
105106
hash = self.document.create_hash(

case_plaso/plaso_exporter.py

Lines changed: 59 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11

22
from dfvfs.lib import definitions as dfvfs_definitions
3+
from plaso.engine.knowledge_base import KnowledgeBase
34
from plaso.storage import zip_file
45

5-
from case_plaso import PLASO
6+
from case_plaso import PLASO, lib
67
from case_plaso.event_exporter import EventExporter
78

89
# Import event exporters to get them registered.
@@ -12,6 +13,18 @@
1213
class PlasoExporter(object):
1314
"""Exports plaso data into a RDF graph using the CASE ontology."""
1415

16+
# Configuration attributes stored in a plaso Session object.
17+
_CONFIGURATION_ATTRIBUTES = [
18+
'identifier',
19+
'command_line_arguments',
20+
'debug_mode',
21+
'enabled_parser_names',
22+
'filter_expression',
23+
'filter_file',
24+
'parser_filter_expression',
25+
'preferred_encoding',
26+
'preferred_year']
27+
1528
def __init__(self, document):
1629
"""Initializes PlasoExporter.
1730
@@ -59,12 +72,55 @@ def export_event(self, event):
5972
event_exporter = self.get_event_exporter(event.data_type)
6073
event_exporter.export_event(event)
6174

75+
def export_session(self, session):
76+
"""Exports the given plaso storage Session into the graph."""
77+
instrument = self.document.create_uco_object(
78+
'Tool',
79+
name=session.product_name,
80+
version=session.product_version,
81+
toolType='parser?',
82+
creator='Joachim Metz')
83+
config = instrument.create_property_bundle('ToolConfiguration')
84+
for attribute in self._CONFIGURATION_ATTRIBUTES:
85+
if hasattr(session, attribute):
86+
value = getattr(session, attribute)
87+
if value is None:
88+
# None is technically a configuration, but we don't want to print "None".
89+
value = ''
90+
value = str(value)
91+
setting = self.document.create_node(
92+
'ConfigurationSetting', itemName=attribute, itemValue=value)
93+
config.add('configurationSetting', setting)
94+
95+
# TODO: How do we know who performed the Plaso action? That information
96+
# is not in the plaso storage file...
97+
performer = self.document.create_uco_object('Identity')
98+
performer.create_property_bundle(
99+
'SimpleName',
100+
givenName='John',
101+
familyName='Doe')
102+
103+
action = self.document.create_uco_object(
104+
'ForensicAction',
105+
startTime=lib.convert_timestamp(session.start_time),
106+
endTime=lib.convert_timestamp(session.completion_time))
107+
action.create_property_bundle(
108+
'ActionReferences',
109+
performer=performer,
110+
instrument=instrument,
111+
result=None, # TODO: We can't fill this in because we don't know what session created what event objects...
112+
location=None) # TODO: How am I supposed to be able to get this information?
113+
62114
def export_storage_file(self, storage_file):
63115
"""Extracts and exports plaso event data and sources into the graph."""
64116
with zip_file.ZIPStorageFileReader(storage_file) as storage_reader:
65-
# TODO: Do stuff with metadata
117+
knowledge_base = KnowledgeBase()
118+
storage_reader.ReadPreprocessingInformation(knowledge_base)
119+
# TODO: Export knowledge base.
120+
121+
for session in storage_reader._storage_file.GetSessions():
122+
self.export_session(session)
66123

67-
# Convert path specs into CASE Traces containgin file-type property bundles.
68124
for source in storage_reader.GetEventSources():
69125
self.export_event_source(source)
70126

notes.md

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22

33
- Why is `fileSystemType` in both `File` and `FileSystem` property bundles?
44
- Clarifications on when to use "PhoneAccount" or "EmailAccount" vs "Contact" is needed.
5-
- Why is there a separate `isRead` for `SMSMessage`? A regular message like "WhatsApp" could
6-
also have this field.
7-
- Why is there only a `sentTime` timestamp in `Message`? I have other potential timestamps such as "created", "received", "uploaded", "downloaded", "deleted", etc. Having specific hardcoded timestamps in property bundles makes this very limiting. And having some hardcoded timestamps and some not (using Action traces?) is very discombobulated for timelined data. I suggest rethinking this approach to separating timestamp information from data.
5+
- Why is there a separate `isRead` for `SMSMessage`? A regular message like "WhatsApp" could also have this field.
6+
- Why is there only a `sentTime` timestamp in `Message`? I have other potential timestamps such as "created", "received", "uploaded", "downloaded", "deleted", etc. Having specific hardcoded timestamps in property bundles makes this very limiting. And having some hardcoded timestamps and some not (using Action traces?) is very discombobulated for timelined data. I suggest rethinking this approach to separating timestamp information from data.
7+
- What do some of the UcoObject properties mean? eg. `granularMarking` and `objectMarking`
8+
- Why is there an `id` and `type` property in `UcoObject`? Those properties already exists. `id` is the URI of the node itself, `type` is the "rdf:type" property.
9+
- I still can't remember the difference between `result` and `object` properties...
10+
- Why do all UcoObjects have a `name` property? That property only makes sense for things like `Tool`. It doesn't make sense for things like `Trace` and `Action`.
11+
- Why does the `description` property exist? It already exists with "rdfs:comment".

0 commit comments

Comments
 (0)