Skip to content

Commit

Permalink
better comments and docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
GwynHannay committed Dec 25, 2021
1 parent def2b6d commit 530a76d
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 94 deletions.
24 changes: 14 additions & 10 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,19 @@ def conversion(csv_file: str):
Parameters
----------
csv_file : str
Name / location of the CSV file to be processed.
Name and location of the CSV file to be processed.
"""
first_pass, headers, json_array = [], [], []

# load CSV file
# Load CSV file.
try:
with open(csv_file, encoding='utf-8') as csvf:
csv_reader = csv.reader(csvf)

# each row is either a header row, a values row, or a noise recording row
# if this is a header row, store it in a new array
# if this is a values row, combine it with the previous row's headers
# if this is a noise recording row, skip it
# Each row is either a header row, a values row, or a noise recording row.
# If this is a header row, store it in a new array to be merged with its values.
# If this is a values row, combine it with the previous row's headers.
# If this is a noise recording row, skip it. We don't process these in this version.
for row in csv_reader:
if row[0] == 'Id':
headers = cps.csv_headers(row)
Expand All @@ -35,9 +35,10 @@ def conversion(csv_file: str):
else:
continue

# now that we have a dictionary of headers and values
# let's identify each part and convert it into
# something much more useable
# Now that we have a dictionary of headers and values, let's identify each part and
# convert it into something much more useable.
# Process each record in the dictionary and write it into a JSON file every time the
# month changes.
suffix, previous_suffix = 'append', 'append'
i, records = 0, len(first_pass)

Expand All @@ -47,6 +48,9 @@ def conversion(csv_file: str):
suffix = cps.get_suffix()

if suffix != previous_suffix and previous_suffix != 'append':
# Since the Sleep as Android data is stored with the most recent record at
# the top, once we have a completed month we should reverse the order so it starts
# at day one.
json_array.reverse()
result = cps.build_records(json_array)

Expand All @@ -68,7 +72,7 @@ def conversion(csv_file: str):


if __name__ == "__main__":
# get our CSV filename and send it to our main function
# Set our CSV filename and send it to our main function.
csv_file = r'sleep-as-android/csv/sleep-export.csv'

conversion(csv_file)
60 changes: 32 additions & 28 deletions utils/csv_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

def csv_headers(headers: list[str]) -> list[str]:
"""Processes header rows in the CSV document.
Specifically, we want to append a number to the 'Event' fields so
a dictionary doesn't overwrite each with the next one.
Specifically, we want to append an incrementing integer to the 'Event'
fields so that a dictionary doesn't overwrite each event with the next one.
Parameters
----------
Expand All @@ -14,7 +14,7 @@ def csv_headers(headers: list[str]) -> list[str]:
Returns
-------
list[str]
The row of headers now processed.
The modified list of headers which can now be easily merged with its details row.
"""
processed, i = [], 0
for header in headers:
Expand Down Expand Up @@ -50,12 +50,14 @@ def combine_record(headers: list[str], row: list[str]) -> dict[str, str]:


def saa_field_parser(record: dict[str, str]) -> dict[str, str]:
"""Handles the processing of all records from the CSV file.
"""Processes a single record from the CSV file, retrieving instructions
on how to handle each field and sending them off to be transformed before
returning a fully formatted record.
Parameters
----------
record : dict[str, str]
A single record from the CSV file, i.e. a single sleep session.
A record of headers and values from the CSV file, i.e. a single sleep session.
Returns
-------
Expand All @@ -69,21 +71,23 @@ def saa_field_parser(record: dict[str, str]) -> dict[str, str]:
value = record[key]

if header.startswith('Event'):
datatype = get_instructions('Event')
result = follow_instructions(header, value, datatype)
instructions = get_instructions('Event')
result = follow_instructions(header, value, instructions)
events.append(result[1])

elif header[0].isdigit():
datatype = get_instructions('Actigraphy')
result = follow_instructions(header, value, datatype)
instructions = get_instructions('Actigraphy')
result = follow_instructions(header, value, instructions)
actigraphies.append(result[1])

else:
datatype = get_instructions(header)
result = follow_instructions(header, value, datatype)
instructions = get_instructions(header)
result = follow_instructions(header, value, instructions)
headers.append(result[0])
entries.append(result[1])

# Actigraphic data and event data will be nested, so add them
# under a single header.
if len(actigraphies) > 0:
headers.append('actigraphy')
entries.append(actigraphies)
Expand All @@ -99,7 +103,7 @@ def saa_field_parser(record: dict[str, str]) -> dict[str, str]:

def get_instructions(header: str) -> dict:
"""Receives a field name and returns with a dictionary of instructions
from the global dictionary defining each field.
from the global variable that defines each field.
Parameters
----------
Expand All @@ -116,57 +120,57 @@ def get_instructions(header: str) -> dict:
return instruction


def follow_instructions(header: str, value: str, datatype: dict) -> tuple:
"""Renames each field and handles its contents based on the information
sent with the header and value in the dictionary.
def follow_instructions(header: str, value: str, field_details: dict) -> tuple:
"""Receives a field name, value, and instructions on how to handle this
field, then follows them accordingly.
Parameters
----------
header : str
Original field name from the CSV file.
value : str
Value accompanying the field name.
datatype : dict
field_details : dict
'Instructions', i.e. new field name and end data type.
Returns
-------
tuple
Completed 'entry' for the record: processed field name and value.
Processed header and value for the record.
"""
entry = ()
field_name = datatype['name']
d_type = datatype['type']
field = ()
field_name = field_details['name']
d_type = field_details['type']

if d_type == 'pk':
pk_value = df.process_pk(value)
entry = (field_name, pk_value)
field = (field_name, pk_value)

elif d_type == 'datetime':
dt_value = df.process_dates(value)
entry = (field_name, dt_value)
field = (field_name, dt_value)

elif d_type == 'float':
f_value = df.process_float(value)
entry = (field_name, f_value)
field = (field_name, f_value)

elif d_type == 'integer':
i_value = df.process_integer(value)
entry = (field_name, i_value)
field = (field_name, i_value)

elif d_type == 'string':
entry = (field_name, value)
field = (field_name, value)

elif d_type == 'array':
if field_name == 'actigraphy':
act = df.process_actigraphy(header, value, globals.start_time)
entry = (field_name, act)
field = (field_name, act)

elif field_name == 'events':
event = df.process_event(value)
entry = (field_name, event)
field = (field_name, event)

return entry
return field


def build_records(records: list) -> str:
Expand Down
124 changes: 68 additions & 56 deletions utils/data_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,18 @@ def process_pk(key: str) -> int:


def process_dates(detail: str) -> str:
"""Parses a string datetime from one format, then returns it as a string
in a better format.
"""Receives a datetime string in one format, then returns it as a string
in another format which is easier read and understood internationally.
Parameters
----------
detail : str
Original datetime string: day. month. year hour:minute
Original datetime string: 'day. month. year hour:minute'
Returns
-------
str
New datetime string: year-month-day hour:minute
New datetime string: 'year-month-day hour:minute'
"""
datetime_value = datetime.strptime(detail, '%d. %m. %Y %H:%M')
datetime_string = datetime.strftime(datetime_value, '%Y-%m-%d %H:%M')
Expand All @@ -77,7 +77,7 @@ def process_float(detail: str) -> float:
Returns
-------
float
Field as a float.
String field converted into a float.
"""
value = float(detail)

Expand All @@ -95,17 +95,63 @@ def process_integer(detail: str) -> int:
Returns
-------
int
Field as an integer.
String field converted into an integer.
"""
value = int(detail)

return value


def process_actigraphy(time: str, value: str, start_time) -> dict[str, str]:
"""Specifically handles actigraphic events from Sleep as Android.
The header fields for these are made of the time (not including date)
of the data recorded, so we want to get the global start time and
use this to add a timestamp to each data point.
Parameters
----------
time : str
Hour and minute in string format.
value : str
Actigraphic value.
start_time : datetime
Global start time of this sleep record.
Returns
-------
dict[str, str]
Completed dictionary of actigraphic event with the datetime recorded and value
recorded.
"""
act_time_part = datetime.strptime(time, '%H:%M').time()
start_time_part = start_time.time()
start_time_date = start_time.date()
next_day_date = start_time_date + timedelta(days=1)

# The date isn't included in the actigraphic header, so if the time
# recorded is greater than the time that this sleep session started, we
# can assume that this is the next day.

# TODO: Handle edge case for a sleep session that can pass over 2
# days. This can be done by adding 1 day to the start date every time we
# cross over midnight.
if act_time_part > start_time_part:
act_datetime = datetime.combine(start_time_date, act_time_part)
else:
act_datetime = datetime.combine(next_day_date, act_time_part)

act_dict = {
'actigraphic_time': act_datetime.strftime('%Y-%m-%d %H:%M'),
'actigraphic_value': value
}

return act_dict


def process_event(event: str) -> dict:
"""Specifically handles 'Event' fields from Sleep as Android.
This involves splitting the event type, the Unix timestamp, and
the event's value if it has one.
This involves splitting the event type, the Unix timestamp, and the event's
value if it has one.
Parameters
----------
Expand All @@ -115,18 +161,26 @@ def process_event(event: str) -> dict:
Returns
-------
dict
Completed dictionary with event split into type, datetime, and value (if
exists).
Completed dictionary with event split into event type, datetime, and value
(if one exists).
"""
event_parts = event.split('-', 2)

event_type = event_parts[0]

timestamp = datetime.fromtimestamp(int(event_parts[1])/1000)
# we want milliseconds, because the DHA event occurs every 1 millisecond
# until you fall asleep
event_time = timestamp.strftime('%Y-%m-%d %H:%M:%S.%f')
# We want the event time in milliseconds, because the DHA event occurs every 1
# millisecond until you fall asleep.
event_time = timestamp.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]

# Some events have a second hyphen if there is data to be included in it. Include
# an event value if this is the case, otherwise set it to none and don't include
# a field for it at all.

# Additionally, we know that HR events are heart rates with a float value, so let's
# convert that.

# TODO: Detect and transform various value data types.
if len(event_parts) > 2:
if event_type == 'HR':
event_value = float(event_parts[2])
Expand All @@ -149,48 +203,6 @@ def process_event(event: str) -> dict:
return event_dict


def process_actigraphy(time: str, value: str, start_time) -> dict[str, str]:
"""Specifically handles actigraphic events from Sleep as Android.
The header fields for these are made of the time (not including date)
of the data recorded, so we want to get the global start time and
use this to add a timestamp to each data point.
Parameters
----------
time : str
Hour and minute in string format.
value : str
Actigraphic value.
start_time : datetime
Global start time of this sleep record.
Returns
-------
dict[str, str]
Completed dictionary of actigraphic event, ready to be inserted into the
record.
"""
act_time_part = datetime.strptime(time, '%H:%M').time()
start_time_part = start_time.time()
start_time_date = start_time.date()
next_day_date = start_time_date + timedelta(days=1)

# the date isn't included in the actigraphic header, so once the time
# recorded is greater than the time that this sleep session started, we
# can assume it's the next day
if act_time_part > start_time_part:
act_datetime = datetime.combine(start_time_date, act_time_part)
else:
act_datetime = datetime.combine(next_day_date, act_time_part)

act_dict = {
'actigraphic_time': act_datetime.strftime('%Y-%m-%d %H:%M'),
'actigraphic_value': value
}

return act_dict


def process_array(records: list) -> str:
"""Receives an array and converts it into a JSON string.
Expand All @@ -202,7 +214,7 @@ def process_array(records: list) -> str:
Returns
-------
str
A JSON string.
The records now converted into a JSON string.
"""
json_string = json.dumps(records)

Expand Down
Loading

0 comments on commit 530a76d

Please sign in to comment.