better comments and docstrings

GwynHannay · Dec 25, 2021 · 530a76d · 530a76d
1 parent def2b6d
commit 530a76d
Show file tree

Hide file tree

Showing 4 changed files with 116 additions and 94 deletions.
diff --git a/main.py b/main.py
@@ -13,19 +13,19 @@ def conversion(csv_file: str):
     Parameters
     ----------
     csv_file : str
-        Name / location of the CSV file to be processed.
+        Name and location of the CSV file to be processed.
     """
     first_pass, headers, json_array = [], [], []
 
-    # load CSV file
+    # Load CSV file.
     try:
         with open(csv_file, encoding='utf-8') as csvf:
             csv_reader = csv.reader(csvf)
 
-            # each row is either a header row, a values row, or a noise recording row
-            # if this is a header row, store it in a new array
-            # if this is a values row, combine it with the previous row's headers
-            # if this is a noise recording row, skip it
+            # Each row is either a header row, a values row, or a noise recording row.
+            # If this is a header row, store it in a new array to be merged with its values.
+            # If this is a values row, combine it with the previous row's headers.
+            # If this is a noise recording row, skip it. We don't process these in this version.
             for row in csv_reader:
                 if row[0] == 'Id':
                     headers = cps.csv_headers(row)
@@ -35,9 +35,10 @@ def conversion(csv_file: str):
                 else:
                     continue
 
-        # now that we have a dictionary of headers and values
-        # let's identify each part and convert it into
-        # something much more useable
+        # Now that we have a dictionary of headers and values, let's identify each part and 
+        # convert it into something much more useable.
+        # Process each record in the dictionary and write it into a JSON file every time the
+        # month changes.
         suffix, previous_suffix = 'append', 'append'
         i, records = 0, len(first_pass)
 
@@ -47,6 +48,9 @@ def conversion(csv_file: str):
                 suffix = cps.get_suffix()
 
                 if suffix != previous_suffix and previous_suffix != 'append':
+                    # Since the Sleep as Android data is stored with the most recent record at
+                    # the top, once we have a completed month we should reverse the order so it starts
+                    # at day one.
                     json_array.reverse()
                     result = cps.build_records(json_array)
 
@@ -68,7 +72,7 @@ def conversion(csv_file: str):
 
 
 if __name__ == "__main__":
-    # get our CSV filename and send it to our main function
+    # Set our CSV filename and send it to our main function.
     csv_file = r'sleep-as-android/csv/sleep-export.csv'
 
     conversion(csv_file)
diff --git a/utils/csv_parser.py b/utils/csv_parser.py
@@ -3,8 +3,8 @@
 
 def csv_headers(headers: list[str]) -> list[str]:
     """Processes header rows in the CSV document.
-    Specifically, we want to append a number to the 'Event' fields so
-    a dictionary doesn't overwrite each with the next one.
+    Specifically, we want to append an incrementing integer to the 'Event' 
+    fields so that a dictionary doesn't overwrite each event with the next one.
 
     Parameters
     ----------
@@ -14,7 +14,7 @@ def csv_headers(headers: list[str]) -> list[str]:
     Returns
     -------
     list[str]
-        The row of headers now processed.
+        The modified list of headers which can now be easily merged with its details row.
     """
     processed, i = [], 0
     for header in headers:
@@ -50,12 +50,14 @@ def combine_record(headers: list[str], row: list[str]) -> dict[str, str]:
 
 
 def saa_field_parser(record: dict[str, str]) -> dict[str, str]:
-    """Handles the processing of all records from the CSV file.
+    """Processes a single record from the CSV file, retrieving instructions
+    on how to handle each field and sending them off to be transformed before
+    returning a fully formatted record.
 
     Parameters
     ----------
     record : dict[str, str]
-        A single record from the CSV file, i.e. a single sleep session.
+        A record of headers and values from the CSV file, i.e. a single sleep session.
 
     Returns
     -------
@@ -69,21 +71,23 @@ def saa_field_parser(record: dict[str, str]) -> dict[str, str]:
         value = record[key]
 
         if header.startswith('Event'):
-            datatype = get_instructions('Event')
-            result = follow_instructions(header, value, datatype)
+            instructions = get_instructions('Event')
+            result = follow_instructions(header, value, instructions)
             events.append(result[1])
 
         elif header[0].isdigit():
-            datatype = get_instructions('Actigraphy')
-            result = follow_instructions(header, value, datatype)
+            instructions = get_instructions('Actigraphy')
+            result = follow_instructions(header, value, instructions)
             actigraphies.append(result[1])
 
         else:
-            datatype = get_instructions(header)
-            result = follow_instructions(header, value, datatype)
+            instructions = get_instructions(header)
+            result = follow_instructions(header, value, instructions)
             headers.append(result[0])
             entries.append(result[1])
 
+    # Actigraphic data and event data will be nested, so add them
+    # under a single header.
     if len(actigraphies) > 0:
         headers.append('actigraphy')
         entries.append(actigraphies)
@@ -99,7 +103,7 @@ def saa_field_parser(record: dict[str, str]) -> dict[str, str]:
 
 def get_instructions(header: str) -> dict:
     """Receives a field name and returns with a dictionary of instructions
-    from the global dictionary defining each field.
+    from the global variable that defines each field.
 
     Parameters
     ----------
@@ -116,57 +120,57 @@ def get_instructions(header: str) -> dict:
     return instruction
 
 
-def follow_instructions(header: str, value: str, datatype: dict) -> tuple:
-    """Renames each field and handles its contents based on the information
-    sent with the header and value in the dictionary.
+def follow_instructions(header: str, value: str, field_details: dict) -> tuple:
+    """Receives a field name, value, and instructions on how to handle this
+    field, then follows them accordingly.
 
     Parameters
     ----------
     header : str
         Original field name from the CSV file.
     value : str
         Value accompanying the field name.
-    datatype : dict
+    field_details : dict
         'Instructions', i.e. new field name and end data type.
 
     Returns
     -------
     tuple
-        Completed 'entry' for the record: processed field name and value.
+        Processed header and value for the record.
     """
-    entry = ()
-    field_name = datatype['name']
-    d_type = datatype['type']
+    field = ()
+    field_name = field_details['name']
+    d_type = field_details['type']
 
     if d_type == 'pk':
         pk_value = df.process_pk(value)
-        entry = (field_name, pk_value)
+        field = (field_name, pk_value)
 
     elif d_type == 'datetime':
         dt_value = df.process_dates(value)
-        entry = (field_name, dt_value)
+        field = (field_name, dt_value)
 
     elif d_type == 'float':
         f_value = df.process_float(value)
-        entry = (field_name, f_value)
+        field = (field_name, f_value)
 
     elif d_type == 'integer':
         i_value = df.process_integer(value)
-        entry = (field_name, i_value)
+        field = (field_name, i_value)
 
     elif d_type == 'string':
-        entry = (field_name, value)
+        field = (field_name, value)
 
     elif d_type == 'array':
         if field_name == 'actigraphy':
             act = df.process_actigraphy(header, value, globals.start_time)
-            entry = (field_name, act)
+            field = (field_name, act)
 
         elif field_name == 'events':
             event = df.process_event(value)
-            entry = (field_name, event)
+            field = (field_name, event)
 
-    return entry
+    return field
 
 
 def build_records(records: list) -> str:

diff --git a/utils/data_functions.py b/utils/data_functions.py
@@ -47,18 +47,18 @@ def process_pk(key: str) -> int:
 
 
 def process_dates(detail: str) -> str:
-    """Parses a string datetime from one format, then returns it as a string
-    in a better format.
+    """Receives a datetime string in one format, then returns it as a string
+    in another format which is easier read and understood internationally.
 
     Parameters
     ----------
     detail : str
-        Original datetime string: day. month. year hour:minute
+        Original datetime string: 'day. month. year hour:minute'
 
     Returns
     -------
     str
-        New datetime string: year-month-day hour:minute
+        New datetime string: 'year-month-day hour:minute'
     """
     datetime_value = datetime.strptime(detail, '%d. %m. %Y %H:%M')
     datetime_string = datetime.strftime(datetime_value, '%Y-%m-%d %H:%M')
@@ -77,7 +77,7 @@ def process_float(detail: str) -> float:
     Returns
     -------
     float
-        Field as a float.
+        String field converted into a float.
     """
     value = float(detail)
 
@@ -95,17 +95,63 @@ def process_integer(detail: str) -> int:
     Returns
     -------
     int
-        Field as an integer.
+        String field converted into an integer.
     """
     value = int(detail)
 
     return value
 
 
+def process_actigraphy(time: str, value: str, start_time) -> dict[str, str]:
+    """Specifically handles actigraphic events from Sleep as Android.
+    The header fields for these are made of the time (not including date)
+    of the data recorded, so we want to get the global start time and
+    use this to add a timestamp to each data point.
+
+    Parameters
+    ----------
+    time : str
+        Hour and minute in string format.
+    value : str
+        Actigraphic value.
+    start_time : datetime
+        Global start time of this sleep record.
+
+    Returns
+    -------
+    dict[str, str]
+        Completed dictionary of actigraphic event with the datetime recorded and value
+        recorded.
+    """
+    act_time_part = datetime.strptime(time, '%H:%M').time()
+    start_time_part = start_time.time()
+    start_time_date = start_time.date()
+    next_day_date = start_time_date + timedelta(days=1)
+
+    # The date isn't included in the actigraphic header, so if the time
+    # recorded is greater than the time that this sleep session started, we
+    # can assume that this is the next day.
+
+    # TODO: Handle edge case for a sleep session that can pass over 2
+    # days. This can be done by adding 1 day to the start date every time we
+    # cross over midnight.
+    if act_time_part > start_time_part:
+        act_datetime = datetime.combine(start_time_date, act_time_part)
+    else:
+        act_datetime = datetime.combine(next_day_date, act_time_part)
+
+    act_dict = {
+        'actigraphic_time': act_datetime.strftime('%Y-%m-%d %H:%M'),
+        'actigraphic_value': value
+    }
+
+    return act_dict
+
+
 def process_event(event: str) -> dict:
     """Specifically handles 'Event' fields from Sleep as Android.
-    This involves splitting the event type, the Unix timestamp, and
-    the event's value if it has one.
+    This involves splitting the event type, the Unix timestamp, and the event's 
+    value if it has one.
 
     Parameters
     ----------
@@ -115,18 +161,26 @@ def process_event(event: str) -> dict:
     Returns
     -------
     dict
-        Completed dictionary with event split into type, datetime, and value (if
-        exists).
+        Completed dictionary with event split into event type, datetime, and value 
+        (if one exists).
     """
     event_parts = event.split('-', 2)
 
     event_type = event_parts[0]
 
     timestamp = datetime.fromtimestamp(int(event_parts[1])/1000)
-    # we want milliseconds, because the DHA event occurs every 1 millisecond
-    # until you fall asleep
-    event_time = timestamp.strftime('%Y-%m-%d %H:%M:%S.%f')
+    # We want the event time in milliseconds, because the DHA event occurs every 1 
+    # millisecond until you fall asleep.
+    event_time = timestamp.strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]
+
+    # Some events have a second hyphen if there is data to be included in it. Include
+    # an event value if this is the case, otherwise set it to none and don't include
+    # a field for it at all.
 
+    # Additionally, we know that HR events are heart rates with a float value, so let's
+    # convert that.
+
+    # TODO: Detect and transform various value data types.
     if len(event_parts) > 2:
         if event_type == 'HR':
             event_value = float(event_parts[2])
@@ -149,48 +203,6 @@ def process_event(event: str) -> dict:
     return event_dict
 
 
-def process_actigraphy(time: str, value: str, start_time) -> dict[str, str]:
-    """Specifically handles actigraphic events from Sleep as Android.
-    The header fields for these are made of the time (not including date)
-    of the data recorded, so we want to get the global start time and
-    use this to add a timestamp to each data point.
-
-    Parameters
-    ----------
-    time : str
-        Hour and minute in string format.
-    value : str
-        Actigraphic value.
-    start_time : datetime
-        Global start time of this sleep record.
-
-    Returns
-    -------
-    dict[str, str]
-        Completed dictionary of actigraphic event, ready to be inserted into the
-        record.
-    """
-    act_time_part = datetime.strptime(time, '%H:%M').time()
-    start_time_part = start_time.time()
-    start_time_date = start_time.date()
-    next_day_date = start_time_date + timedelta(days=1)
-
-    # the date isn't included in the actigraphic header, so once the time
-    # recorded is greater than the time that this sleep session started, we
-    # can assume it's the next day
-    if act_time_part > start_time_part:
-        act_datetime = datetime.combine(start_time_date, act_time_part)
-    else:
-        act_datetime = datetime.combine(next_day_date, act_time_part)
-
-    act_dict = {
-        'actigraphic_time': act_datetime.strftime('%Y-%m-%d %H:%M'),
-        'actigraphic_value': value
-    }
-
-    return act_dict
-
-
 def process_array(records: list) -> str:
     """Receives an array and converts it into a JSON string.
 
@@ -202,7 +214,7 @@ def process_array(records: list) -> str:
     Returns
     -------
     str
-        A JSON string.
+        The records now converted into a JSON string.
     """
     json_string = json.dumps(records)