From ae510105f518797ba25a3112220bb5735707d7a9 Mon Sep 17 00:00:00 2001 From: Shing Chan Date: Wed, 23 Oct 2024 15:16:52 +0100 Subject: [PATCH] refactor: improve argument parsing of dates inputs: startTime, endTime and csvStartTime The changes in this commit allow more flexible date inputs e.g. '2000-12-31T23:59:59', '2000-12-31 23:59:59', '12/31/2000 23:59:59' etc. should all work. --- src/accelerometer/accProcess.py | 44 ++++++------------------------ src/accelerometer/device.py | 13 +++++++-- src/accelerometer/summarisation.py | 8 ++---- 3 files changed, 21 insertions(+), 44 deletions(-) diff --git a/src/accelerometer/accProcess.py b/src/accelerometer/accProcess.py index f6e0cb60..754d8868 100644 --- a/src/accelerometer/accProcess.py +++ b/src/accelerometer/accProcess.py @@ -44,13 +44,13 @@ def main(): # noqa: C901 minutes. Not to be confused with timezone offsets. (default : %(default)s""") parser.add_argument('--startTime', - metavar='e.g. 1991-01-01T23:59', default=None, - type=str2date, help="""removes data before this + metavar='e.g. 2000-12-31 23:59:59', default=None, + type=str, help="""removes data before this time (local) in the final analysis (default : %(default)s)""") parser.add_argument('--endTime', - metavar='e.g 1991-01-01T23:59', default=None, - type=str2date, help="""removes data after this + metavar='e.g 2000-12-31 23:59:59', default=None, + type=str, help="""removes data after this time (local) in the final analysis (default : %(default)s)""") parser.add_argument('--processInputFile', @@ -75,8 +75,8 @@ def main(): # noqa: C901 help="""Filter ENMOtrunc values? (default : %(default)s)""") parser.add_argument('--csvStartTime', - metavar='e.g. 2020-01-01T00:01', default=None, - type=str2date, help="""start time for csv file + metavar='e.g. 2000-12-31 23:59:59', default=None, + type=str, help="""start time for csv file when time column is not available (default : %(default)s)""") parser.add_argument('--csvSampleRate', @@ -281,10 +281,10 @@ def deleteIntermediateFiles(): # Check user-specified end time is not before start time if args.startTime and args.endTime: - assert args.startTime <= args.endTime, ( + assert pd.Timestamp(args.startTime) <= pd.Timestamp(args.endTime), ( "startTime and endTime arguments are invalid!\n" - f"startTime: {args.startTime.strftime('%Y-%m-%dT%H:%M')}\n" - f"endTime:, {args.endTime.strftime('%Y-%m-%dT%H:%M')}\n" + f"startTime: {args.startTime}\n" + f"endTime: {args.endTime}\n" ) # Print processing options to screen @@ -367,32 +367,6 @@ def str2bool(v): return v.lower() in ("yes", "true", "t", "1") -def str2date(v): - """ - Used to parse date values from the command line. E.g. "1994-11-30T12:00" -> time.datetime - """ - - eg = "1994-11-30T12:00" # example date - if v.count("-") != eg.count("-"): - print("ERROR: Not enough dashes in date") - elif v.count("T") != eg.count("T"): - print("ERROR: No T seperator in date") - elif v.count(":") != eg.count(":"): - print("ERROR: No ':' seperator in date") - elif len(v.split("-")[0]) != 4: - print("ERROR: Year in date must be 4 numbers") - elif len(v.split("-")[1]) != 2 and len(v.split("-")[1]) != 1: - print("ERROR: Month in date must be 1-2 numbers") - elif len(v.split("-")[2].split("T")[0]) != 2 and len(v.split("-")[2].split("T")[0]) != 1: - print("ERROR: Day in date must be 1-2 numbers") - else: - return pd.datetime.strptime(v, "%Y-%m-%dT%H:%M") - print("Please change your input date:") - print('"' + v + '"') - print("to match the example date format:") - print('"' + eg + '"') - raise ValueError("Date in incorrect format") - if __name__ == '__main__': main() # Standard boilerplate to call the main() function to begin the program. diff --git a/src/accelerometer/device.py b/src/accelerometer/device.py index 9e2351e5..ada4933b 100644 --- a/src/accelerometer/device.py +++ b/src/accelerometer/device.py @@ -4,6 +4,7 @@ import gzip import numpy as np import os +import dateutil import pandas as pd import statsmodels.api as sm import struct @@ -104,7 +105,7 @@ def processInputFileToEpoch( # noqa: C901 if javaHeapSpace: commandArgs.insert(1, javaHeapSpace) if csvStartTime: - commandArgs.append("csvStartTime:" + csvStartTime.strftime("%Y-%m-%dT%H:%M")) + commandArgs.append("csvStartTime:" + to_iso_datetime(csvStartTime)) if csvSampleRate: commandArgs.append("csvSampleRate:" + str(csvSampleRate)) if csvTimeFormat: @@ -163,9 +164,9 @@ def processInputFileToEpoch( # noqa: C901 if javaHeapSpace: commandArgs.insert(1, javaHeapSpace) if startTime: - commandArgs.append("startTime:" + startTime.strftime("%Y-%m-%dT%H:%M")) + commandArgs.append("startTime:" + to_iso_datetime(startTime)) if endTime: - commandArgs.append("endTime:" + endTime.strftime("%Y-%m-%dT%H:%M")) + commandArgs.append("endTime:" + to_iso_datetime(endTime)) if csvStartTime: commandArgs.append("csvStartTime:" + csvStartTime.strftime("%Y-%m-%dT%H:%M")) if csvSampleRate: @@ -531,3 +532,9 @@ def getGT3XDeviceId(gt3xFile): this usually occurs when the file is not an Actigraph .gt3x accelerometer file. Exiting...""") sys.exit(-8) + + +def to_iso_datetime(dt): + """ Given input string representing a datetime, return its ISO formatted + datetime string. """ + return dateutil.parser.parse(dt).isoformat() diff --git a/src/accelerometer/summarisation.py b/src/accelerometer/summarisation.py index d5fa1b30..eded68f4 100644 --- a/src/accelerometer/summarisation.py +++ b/src/accelerometer/summarisation.py @@ -1,6 +1,5 @@ """Module to generate overall activity summary from epoch data.""" import sys -import pytz import numpy as np import pandas as pd from pandas.tseries.frequencies import to_offset @@ -73,13 +72,10 @@ def getActivitySummary( # noqa: C901 # Remove data before/after user specified start/end times rows = data.shape[0] - tz = pytz.timezone(timeZone) if startTime: - localStartTime = tz.localize(startTime) - data = data[data.index >= localStartTime] + data = data.loc[pd.Timestamp(startTime, tz=timeZone):] if endTime: - localEndTime = tz.localize(endTime) - data = data[data.index <= localEndTime] + data = data.loc[:pd.Timestamp(endTime, tz=timeZone)] # Quit if no data left if data.shape[0] == 0: print("No rows remaining after start/end time removal")