Skip to content

Commit

Permalink
refactor: improve argument parsing of dates inputs: startTime, endTim…
Browse files Browse the repository at this point in the history
…e and csvStartTime

The changes in this commit allow more flexible date inputs e.g. '2000-12-31T23:59:59', '2000-12-31 23:59:59', '12/31/2000 23:59:59' etc. should all work.
  • Loading branch information
chanshing committed Oct 23, 2024
1 parent 79c1750 commit ae51010
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 44 deletions.
44 changes: 9 additions & 35 deletions src/accelerometer/accProcess.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ def main(): # noqa: C901
minutes. Not to be confused with timezone offsets.
(default : %(default)s""")
parser.add_argument('--startTime',
metavar='e.g. 1991-01-01T23:59', default=None,
type=str2date, help="""removes data before this
metavar='e.g. 2000-12-31 23:59:59', default=None,
type=str, help="""removes data before this
time (local) in the final analysis
(default : %(default)s)""")
parser.add_argument('--endTime',
metavar='e.g 1991-01-01T23:59', default=None,
type=str2date, help="""removes data after this
metavar='e.g 2000-12-31 23:59:59', default=None,
type=str, help="""removes data after this
time (local) in the final analysis
(default : %(default)s)""")
parser.add_argument('--processInputFile',
Expand All @@ -75,8 +75,8 @@ def main(): # noqa: C901
help="""Filter ENMOtrunc values?
(default : %(default)s)""")
parser.add_argument('--csvStartTime',
metavar='e.g. 2020-01-01T00:01', default=None,
type=str2date, help="""start time for csv file
metavar='e.g. 2000-12-31 23:59:59', default=None,
type=str, help="""start time for csv file
when time column is not available
(default : %(default)s)""")
parser.add_argument('--csvSampleRate',
Expand Down Expand Up @@ -281,10 +281,10 @@ def deleteIntermediateFiles():

# Check user-specified end time is not before start time
if args.startTime and args.endTime:
assert args.startTime <= args.endTime, (
assert pd.Timestamp(args.startTime) <= pd.Timestamp(args.endTime), (
"startTime and endTime arguments are invalid!\n"
f"startTime: {args.startTime.strftime('%Y-%m-%dT%H:%M')}\n"
f"endTime:, {args.endTime.strftime('%Y-%m-%dT%H:%M')}\n"
f"startTime: {args.startTime}\n"
f"endTime: {args.endTime}\n"
)

# Print processing options to screen
Expand Down Expand Up @@ -367,32 +367,6 @@ def str2bool(v):
return v.lower() in ("yes", "true", "t", "1")


def str2date(v):
"""
Used to parse date values from the command line. E.g. "1994-11-30T12:00" -> time.datetime
"""

eg = "1994-11-30T12:00" # example date
if v.count("-") != eg.count("-"):
print("ERROR: Not enough dashes in date")
elif v.count("T") != eg.count("T"):
print("ERROR: No T seperator in date")
elif v.count(":") != eg.count(":"):
print("ERROR: No ':' seperator in date")
elif len(v.split("-")[0]) != 4:
print("ERROR: Year in date must be 4 numbers")
elif len(v.split("-")[1]) != 2 and len(v.split("-")[1]) != 1:
print("ERROR: Month in date must be 1-2 numbers")
elif len(v.split("-")[2].split("T")[0]) != 2 and len(v.split("-")[2].split("T")[0]) != 1:
print("ERROR: Day in date must be 1-2 numbers")
else:
return pd.datetime.strptime(v, "%Y-%m-%dT%H:%M")
print("Please change your input date:")
print('"' + v + '"')
print("to match the example date format:")
print('"' + eg + '"')
raise ValueError("Date in incorrect format")


if __name__ == '__main__':
main() # Standard boilerplate to call the main() function to begin the program.
13 changes: 10 additions & 3 deletions src/accelerometer/device.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import gzip
import numpy as np
import os
import dateutil
import pandas as pd
import statsmodels.api as sm
import struct
Expand Down Expand Up @@ -104,7 +105,7 @@ def processInputFileToEpoch( # noqa: C901
if javaHeapSpace:
commandArgs.insert(1, javaHeapSpace)
if csvStartTime:
commandArgs.append("csvStartTime:" + csvStartTime.strftime("%Y-%m-%dT%H:%M"))
commandArgs.append("csvStartTime:" + to_iso_datetime(csvStartTime))
if csvSampleRate:
commandArgs.append("csvSampleRate:" + str(csvSampleRate))
if csvTimeFormat:
Expand Down Expand Up @@ -163,9 +164,9 @@ def processInputFileToEpoch( # noqa: C901
if javaHeapSpace:
commandArgs.insert(1, javaHeapSpace)
if startTime:
commandArgs.append("startTime:" + startTime.strftime("%Y-%m-%dT%H:%M"))
commandArgs.append("startTime:" + to_iso_datetime(startTime))
if endTime:
commandArgs.append("endTime:" + endTime.strftime("%Y-%m-%dT%H:%M"))
commandArgs.append("endTime:" + to_iso_datetime(endTime))
if csvStartTime:
commandArgs.append("csvStartTime:" + csvStartTime.strftime("%Y-%m-%dT%H:%M"))
if csvSampleRate:
Expand Down Expand Up @@ -531,3 +532,9 @@ def getGT3XDeviceId(gt3xFile):
this usually occurs when the file is not an Actigraph .gt3x accelerometer
file. Exiting...""")
sys.exit(-8)


def to_iso_datetime(dt):
""" Given input string representing a datetime, return its ISO formatted
datetime string. """
return dateutil.parser.parse(dt).isoformat()
8 changes: 2 additions & 6 deletions src/accelerometer/summarisation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Module to generate overall activity summary from epoch data."""
import sys
import pytz
import numpy as np
import pandas as pd
from pandas.tseries.frequencies import to_offset
Expand Down Expand Up @@ -73,13 +72,10 @@ def getActivitySummary( # noqa: C901

# Remove data before/after user specified start/end times
rows = data.shape[0]
tz = pytz.timezone(timeZone)
if startTime:
localStartTime = tz.localize(startTime)
data = data[data.index >= localStartTime]
data = data.loc[pd.Timestamp(startTime, tz=timeZone):]
if endTime:
localEndTime = tz.localize(endTime)
data = data[data.index <= localEndTime]
data = data.loc[:pd.Timestamp(endTime, tz=timeZone)]
# Quit if no data left
if data.shape[0] == 0:
print("No rows remaining after start/end time removal")
Expand Down

0 comments on commit ae51010

Please sign in to comment.