Skip to content

Commit

Permalink
simplified plain table script
Browse files Browse the repository at this point in the history
  • Loading branch information
msouff committed May 28, 2020
1 parent 24208bf commit 5571999
Showing 1 changed file with 25 additions and 134 deletions.
159 changes: 25 additions & 134 deletions ecflow/spt_extract_plain_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# File: spt_extract_plain_table.py
# Author(s): Michael Souffront, Wade Roberts, Spencer McDonald
# Date: 03/07/2018
# Last Updated: 05/28/2020
# Purpose: Calculate basic statistics for GloFAS-RAPID files and
# extract them to a summary table; interpolate forecast
# values for time steps other than 3 hrs
Expand All @@ -16,7 +17,6 @@
import subprocess as sp
import netCDF4 as nc
import datetime as dt
import numpy as np
import pandas as pd
import logging

Expand Down Expand Up @@ -61,7 +61,7 @@ def extract_summary_table(workspace):
try:
with open(os.path.join(workspace, file_name), 'w') as f:
# writes header
# f.write('comid,timestamp,max,min,style,flow_class\n')
# f.write('comid,timestamp,max,mean,color,thickness\n')

# extracts forecast COMIDS and formatted dates into lists
comids = nc.Dataset(nclist[0], 'r').variables['rivid'][:].tolist()
Expand All @@ -73,7 +73,6 @@ def extract_summary_table(workspace):
# creates empty lists with forecast stats
maxlist = []
meanlist = []
minlist = []

# loops through the stat netcdf files to populate lists created above
for ncfile in sorted(nclist):
Expand All @@ -85,137 +84,41 @@ def extract_summary_table(workspace):
maxlist.append(res.variables['Qout'][index, 0:49].tolist())
elif 'avg' in ncfile:
meanlist.append(res.variables['Qout'][index, 0:49].tolist())
elif 'min' in ncfile:
minlist.append(res.variables['Qout'][index, 0:49].tolist())

# creates step order list
step_order = range(1, 50)
# step_order = range(1, 200)

# creates watershed and subbasin names
watershed_name = full_name.split('-')[0]
subbasin_name = full_name.split('-')[1]

# creates unique id
count = 1

# loops through COMIDs again to add rows to csv file
for index, comid in enumerate(comids):
for step, date, max, mean, min in zip(step_order, dates, maxlist[index], meanlist[index],
minlist[index]):
# define style
if mean > rp_df.loc[comid, 'return_20']:
style = 'purple'
elif mean > rp_df[comid, 'return_10']:
style = 'red'
elif mean > rp_df[comid, 'return_2']:
style = 'yellow'
for f_date, f_max, f_mean in zip(dates, maxlist[index], meanlist[index]):
# define reach color based on return periods
if f_mean > rp_df.loc[comid, 'return_20']:
color = 'purple'
elif f_mean > rp_df.loc[comid, 'return_10']:
color = 'red'
elif f_mean > rp_df.loc[comid, 'return_2']:
color = 'yellow'
else:
style = 'blue'

# define flow_class
if mean < 20:
flow_class = '1'
elif 20 <= mean < 250:
flow_class = '2'
elif 250 <= mean < 1500:
flow_class = '3'
elif 1500 <= mean < 10000:
flow_class = '4'
elif 10000 <= mean < 30000:
flow_class = '5'
color = 'blue'

# define reach thickness based on flow magnitude
if f_mean < 20:
thickness = '1'
elif 20 <= f_mean < 250:
thickness = '2'
elif 250 <= f_mean < 1500:
thickness = '3'
elif 1500 <= f_mean < 10000:
thickness = '4'
elif 10000 <= f_mean < 30000:
thickness = '5'
else:
flow_class = '6'
thickness = '6'

f.write(','.join([str(comid), date, str(max), str(mean), style, flow_class + '\n']))
count += 1
f.write(','.join([str(comid), f_date, str(f_max), str(f_mean), color, thickness + '\n']))

return 'Stat Success'
except Exception as e:
logging.debug(e)


# function to take a given csv and interpolate all time series in it
def interpolate_table(path):
# importing the table
print('working on interpolation')
df = pd.read_csv(path, index_col=8)
interpolated_df = pd.DataFrame([])
if len(df.index) % 85 == 0:
n = 85
for i in range(int(len(df.index) / 85)):
# making a temporay df to interpolate in
df_temp = df.iloc[n - 85: n]

# resetting the index to datetime type
df_temp.index = pd.to_datetime(df_temp.index, infer_datetime_format=True)

# making a temporary dataframe for the 6 hour gap time series
df_temp_6_hr = df_temp.iloc[48:, :]

# making a new index with 3 hour time intervals rather than 6 hour
new_index = pd.date_range(df_temp_6_hr.index[0], df_temp_6_hr.index[len(df_temp_6_hr.index) - 1], freq='3H')

# reindexing the 6 hour df to a 3 hr df
df_temp_3_hr = df_temp_6_hr.reindex(new_index)

# filling the constant values with a forward fill
for col in ["watershed", "subbasin", "comid", "return2", "return10", "return20"]:
df_temp_3_hr[col].ffill(inplace=True)

# making a new index column
df_temp_3_hr['index'] = np.linspace(49, 121, len(df_temp_3_hr.index))

# using a pchip spline to interpolate the values in the new time interval
for col in ['max', 'mean', 'min']:
df_temp_3_hr[col] = df_temp_3_hr[col].interpolate('pchip')

# creating a variable to combine the new interpolated values to the dataframe
frames = [df_temp.iloc[:48], df_temp_3_hr]
# concatenating the variable
df_temp = pd.concat(frames)

# rearranging the dataframe to match how it was before
df_temp['timestamp'] = df_temp.index
df_temp.index = df_temp['id']
df_temp = df_temp.drop(['id'], axis=1)
cols = ['watershed', 'subbasin', 'comid', 'return2', 'return10', 'return20', 'index', 'timestamp', 'max',
'mean', 'min', 'style', 'flow_class']
df_temp = df_temp[cols]

# appending this section of the table back to the entire table
interpolated_df = interpolated_df.append(df_temp)

n += 85

# resetting the id column
interpolated_df.index = np.linspace(1, len(interpolated_df.index), len(interpolated_df.index), dtype=np.int16)

# changing the data types to match what was originally in the table
interpolated_df.index = interpolated_df.index.astype(np.int16)
interpolated_df['timestamp'] = interpolated_df['timestamp'].dt.strftime("%m/%d/%y %H:%M")
interpolated_df['index'] = interpolated_df['index'].astype(np.int16)
interpolated_df['comid'] = interpolated_df['comid'].astype(np.int64)

# logical indexing the styles column to fill the interpolated values with corresponding colors
interpolated_df.ix[(interpolated_df['mean'] > interpolated_df['return2']), ['style']] = 'yellow'
interpolated_df.ix[(interpolated_df['mean'] > interpolated_df['return10']), ['style']] = 'red'
interpolated_df.ix[(interpolated_df['mean'] > interpolated_df['return20']), ['style']] = 'purple'
interpolated_df.ix[(interpolated_df['mean'] <= interpolated_df['return2']), ['style']] = 'blue'

# logical indexing the flow class column to fill the interpolated values with corresponding values
interpolated_df.ix[(interpolated_df['mean'] < 20), ['flow_class']] = '1'
interpolated_df.ix[(interpolated_df['mean'] >= 20) & (interpolated_df['mean'] < 250), ['flow_class']] = '2'
interpolated_df.ix[(interpolated_df['mean'] >= 250) & (interpolated_df['mean'] < 1500), ['flow_class']] = '3'
interpolated_df.ix[(interpolated_df['mean'] >= 1500) & (interpolated_df['mean'] < 10000), ['flow_class']] = '4'
interpolated_df.ix[(interpolated_df['mean'] >= 10000) & (interpolated_df['mean'] < 30000), ['flow_class']] = '5'
interpolated_df.ix[(interpolated_df['mean'] > 30000), ['flow_class']] = '6'

# overwrite csv table with interpolated values, leaving header out
interpolated_df.to_csv(path, index_label='id', header=False)
return ('Interpolation Success')


# runs function on file execution
if __name__ == "__main__":
# output directory
Expand All @@ -240,15 +143,3 @@ def interpolate_table(path):
pool.close()
pool.join()
logging.debug('Finished')

# # populate interpolation list
# date_list = os.listdir(date)
# for file in date_list:
# if file.startswith("summary_table"):
# interpolation_list.append(os.path.join(date, file))
#
# # run interpolation
# for csv_path in interpolation_list:
# interpolate_table(
# path=csv_path
# )

0 comments on commit 5571999

Please sign in to comment.