forked from david-edu-morales/TAAP_py
-
Notifications
You must be signed in to change notification settings - Fork 0
/
taap_mca_seasonalSum.py
170 lines (131 loc) · 8.14 KB
/
taap_mca_seasonalSum.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# %%
import datetime as dt
import matplotlib.pyplot as plt
import pandas as pd
from random import randint
from sklearn import linear_model
import seaborn as sns
sns.set(rc={'figure.figsize':(11, 4)})
# %%
# Set up variables
keylist_mx = [26013, 26057, 26164] # create list of climate station keys
varsSum_mx = ['precip'] # specify variables to be resampled
seasons = ['summer', 'winter'] # specify seasons to be graphed
csvFile = 'data/historicalTrends_precipSum.csv' # csv filename to collect linRegCoefs
headerList = ['key', 'season', 'coef'] # header names for csv of linRegCoefs
# %%
# *** MEXICAN CLIMATE STATIONS ***
# Read the files into a df
# Create a dictionary of keys and filenames to call dataframes into another dictionary
filenameDict = {keylist_mx[key]: 'data/'+str(keylist_mx[key])+'_seasonalSum.csv' for key in range(len(keylist_mx))}
# Create a dictionary of keys and corresponding dataframes
dictSeasonPrecip = {key: pd.read_csv(filename,
index_col = 'year',
parse_dates=True)
for (key, filename) in filenameDict.items()}
# %%
# Monte Carlo function to generate randomized sample of observed values for given timeframe.
def monteCarloGenerator(obsValueList):
global coef # linear regression variable will be added to list in iterator
tX = [] # list to collect count of years for x-axis
vY = [] # list to collect randomized observed values for y-axis
currentYear = 1 # counter to keep track of years
listLen = len(obsValueList) # create limit of rolls and range of position values
# not all station/variable/month datasets are same size
while currentYear <= listLen:
roll = randint(0,(listLen-1)) # generate random index place value
selectedValue = obsValueList[roll] # select corresponding precip value from list
tX.append(currentYear) # add year value to list
vY.append(selectedValue) # add precip value to list
currentYear += 1
# Calculate the linear regression
# merge selectedValue and counter lists into dict
selValDf = pd.DataFrame({'year':tX,'variable':vY}) # convert dictionary into dataframe
selValDf = selValDf.dropna() # drop NaN-bearing rows from dataframe
x_data = selValDf['year'].values.reshape(selValDf.shape[0],1) # prep data for linreg
y_data = selValDf['variable'].values.reshape(selValDf.shape[0],1)
reg = linear_model.LinearRegression().fit(x_data, y_data)
coef = reg.coef_ * listLen # define linreg trend as coef to be plot and saved in iterator
# multiply coef by listLen variable for variables w/ < 40 yrs of data
# %%
# Return recorded coefficients into dictionary
dfCoef = pd.read_csv(csvFile, delimiter=',', usecols=headerList)
dictCoef = {key: dfCoef[dfCoef['key'] == key] for key in keylist_mx}
# Reset index for each dataframe to make appending easier
for key in keylist_mx:
dictCoef[key].reset_index(drop=True, inplace=True)
# %%
# Automate the iterator to run through all station/variable/month MCA distributions
# get start datetime
startTime = dt.datetime.now()
for key in keylist_mx:
dfKey = dictSeasonPrecip[key] # Set object name for ease of reading
lrcSdList = [] # reset SD list for each key and append to dictCoef
lrcMeanList = [] # reset mean list for each key and append to dictCoef
chanceList = []
for var in varsSum_mx:
for s in range(len(seasons)):
df = dfKey[dfKey.season == seasons[s]]
end = df.index.year[-1]
start = start = end - 39
dataset = df.loc[str(start):str(end)].precipSum.values.tolist()
# Select data from observed record for the iterator
#dataset = df[df.season == seasons[s]].precipSum.tail(40).values.tolist()
# Select observed linreg coef to plot on MCA distribution
obsCoef = dictCoef[key][dictCoef[key].season==seasons[s]]['coef'].values[0]
sampSize = 100000 # number of iterations for MCA
counter = 1 # counter to keep track of iterated distributions
linRegCoef = [] # create list for storing generated linreg coefs
# Iterate Monte Carlo simulator code
while counter <= sampSize: # iterate to chosen sample size
monteCarloGenerator(dataset)
linRegCoef.append(coef[0,0])
counter += 1
# Collect information about generated linreg statistics (SD and mean)
coefSeries = pd.Series(linRegCoef) # linregCoef to Series
lrcSD, lrcMean = coefSeries.std(), coefSeries.mean() # define SD and mean
lrcSdList.append(lrcSD) # append SD to list
lrcMeanList.append(lrcMean) # append mean to list
# Calculate percent chance of generating observed trend, or higher
chanceCount = 0
for i in range(len(coefSeries)):
if obsCoef < 0: # for (-) historical trends
if obsCoef > coefSeries[i]:
chanceCount += 1
else: # for (+) historical trends
if obsCoef < coefSeries[i]:
chanceCount += 1
percentChance = chanceCount/len(coefSeries)*100
chanceList.append(percentChance) # add to chanceList
# plot distribution of coefficients onto histogram
ax = coefSeries.plot.hist(bins=50, label='_nolegend_') # generate histogram of linregCoef
ax.axvline(obsCoef, color='r', label='historical trend') # plots corresponding linregCoef
textstr = '\n'.join((
r'historical trend = %.2f%s' % (obsCoef, 'mm/40yr'),
r'MC-generation chance = %.2f%s' % (percentChance, '%'),
r'$\sigma=%.2f$' % (lrcSD, )))
props = dict(boxstyle='round', facecolor='lightsteelblue', alpha=0.5)
# place a text box in upper left in axes coords
ax.text(0.05, 0.92, textstr, transform=ax.transAxes, fontsize=12,
verticalalignment='top', bbox=props)
ax.set_xlabel(var)
ax.set_ylabel('Count')
ax.set_title('Monte Carlo Analysis of '+seasons[s].capitalize()+' '+var.capitalize()+\
'\nClimate Station '+str(key)+', n='+str(sampSize), fontsize=12)
plt.legend(loc='upper right')
plt.savefig('graphs/mcaPlots/'+str(key)+'-'+var+'-'+seasons[s]+'_mca')
plt.show()
print(str(key)+'/'+var+'/'+seasons[s]+': stats completed')
print('_________________________')
# add SD and mean to dictCoef dataframes
dfStats = pd.DataFrame({'sd': lrcSdList, 'mean': lrcMeanList, 'chance': chanceList}) # convert sd/mean lists to df
dictCoef[key] = dictCoef[key].join(dfStats, how='left') # join above df to dictCoef
#dictCoef[key][['key', 'month']] = dictCoef[key][['key','month']].astype(int) # reset key/month to ints
endTime = dt.datetime.now()
elapsedTime = endTime - startTime
print('Execution time:', elapsedTime)
# %%
# Create cleaned-data csv files
for key in keylist_mx:
dictCoef[key].to_csv('data/'+str(key)+'_coefData-sum.csv')
# %%