Skip to content

Commit

Permalink
v1.1 updates
Browse files Browse the repository at this point in the history
removed data.drop bug
removed sim bug
added print  sim data
extended bayesian optimization iterations
  • Loading branch information
AndreasWunsch committed Oct 23, 2020
1 parent 38d7c56 commit f74e360
Show file tree
Hide file tree
Showing 12 changed files with 319 additions and 230 deletions.
75 changes: 40 additions & 35 deletions CNN - Python Code/CNN_seq2seq.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""
Created on Wed Jul 1 12:53:39 2020
updated on on Thu Oct 15 17:15:45 2020
@author: Andreas Wunsch
"""

Expand All @@ -15,7 +15,7 @@
from bayes_opt import BayesianOptimization
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
# from bayes_opt.util import load_logs #needed for: load existing optimizer states
from bayes_opt.util import load_logs #needed for: load existing optimizer states
import os
import glob
import pandas as pd
Expand All @@ -24,16 +24,12 @@
from scipy import stats
from matplotlib import pyplot
from sklearn.preprocessing import MinMaxScaler
# from sklearn.preprocessing import StandardScaler
import tensorflow as tf
tf.config.optimizer.set_jit(True)

from tensorflow.keras.mixed_precision import experimental as mixed_precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')


def load_RM_GW_and_HYRAS_Data(i):
pathGW = "./GWData"
pathHYRAS = "./MeteoData"
Expand Down Expand Up @@ -166,11 +162,11 @@ def bayesOpt_function_with_discrete_params(pp,densesize_int, seqlength_int, batc

# inputs
if rH == 0:
data.drop(columns='rH')
data = data.drop(columns='rH')
if T == 0:
data.drop(columns='T')
data = data.drop(columns='T')
if Tsin == 0:
data.drop(columns='Tsin')
data = data.drop(columns='Tsin')

#scale data
scaler = MinMaxScaler(feature_range=(-1, 1))
Expand Down Expand Up @@ -200,10 +196,6 @@ def bayesOpt_function_with_discrete_params(pp,densesize_int, seqlength_int, batc
for ini in range(inimax):
print("BayesOpt-Iteration {} - ini-Ensemblemember {}".format(len(optimizer.res)+1, ini+1))

# f = open('log_full.txt', "a")
# print("BayesOpt-Iteration {} - ini-Ensemblemember {}".format(len(optimizer.res)+1, ini+1), file = f)
# f.close()

model = gwmodel(ini,GLOBAL_SETTINGS,X_train, Y_train, X_stop, Y_stop)

idx = 0
Expand Down Expand Up @@ -242,9 +234,6 @@ def bayesOpt_function_with_discrete_params(pp,densesize_int, seqlength_int, batc

print("total elapsed time = {}".format(datetime.datetime.now()-time1))
print("(pp) elapsed time = {}".format(datetime.datetime.now()-time_single))
# f = open('log_full.txt', "a")
# print("elapsed time = {}".format(datetime.datetime.now()-time1), file = f)
# f.close()

return m_error[0,0]+m_error[0,1]

Expand Down Expand Up @@ -272,15 +261,14 @@ def simulate_testset(pp,densesize_int, seqlength_int, batchsize_int, filters_int

# inputs
if rH == 0:
data.drop(columns='rH')
data = data.drop(columns='rH')
if T == 0:
data.drop(columns='T')
data = data.drop(columns='T')
if Tsin == 0:
data.drop(columns='Tsin')
data = data.drop(columns='Tsin')

#scale data
scaler = MinMaxScaler(feature_range=(-1, 1))
# scaler = StandardScaler()
scaler_gwl = MinMaxScaler(feature_range=(-1, 1))
scaler_gwl.fit(pd.DataFrame(data['GWL']))
data_n = pd.DataFrame(scaler.fit_transform(data), index=data.index, columns=data.columns)
Expand Down Expand Up @@ -382,6 +370,13 @@ def simulate_testset(pp,densesize_int, seqlength_int, batchsize_int, filters_int
np.savetxt('./ensemble_member_errors_'+Well_ID+'_PIop.txt',errors_members[:,:,6].transpose(),delimiter=';', fmt = '%.4f')

return scores, TestData, inimax, testresults_members, test_sim_median, Well_ID

class newJSONLogger(JSONLogger) :

def __init__(self, path):
self._path=None
super(JSONLogger, self).__init__()
self._path = path if path[-5:] == ".json" else path + ".json"

"""###########################################################################
Expand Down Expand Up @@ -420,23 +415,28 @@ def simulate_testset(pp,densesize_int, seqlength_int, batchsize_int, filters_int
verbose = 0 # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent, verbose = 2 prints everything
)

# #load existing optimizer
# load_logs(optimizer, logs=["./logs_CNN_seq2seq_"+Well_ID+".json"]);
# print("\nExisting optimizer is already aware of {} points.".format(len(optimizer.space)))
#load existing optimizer
log_already_available = 0
if os.path.isfile("./logs_CNN_seq2seq_"+Well_ID+".json"):
load_logs(optimizer, logs=["./logs_CNN_seq2seq_"+Well_ID+".json"]);
print("\nExisting optimizer is already aware of {} points.".format(len(optimizer.space)))
log_already_available = 1

# Saving progress
logger = JSONLogger(path="./logs_CNN_seq2seq_"+Well_ID+".json")
logger = newJSONLogger(path="./logs_CNN_seq2seq_"+Well_ID+".json")
optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)

# random exploration as a start
f = open('./timelog_CNN_seq2seq_'+Well_ID+'.txt', "w")
print("Starttime of first iteration: {}\n".format(datetime.datetime.now()), file = f)#this is not looged in json file
optimizer.maximize(
init_points=12, #steps of random exploration
n_iter=0, # steps of bayesian optimization
acq="ei",# ei = expected improvmenet (probably the most common acquisition function)
xi=0.05 # Prefer exploitation (xi=0.0) / Prefer exploration (xi=0.1)
)

if log_already_available == 0:
optimizer.maximize(
init_points=25, #steps of random exploration (random starting points before bayesopt(?))
n_iter=0, # steps of bayesian optimization
acq="ei",# ei = expected improvmenet (probably the most common acquisition function)
xi=0.05 # Prefer exploitation (xi=0.0) / Prefer exploration (xi=0.1)
)

# optimize while improvement during last 10 steps!
current_step = len(optimizer.res)
Expand All @@ -446,7 +446,7 @@ def simulate_testset(pp,densesize_int, seqlength_int, batchsize_int, filters_int
step = step + 1
beststep = optimizer.res[step] == optimizer.max #search for best iteration step

while current_step < 25: #below < 25 iterations, no termination
while current_step < 50: #below < 50 iterations, no termination
current_step = len(optimizer.res)
beststep = False
step = -1
Expand All @@ -461,7 +461,7 @@ def simulate_testset(pp,densesize_int, seqlength_int, batchsize_int, filters_int
xi=0.05 # Prefer exploitation (xi=0.0) / Prefer exploration (xi=0.1)
)

while (step + 10 > current_step and current_step < 50): # termination after 50 steps or after 10 steps without improvement
while (step + 20 > current_step and current_step < 150): # termination after 50 steps or after 10 steps without improvement
current_step = len(optimizer.res)
beststep = False
step = -1
Expand Down Expand Up @@ -535,4 +535,9 @@ def simulate_testset(pp,densesize_int, seqlength_int, batchsize_int, filters_int
print("max iteration = {}\n".format(len(optimizer.res)), file = f)
for i, res in enumerate(optimizer.res):
print("Iteration {}: \t{}".format(i+1, res), file = f)
f.close()
f.close()

#print sim data
for i in range(inimax):
printdf = pd.DataFrame(data=testresults_members[:,:,i],index=TestData.index)
printdf.to_csv("./ensemble_member"+str(i) +"_values_CNN_"+Well_ID+'.txt',sep=';', float_format = '%.4f')
66 changes: 40 additions & 26 deletions CNN - Python Code/CNN_seq2seq_GWLshift.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 29 14:21:02 2020
updated on Thu Oct 15 17:57:46 2020
@author: Andreas Wunsch
"""
Expand All @@ -15,7 +16,7 @@
from bayes_opt import BayesianOptimization
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
# from bayes_opt.util import load_logs #needed for: load existing optimizer states
from bayes_opt.util import load_logs #needed for: load existing optimizer states
import os
import glob
import pandas as pd
Expand All @@ -24,13 +25,8 @@
from scipy import stats
from matplotlib import pyplot
from sklearn.preprocessing import MinMaxScaler
# from sklearn.preprocessing import StandardScaler
import tensorflow as tf
tf.config.optimizer.set_jit(True)

from tensorflow.keras.mixed_precision import experimental as mixed_precision
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')

Expand Down Expand Up @@ -174,11 +170,11 @@ def bayesOpt_function_with_discrete_params(pp,densesize_int, seqlength_int, batc

# inputs
if rH == 0:
data.drop(columns='rH')
data = data.drop(columns='rH')
if T == 0:
data.drop(columns='T')
data = data.drop(columns='T')
if Tsin == 0:
data.drop(columns='Tsin')
data = data.drop(columns='Tsin')

#scale data
scaler = MinMaxScaler(feature_range=(-1, 1))
Expand Down Expand Up @@ -281,11 +277,11 @@ def simulate_testset(pp,densesize_int, seqlength_int, batchsize_int, filters_int

# inputs
if rH == 0:
data.drop(columns='rH')
data = data.drop(columns='rH')
if T == 0:
data.drop(columns='T')
data = data.drop(columns='T')
if Tsin == 0:
data.drop(columns='Tsin')
data = data.drop(columns='Tsin')

#scale data
scaler = MinMaxScaler(feature_range=(-1, 1))
Expand Down Expand Up @@ -393,6 +389,13 @@ def simulate_testset(pp,densesize_int, seqlength_int, batchsize_int, filters_int

return scores, TestData, inimax, testresults_members, test_sim_median, Well_ID

class newJSONLogger(JSONLogger) :

def __init__(self, path):
self._path=None
super(JSONLogger, self).__init__()
self._path = path if path[-5:] == ".json" else path + ".json"

"""###########################################################################
above only functions
Expand Down Expand Up @@ -430,23 +433,28 @@ def simulate_testset(pp,densesize_int, seqlength_int, batchsize_int, filters_int
verbose = 0 # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent, verbose = 2 prints everything
)

# #load existing optimizer
# load_logs(optimizer, logs=["./logs_CNN_seq2seq_GWLt-1_"+Well_ID+".json"]);
# print("\nExisting optimizer is already aware of {} points.".format(len(optimizer.space)))
#load existing optimizer
log_already_available = 0
if os.path.isfile("./logs_CNN_seq2seq_GWLt-1_"+Well_ID+".json"):
load_logs(optimizer, logs=["./logs_CNN_seq2seq_GWLt-1_"+Well_ID+".json"]);
print("\nExisting optimizer is already aware of {} points.".format(len(optimizer.space)))
log_already_available = 1

# Saving progress
logger = JSONLogger(path="./logs_CNN_seq2seq_GWLt-1_"+Well_ID+".json")
logger = newJSONLogger(path="./logs_CNN_seq2seq_GWLt-1_"+Well_ID+".json")
optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)

# random exploration as a start
f = open('./timelog_CNN_seq2seq_GWLt-1_'+Well_ID+'.txt', "w")
print("Starttime of first iteration: {}\n".format(datetime.datetime.now()), file = f)#this is not looged in json file
optimizer.maximize(
init_points=12, #steps of random exploration
n_iter=0, # steps of bayesian optimization
acq="ei",# ei = expected improvmenet (probably the most common acquisition function)
xi=0.05 # Prefer exploitation (xi=0.0) / Prefer exploration (xi=0.1)
)

if log_already_available == 0:
optimizer.maximize(
init_points=25, #steps of random exploration (random starting points before bayesopt(?))
n_iter=0, # steps of bayesian optimization
acq="ei",# ei = expected improvmenet (probably the most common acquisition function)
xi=0.05 # Prefer exploitation (xi=0.0) / Prefer exploration (xi=0.1)
)

# optimize while improvement during last 10 steps
current_step = len(optimizer.res)
Expand All @@ -456,7 +464,7 @@ def simulate_testset(pp,densesize_int, seqlength_int, batchsize_int, filters_int
step = step + 1
beststep = optimizer.res[step] == optimizer.max #aktuell beste Iteration suchen

while current_step < 25: #below < 25 iterations, no termination
while current_step < 50: #below < 50 iterations, no termination
current_step = len(optimizer.res)
beststep = False
step = -1
Expand All @@ -471,7 +479,7 @@ def simulate_testset(pp,densesize_int, seqlength_int, batchsize_int, filters_int
xi=0.05 # Prefer exploitation (xi=0.0) / Prefer exploration (xi=0.1)
)

while (step + 10 > current_step and current_step < 50): # termination after 50 steps or after 10 steps without improvement
while (step + 20 > current_step and current_step < 150): # termination after 50 steps or after 10 steps without improvement
current_step = len(optimizer.res)
beststep = False
step = -1
Expand Down Expand Up @@ -545,4 +553,10 @@ def simulate_testset(pp,densesize_int, seqlength_int, batchsize_int, filters_int
print("max iteration = {}\n".format(len(optimizer.res)), file = f)
for i, res in enumerate(optimizer.res):
print("Iteration {}: \t{}".format(i+1, res), file = f)
f.close()
f.close()


#print sim data
for i in range(inimax):
printdf = pd.DataFrame(data=testresults_members[:,:,i],index=TestData.index)
printdf.to_csv("./ensemble_member"+str(i) +"_values_CNN_"+Well_ID+'.txt',sep=';', float_format = '%.4f')
Loading

0 comments on commit f74e360

Please sign in to comment.