From 7ef544416a13b64400e9b2237f17c9da9c7d3c5b Mon Sep 17 00:00:00 2001 From: Christian Kehl Date: Mon, 28 Dec 2020 17:02:08 +0100 Subject: [PATCH 1/5] benchmarking - after seeing that memory trends between sync and async memory measures are equivalent, we disable the async mode to reduce compute times --- parcels/particleset_benchmark.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/parcels/particleset_benchmark.py b/parcels/particleset_benchmark.py index fb9a55ce7..5bc903acb 100644 --- a/parcels/particleset_benchmark.py +++ b/parcels/particleset_benchmark.py @@ -48,7 +48,7 @@ def measure_mem_usage(): return rsc.ru_maxrss*1024 return rsc.ru_maxrss -USE_ASYNC_MEMLOG = True +USE_ASYNC_MEMLOG = False USE_RUSE_SYNC_MEMLOG = False # can be faulty class ParticleSet_Benchmark(ParticleSet): @@ -206,6 +206,7 @@ def execute(self, pyfunc=AdvectionRK4, endtime=None, runtime=None, dt=1., if verbose_progress: pbar = self._create_progressbar_(_starttime, endtime) + mem_used_start = 0 if USE_ASYNC_MEMLOG: self.async_mem_log.measure_func = measure_mem mem_used_start = measure_mem() From 7d50c0a4743fb84a40c4b1f4a0775ea6fd415e46 Mon Sep 17 00:00:00 2001 From: Christian Kehl Date: Tue, 12 May 2020 12:04:45 +0200 Subject: [PATCH 2/5] resolved the two main list-comprehension slow-downs --- parcels/kernel.py | 48 +++++++++++++++++++++++++++-------------------- 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/parcels/kernel.py b/parcels/kernel.py index 95a785e4f..304c2d9c6 100644 --- a/parcels/kernel.py +++ b/parcels/kernel.py @@ -343,12 +343,15 @@ def execute_python(self, pset, endtime, dt): break def remove_deleted(self, pset, output_file, endtime): - """Utility to remove all particles that signalled deletion""" - indices = [i for i, p in enumerate(pset.particles) - if p.state in [ErrorCode.Delete]] - if len(indices) > 0 and output_file is not None: - output_file.write(pset[indices], endtime, deleted_only=True) - pset.remove(indices) + """Utility to remove all particles that signalled deletion""" + states = np.array([p.state for p in pset.particles]) + delstates = states==int(ErrorCode.Delete) + indices = np.nonzero(delstates) + n_deleted = np.count_nonzero(delstates) + if n_deleted > 0 and output_file is not None: + output_file.write(pset[indices], endtime, deleted_only=True) + pset.remove(indices) + def execute(self, pset, endtime, dt, recovery=None, output_file=None, execute_once=False): """Execute this Kernel over a ParticleSet for several timesteps""" @@ -358,14 +361,6 @@ def execute(self, pset, endtime, dt, recovery=None, output_file=None, execute_on if abs(dt) < 1e-6 and not execute_once: logger.warning_once("'dt' is too small, causing numerical accuracy limit problems. Please chose a higher 'dt' and rather scale the 'time' axis of the field accordingly. (related issue #762)") - # def remove_deleted(pset): - # """Utility to remove all particles that signalled deletion""" - # indices = [i for i, p in enumerate(pset.particles) - # if p.state in [ErrorCode.Delete]] - # if len(indices) > 0 and output_file is not None: - # output_file.write(pset[indices], endtime, deleted_only=True) - # pset.remove(indices) - if recovery is None: recovery = {} elif ErrorCode.ErrorOutOfBounds in recovery and ErrorCode.ErrorThroughSurface not in recovery: @@ -387,9 +382,16 @@ def execute(self, pset, endtime, dt, recovery=None, output_file=None, execute_on self.remove_deleted(pset, output_file=output_file, endtime=endtime) # Identify particles that threw errors - error_particles = [p for p in pset.particles if p.state not in [ErrorCode.Success, ErrorCode.Evaluate]] - - while len(error_particles) > 0: + states = np.array([p.state for p in pset.particles]) + estates = np.zeros(states.shape, dtype=np.bool) + for ecval in {ErrorCode.Success, ErrorCode.Evaluate}: + estates |= states==int(ecval) + estates = np.invert(estates) + error_indices = np.nonzero(estates) + n_errors = np.count_nonzero(estates) + error_particles = pset[error_indices] + + while n_errors > 0: # Apply recovery kernel for p in error_particles: if p.state == ErrorCode.StopExecution: @@ -397,7 +399,6 @@ def execute(self, pset, endtime, dt, recovery=None, output_file=None, execute_on if p.state == ErrorCode.Repeat: p.reset_state() elif p.state == ErrorCode.Delete: - # p.delete() pass elif p.state in recovery_map: recovery_kernel = recovery_map[p.state] @@ -407,7 +408,6 @@ def execute(self, pset, endtime, dt, recovery=None, output_file=None, execute_on p.reset_state() else: logger.warning_once('Deleting particle {} because of non-recoverable error'.format(p.id)) - # logger.warning('Deleting particle because of bug in #749 and #737 - particle state: {}'.format(ErrorCode.toString(p.state))) p.delete() # Remove all particles that signalled deletion @@ -419,7 +419,15 @@ def execute(self, pset, endtime, dt, recovery=None, output_file=None, execute_on else: self.execute_python(pset, endtime, dt) - error_particles = [p for p in pset.particles if p.state not in [ErrorCode.Success, ErrorCode.Evaluate]] + # error_particles = [p for p in pset.particles if p.state not in [ErrorCode.Success, ErrorCode.Evaluate]] + states = np.array([p.state for p in pset.particles]) + estates = np.zeros(states.shape, dtype=np.bool) + for ecval in {ErrorCode.Success, ErrorCode.Evaluate}: + estates |= states == int(ecval) + estates = np.invert(estates) + error_indices = np.nonzero(estates) + n_errors = np.count_nonzero(estates) + error_particles = pset[error_indices] def merge(self, kernel): funcname = self.funcname + kernel.funcname From e9a9c357305523a178c8cdeeb644ee6378cf8ccd Mon Sep 17 00:00:00 2001 From: Christian Kehl Date: Tue, 12 May 2020 13:34:17 +0200 Subject: [PATCH 3/5] avoid unnecessary ndarray copies by initialising them in C-contiguous order - to be tested --- parcels/field.py | 19 ++++++++++++------- parcels/grid.py | 30 ++++++++++++++++++++++-------- parcels/kernel.py | 12 ++++++++---- 3 files changed, 42 insertions(+), 19 deletions(-) diff --git a/parcels/field.py b/parcels/field.py index 339497833..fce2ba86a 100644 --- a/parcels/field.py +++ b/parcels/field.py @@ -179,8 +179,8 @@ def __init__(self, name, data, lon=None, lat=None, depth=None, time=None, grid=N # (data_full_zdim = grid.zdim if no indices are used, for A- and C-grids and for some B-grids). It is used for the B-grid, # since some datasets do not provide the deeper level of data (which is ignored by the interpolation). self.data_full_zdim = kwargs.pop('data_full_zdim', None) - self.data_chunks = [] - self.c_data_chunks = [] + self.data_chunks = [] # the data buffer of the FileBuffer raw loaded data - shall be a list of C-contiguous arrays + self.c_data_chunks = [] # C-pointers to the data_chunks array self.nchunks = [] self.chunk_set = False self.filebuffers = [None] * 3 @@ -990,7 +990,8 @@ def chunk_setup(self): self.data_chunks = [None] * npartitions self.c_data_chunks = [None] * npartitions - self.grid.load_chunk = np.zeros(npartitions, dtype=c_int) + # self.grid.load_chunk = np.zeros(npartitions, dtype=c_int) + self.grid.load_chunk = np.zeros(npartitions, dtype=c_int, order='C') # self.grid.chunk_info format: number of dimensions (without tdim); number of chunks per dimensions; # chunksizes (the 0th dim sizes for all chunk of dim[0], then so on for next dims self.grid.chunk_info = [[len(self.nchunks)-1], list(self.nchunks[1:]), sum(list(list(ci) for ci in chunks[1:]), [])] @@ -1009,7 +1010,8 @@ def chunk_data(self): for block_id in range(len(self.grid.load_chunk)): if self.grid.load_chunk[block_id] == 1 or self.grid.load_chunk[block_id] > 1 and self.data_chunks[block_id] is None: block = self.get_block(block_id) - self.data_chunks[block_id] = np.array(self.data.blocks[(slice(self.grid.tdim),) + block]) + # self.data_chunks[block_id] = np.array(self.data.blocks[(slice(self.grid.tdim),) + block]) + self.data_chunks[block_id] = np.array(self.data.blocks[(slice(self.grid.tdim),) + block], order='C') elif self.grid.load_chunk[block_id] == 0: if isinstance(self.data_chunks, list): self.data_chunks[block_id] = None @@ -1022,8 +1024,10 @@ def chunk_data(self): else: self.data_chunks[0, :] = None self.c_data_chunks[0] = None + self.grid.load_chunk[0] = 2 - self.data_chunks[0] = np.array(self.data) + # self.data_chunks[0] = np.array(self.data) + self.data_chunks[0] = np.array(self.data, order='C') @property def ctypes_struct(self): @@ -1046,8 +1050,9 @@ class CField(Structure): if self.grid.load_chunk[i] == 1: raise ValueError('data_chunks should have been loaded by now if requested. grid.load_chunk[bid] cannot be 1') if self.grid.load_chunk[i] > 1: - if not self.data_chunks[i].flags.c_contiguous: - self.data_chunks[i] = self.data_chunks[i].copy() + if not self.data_chunks[i].flags['C_CONTIGUOUS']: + # self.data_chunks[i] = self.data_chunks[i].copy() + self.data_chunks[i] = np.array(self.data_chunks[i], order='C') self.c_data_chunks[i] = self.data_chunks[i].ctypes.data_as(POINTER(POINTER(c_float))) else: self.c_data_chunks[i] = None diff --git a/parcels/grid.py b/parcels/grid.py index 1ae31fa77..a4afd63d6 100644 --- a/parcels/grid.py +++ b/parcels/grid.py @@ -35,9 +35,15 @@ class Grid(object): """ def __init__(self, lon, lat, time, time_origin, mesh): - self.lon = lon - self.lat = lat - self.time = np.zeros(1, dtype=np.float64) if time is None else time + self.lon = lon # should be a C-contiguous array of floats + if not self.lon.flags['C_CONTIGUOUS']: + self.lon = np.array(self.lon, order='C') + self.lat = lat # should be a C-contiguous array of floats + if not self.lat.flags['C_CONTIGUOUS']: + self.lat = np.array(self.lat, order='C') + self.time = np.zeros(1, dtype=np.float64) if time is None else time # should be a C-contiguous array of float64 + if not self.time.flags['C_CONTIGUOUS']: + self.time = np.array(self.time, order='C') if not self.lon.dtype == np.float32: logger.warning_once("Casting lon data to np.float32") self.lon = self.lon.astype(np.float32) @@ -61,7 +67,7 @@ def __init__(self, lon, lat, time, time_origin, mesh): self.defer_load = False self.lonlat_minmax = np.array([np.nanmin(lon), np.nanmax(lon), np.nanmin(lat), np.nanmax(lat)], dtype=np.float32) self.periods = 0 - self.load_chunk = [] + self.load_chunk = [] # should be a C-contiguous array of ints self.chunk_info = None self.master_chunksize = None self._add_last_periodic_data_timestep = False @@ -303,7 +309,9 @@ def __init__(self, lon, lat, depth=None, time=None, time_origin=None, mesh='flat assert(len(depth.shape) == 1), 'depth is not a vector' self.gtype = GridCode.RectilinearZGrid - self.depth = np.zeros(1, dtype=np.float32) if depth is None else depth + self.depth = np.zeros(1, dtype=np.float32) if depth is None else depth # should be a C-contiguous array of floats + if not self.depth.flags['C_CONTIGUOUS']: + self.depth = np.array(self.depth, order='C') self.zdim = self.depth.size self.z4d = -1 # only used in RectilinearSGrid if not self.depth.dtype == np.float32: @@ -339,7 +347,9 @@ def __init__(self, lon, lat, depth, time=None, time_origin=None, mesh='flat'): assert(isinstance(depth, np.ndarray) and len(depth.shape) in [3, 4]), 'depth is not a 3D or 4D numpy array' self.gtype = GridCode.RectilinearSGrid - self.depth = depth + self.depth = depth # should be a C-contiguous array of floats + if not self.depth.flags['C_CONTIGUOUS']: + self.depth = np.array(self.depth, order='C') self.zdim = self.depth.shape[-3] self.z4d = len(self.depth.shape) == 4 if self.z4d: @@ -435,7 +445,9 @@ def __init__(self, lon, lat, depth=None, time=None, time_origin=None, mesh='flat assert(len(depth.shape) == 1), 'depth is not a vector' self.gtype = GridCode.CurvilinearZGrid - self.depth = np.zeros(1, dtype=np.float32) if depth is None else depth + self.depth = np.zeros(1, dtype=np.float32) if depth is None else depth # should be a C-contiguous array of floats + if not self.depth.flags['C_CONTIGUOUS']: + self.depth = np.array(self.depth, order='C') self.zdim = self.depth.size self.z4d = -1 # only for SGrid if not self.depth.dtype == np.float32: @@ -470,7 +482,9 @@ def __init__(self, lon, lat, depth, time=None, time_origin=None, mesh='flat'): assert(isinstance(depth, np.ndarray) and len(depth.shape) in [3, 4]), 'depth is not a 4D numpy array' self.gtype = GridCode.CurvilinearSGrid - self.depth = depth + self.depth = depth # should be a C-contiguous array of floats + if not self.depth.flags['C_CONTIGUOUS']: + self.depth = np.array(self.depth, order='C') self.zdim = self.depth.shape[-3] self.z4d = len(self.depth.shape) == 4 if self.z4d: diff --git a/parcels/kernel.py b/parcels/kernel.py index 304c2d9c6..e8fc121ee 100644 --- a/parcels/kernel.py +++ b/parcels/kernel.py @@ -240,13 +240,17 @@ def execute_jit(self, pset, endtime, dt): g.load_chunk = np.where(g.load_chunk == 1, 2, g.load_chunk) if len(g.load_chunk) > 0: # not the case if a field in not called in the kernel if not g.load_chunk.flags.c_contiguous: - g.load_chunk = g.load_chunk.copy() + # g.load_chunk = g.load_chunk.copy() + g.load_chunk = np.array(g.load_chunk, order='C') if not g.depth.flags.c_contiguous: - g.depth = g.depth.copy() + # g.depth = g.depth.copy() + g.depth = np.array(g.depth, order='C') if not g.lon.flags.c_contiguous: - g.lon = g.lon.copy() + # g.lon = g.lon.copy() + g.lon = np.array(g.lon, order='C') if not g.lat.flags.c_contiguous: - g.lat = g.lat.copy() + # g.lat = g.lat.copy() + g.lat = np.array(g.lat, order='C') fargs = [byref(f.ctypes_struct) for f in self.field_args.values()] fargs += [c_double(f) for f in self.const_args.values()] From b103fcac854f4aee42cc245b0bc45aab152fa4d0 Mon Sep 17 00:00:00 2001 From: Christian Kehl Date: Fri, 23 Oct 2020 12:53:44 +0200 Subject: [PATCH 4/5] merged conflicts in the stommel benchmark --- performance/benchmark_stommel.py | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/performance/benchmark_stommel.py b/performance/benchmark_stommel.py index 93e0cf958..a20dc60bd 100644 --- a/performance/benchmark_stommel.py +++ b/performance/benchmark_stommel.py @@ -418,31 +418,6 @@ def Age(particle, fieldset, time): #endtime = ostime.time() endtime = ostime.process_time() - # if MPI: - # mpi_comm = MPI.COMM_WORLD - # if mpi_comm.Get_rank() == 0: - # dt_time = [] - # for i in range(len(perflog.times_steps)): - # if i==0: - # dt_time.append( (perflog.times_steps[i]-global_t_0) ) - # else: - # dt_time.append( (perflog.times_steps[i]-perflog.times_steps[i-1]) ) - # sys.stdout.write("final # particles: {}\n".format(perflog.Nparticles_step[len(perflog.Nparticles_step)-1])) - # sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime-starttime)) - # avg_time = np.mean(np.array(dt_time, dtype=np.float64)) - # sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time*1000.0)) - # else: - # dt_time = [] - # for i in range(len(perflog.times_steps)): - # if i == 0: - # dt_time.append((perflog.times_steps[i] - global_t_0)) - # else: - # dt_time.append((perflog.times_steps[i] - perflog.times_steps[i - 1])) - # sys.stdout.write("final # particles: {}\n".format(perflog.Nparticles_step[len(perflog.Nparticles_step)-1])) - # sys.stdout.write("Time of pset.execute(): {} sec.\n".format(endtime - starttime)) - # avg_time = np.mean(np.array(dt_time, dtype=np.float64)) - # sys.stdout.write("Avg. kernel update time: {} msec.\n".format(avg_time * 1000.0)) - if MPI: mpi_comm = MPI.COMM_WORLD mpi_comm.Barrier() From 77730ef69855d6f95648652b851c01a8e8b8faf4 Mon Sep 17 00:00:00 2001 From: Christian Kehl Date: Fri, 23 Oct 2020 13:20:46 +0200 Subject: [PATCH 5/5] Merged with benchmark rebase --- parcels/kernel_benchmark.py | 12 ++++++++---- parcels/particleset_benchmark.py | 8 -------- performance/benchmark_CMEMS.py | 1 - performance/benchmark_deep_migration_NPacific.py | 3 +-- performance/benchmark_galapagos_backwards.py | 1 - performance/benchmark_palaeo_Y2K.py | 1 - performance/benchmark_perlin.py | 16 ---------------- performance/benchmark_stommel.py | 9 +-------- 8 files changed, 10 insertions(+), 41 deletions(-) diff --git a/parcels/kernel_benchmark.py b/parcels/kernel_benchmark.py index 52ea86ff2..efdabfa03 100644 --- a/parcels/kernel_benchmark.py +++ b/parcels/kernel_benchmark.py @@ -108,13 +108,17 @@ def execute_jit(self, pset, endtime, dt): g.load_chunk = np.where(g.load_chunk == 1, 2, g.load_chunk) if len(g.load_chunk) > 0: # not the case if a field in not called in the kernel if not g.load_chunk.flags.c_contiguous: - g.load_chunk = g.load_chunk.copy() + # g.load_chunk = g.load_chunk.copy() + g.load_chunk = np.array(g.load_chunk, order='C') if not g.depth.flags.c_contiguous: - g.depth = g.depth.copy() + # g.depth = g.depth.copy() + g.depth = np.array(g.depth, order='C') if not g.lon.flags.c_contiguous: - g.lon = g.lon.copy() + # g.lon = g.lon.copy() + g.lon = np.array(g.lon, order='C') if not g.lat.flags.c_contiguous: - g.lat = g.lat.copy() + # g.lat = g.lat.copy() + g.lat = np.array(g.lat, order='C') self._mem_io_timings.stop_timing() self._mem_io_timings.accumulate_timing() diff --git a/parcels/particleset_benchmark.py b/parcels/particleset_benchmark.py index 5bc903acb..573bc7a0e 100644 --- a/parcels/particleset_benchmark.py +++ b/parcels/particleset_benchmark.py @@ -302,14 +302,6 @@ def execute(self, pyfunc=AdvectionRK4, endtime=None, runtime=None, dt=1., self.total_log.stop_timing() self.total_log.accumulate_timing() mem_B_used_total = 0 - # if MPI: - # mpi_comm = MPI.COMM_WORLD - # mem_B_used = self.process.memory_info().rss - # mem_B_used_total = mpi_comm.reduce(mem_B_used, op=MPI.SUM, root=0) - # else: - # mem_B_used_total = self.process.memory_info().rss - # mem_B_used_total = self.process.memory_info().rss - mem_B_used_total = 0 if USE_RUSE_SYNC_MEMLOG: mem_B_used_total = measure_mem_usage() else: diff --git a/performance/benchmark_CMEMS.py b/performance/benchmark_CMEMS.py index e31bb4057..39f0276dd 100644 --- a/performance/benchmark_CMEMS.py +++ b/performance/benchmark_CMEMS.py @@ -347,7 +347,6 @@ def perIterGC(): if MPI: mpi_comm = MPI.COMM_WORLD - # mpi_comm.Barrier() Nparticles = mpi_comm.reduce(np.array(pset.nparticle_log.get_params()), op=MPI.SUM, root=0) Nmem = mpi_comm.reduce(np.array(pset.mem_log.get_params()), op=MPI.SUM, root=0) if mpi_comm.Get_rank() == 0: diff --git a/performance/benchmark_deep_migration_NPacific.py b/performance/benchmark_deep_migration_NPacific.py index 22b9c5681..c872973c9 100644 --- a/performance/benchmark_deep_migration_NPacific.py +++ b/performance/benchmark_deep_migration_NPacific.py @@ -189,7 +189,7 @@ def DeleteParticle(particle, fieldset, time): def perIterGC(): gc.collect() -def getclosest_ij(lats,lons,latpt,lonpt): +def getclosest_ij(lats,lons,latpt,lonpt): """Function to find the index of the closest point to a certain lon/lat value.""" dist_sq = (lats-latpt)**2 + (lons-lonpt)**2 # find squared distance of every point on grid minindex_flattened = dist_sq.argmin() # 1D index of minimum dist_sq element @@ -436,7 +436,6 @@ def Profiles(particle, fieldset, time): if MPI: mpi_comm = MPI.COMM_WORLD - # mpi_comm.Barrier() Nparticles = mpi_comm.reduce(np.array(pset.nparticle_log.get_params()), op=MPI.SUM, root=0) Nmem = mpi_comm.reduce(np.array(pset.mem_log.get_params()), op=MPI.SUM, root=0) if mpi_comm.Get_rank() == 0: diff --git a/performance/benchmark_galapagos_backwards.py b/performance/benchmark_galapagos_backwards.py index ab55302d6..c30452080 100644 --- a/performance/benchmark_galapagos_backwards.py +++ b/performance/benchmark_galapagos_backwards.py @@ -200,7 +200,6 @@ def periodicBC(particle, fieldSet, time): if MPI: mpi_comm = MPI.COMM_WORLD - # mpi_comm.Barrier() Nparticles = mpi_comm.reduce(np.array(pset.nparticle_log.get_params()), op=MPI.SUM, root=0) Nmem = mpi_comm.reduce(np.array(pset.mem_log.get_params()), op=MPI.SUM, root=0) if mpi_comm.Get_rank() == 0: diff --git a/performance/benchmark_palaeo_Y2K.py b/performance/benchmark_palaeo_Y2K.py index 415ff69bc..32e3a93e6 100644 --- a/performance/benchmark_palaeo_Y2K.py +++ b/performance/benchmark_palaeo_Y2K.py @@ -361,7 +361,6 @@ class DinoParticle(JITParticle): if MPI: mpi_comm = MPI.COMM_WORLD - # mpi_comm.Barrier() Nparticles = mpi_comm.reduce(np.array(pset.nparticle_log.get_params()), op=MPI.SUM, root=0) Nmem = mpi_comm.reduce(np.array(pset.mem_log.get_params()), op=MPI.SUM, root=0) if mpi_comm.Get_rank() == 0: diff --git a/performance/benchmark_perlin.py b/performance/benchmark_perlin.py index cb1089d73..ec3be8804 100644 --- a/performance/benchmark_perlin.py +++ b/performance/benchmark_perlin.py @@ -88,30 +88,15 @@ def perlin_fieldset_from_numpy(periodic_wrap=False): # Coordinates of the test fieldset (on A-grid in deg) lon = np.linspace(-a*0.5, a*0.5, img_shape[0], dtype=np.float32) - # sys.stdout.write("lon field: {}\n".format(lon.size)) lat = np.linspace(-b*0.5, b*0.5, img_shape[1], dtype=np.float32) - # sys.stdout.write("lat field: {}\n".format(lat.size)) totime = tsteps*tscale*24.0*60.0*60.0 time = np.linspace(0., totime, tsteps, dtype=np.float64) - # sys.stdout.write("time field: {}\n".format(time.size)) # Define arrays U (zonal), V (meridional) U = perlin2d.generate_fractal_noise_temporal2d(img_shape, tsteps, (perlinres[1], perlinres[2]), noctaves, perlin_persistence, max_shift=((-1, 2), (-1, 2))) U = np.transpose(U, (0,2,1)) - # U = np.swapaxes(U, 1, 2) - # print("U-statistics - min: {:10.7f}; max: {:10.7f}; avg. {:10.7f}; std_dev: {:10.7f}".format(U.min(initial=0), U.max(initial=0), U.mean(), U.std())) V = perlin2d.generate_fractal_noise_temporal2d(img_shape, tsteps, (perlinres[1], perlinres[2]), noctaves, perlin_persistence, max_shift=((-1, 2), (-1, 2))) V = np.transpose(V, (0,2,1)) - # V = np.swapaxes(V, 1, 2) - # print("V-statistics - min: {:10.7f}; max: {:10.7f}; avg. {:10.7f}; std_dev: {:10.7f}".format(V.min(initial=0), V.max(initial=0), V.mean(), V.std())) - - # U = perlin3d.generate_fractal_noise_3d(img_shape, perlinres, noctaves, perlin_persistence) * scalefac - # U = np.transpose(U, (0,2,1)) - # sys.stdout.write("U field shape: {} - [tdim][ydim][xdim]=[{}][{}][{}]\n".format(U.shape, time.shape[0], lat.shape[0], lon.shape[0])) - # V = perlin3d.generate_fractal_noise_3d(img_shape, perlinres, noctaves, perlin_persistence) * scalefac - # V = np.transpose(V, (0,2,1)) - # sys.stdout.write("V field shape: {} - [tdim][ydim][xdim]=[{}][{}][{}]\n".format(V.shape, time.shape[0], lat.shape[0], lon.shape[0])) - data = {'U': U, 'V': V} dimensions = {'time': time, 'lon': lon, 'lat': lat} if periodic_wrap: @@ -424,7 +409,6 @@ def Age(particle, fieldset, time): if MPI: mpi_comm = MPI.COMM_WORLD - # mpi_comm.Barrier() Nparticles = mpi_comm.reduce(np.array(pset.nparticle_log.get_params()), op=MPI.SUM, root=0) Nmem = mpi_comm.reduce(np.array(pset.mem_log.get_params()), op=MPI.SUM, root=0) if mpi_comm.Get_rank() == 0: diff --git a/performance/benchmark_stommel.py b/performance/benchmark_stommel.py index a20dc60bd..c1c3f14a6 100644 --- a/performance/benchmark_stommel.py +++ b/performance/benchmark_stommel.py @@ -6,8 +6,8 @@ from parcels import AdvectionEE, AdvectionRK45, AdvectionRK4 from parcels import FieldSet, ScipyParticle, JITParticle, Variable, AdvectionRK4, ErrorCode from parcels.particleset_benchmark import ParticleSet_Benchmark as ParticleSet -# from parcels.kernel_benchmark import Kernel_Benchmark as Kernel from parcels.field import VectorField, NestedField, SummedField +# from parcels.kernel_benchmark import Kernel_Benchmark as Kernel # from parcels import plotTrajectoriesFile_loadedField from datetime import timedelta as delta import math @@ -383,11 +383,8 @@ def Age(particle, fieldset, time): mpi_comm = MPI.COMM_WORLD mpi_rank = mpi_comm.Get_rank() if mpi_rank==0: - # starttime = ostime.time() - # starttime = MPI.Wtime() starttime = ostime.process_time() else: - # starttime = ostime.time() starttime = ostime.process_time() kernels = pset.Kernel(AdvectionRK4,delete_cfiles=True) if agingParticles: @@ -411,11 +408,8 @@ def Age(particle, fieldset, time): mpi_comm = MPI.COMM_WORLD mpi_rank = mpi_comm.Get_rank() if mpi_rank==0: - # endtime = ostime.time() - # endtime = MPI.Wtime() endtime = ostime.process_time() else: - #endtime = ostime.time() endtime = ostime.process_time() if MPI: @@ -451,7 +445,6 @@ def Age(particle, fieldset, time): if MPI: mpi_comm = MPI.COMM_WORLD - # mpi_comm.Barrier() Nparticles = mpi_comm.reduce(np.array(pset.nparticle_log.get_params()), op=MPI.SUM, root=0) Nmem = mpi_comm.reduce(np.array(pset.mem_log.get_params()), op=MPI.SUM, root=0) if mpi_comm.Get_rank() == 0: