Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Changed
- By default, batch downloads will skip files that already exist locally. To force re-downloading and replace existing files, pass the `replace_existing=True` argument to `Batch.load()`, `Batch.download()`, or `BatchData.load()`.
- The `BatchData.load_sim_data()` function now overwrites any previously downloaded simulation files (instead of skipping them).

### Fixed
- Giving opposite boundaries different names no longer causes a symmetry validator failure.

Expand Down
51 changes: 35 additions & 16 deletions tidy3d/web/api/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,12 +431,7 @@ def load_sim_data(self, task_name: str) -> SimulationDataType:
task_id = self.task_ids[task_name]
web.get_info(task_id)

return web.load(
task_id=task_id,
path=task_data_path,
replace_existing=False,
verbose=False,
)
return web.load(task_id=task_id, path=task_data_path, verbose=False)

def __getitem__(self, task_name: TaskName) -> SimulationDataType:
"""Get the simulation data object for a given ``task_name``."""
Expand All @@ -451,14 +446,16 @@ def __len__(self):
return len(self.task_paths)

@classmethod
def load(cls, path_dir: str = DEFAULT_DATA_DIR) -> BatchData:
def load(cls, path_dir: str = DEFAULT_DATA_DIR, replace_existing: bool = False) -> BatchData:
"""Load :class:`Batch` from file, download results, and load them.

Parameters
----------
path_dir : str = './'
Base directory where data will be downloaded, by default current working directory.
A `batch.hdf5` file must be present in the directory.
replace_existing : bool = False
Downloads the data even if path exists (overwriting the existing).

Returns
------
Expand All @@ -469,7 +466,7 @@ def load(cls, path_dir: str = DEFAULT_DATA_DIR) -> BatchData:

batch_file = Batch._batch_path(path_dir=path_dir)
batch = Batch.from_file(batch_file)
return batch.load(path_dir=path_dir)
return batch.load(path_dir=path_dir, replace_existing=replace_existing)


class Batch(WebContainer):
Expand Down Expand Up @@ -606,7 +603,6 @@ def run(self, path_dir: str = DEFAULT_DATA_DIR) -> BatchData:
self.upload()
self.start()
self.monitor()
self.download(path_dir=path_dir)
return self.load(path_dir=path_dir)

@cached_property
Expand Down Expand Up @@ -900,13 +896,15 @@ def _batch_path(path_dir: str = DEFAULT_DATA_DIR):
"""
return os.path.join(path_dir, "batch.hdf5")

def download(self, path_dir: str = DEFAULT_DATA_DIR) -> None:
def download(self, path_dir: str = DEFAULT_DATA_DIR, replace_existing: bool = False) -> None:
"""Download results of each task.

Parameters
----------
path_dir : str = './'
Base directory where data will be downloaded, by default the current working directory.
replace_existing : bool = False
Downloads the data even if path exists (overwriting the existing).

Note
----
Expand All @@ -919,17 +917,36 @@ def download(self, path_dir: str = DEFAULT_DATA_DIR) -> None:
self._check_path_dir(path_dir=path_dir)
self.to_file(self._batch_path(path_dir=path_dir))

num_existing = 0
for _, job in self.jobs.items():
job_path_str = self._job_data_path(task_id=job.task_id, path_dir=path_dir)
if os.path.exists(job_path_str):
num_existing += 1
if num_existing > 0:
files_plural = "files have" if num_existing > 1 else "file has"
log.warning(
f"{num_existing} {files_plural} already been downloaded "
f"and will be skipped. To forcibly overwrite existing files, invoke "
"the load or download function with `replace_existing=True`.",
log_once=True,
)

with ThreadPoolExecutor(max_workers=self.num_workers) as executor:
fns = []
for task_name, job in self.jobs.items():
job_path = self._job_data_path(task_id=job.task_id, path_dir=path_dir)

job_path_str = self._job_data_path(task_id=job.task_id, path_dir=path_dir)
if os.path.exists(job_path_str):
if replace_existing:
log.info(f"File '{job_path_str}' already exists. Overwriting.")
else:
log.info(f"File '{job_path_str}' already exists. Skipping.")
continue
if "error" in job.status:
log.warning(f"Not downloading '{task_name}' as the task errored.")
continue

def fn(job=job, job_path=job_path) -> None:
return job.download(path=job_path)
def fn(job=job, job_path_str=job_path_str) -> None:
return job.download(path=job_path_str)

fns.append(fn)

Expand All @@ -951,13 +968,15 @@ def fn(job=job, job_path=job_path) -> None:
completed += 1
progress.update(pbar, completed=completed)

def load(self, path_dir: str = DEFAULT_DATA_DIR) -> BatchData:
def load(self, path_dir: str = DEFAULT_DATA_DIR, replace_existing: bool = False) -> BatchData:
"""Download results and load them into :class:`.BatchData` object.

Parameters
----------
path_dir : str = './'
Base directory where data will be downloaded, by default current working directory.
replace_existing : bool = False
Downloads the data even if path exists (overwriting the existing).

Returns
------
Expand All @@ -969,7 +988,7 @@ def load(self, path_dir: str = DEFAULT_DATA_DIR) -> BatchData:
allowing one to load this :class:`Batch` later using ``batch = Batch.from_file()``.
"""
self._check_path_dir(path_dir=path_dir)
self.to_file(self._batch_path(path_dir=path_dir))
self.download(path_dir=path_dir, replace_existing=replace_existing)

if self.jobs is None:
raise DataError("Can't load batch results, hasn't been uploaded.")
Expand Down
2 changes: 1 addition & 1 deletion tidy3d/web/api/webapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -806,7 +806,7 @@ def load(
Unique identifier of task on server. Returned by :meth:`upload`.
path : str
Download path to .hdf5 data file (including filename).
replace_existing: bool = True
replace_existing : bool = True
Downloads the data even if path exists (overwriting the existing).
verbose : bool = True
If ``True``, will print progressbars and status, otherwise, will run silently.
Expand Down
Loading