Skip to content

data_analyzer error handling #6766

Closed
@wyli

Description

@wyli

Describe the bug

if self.label_key is not None:
label = batch_data[self.label_key]
label = torch.argmax(label, dim=0) if label.shape[0] > 1 else label[0]
batch_data[self.label_key] = label.to(device)
d = summarizer(batch_data)
except BaseException as err:
if "image_meta_dict" in batch_data.keys():
filename = batch_data["image_meta_dict"]["filename_or_obj"]
else:
filename = batch_data[self.image_key].meta["filename_or_obj"]
logger.info(f"Unable to process data {filename} on {device}. {err}")
if self.device.type == "cuda":
logger.info("DataAnalyzer `device` set to GPU execution hit an exception. Falling back to `cpu`.")
batch_data[self.image_key] = batch_data[self.image_key].to("cpu")
if self.label_key is not None:
label = batch_data[self.label_key]
label = torch.argmax(label, dim=0) if label.shape[0] > 1 else label[0]

the torch.argmax(label, dim=0) may have run twice during the error handling

also the filename is not properly tracked during this case

Traceback (most recent call last):
  File "/opt/monai/monai/transforms/transform.py", line 141, in apply_transform
    return _apply_transform(transform, data, unpack_items, lazy, overrides, log_stats)
  File "/opt/monai/monai/transforms/transform.py", line 98, in _apply_transform
    return transform(data, lazy=lazy) if isinstance(transform, LazyTrait) else transform(data)
  File "/opt/monai/monai/auto3dseg/analyzer.py", line 484, in __call__
    label_dict[LabelStatsKeys.IMAGE_INTST] = [
  File "/opt/monai/monai/auto3dseg/analyzer.py", line 485, in <listcomp>
    self.ops[LabelStatsKeys.IMAGE_INTST].evaluate(nda_m) for nda_m in nda_masks
  File "/opt/monai/monai/auto3dseg/operations.py", line 94, in evaluate
    ret = super().evaluate(data, **kwargs)
  File "/opt/monai/monai/auto3dseg/operations.py", line 41, in evaluate
    return {k: v(data, **kwargs) for k, v in self.data.items() if callable(v)}
  File "/opt/monai/monai/auto3dseg/operations.py", line 41, in <dictcomp>
    return {k: v(data, **kwargs) for k, v in self.data.items() if callable(v)}
  File "/opt/monai/monai/transforms/utils_pytorch_numpy_unification.py", line 505, in median
    ret = np.median(x, **kwargs) if isinstance(x, (np.ndarray, list)) else torch.median(x, **kwargs)  # type: ignore
  File "/opt/monai/monai/data/meta_tensor.py", line 276, in __torch_function__
    ret = super().__torch_function__(func, types, args, kwargs)
  File "/usr/local/lib/python3.10/dist-packages/torch/_tensor.py", line 1298, in __torch_function__
    ret = func(*args, **kwargs)
torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 2.91 GiB. GPU 0 has a total capacty of 15.78 GiB of which 1.45 GiB is free. Process 3353954 has 14.27 GiB memory in use. Of the allocated memory 8.76 GiB is allocated by PyTorch, and 5.16 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/opt/monai/monai/apps/auto3dseg/data_analyzer.py", line 337, in _get_all_case_stats
    d = summarizer(batch_data)
  File "/opt/monai/monai/transforms/compose.py", line 322, in __call__
    result = execute_compose(
  File "/opt/monai/monai/transforms/compose.py", line 111, in execute_compose
    data = apply_transform(
  File "/opt/monai/monai/transforms/transform.py", line 171, in apply_transform
    raise RuntimeError(f"applying transform {transform}") from e
RuntimeError: applying transform <monai.auto3dseg.analyzer.LabelStats object at 0x7efe93b7d960>

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/opt/monai/monai/transforms/transform.py", line 141, in apply_transform
    return _apply_transform(transform, data, unpack_items, lazy, overrides, log_stats)
  File "/opt/monai/monai/transforms/transform.py", line 98, in _apply_transform
    return transform(data, lazy=lazy) if isinstance(transform, LazyTrait) else transform(data)
  File "/opt/monai/monai/auto3dseg/analyzer.py", line 335, in __call__
    raise ValueError(f"Label shape {ndas_label.shape} is different from image shape {ndas[0].shape}")
ValueError: Label shape torch.Size([512, 987]) is different from image shape torch.Size([512, 512, 987])

The above exception was the direct cause of the following exception:

Traceback (most recent call last):
  File "/usr/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/usr/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/opt/monai/monai/apps/auto3dseg/__main__.py", line 24, in <module>
    fire.Fire(
  File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 141, in Fire
    component_trace = _Fire(component, args, parsed_flag_args, context, name)
  File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 475, in _Fire
    component, remaining_args = _CallAndUpdateTrace(
  File "/usr/local/lib/python3.10/dist-packages/fire/core.py", line 691, in _CallAndUpdateTrace
    component = fn(*varargs, **kwargs)
  File "/opt/monai/monai/apps/auto3dseg/data_analyzer.py", line 227, in get_all_case_stats
    result_bycase = self._get_all_case_stats(0, 1, None, key, transform_list)
  File "/opt/monai/monai/apps/auto3dseg/data_analyzer.py", line 351, in _get_all_case_stats
    d = summarizer(batch_data)
  File "/opt/monai/monai/transforms/compose.py", line 322, in __call__
    result = execute_compose(
  File "/opt/monai/monai/transforms/compose.py", line 111, in execute_compose
    data = apply_transform(
  File "/opt/monai/monai/transforms/transform.py", line 171, in apply_transform
    raise RuntimeError(f"applying transform {transform}") from e
RuntimeError: applying transform <monai.auto3dseg.analyzer.FgImageStats object at 0x7efe93b7e230>

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions