Open
Description
Bug description
With the mlflow logger, I have the following error:
Traceback (most recent call last):
File "/opt/venvs/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 48, in _call_and_handle_interrupt
return trainer_fn(*args, **kwargs)
File "/opt/venvs/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 599, in _fit_impl
self._run(model, ckpt_path=ckpt_path)
File "/opt/venvs/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 1025, in _run
call._call_teardown_hook(self)
File "/opt/venvs/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 148, in _call_teardown_hook
logger.finalize("success")
File "/opt/venvs/lib/python3.10/site-packages/lightning_utilities/core/rank_zero.py", line 41, in wrapped_fn
return fn(*args, **kwargs)
File "/opt/venvs/lib/python3.10/site-packages/pytorch_lightning/loggers/mlflow.py", line 289, in finalize
self._scan_and_log_checkpoints(self._checkpoint_callback)
File "/opt/venvs/lib/python3.10/site-packages/pytorch_lightning/loggers/mlflow.py", line 369, in _scan_and_log_checkpoints
self.experiment.log_artifact(self._run_id, p, artifact_path)
File "/opt/venvs/lib/python3.10/site-packages/mlflow/tracking/client.py", line 2379, in log_artifact
self._tracking_client.log_artifact(run_id, local_path, artifact_path)
File "/opt/venvs/lib/python3.10/site-packages/mlflow/tracking/_tracking_service/client.py", line 931, in log_artifact
artifact_repo.log_artifact(local_path, artifact_path)
File "/opt/venvs/lib/python3.10/site-packages/mlflow/store/artifact/local_artifact_repo.py", line 33, in log_artifact
verify_artifact_path(artifact_path)
File "/opt/venvs/lib/python3.10/site-packages/mlflow/store/artifact/artifact_repo.py", line 464, in verify_artifact_path
raise MlflowException(
mlflow.exceptions.MlflowException: Invalid artifact path: 'ordering-model-epoch=09-val_loss=0.00'. Names may be treated as files in certain cases, and must not resolve to other names when treated as such. This name would resolve to 'ordering-model-epoch=09-val_loss=0.00'
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/lib/python3.10/pdb.py", line 1726, in main
pdb._runscript(mainpyfile)
File "/usr/lib/python3.10/pdb.py", line 1586, in _runscript
self.run(statement)
File "/usr/lib/python3.10/bdb.py", line 597, in run
exec(cmd, globals, locals)
File "<string>", line 1, in <module>
File "/workspace/perso/train.py", line 66, in <module>
train_model(config)
File "/workspace/vrsgen/training/train.py", line 145, in train_model
trainer.fit(model, train_loader, val_loader)
File "/opt/venvs/lib/python3.10/site-packages/pytorch_lightning/trainer/trainer.py", line 561, in fit
call._call_and_handle_interrupt(
File "/opt/venvs/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 68, in _call_and_handle_interrupt
_interrupt(trainer, exception)
File "/opt/venvs/lib/python3.10/site-packages/pytorch_lightning/trainer/call.py", line 82, in _interrupt
logger.finalize("failed")
File "/opt/venvs/lib/python3.10/site-packages/lightning_utilities/core/rank_zero.py", line 41, in wrapped_fn
return fn(*args, **kwargs)
File "/opt/venvs/lib/python3.10/site-packages/pytorch_lightning/loggers/mlflow.py", line 289, in finalize
self._scan_and_log_checkpoints(self._checkpoint_callback)
File "/opt/venvs/lib/python3.10/site-packages/pytorch_lightning/loggers/mlflow.py", line 369, in _scan_and_log_checkpoints
self.experiment.log_artifact(self._run_id, p, artifact_path)
File "/opt/venvs/lib/python3.10/site-packages/mlflow/tracking/client.py", line 2379, in log_artifact
self._tracking_client.log_artifact(run_id, local_path, artifact_path)
File "/opt/venvs/lib/python3.10/site-packages/mlflow/tracking/_tracking_service/client.py", line 931, in log_artifact
artifact_repo.log_artifact(local_path, artifact_path)
File "/opt/venvs/lib/python3.10/site-packages/mlflow/store/artifact/local_artifact_repo.py", line 33, in log_artifact
verify_artifact_path(artifact_path)
File "/opt/venvs/lib/python3.10/site-packages/mlflow/store/artifact/artifact_repo.py", line 464, in verify_artifact_path
raise MlflowException(
mlflow.exceptions.MlflowException: Invalid artifact path: 'ordering-model-epoch=09-val_loss=0.00'. Names may be treated as files in certain cases, and must not resolve to other names when treated as such. This name would resolve to 'ordering-model-epoch=09-val_loss=0.00'
Uncaught exception. Entering post mortem debugging
Running 'cont' or 'step' will restart the program
The problem comes from the path_not_unique function (line 168 in mlflow.utils.validation) which works only with str and here receives a pathlib.Path object ->posixpath.normpath(name) will return a str even for a Path as input and then norm != name
is always false
to correct this, in the mlflow logger in pytorch_ligntning.loggers.mlflow, line 369 should be:
self.experiment.log_artifact(self._run_id, p, str(artifact_path))
What version are you seeing the problem on?
v2.5
Reproduced in studio
No response
How to reproduce the bug
Error messages and logs
# Error messages and logs here please
Environment
Current environment
#- PyTorch Lightning Version (e.g., 2.5.0):
#- PyTorch Version (e.g., 2.5):
#- Python version (e.g., 3.12):
#- OS (e.g., Linux):
#- CUDA/cuDNN version:
#- GPU models and configuration:
#- How you installed Lightning(`conda`, `pip`, source):
More info
No response