From 9e5da2cf6a020fd48e6484bd89338ae10024d54b Mon Sep 17 00:00:00 2001 From: Kailash Gogineni Date: Fri, 12 Sep 2025 15:30:09 -0700 Subject: [PATCH 01/17] Update datasets_utils.py Patch for token captions --- src/data/datasets_utils.py | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/data/datasets_utils.py b/src/data/datasets_utils.py index afb5a1f..78768b1 100644 --- a/src/data/datasets_utils.py +++ b/src/data/datasets_utils.py @@ -27,11 +27,27 @@ def tokenize_captions( captions = [] if "prompt" in examples.keys(): captions = examples["prompt"] + # else: + # for example in examples["image"]: + # path = example.filename + # filename = os.path.splitext(os.path.basename(path))[0] + # caption = filename.replace("_", " ") + # captions.append(caption) else: - for example in examples["image"]: - path = example.filename - filename = os.path.splitext(os.path.basename(path))[0] - caption = filename.replace("_", " ") + for i, img in enumerate(examples["image"]): + # try several likely places for a path + path = getattr(img, "filename", None) # PIL Image opened from disk + if path is None and isinstance(img, dict): + path = img.get("path") # HF datasets when decode=False + if path is None and "image_path" in examples: + path = examples["image_path"][i] # custom parallel column if you have one + + if path: + filename = os.path.splitext(os.path.basename(path))[0] + caption = filename.replace("_", " ") + else: + caption = f"image_{i}" + captions.append(caption) inputs = tokenizer(captions) From f17bc7238c141554c6e3f1ee83ccbddfbaf2d1b1 Mon Sep 17 00:00:00 2001 From: Kailash Gogineni Date: Fri, 12 Sep 2025 16:14:27 -0700 Subject: [PATCH 02/17] Create stable_diffusion.yaml Add stable diffusion model --- config/stable_diffusion.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 config/stable_diffusion.yaml diff --git a/config/stable_diffusion.yaml b/config/stable_diffusion.yaml new file mode 100644 index 0000000..48f57dc --- /dev/null +++ b/config/stable_diffusion.yaml @@ -0,0 +1,20 @@ +defaults: + - launcher: defaults + - accelerate_config: fsdp_config + - train_args: stable-diffusion-xl + - _self_ + +accelerate_config: + dynamo_config: + dynamo_backend: ["no", "inductor"] + +train_args: + train_batch_size: [1, 10, 17] + num_iterations: 20 + logging_dir: outputs/stable_diffusion_xl + +# Override Hydra's run dir to be the same as logging dir. Not setting this may result +# in errors or unexpected behavior because Hydra by default a run dir `./