|
| 1 | +#!/usr/bin/env python3 |
| 2 | +import argparse |
| 3 | +import base64 |
| 4 | +import json |
| 5 | +import os |
| 6 | +import re |
| 7 | +import sys |
| 8 | +import urllib.request |
| 9 | +import urllib.error |
| 10 | + |
| 11 | +def parse_arguments(): |
| 12 | + ap = argparse.ArgumentParser( |
| 13 | + description="Client for stable-diffusion.cpp sd-server", |
| 14 | + allow_abbrev=False |
| 15 | + ) |
| 16 | + |
| 17 | + ap.add_argument("--server-url", default=os.environ.get("SD_SERVER_URL"), |
| 18 | + help="URL of the sd-server OpenAI-compatible endpoint. Defaults to SD_SERVER_URL env var.") |
| 19 | + |
| 20 | + ap.add_argument("-o", "--output", default="./output.png", |
| 21 | + help="path to write result image to. You can use printf-style %%d format specifiers for image sequences (default: ./output.png) (e.g., output_%%03d.png).") |
| 22 | + ap.add_argument("--output-begin-idx", type=int, default=None, |
| 23 | + help="starting index for output image sequence, must be non-negative (default 0 if specified %%d in output path, 1 otherwise).") |
| 24 | + ap.add_argument("-v", "--verbose", action="store_true", |
| 25 | + help="print extra info.") |
| 26 | + |
| 27 | + ap.add_argument("-p", "--prompt", default="", |
| 28 | + help="the prompt to render") |
| 29 | + ap.add_argument("-n", "--negative-prompt", dest="negative_prompt", default=None, |
| 30 | + help="the negative prompt (default: \"\")") |
| 31 | + ap.add_argument("-H", "--height", type=int, |
| 32 | + help="image height, in pixel space (default: 512)") |
| 33 | + ap.add_argument("-W", "--width", type=int, |
| 34 | + help="image width, in pixel space (default: 512)") |
| 35 | + ap.add_argument("--clip-skip", type=int, dest="clip_skip", |
| 36 | + help="ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1). <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x") |
| 37 | + ap.add_argument("-b", "--batch-count", type=int, dest="batch_count", |
| 38 | + help="batch count") |
| 39 | + ap.add_argument("--video-frames", type=int, dest="video_frames", |
| 40 | + help="video frames (default: 1)") |
| 41 | + ap.add_argument("--fps", type=int, |
| 42 | + help="fps (default: 24)") |
| 43 | + ap.add_argument("--upscale-repeats", type=int, dest="upscale_repeats", |
| 44 | + help="Run the ESRGAN upscaler this many times (default: 1)") |
| 45 | + ap.add_argument("--cfg-scale", type=float, dest="cfg_scale", |
| 46 | + help="unconditional guidance scale (default: 7.0)") |
| 47 | + ap.add_argument("--img-cfg-scale", type=float, dest="img_cfg_scale", |
| 48 | + help="image guidance scale for inpaint or instruct-pix2pix models (default: same as --cfg-scale)") |
| 49 | + ap.add_argument("--guidance", type=float, |
| 50 | + help="distilled guidance scale for models with guidance input (default: 3.5)") |
| 51 | + ap.add_argument("--strength", type=float, |
| 52 | + help="strength for noising/unnoising (default: 0.75)") |
| 53 | + ap.add_argument("--pm-style-strength", type=float, dest="pm_style_strength", |
| 54 | + help="PhotoMaker style strength") |
| 55 | + ap.add_argument("--control-strength", type=float, dest="control_strength", |
| 56 | + help="strength to apply Control Net (default: 0.9). 1.0 corresponds to full destruction of information in init image") |
| 57 | + ap.add_argument("--moe-boundary", type=float, dest="moe_boundary", |
| 58 | + help="timestep boundary for Wan2.2 MoE model (default: 0.875). Only enabled if --high-noise-steps is set to -1") |
| 59 | + ap.add_argument("--vace-strength", type=float, dest="vace_strength", |
| 60 | + help="wan vace strength") |
| 61 | + ap.add_argument("--increase-ref-index", action="store_true", dest="increase_ref_index", default=None, |
| 62 | + help="automatically increase the indices of references images based on the order they are listed (starting with 1)") |
| 63 | + ap.add_argument("--disable-auto-resize-ref-image", action="store_false", dest="auto_resize_ref_image", default=None, |
| 64 | + help="disable auto resize of ref images") |
| 65 | + ap.add_argument("-s", "--seed", type=int, |
| 66 | + help="RNG seed (default: 42, use random seed for < 0)") |
| 67 | + ap.add_argument("--skip-layers", dest="skip_layers", default=None, |
| 68 | + help="layers to skip for SLG steps (default: [7,8,9]).") |
| 69 | + ap.add_argument("--high-noise-skip-layers", dest="high_noise_skip_layers", default=None, |
| 70 | + help="(high noise) layers to skip for SLG steps (default: [7,8,9])") |
| 71 | + ap.add_argument("--cache-mode", dest="cache_mode", |
| 72 | + help="caching method: 'easycache' (DiT), 'ucache' (UNET), 'dbcache'/'taylorseer'/'cache-dit' (DiT block-level).") |
| 73 | + ap.add_argument("--cache-option", dest="cache_option", |
| 74 | + help="named cache params (key=value format, comma-separated). easycache/ucache: threshold=,start=,end=,decay=,relative=,reset=; dbcache/taylorseer/cache-dit: Fn=,Bn=,threshold=,warmup=. Examples: \"threshold=0.25\" or \"threshold=1.5,reset=0\"") |
| 75 | + ap.add_argument("--cache-preset", dest="cache_preset", |
| 76 | + help="cache-dit preset: 'slow'/'s', 'medium'/'m', 'fast'/'f', 'ultra'/'u'") |
| 77 | + ap.add_argument("--scm-mask", dest="scm_mask", |
| 78 | + help="SCM steps mask for cache-dit: comma-separated 0/1 (e.g., \"1,1,1,0,0,1,0,0,1,0\") - 1=compute, 0=can cache") |
| 79 | + |
| 80 | + args, unknown = ap.parse_known_args() |
| 81 | + |
| 82 | + for u_arg in unknown: |
| 83 | + print(f"Warning: Unsupported argument '{u_arg}' will be ignored.") |
| 84 | + |
| 85 | + args_dict = vars(args) |
| 86 | + |
| 87 | + for arg in ["skip_layers", "high_noise_skip_layers"]: |
| 88 | + if args_dict.get(arg) is not None: |
| 89 | + args_dict[arg] = [int(x) for x in args_dict[arg].split(',')] |
| 90 | + |
| 91 | + if args_dict.get("output"): |
| 92 | + output_format = 'png' |
| 93 | + output_ext = os.path.splitext(args_dict['output'])[-1].lower() |
| 94 | + if output_ext in ['.jpg', '.jpeg', '.jpe']: |
| 95 | + output_format = 'jpeg' |
| 96 | + args_dict["output_format"] = output_format |
| 97 | + |
| 98 | + util_keys = {'verbose', 'server_url', 'output', 'output_begin_idx'} |
| 99 | + |
| 100 | + util_opts = {k: v for k, v in args_dict.items() if k in util_keys and v is not None} |
| 101 | + gen_opts = {k: v for k, v in args_dict.items() if k not in util_keys and v is not None} |
| 102 | + |
| 103 | + return util_opts, gen_opts |
| 104 | + |
| 105 | + |
| 106 | +def build_openai_payload(gen_opts, util_opts): |
| 107 | + extension_data = {} |
| 108 | + api_data = {} |
| 109 | + |
| 110 | + extension_keys = [ |
| 111 | + "negative_prompt", "seed", "video_frames", "fps", |
| 112 | + "cfg_scale", "img_cfg_scale", "guidance", "strength", |
| 113 | + "clip_skip", "upscale_repeats", "moe_boundary", |
| 114 | + "control_strength", "pm_style_strength", "vace_strength", |
| 115 | + "cache_mode", "cache_option", "cache_preset", "scm_mask", |
| 116 | + "increase_ref_index", "auto_resize_ref_image", |
| 117 | + "skip_layers", "high_noise_skip_layers" |
| 118 | + ] |
| 119 | + |
| 120 | + for key in extension_keys: |
| 121 | + if gen_opts.get(key) is not None: |
| 122 | + extension_data[key] = gen_opts[key] |
| 123 | + |
| 124 | + width = gen_opts.get("width") |
| 125 | + height = gen_opts.get("height") |
| 126 | + if width and height: |
| 127 | + api_data["size"] = f"{width}x{height}" |
| 128 | + elif width: |
| 129 | + extension_data["width"] = width |
| 130 | + elif height: |
| 131 | + extension_data["height"] = height |
| 132 | + |
| 133 | + if gen_opts.get("output_format"): |
| 134 | + api_data["output_format"] = gen_opts["output_format"] |
| 135 | + |
| 136 | + if gen_opts.get("batch_count"): |
| 137 | + api_data["n"] = gen_opts["batch_count"] |
| 138 | + |
| 139 | + prompt = gen_opts.get('prompt', '') |
| 140 | + json_payload = json.dumps(extension_data) |
| 141 | + api_data["prompt"] = f"{prompt}<sd_cpp_extra_args>{json_payload}</sd_cpp_extra_args>" |
| 142 | + |
| 143 | + return api_data |
| 144 | + |
| 145 | + |
| 146 | +def decode_openai_response(response_body): |
| 147 | + try: |
| 148 | + data = json.loads(response_body) |
| 149 | + except json.JSONDecodeError as e: |
| 150 | + raise ValueError(f"Invalid JSON response: {e}") |
| 151 | + |
| 152 | + if 'data' not in data: |
| 153 | + raise ValueError(f"Unexpected response format (no 'data' key)") |
| 154 | + |
| 155 | + images = data['data'] |
| 156 | + decoded_images = [] |
| 157 | + |
| 158 | + for i, img_data in enumerate(images): |
| 159 | + b64_data = img_data.get("b64_json") |
| 160 | + if not b64_data: |
| 161 | + raise ValueError(f"No image data found for item {i}") |
| 162 | + try: |
| 163 | + decoded_images.append(base64.b64decode(b64_data)) |
| 164 | + except base64.binascii.Error as e: |
| 165 | + raise ValueError(f"Failed to decode base64 data for item {i}: {e}") |
| 166 | + |
| 167 | + return decoded_images |
| 168 | + |
| 169 | + |
| 170 | +def save_images(image_list, util_opts): |
| 171 | + verbose = util_opts.get("verbose", False) |
| 172 | + output = util_opts.get("output", "./output.png") |
| 173 | + output_begin_idx = util_opts.get("output_begin_idx") |
| 174 | + |
| 175 | + dirname, filename = os.path.split(output) |
| 176 | + |
| 177 | + format_specifier = re.search(r'%\d*d', filename) |
| 178 | + |
| 179 | + if format_specifier: |
| 180 | + start_idx = 0 |
| 181 | + fmt_pref = filename[:format_specifier.start()] |
| 182 | + fmt_spec = format_specifier.group() |
| 183 | + fmt_suf = filename[format_specifier.end():] |
| 184 | + else: |
| 185 | + start_idx = 1 |
| 186 | + stem, ext = os.path.splitext(filename) |
| 187 | + fmt_pref = stem |
| 188 | + fmt_spec = '_%d' |
| 189 | + fmt_suf = ext |
| 190 | + |
| 191 | + if output_begin_idx is not None: |
| 192 | + start_idx = output_begin_idx |
| 193 | + |
| 194 | + if dirname and not os.path.exists(dirname): |
| 195 | + os.makedirs(dirname, exist_ok=True) |
| 196 | + |
| 197 | + for i, img_bytes in enumerate(image_list): |
| 198 | + if not format_specifier and i == 0: |
| 199 | + filepath = output |
| 200 | + else: |
| 201 | + fmt_file = ''.join([fmt_pref, fmt_spec % (i + start_idx), fmt_suf]) |
| 202 | + filepath = os.path.join(dirname, fmt_file) |
| 203 | + |
| 204 | + with open(filepath, "wb") as f: |
| 205 | + f.write(img_bytes) |
| 206 | + |
| 207 | + if verbose: |
| 208 | + print(f"Saved image to {filepath}") |
| 209 | + |
| 210 | + |
| 211 | +def main(): |
| 212 | + util_opts, gen_opts = parse_arguments() |
| 213 | + |
| 214 | + verbose = bool(util_opts.get("verbose")) |
| 215 | + |
| 216 | + server_url = util_opts.get("server_url") |
| 217 | + if not server_url: |
| 218 | + print("Error: --server-url not provided and SD_SERVER_URL env var not found.", file=sys.stderr) |
| 219 | + sys.exit(1) |
| 220 | + |
| 221 | + if not server_url.endswith('/'): |
| 222 | + server_url += '/' |
| 223 | + endpoint = server_url + "v1/images/generations" |
| 224 | + |
| 225 | + api_payload = build_openai_payload(gen_opts, util_opts) |
| 226 | + |
| 227 | + if verbose: |
| 228 | + print(f"Sending request to: {endpoint}") |
| 229 | + print(f"Payload: {json.dumps(api_payload, indent=2)}") |
| 230 | + |
| 231 | + req_data = json.dumps(api_payload).encode('utf-8') |
| 232 | + req = urllib.request.Request(endpoint, data=req_data, headers={'Content-Type': 'application/json'}) |
| 233 | + |
| 234 | + response_body = None |
| 235 | + try: |
| 236 | + with urllib.request.urlopen(req) as response: |
| 237 | + response_body = response.read().decode('utf-8') |
| 238 | + except urllib.error.HTTPError as e: |
| 239 | + print(f"HTTP Error {e.code}: {e.reason}") |
| 240 | + sys.exit(1) |
| 241 | + except urllib.error.URLError as e: |
| 242 | + print(f"URL Error: {e.reason}") |
| 243 | + sys.exit(1) |
| 244 | + except Exception as e: |
| 245 | + print(f"Request Error: {e}") |
| 246 | + sys.exit(1) |
| 247 | + |
| 248 | + try: |
| 249 | + images = decode_openai_response(response_body) |
| 250 | + except ValueError as e: |
| 251 | + print(f"Error decoding response: {e}") |
| 252 | + sys.exit(1) |
| 253 | + |
| 254 | + save_images(images, util_opts) |
| 255 | + |
| 256 | + |
| 257 | +if __name__ == "__main__": |
| 258 | + main() |
| 259 | + |
0 commit comments