Skip to content

Commit 321286e

Browse files
authored
Merge branch 'leejet:master' into sd-server
2 parents 172e79c + 2e9242e commit 321286e

33 files changed

+144070
-700
lines changed

.github/workflows/build.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ jobs:
149149
runs-on: windows-2025
150150

151151
env:
152-
VULKAN_VERSION: 1.3.261.1
152+
VULKAN_VERSION: 1.4.328.1
153153

154154
strategy:
155155
matrix:
@@ -199,9 +199,9 @@ jobs:
199199
version: 1.11.1
200200
- name: Install Vulkan SDK
201201
id: get_vulkan
202-
if: ${{ matrix.build == 'vulkan' }}
202+
if: ${{ matrix.build == 'vulkan' }} https://sdk.lunarg.com/sdk/download/1.4.328.1/windows/vulkansdk-windows-X64-1.4.328.1.exe
203203
run: |
204-
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
204+
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe"
205205
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
206206
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
207207
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"

Dockerfile

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,21 @@
11
ARG UBUNTU_VERSION=22.04
22

3-
FROM ubuntu:$UBUNTU_VERSION as build
3+
FROM ubuntu:$UBUNTU_VERSION AS build
44

5-
RUN apt-get update && apt-get install -y build-essential git cmake
5+
RUN apt-get update && apt-get install -y --no-install-recommends build-essential git cmake
66

77
WORKDIR /sd.cpp
88

99
COPY . .
1010

11-
RUN mkdir build && cd build && cmake .. && cmake --build . --config Release
11+
RUN cmake . -B ./build
12+
RUN cmake --build ./build --config Release --parallel
1213

13-
FROM ubuntu:$UBUNTU_VERSION as runtime
14+
FROM ubuntu:$UBUNTU_VERSION AS runtime
15+
16+
RUN apt-get update && \
17+
apt-get install --yes --no-install-recommends libgomp1 && \
18+
apt-get clean
1419

1520
COPY --from=build /sd.cpp/build/bin/sd /sd
1621

README.md

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@ API and command-line option may change frequently.***
2121
- [SD3/SD3.5](./docs/sd3.md)
2222
- [Flux-dev/Flux-schnell](./docs/flux.md)
2323
- [Chroma](./docs/chroma.md)
24+
- [Qwen Image](./docs/qwen_image.md)
2425
- Image Edit Models
2526
- [FLUX.1-Kontext-dev](./docs/kontext.md)
27+
- [Qwen Image Edit/Qwen Image Edit 2509](./docs/qwen_image_edit.md)
2628
- Video Models
2729
- [Wan2.1/Wan2.2](./docs/wan.md)
2830
- [PhotoMaker](https://github.com/TencentARC/PhotoMaker) support.
@@ -285,7 +287,7 @@ usage: ./bin/sd [arguments]
285287
286288
arguments:
287289
-h, --help show this help message and exit
288-
-M, --mode [MODE] run mode, one of: [img_gen, vid_gen, convert], default: img_gen
290+
-M, --mode [MODE] run mode, one of: [img_gen, vid_gen, upscale, convert], default: img_gen
289291
-t, --threads N number of threads to use during computation (default: -1)
290292
If threads <= 0, then threads will be set to the number of CPU physical cores
291293
--offload-to-cpu place the weights in RAM to save VRAM, and automatically load them into VRAM when needed
@@ -296,11 +298,13 @@ arguments:
296298
--clip_g path to the clip-g text encoder
297299
--clip_vision path to the clip-vision encoder
298300
--t5xxl path to the t5xxl text encoder
301+
--qwen2vl path to the qwen2vl text encoder
302+
--qwen2vl_vision path to the qwen2vl vit
299303
--vae [VAE] path to vae
300304
--taesd [TAESD_PATH] path to taesd. Using Tiny AutoEncoder for fast decoding (low quality)
301305
--control-net [CONTROL_PATH] path to control net model
302306
--embd-dir [EMBEDDING_PATH] path to embeddings
303-
--upscale-model [ESRGAN_PATH] path to esrgan model. Upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now
307+
--upscale-model [ESRGAN_PATH] path to esrgan model. For img_gen mode, upscale images after generate, just RealESRGAN_x4plus_anime_6B supported by now
304308
--upscale-repeats Run the ESRGAN upscaler this many times (default 1)
305309
--type [TYPE] weight type (examples: f32, f16, q4_0, q4_1, q5_0, q5_1, q8_0, q2_K, q3_K, q4_K)
306310
If not specified, the default is the type of the weight file
@@ -449,6 +453,7 @@ These projects use `stable-diffusion.cpp` as a backend for their image generatio
449453
- [Local Diffusion](https://github.com/rmatif/Local-Diffusion)
450454
- [sd.cpp-webui](https://github.com/daniandtheweb/sd.cpp-webui)
451455
- [LocalAI](https://github.com/mudler/LocalAI)
456+
- [Neural-Pixel](https://github.com/Luiz-Alcantara/Neural-Pixel)
452457
453458
## Contributors
454459
@@ -463,6 +468,7 @@ Thank you to all the people who have already contributed to stable-diffusion.cpp
463468
## References
464469
465470
- [ggml](https://github.com/ggerganov/ggml)
471+
- [diffusers](https://github.com/huggingface/diffusers)
466472
- [stable-diffusion](https://github.com/CompVis/stable-diffusion)
467473
- [sd3-ref](https://github.com/Stability-AI/sd3-ref)
468474
- [stable-diffusion-stability-ai](https://github.com/Stability-AI/stablediffusion)
@@ -473,4 +479,4 @@ Thank you to all the people who have already contributed to stable-diffusion.cpp
473479
- [generative-models](https://github.com/Stability-AI/generative-models/)
474480
- [PhotoMaker](https://github.com/TencentARC/PhotoMaker)
475481
- [Wan2.1](https://github.com/Wan-Video/Wan2.1)
476-
- [Wan2.2](https://github.com/Wan-Video/Wan2.2)
482+
- [Wan2.2](https://github.com/Wan-Video/Wan2.2)

assets/qwen/example.png

1.35 MB
Loading

assets/qwen/qwen_image_edit.png

457 KB
Loading
415 KB
Loading

clip.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
/*================================================== CLIPTokenizer ===================================================*/
88

9-
std::pair<std::unordered_map<std::string, float>, std::string> extract_and_remove_lora(std::string text) {
9+
__STATIC_INLINE__ std::pair<std::unordered_map<std::string, float>, std::string> extract_and_remove_lora(std::string text) {
1010
std::regex re("<lora:([^:]+):([^>]+)>");
1111
std::smatch matches;
1212
std::unordered_map<std::string, float> filename2multiplier;
@@ -31,7 +31,7 @@ std::pair<std::unordered_map<std::string, float>, std::string> extract_and_remov
3131
return std::make_pair(filename2multiplier, text);
3232
}
3333

34-
std::vector<std::pair<int, std::u32string>> bytes_to_unicode() {
34+
__STATIC_INLINE__ std::vector<std::pair<int, std::u32string>> bytes_to_unicode() {
3535
std::vector<std::pair<int, std::u32string>> byte_unicode_pairs;
3636
std::set<int> byte_set;
3737
for (int b = static_cast<int>('!'); b <= static_cast<int>('~'); ++b) {

common.hpp

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ class ResBlock : public GGMLBlock {
177177
}
178178
};
179179

180-
class GEGLU : public GGMLBlock {
180+
class GEGLU : public UnaryBlock {
181181
protected:
182182
int64_t dim_in;
183183
int64_t dim_out;
@@ -216,23 +216,57 @@ class GEGLU : public GGMLBlock {
216216
}
217217
};
218218

219+
class GELU : public UnaryBlock {
220+
public:
221+
GELU(int64_t dim_in, int64_t dim_out, bool bias = true) {
222+
blocks["proj"] = std::shared_ptr<GGMLBlock>(new Linear(dim_in, dim_out, bias));
223+
}
224+
225+
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
226+
// x: [ne3, ne2, ne1, dim_in]
227+
// return: [ne3, ne2, ne1, dim_out]
228+
auto proj = std::dynamic_pointer_cast<Linear>(blocks["proj"]);
229+
230+
x = proj->forward(ctx, x);
231+
x = ggml_gelu_inplace(ctx, x);
232+
return x;
233+
}
234+
};
235+
219236
class FeedForward : public GGMLBlock {
220237
public:
238+
enum class Activation {
239+
GEGLU,
240+
GELU
241+
};
221242
FeedForward(int64_t dim,
222243
int64_t dim_out,
223-
int64_t mult = 4) {
244+
int64_t mult = 4,
245+
Activation activation = Activation::GEGLU,
246+
bool precision_fix = false) {
224247
int64_t inner_dim = dim * mult;
248+
if (activation == Activation::GELU) {
249+
blocks["net.0"] = std::shared_ptr<GGMLBlock>(new GELU(dim, inner_dim));
250+
} else {
251+
blocks["net.0"] = std::shared_ptr<GGMLBlock>(new GEGLU(dim, inner_dim));
252+
}
225253

226-
blocks["net.0"] = std::shared_ptr<GGMLBlock>(new GEGLU(dim, inner_dim));
227254
// net_1 is nn.Dropout(), skip for inference
228-
blocks["net.2"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, dim_out));
255+
float scale = 1.f;
256+
if (precision_fix) {
257+
scale = 1.f / 128.f;
258+
}
259+
// The purpose of the scale here is to prevent NaN issues in certain situations.
260+
// For example, when using Vulkan without enabling force_prec_f32,
261+
// or when using CUDA but the weights are k-quants.
262+
blocks["net.2"] = std::shared_ptr<GGMLBlock>(new Linear(inner_dim, dim_out, true, false, false, scale));
229263
}
230264

231265
struct ggml_tensor* forward(struct ggml_context* ctx, struct ggml_tensor* x) {
232266
// x: [ne3, ne2, ne1, dim]
233267
// return: [ne3, ne2, ne1, dim_out]
234268

235-
auto net_0 = std::dynamic_pointer_cast<GEGLU>(blocks["net.0"]);
269+
auto net_0 = std::dynamic_pointer_cast<UnaryBlock>(blocks["net.0"]);
236270
auto net_2 = std::dynamic_pointer_cast<Linear>(blocks["net.2"]);
237271

238272
x = net_0->forward(ctx, x); // [ne3, ne2, ne1, inner_dim]

0 commit comments

Comments
 (0)