-
Notifications
You must be signed in to change notification settings - Fork 33
/
Copy pathace_plus_fft.yaml
192 lines (188 loc) · 6.49 KB
/
ace_plus_fft.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
NAME: ACEInference
DTYPE: bfloat16
VERSION: fft
IS_DEFAULT: True
MAX_SEQ_LEN: 4096
MODEL:
NAME: LatentDiffusionACEPlus
PARAMETERIZATION: rf
TIMESTEPS: 1000
GUIDE_SCALE: 1.0
PRETRAINED_MODEL:
IGNORE_KEYS: [ ]
USE_EMA: False
EVAL_EMA: False
SIZE_FACTOR: 8
DIFFUSION:
NAME: DiffusionFluxRF
PREDICTION_TYPE: raw
NOISE_NORM: True
# NOISE_SCHEDULER DESCRIPTION: TYPE: default: ''
NOISE_SCHEDULER:
NAME: FlowMatchFluxShiftScheduler
SHIFT: False
PRE_T_SAMPLE: True
PRE_T_SAMPLE_FOLD: 1
SIGMOID_SCALE: 1
BASE_SHIFT: 0.5
MAX_SHIFT: 1.15
SAMPLER_SCHEDULER:
NAME: FlowMatchFluxShiftScheduler
SHIFT: True
PRE_T_SAMPLE: False
SIGMOID_SCALE: 1
BASE_SHIFT: 0.5
MAX_SHIFT: 1.15
#
DIFFUSION_MODEL:
# NAME DESCRIPTION: TYPE: default: 'Flux'
NAME: FluxMRModiACEPlus
PRETRAINED_MODEL: ${ACE_PLUS_FFT_MODEL}
# IN_CHANNELS DESCRIPTION: model's input channels. TYPE: int default: 64
IN_CHANNELS: 448
# OUT_CHANNELS DESCRIPTION: model's input channels. TYPE: int default: 64
OUT_CHANNELS: 64
# HIDDEN_SIZE DESCRIPTION: model's hidden size. TYPE: int default: 1024
HIDDEN_SIZE: 3072
REDUX_DIM: 1152
# NUM_HEADS DESCRIPTION: number of heads in the transformer. TYPE: int default: 16
NUM_HEADS: 24
# AXES_DIM DESCRIPTION: dimensions of the axes of the positional encoding. TYPE: list default: [16, 56, 56]
AXES_DIM: [ 16, 56, 56 ]
# THETA DESCRIPTION: theta for positional encoding. TYPE: int default: 10000
THETA: 10000
# VEC_IN_DIM DESCRIPTION: dimension of the vector input. TYPE: int default: 768
VEC_IN_DIM: 768
# GUIDANCE_EMBED DESCRIPTION: whether to use guidance embedding. TYPE: bool default: False
GUIDANCE_EMBED: True
# CONTEXT_IN_DIM DESCRIPTION: dimension of the context input. TYPE: int default: 4096
CONTEXT_IN_DIM: 4096
# MLP_RATIO DESCRIPTION: ratio of mlp hidden size to hidden size. TYPE: float default: 4.0
MLP_RATIO: 4.0
# QKV_BIAS DESCRIPTION: whether to use bias in qkv projection. TYPE: bool default: True
QKV_BIAS: True
# DEPTH DESCRIPTION: number of transformer blocks. TYPE: int default: 19
DEPTH: 19
# DEPTH_SINGLE_BLOCKS DESCRIPTION: number of transformer blocks in the single stream block. TYPE: int default: 38
DEPTH_SINGLE_BLOCKS: 38
ATTN_BACKEND: flash_attn
#
FIRST_STAGE_MODEL:
NAME: AutoencoderKLFlux
EMBED_DIM: 16
PRETRAINED_MODEL: ${FLUX_FILL_PATH}@ae.safetensors
IGNORE_KEYS: [ ]
BATCH_SIZE: 8
USE_CONV: False
SCALE_FACTOR: 0.3611
SHIFT_FACTOR: 0.1159
#
ENCODER:
NAME: Encoder
CH: 128
OUT_CH: 3
NUM_RES_BLOCKS: 2
IN_CHANNELS: 3
ATTN_RESOLUTIONS: [ ]
CH_MULT: [ 1, 2, 4, 4 ]
Z_CHANNELS: 16
DOUBLE_Z: True
DROPOUT: 0.0
RESAMP_WITH_CONV: True
#
DECODER:
NAME: Decoder
CH: 128
OUT_CH: 3
NUM_RES_BLOCKS: 2
IN_CHANNELS: 3
ATTN_RESOLUTIONS: [ ]
CH_MULT: [ 1, 2, 4, 4 ]
Z_CHANNELS: 16
DROPOUT: 0.0
RESAMP_WITH_CONV: True
GIVE_PRE_END: False
TANH_OUT: False
#
COND_STAGE_MODEL:
# NAME DESCRIPTION: TYPE: default: 'T5PlusClipFluxEmbedder'
NAME: T5PlusClipFluxEmbedder
# T5_MODEL DESCRIPTION: TYPE: default: ''
T5_MODEL:
# NAME DESCRIPTION: TYPE: default: 'HFEmbedder'
NAME: HFEmbedder
# HF_MODEL_CLS DESCRIPTION: huggingface cls in transfomer TYPE: NoneType default: None
HF_MODEL_CLS: T5EncoderModel
# MODEL_PATH DESCRIPTION: model folder path TYPE: NoneType default: None
MODEL_PATH: ${FLUX_FILL_PATH}@text_encoder_2/
# HF_TOKENIZER_CLS DESCRIPTION: huggingface cls in transfomer TYPE: NoneType default: None
HF_TOKENIZER_CLS: T5Tokenizer
# TOKENIZER_PATH DESCRIPTION: tokenizer folder path TYPE: NoneType default: None
TOKENIZER_PATH: ${FLUX_FILL_PATH}@tokenizer_2/
ADDED_IDENTIFIER: [ '<img>','{image}', '{caption}', '{mask}', '{ref_image}', '{image1}', '{image2}', '{image3}', '{image4}', '{image5}', '{image6}', '{image7}', '{image8}', '{image9}' ]
# MAX_LENGTH DESCRIPTION: max length of input TYPE: int default: 77
MAX_LENGTH: 512
# OUTPUT_KEY DESCRIPTION: output key TYPE: str default: 'last_hidden_state'
OUTPUT_KEY: last_hidden_state
# D_TYPE DESCRIPTION: dtype TYPE: str default: 'bfloat16'
D_TYPE: bfloat16
# BATCH_INFER DESCRIPTION: batch infer TYPE: bool default: False
BATCH_INFER: False
CLEAN: whitespace
# CLIP_MODEL DESCRIPTION: TYPE: default: ''
CLIP_MODEL:
# NAME DESCRIPTION: TYPE: default: 'HFEmbedder'
NAME: HFEmbedder
# HF_MODEL_CLS DESCRIPTION: huggingface cls in transfomer TYPE: NoneType default: None
HF_MODEL_CLS: CLIPTextModel
# MODEL_PATH DESCRIPTION: model folder path TYPE: NoneType default: None
MODEL_PATH: ${FLUX_FILL_PATH}@text_encoder/
# HF_TOKENIZER_CLS DESCRIPTION: huggingface cls in transfomer TYPE: NoneType default: None
HF_TOKENIZER_CLS: CLIPTokenizer
# TOKENIZER_PATH DESCRIPTION: tokenizer folder path TYPE: NoneType default: None
TOKENIZER_PATH: ${FLUX_FILL_PATH}@tokenizer/
# MAX_LENGTH DESCRIPTION: max length of input TYPE: int default: 77
MAX_LENGTH: 77
# OUTPUT_KEY DESCRIPTION: output key TYPE: str default: 'last_hidden_state'
OUTPUT_KEY: pooler_output
# D_TYPE DESCRIPTION: dtype TYPE: str default: 'bfloat16'
D_TYPE: bfloat16
# BATCH_INFER DESCRIPTION: batch infer TYPE: bool default: False
BATCH_INFER: True
CLEAN: whitespace
PREPROCESSOR:
- TYPE: repainting
REPAINTING_SCALE: 1.0
ANNOTATOR:
- TYPE: no_preprocess
REPAINTING_SCALE: 0.0
ANNOTATOR:
- TYPE: mosaic_repainting
REPAINTING_SCALE: 0.0
ANNOTATOR:
NAME: ColorAnnotator
RATIO: 64
- TYPE: contour_repainting
REPAINTING_SCALE: 0.0
ANNOTATOR:
NAME: InfoDrawContourAnnotator
INPUT_NC: 3
OUTPUT_NC: 1
N_RESIDUAL_BLOCKS: 3
SIGMOID: True
PRETRAINED_MODEL: "ms://iic/scepter_annotator@annotator/ckpts/informative_drawing_contour_style.pth"
- TYPE: depth_repainting
REPAINTING_SCALE: 0.0
ANNOTATOR:
NAME: MidasDetector
PRETRAINED_MODEL: "ms://iic/scepter_annotator@annotator/ckpts/dpt_hybrid-midas-501f0c75.pt"
- TYPE: recolorizing
REPAINTING_SCALE: 0.0
ANNOTATOR:
NAME: GrayAnnotator
SAMPLE_ARGS:
SAMPLE_STEPS: 28
SAMPLER: flow_euler
SEED: 42
IMAGE_SIZE: [ 1024, 1024 ]
GUIDE_SCALE: 50