Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added DeepDanbooru interrogator #1752

Merged
merged 12 commits into from
Oct 9, 2022
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ Check the [custom scripts](https://github.com/AUTOMATIC1111/stable-diffusion-web
- separate prompts using uppercase `AND`
- also supports weights for prompts: `a cat :1.2 AND a dog AND a penguin :2.2`
- No token limit for prompts (original stable diffusion lets you use up to 75 tokens)
- DeepDanbooru integration, creates danbooru style tags for anime prompts (add --deepdanbooru to commandline args)

## Installation and Running
Make sure the required [dependencies](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Dependencies) are met and follow the instructions available for both [NVidia](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-NVidia-GPUs) (recommended) and [AMD](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Install-and-Run-on-AMD-GPUs) GPUs.
Expand Down Expand Up @@ -123,4 +124,5 @@ The documentation was moved from this README over to the project's [wiki](https:
- Noise generation for outpainting mk2 - https://github.com/parlance-zz/g-diffuser-bot
- CLIP interrogator idea and borrowing some code - https://github.com/pharmapsychotic/clip-interrogator
- Initial Gradio script - posted on 4chan by an Anonymous user. Thank you Anonymous user.
- DeepDanbooru - interrogator for anime diffusors https://github.com/KichangKim/DeepDanbooru
- (You)
4 changes: 4 additions & 0 deletions launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def extract_arg(args, name):

args, skip_torch_cuda_test = extract_arg(args, '--skip-torch-cuda-test')
xformers = '--xformers' in args
deepdanbooru = '--deepdanbooru' in args


def repo_dir(name):
Expand Down Expand Up @@ -132,6 +133,9 @@ def git_clone(url, dir, name, commithash=None):
elif platform.system() == "Linux":
run_pip("install xformers", "xformers")

if not is_installed("deepdanbooru") and deepdanbooru:
run_pip("install git+https://github.com/KichangKim/DeepDanbooru.git@edf73df4cdaeea2cf00e9ac08bd8a9026b7a7b26#egg=deepdanbooru[tensorflow] tensorflow==2.10.0 tensorflow-io==0.27.0", "deepdanbooru")

os.makedirs(dir_repos, exist_ok=True)

git_clone("https://github.com/CompVis/stable-diffusion.git", repo_dir('stable-diffusion'), "Stable Diffusion", stable_diffusion_commit_hash)
Expand Down
Empty file.
73 changes: 73 additions & 0 deletions modules/deepbooru.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import os.path
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import get_context


def _load_tf_and_return_tags(pil_image, threshold):
import deepdanbooru as dd
import tensorflow as tf
import numpy as np

this_folder = os.path.dirname(__file__)
model_path = os.path.abspath(os.path.join(this_folder, '..', 'models', 'deepbooru'))
if not os.path.exists(os.path.join(model_path, 'project.json')):
# there is no point importing these every time
import zipfile
from basicsr.utils.download_util import load_file_from_url
load_file_from_url(r"https://github.com/KichangKim/DeepDanbooru/releases/download/v3-20211112-sgd-e28/deepdanbooru-v3-20211112-sgd-e28.zip",
model_path)
with zipfile.ZipFile(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"), "r") as zip_ref:
zip_ref.extractall(model_path)
os.remove(os.path.join(model_path, "deepdanbooru-v3-20211112-sgd-e28.zip"))

tags = dd.project.load_tags_from_project(model_path)
model = dd.project.load_model_from_project(
model_path, compile_model=True
)

width = model.input_shape[2]
height = model.input_shape[1]
image = np.array(pil_image)
image = tf.image.resize(
image,
size=(height, width),
method=tf.image.ResizeMethod.AREA,
preserve_aspect_ratio=True,
)
image = image.numpy() # EagerTensor to np.array
image = dd.image.transform_and_pad_image(image, width, height)
image = image / 255.0
image_shape = image.shape
image = image.reshape((1, image_shape[0], image_shape[1], image_shape[2]))

y = model.predict(image)[0]

result_dict = {}

for i, tag in enumerate(tags):
result_dict[tag] = y[i]
result_tags_out = []
result_tags_print = []
for tag in tags:
if result_dict[tag] >= threshold:
if tag.startswith("rating:"):
continue
result_tags_out.append(tag)
result_tags_print.append(f'{result_dict[tag]} {tag}')

print('\n'.join(sorted(result_tags_print, reverse=True)))

return ', '.join(result_tags_out).replace('_', ' ').replace(':', ' ')


def subprocess_init_no_cuda():
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"


def get_deepbooru_tags(pil_image, threshold=0.5):
context = get_context('spawn')
with ProcessPoolExecutor(initializer=subprocess_init_no_cuda, mp_context=context) as executor:
f = executor.submit(_load_tf_and_return_tags, pil_image, threshold, )
ret = f.result() # will rethrow any exceptions
return ret
1 change: 1 addition & 0 deletions modules/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
parser.add_argument("--ldsr-models-path", type=str, help="Path to directory with LDSR model file(s).", default=os.path.join(models_path, 'LDSR'))
parser.add_argument("--xformers", action='store_true', help="enable xformers for cross attention layers")
parser.add_argument("--force-enable-xformers", action='store_true', help="enable xformers for cross attention layers regardless of whether the checking code thinks you can run it; do not make bug reports if this fails to work")
parser.add_argument("--deepdanbooru", action='store_true', help="enable deepdanbooru interrogator")
parser.add_argument("--opt-split-attention", action='store_true', help="force-enables cross-attention layer optimization. By default, it's on for torch.cuda and off for other torch devices.")
parser.add_argument("--disable-opt-split-attention", action='store_true', help="force-disables cross-attention layer optimization")
parser.add_argument("--opt-split-attention-v1", action='store_true', help="enable older version of split attention optimization that does not consume all the VRAM it can find")
Expand Down
29 changes: 24 additions & 5 deletions modules/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
from modules import sd_hijack
from modules.paths import script_path
from modules.shared import opts, cmd_opts
if cmd_opts.deepdanbooru:
from modules.deepbooru import get_deepbooru_tags
import modules.shared as shared
from modules.sd_samplers import samplers, samplers_for_img2img
from modules.sd_hijack import model_hijack
Expand Down Expand Up @@ -292,6 +294,11 @@ def interrogate(image):
return gr_show(True) if prompt is None else prompt


def interrogate_deepbooru(image):
prompt = get_deepbooru_tags(image)
return gr_show(True) if prompt is None else prompt


def create_seed_inputs():
with gr.Row():
with gr.Box():
Expand Down Expand Up @@ -428,15 +435,20 @@ def create_toprow(is_img2img):
outputs=[],
)

with gr.Row():
with gr.Row(scale=1):
if is_img2img:
interrogate = gr.Button('Interrogate', elem_id="interrogate")
interrogate = gr.Button('Interrogate\nCLIP', elem_id="interrogate")
if cmd_opts.deepdanbooru:
deepbooru = gr.Button('Interrogate\nDeepBooru', elem_id="deepbooru")
else:
deepbooru = None
else:
interrogate = None
deepbooru = None
prompt_style_apply = gr.Button('Apply style', elem_id="style_apply")
save_style = gr.Button('Create style', elem_id="style_create")

return prompt, roll, prompt_style, negative_prompt, prompt_style2, submit, interrogate, prompt_style_apply, save_style, paste, token_counter, token_button
return prompt, roll, prompt_style, negative_prompt, prompt_style2, submit, interrogate, deepbooru, prompt_style_apply, save_style, paste, token_counter, token_button


def setup_progressbar(progressbar, preview, id_part, textinfo=None):
Expand Down Expand Up @@ -465,7 +477,7 @@ def create_ui(wrap_gradio_gpu_call):
import modules.txt2img

with gr.Blocks(analytics_enabled=False) as txt2img_interface:
txt2img_prompt, roll, txt2img_prompt_style, txt2img_negative_prompt, txt2img_prompt_style2, submit, _, txt2img_prompt_style_apply, txt2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=False)
txt2img_prompt, roll, txt2img_prompt_style, txt2img_negative_prompt, txt2img_prompt_style2, submit, _, _, txt2img_prompt_style_apply, txt2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=False)
dummy_component = gr.Label(visible=False)

with gr.Row(elem_id='txt2img_progress_row'):
Expand Down Expand Up @@ -617,7 +629,7 @@ def create_ui(wrap_gradio_gpu_call):
token_button.click(fn=update_token_counter, inputs=[txt2img_prompt, steps], outputs=[token_counter])

with gr.Blocks(analytics_enabled=False) as img2img_interface:
img2img_prompt, roll, img2img_prompt_style, img2img_negative_prompt, img2img_prompt_style2, submit, img2img_interrogate, img2img_prompt_style_apply, img2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=True)
img2img_prompt, roll, img2img_prompt_style, img2img_negative_prompt, img2img_prompt_style2, submit, img2img_interrogate, img2img_deepbooru, img2img_prompt_style_apply, img2img_save_style, paste, token_counter, token_button = create_toprow(is_img2img=True)

with gr.Row(elem_id='img2img_progress_row'):
with gr.Column(scale=1):
Expand Down Expand Up @@ -774,6 +786,13 @@ def create_ui(wrap_gradio_gpu_call):
outputs=[img2img_prompt],
)

if cmd_opts.deepdanbooru:
img2img_deepbooru.click(
fn=interrogate_deepbooru,
inputs=[init_img],
outputs=[img2img_prompt],
)

save.click(
fn=wrap_gradio_call(save_files),
_js="(x, y, z) => [x, y, selected_gallery_index()]",
Expand Down
7 changes: 6 additions & 1 deletion style.css
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,12 @@

#style_apply, #style_create, #interrogate{
margin: 0.75em 0.25em 0.25em 0.25em;
min-width: 3em;
min-width: 5em;
}

#style_apply, #style_create, #deepbooru{
margin: 0.75em 0.25em 0.25em 0.25em;
min-width: 5em;
}

#style_pos_col, #style_neg_col{
Expand Down