Merge branch 'dev2' into logging

silveroxides · Jun 2, 2023 · 7ef00f5 · 7ef00f5
2 parents 85f882e + 5ebc697
commit 7ef00f5
Show file tree

Hide file tree

Showing 64 changed files with 4,804 additions and 851 deletions.
diff --git a/.dockerignore b/.dockerignore
@@ -0,0 +1,7 @@
+.cache/
+cudnn_windows/
+bitsandbytes_windows/
+bitsandbytes_windows_deprecated/
+dataset/
+__pycache__/
+venv/
diff --git a/.gitignore b/.gitignore
@@ -11,4 +11,6 @@ gui-user.bat
 gui-user.ps1
 .vscode
 wandb
-setup.log
+setup.log
+logs
+SmilingWolf
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,47 @@
+FROM nvcr.io/nvidia/pytorch:23.04-py3 as base
+ENV DEBIAN_FRONTEND=noninteractive
+ENV TZ=Europe/London
+
+RUN apt update && apt-get install -y software-properties-common
+RUN add-apt-repository ppa:deadsnakes/ppa && \
+    apt update && \
+    apt-get install -y git curl libgl1 libglib2.0-0 libgoogle-perftools-dev \
+    python3.10-dev python3.10-tk python3-html5lib python3-apt python3-pip python3.10-distutils && \
+    rm -rf /var/lib/apt/lists/*
+
+# Set python 3.10 as default
+RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 3 && \
+    update-alternatives --config python3
+
+RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3
+
+WORKDIR /app
+RUN python3 -m pip install wheel
+
+# Todo: Install torch 2.1.0 for cu121 support (only available as nightly as of writing)
+## RUN python3 -m pip install --pre torch ninja setuptools --extra-index-url https://download.pytorch.org/whl/nightly/cu121
+
+# Todo: Install xformers nightly for Torch 2.1.0 support
+## RUN python3 -m pip install -v -U git+https://github.com/facebookresearch/xformers.git@main#egg=xformers
+
+# Install requirements
+COPY requirements.txt setup.py ./
+RUN python3 -m pip install --use-pep517 -r requirements.txt xformers
+
+# Replace pillow with pillow-simd
+RUN python3 -m pip uninstall -y pillow && \
+    CC="cc -mavx2" python3 -m pip install -U --force-reinstall pillow-simd
+
+# Fix missing libnvinfer7
+USER root
+RUN ln -s /usr/lib/x86_64-linux-gnu/libnvinfer.so /usr/lib/x86_64-linux-gnu/libnvinfer.so.7 && \
+    ln -s /usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so /usr/lib/x86_64-linux-gnu/libnvinfer_plugin.so.7
+
+RUN useradd -m -s /bin/bash appuser
+USER appuser
+COPY --chown=appuser . .
+
+STOPSIGNAL SIGINT
+ENV LD_PRELOAD=libtcmalloc.so
+ENV PATH="$PATH:/home/appuser/.local/bin"
+CMD python3 "./kohya_gui.py" ${CLI_ARGS} --listen 0.0.0.0 --server_port 7860
diff --git a/README-ja.md b/README-ja.md
@@ -16,13 +16,13 @@ GUIやPowerShellスクリプトなど、より使いやすくする機能が[bma
 
 当リポジトリ内およびnote.comに記事がありますのでそちらをご覧ください（将来的にはすべてこちらへ移すかもしれません）。
 
-* [学習について、共通編](./train_README-ja.md) : データ整備やオプションなど
-    * [データセット設定](./config_README-ja.md)
-* [DreamBoothの学習について](./train_db_README-ja.md)
-* [fine-tuningのガイド](./fine_tune_README_ja.md):
-* [LoRAの学習について](./train_network_README-ja.md)
-* [Textual Inversionの学習について](./train_ti_README-ja.md)
-* note.com [画像生成スクリプト](https://note.com/kohya_ss/n/n2693183a798e)
+* [学習について、共通編](./docs/train_README-ja.md) : データ整備やオプションなど
+    * [データセット設定](./docs/config_README-ja.md)
+* [DreamBoothの学習について](./docs/train_db_README-ja.md)
+* [fine-tuningのガイド](./docs/fine_tune_README_ja.md):
+* [LoRAの学習について](./docs/train_network_README-ja.md)
+* [Textual Inversionの学習について](./docs/train_ti_README-ja.md)
+* [画像生成スクリプト](./docs/gen_img_README-ja.md)
 * note.com [モデル変換スクリプト](https://note.com/kohya_ss/n/n374f316fe4ad)
 
 ## Windowsでの動作に必要なプログラム
@@ -115,6 +115,16 @@ accelerate configの質問には以下のように答えてください。（bf1
 
 他のバージョンでは学習がうまくいかない場合があるようです。特に他の理由がなければ指定のバージョンをお使いください。
 
+### オプション：Lion8bitを使う
+
+Lion8bitを使う場合には`bitsandbytes`を0.38.0以降にアップグレードする必要があります。`bitsandbytes`をアンインストールし、Windows環境では例えば[こちら](https://github.com/jllllll/bitsandbytes-windows-webui)などからWindows版のwhlファイルをインストールしてください。たとえば以下のような手順になります。
+
+```powershell
+pip install https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.38.1-py3-none-any.whl
+```
+
+アップグレード時には`pip install .`でこのリポジトリを更新し、必要に応じて他のパッケージもアップグレードしてください。
+
 ## アップグレード
 
 新しいリリースがあった場合、以下のコマンドで更新できます。

diff --git a/README.md b/README.md
@@ -2,14 +2,22 @@
 
 This repository provides a Windows-focused Gradio GUI for [Kohya's Stable Diffusion trainers](https://github.com/kohya-ss/sd-scripts). The GUI allows you to set the training parameters and generate and run the required CLI commands to train the model.
 
-If you run on Linux and would like to use the GUI, there is now a port of it as a docker container. You can find the project [here](https://github.com/P2Enjoy/kohya_ss-docker).
-
 ### Table of Contents
 
 - [Tutorials](#tutorials)
+* [Training guide - common](./docs/train_README-ja.md) : data preparation, options etc... 
+  * [Chinese version](./docs/train_README-zh.md)
+  * [Dataset config](./docs/config_README-ja.md) 
+  * [DreamBooth training guide](./docs/train_db_README-ja.md)
+  * [Step by Step fine-tuning guide](./docs/fine_tune_README_ja.md):
+  * [Training LoRA](./docs/train_network_README-ja.md)
+  * [training Textual Inversion](./docs/train_ti_README-ja.md)
+  * [Image generation](./docs/gen_img_README-ja.md)
+  * [Model conversion](https://note.com/kohya_ss/n/n374f316fe4ad)
 - [Required Dependencies](#required-dependencies)
   - [Linux/macOS](#linux-and-macos-dependencies)
 - [Installation](#installation)
+    - [Docker](#docker)
     - [Linux/macOS](#linux-and-macos)
       - [Default Install Locations](#install-location)
     - [Windows](#windows)
@@ -45,6 +53,10 @@ Newer Tutorial: [Generate Studio Quality Realistic Photos By Kohya LoRA Stable D
 
 [![Newer Tutorial: Generate Studio Quality Realistic Photos By Kohya LoRA Stable Diffusion Training](https://user-images.githubusercontent.com/19240467/235306147-85dd8126-f397-406b-83f2-368927fa0281.png)](https://www.youtube.com/watch?v=TpuDOsuKIBo)
 
+Newer Tutorial: [How To Install And Use Kohya LoRA GUI / Web UI on RunPod IO](https://www.youtube.com/watch?v=3uzCNrQao3o):
+
+[![How To Install And Use Kohya LoRA GUI / Web UI on RunPod IO With Stable Diffusion & Automatic1111](https://github-production-user-asset-6210df.s3.amazonaws.com/19240467/238678226-0c9c3f7d-c308-4793-b790-999fdc271372.png)](https://www.youtube.com/watch?v=3uzCNrQao3o)
+
 ## Required Dependencies
 
 - Install [Python 3.10](https://www.python.org/ftp/python/3.10.9/python-3.10.9-amd64.exe) 
@@ -61,10 +73,34 @@ These dependencies are taken care of via `setup.sh` in the installation section.
 ### Runpod
 Follow the instructions found in this discussion: https://github.com/bmaltais/kohya_ss/discussions/379
 
+### Docker
+Docker is supported on Windows and Linux distributions. However this method currently only supports Nvidia GPUs. 
+Run the following commands in your OS shell after installing [git](https://git-scm.com/download/) and [docker](https://www.docker.com/products/docker-desktop/):
+```bash
+git clone https://github.com/bmaltais/kohya_ss.git
+cd kohya_ss
+docker compose build
+docker compose run --service-ports kohya-ss-gui
+```
+
+This will take a while (up to 20 minutes) on the first run.
+
+The following limitations apply:
+* All training data must be added to the `dataset` subdirectory, the docker container cannot access any other files
+* The file picker does not work
+  * Cannot select folders, folder path must be set manually like e.g. /dataset/my_lora/img
+  * Cannot select config file, it must be loaded via path instead like e.g. /dataset/my_config.json  
+* Dialogs do not work
+  * Make sure your file names are unique as this happens when asking if an existing file should be overridden
+* No auto-update support. Must run update scripts outside docker manually and then rebuild with `docker compose build`.
+
+
+If you run on Linux, there is an alternative docker container port with less limitations. You can find the project [here](https://github.com/P2Enjoy/kohya_ss-docker).
+
 ### Linux and macOS
 In the terminal, run
 
-```
+```bash
 git clone https://github.com/bmaltais/kohya_ss.git
 cd kohya_ss
 # May need to chmod +x ./setup.sh if you're on a machine with stricter security.
@@ -259,7 +295,7 @@ The LoRA supported by `train_network.py` has been named to avoid confusion. The
     
 LoRA-LierLa is the default LoRA type for `train_network.py` (without `conv_dim` network arg). LoRA-LierLa can be used with [our extension](https://github.com/kohya-ss/sd-webui-additional-networks) for AUTOMATIC1111's Web UI, or with the built-in LoRA feature of the Web UI.
 
-To use LoRA-C3Liar with Web UI, please use our extension.
+To use LoRA-C3Lier with Web UI, please use our extension.
 
 ## Sample image generation during training
 A prompt file might look like this, for example
@@ -309,45 +345,57 @@ This will store a backup file with your current locally installed pip packages a
 
 ## Change History
 
-* 2023/04/25 (v21.5.7)
-  - `tag_images_by_wd14_tagger.py` can now get arguments from outside. [PR #453](https://github.com/kohya-ss/sd-scripts/pull/453) Thanks to mio2333!
-  - Added `--save_every_n_steps` option to each training script. The model is saved every specified steps.
-    - `--save_last_n_steps` option can be used to save only the specified number of models (old models will be deleted).
-    - If you specify the `--save_state` option, the state will also be saved at the same time. You can specify the number of steps to keep the state with the `--save_last_n_steps_state` option (the same value as `--save_last_n_steps` is used if omitted).
-    - You can use the epoch-based model saving and state saving options together.
-    - Not tested in multi-GPU environment. Please report any bugs.
-  - `--cache_latents_to_disk` option automatically enables `--cache_latents` option when specified. [#438](https://github.com/kohya-ss/sd-scripts/issues/438)
-  - Fixed a bug in `gen_img_diffusers.py` where latents upscaler would fail with a batch size of 2 or more.
-  - Fix issue with using earlier version than python 3.10 in Linux. Thanks @Whyjsee
-* 2023/04/24 (v21.5.6)
-    - Fix triton error
-    - Fix issue with merge lora path with spaces
-    - Added support for logging to wandb. Please refer to PR #428. Thank you p1atdev!
-      - wandb installation is required. Please install it with pip install wandb. Login to wandb with wandb login command, or set --wandb_api_key option for automatic login.
-      - Please let me know if you find any bugs as the test is not complete.
-    - You can automatically login to wandb by setting the --wandb_api_key option. Please be careful with the handling of API Key. PR #435 Thank you Linaqruf!
-    - Improved the behavior of --debug_dataset on non-Windows environments. PR #429 Thank you tsukimiya!
-    - Fixed --face_crop_aug option not working in Fine tuning method.
-    - Prepared code to use any upscaler in gen_img_diffusers.py.
-    - Fixed to log to TensorBoard when --logging_dir is specified and --log_with is not specified.
-* 2023/04/22 (v21.5.5)
-    - Update LoRA merge GUI to support SD checkpoint merge and up to 4 LoRA merging
-    - Fixed `lora_interrogator.py` not working. Please refer to [PR #392](https://github.com/kohya-ss/sd-scripts/pull/392) for details. Thank you A2va and heyalexchoi!
-    - Fixed the handling of tags containing `_` in `tag_images_by_wd14_tagger.py`.
-    - Add new Extract DyLoRA gui to the Utilities tab.
-    - Add new Merge LyCORIS models into checkpoint gui to the Utilities tab.
-    - Add new info on startup to help debug things
-* 2023/04/17 (v21.5.4)
-    - Fixed a bug that caused an error when loading DyLoRA with the `--network_weight` option in `train_network.py`.
-    - Added the `--recursive` option to each script in the `finetune` folder to process folders recursively. Please refer to [PR #400](https://github.com/kohya-ss/sd-scripts/pull/400/) for details. Thanks to Linaqruf!
-    - Upgrade Gradio to latest release
-    - Fix issue when Adafactor is used as optimizer and LR Warmup is not 0: https://github.com/bmaltais/kohya_ss/issues/617
-    - Added support for DyLoRA in `train_network.py`. Please refer to [here](./train_network_README-ja.md#dylora) for details (currently only in Japanese).
-    - Added support for caching latents to disk in each training script. Please specify __both__ `--cache_latents` and `--cache_latents_to_disk` options.
-        - The files are saved in the same folder as the images with the extension `.npz`. If you specify the `--flip_aug` option, the files with `_flip.npz` will also be saved.
-        - Multi-GPU training has not been tested.
-        - This feature is not tested with all combinations of datasets and training scripts, so there may be bugs.
-    - Added workaround for an error that occurs when training with `fp16` or `bf16` in `fine_tune.py`.
-    - Implemented DyLoRA GUI support. There will now be a new 'DyLoRA Unit` slider when the LoRA type is selected as `kohya DyLoRA` to specify the desired Unit value for DyLoRA training.
-    - Update gui.bat and gui.ps1 based on: https://github.com/bmaltais/kohya_ss/issues/188
-    - Update `setup.bat` to install torch 2.0.0 instead of 1.2.1. If you want to upgrade from 1.2.1 to 2.0.0 run setup.bat again, select 1 to uninstall the previous torch modules, then select 2 for torch 2.0.0
+* 2023/05/28 (v21.5.15)
+- Show warning when image caption file does not exist during training. [PR #533](https://github.com/kohya-ss/sd-scripts/pull/533) Thanks to TingTingin!
+  - Warning is also displayed when using class+identifier dataset. Please ignore if it is intended.
+- `train_network.py` now supports merging network weights before training. [PR #542](https://github.com/kohya-ss/sd-scripts/pull/542) Thanks to u-haru!
+  - `--base_weights` option specifies LoRA or other model files (multiple files are allowed) to merge.
+  - `--base_weights_multiplier` option specifies multiplier of the weights to merge (multiple values are allowed). If omitted or less than `base_weights`, 1.0 is used.
+  - This is useful for incremental learning. See PR for details.
+- Show warning and continue training when uploading to HuggingFace fails.
+* 2023/05/28 (v21.5.14)
+- Add Create Groupo tool and GUI
+* 2023/05/24 (v21.5.13)
+- Upgrade gradio release to fix issue with UI refresh on config load.
+- [D-Adaptation v3.0](https://github.com/facebookresearch/dadaptation) is now supported. [PR #530](https://github.com/kohya-ss/sd-scripts/pull/530) Thanks to sdbds!
+  - `--optimizer_type` now accepts `DAdaptAdamPreprint`, `DAdaptAdanIP`, and `DAdaptLion`.
+  - `DAdaptAdam` is now new. The old `DAdaptAdam` is available with `DAdaptAdamPreprint`.
+  - Simply specifying `DAdaptation` will use `DAdaptAdamPreprint` (same behavior as before).
+  - You need to install D-Adaptation v3.0. After activating venv, please do `pip install -U dadaptation`.
+  - See PR and D-Adaptation documentation for details.
+* 2023/05/22 (v21.5.12)
+- Fixed several bugs.
+  - The state is saved even when the `--save_state` option is not specified in `fine_tune.py` and `train_db.py`. [PR #521](https://github.com/kohya-ss/sd-scripts/pull/521) Thanks to akshaal!
+  - Cannot load LoRA without `alpha`. [PR #527](https://github.com/kohya-ss/sd-scripts/pull/527) Thanks to Manjiz!
+  - Minor changes to console output during sample generation. [PR #515](https://github.com/kohya-ss/sd-scripts/pull/515) Thanks to yanhuifair!
+- The generation script now uses xformers for VAE as well.
+- Fixed an issue where an error would occur if the encoding of the prompt file was different from the default. [PR #510](https://github.com/kohya-ss/sd-scripts/pull/510) Thanks to sdbds!
+  - Please save the prompt file in UTF-8.
+* 2023/05/15 (v21.5.11)
+  - Added an option `--dim_from_weights` to `train_network.py` to automatically determine the dim(rank) from the weight file. [PR #491](https://github.com/kohya-ss/sd-scripts/pull/491) Thanks to AI-Casanova!
+    - It is useful in combination with `resize_lora.py`. Please see the PR for details.
+  - Fixed a bug where the noise resolution was incorrect with Multires noise. [PR #489](https://github.com/kohya-ss/sd-scripts/pull/489) Thanks to sdbds!
+    - Please see the PR for details.
+  - The image generation scripts can now use img2img and highres fix at the same time.
+  - Fixed a bug where the hint image of ControlNet was incorrectly BGR instead of RGB in the image generation scripts.
+  - Added a feature to the image generation scripts to use the memory-efficient VAE.
+    - If you specify a number with the `--vae_slices` option, the memory-efficient VAE will be used. The maximum output size will be larger, but it will be slower. Please specify a value of about `16` or `32`.
+    - The implementation of the VAE is in `library/slicing_vae.py`.
+  - Fix for wandb #ebabchick
+  - Added [English translation of documents](https://github.com/darkstorm2150/sd-scripts#links-to-usage-documentation) by darkstorm2150. Thank you very much!
+  - The prompt for sample generation during training can now be specified in `.toml` or `.json`. [PR #504](https://github.com/kohya-ss/sd-scripts/pull/504) Thanks to Linaqruf!
+    - For details on prompt description, please see the PR.
+* 2023/04/07 (v21.5.10)
+  - Fix issue https://github.com/bmaltais/kohya_ss/issues/734
+  - The documentation has been moved to the `docs` folder. If you have links, please change them.
+  - DAdaptAdaGrad, DAdaptAdan, and DAdaptSGD are now supported by DAdaptation. [PR#455](https://github.com/kohya-ss/sd-scripts/pull/455) Thanks to sdbds!
+    - DAdaptation needs to be installed. Also, depending on the optimizer, DAdaptation may need to be updated. Please update with `pip install --upgrade dadaptation`.
+  - Added support for pre-calculation of LoRA weights in image generation scripts. Specify `--network_pre_calc`.
+    - The prompt option `--am` is available. Also, it is disabled when Regional LoRA is used.
+  - Added Adaptive noise scale to each training script. Specify a number with `--adaptive_noise_scale` to enable it.
+    - __Experimental option. It may be removed or changed in the future.__
+    - This is an original implementation that automatically adjusts the value of the noise offset according to the absolute value of the mean of each channel of the latents. It is expected that appropriate noise offsets will be set for bright and dark images, respectively.
+    - Specify it together with `--noise_offset`.
+    - The actual value of the noise offset is calculated as `noise_offset + abs(mean(latents, dim=(2,3))) * adaptive_noise_scale`. Since the latent is close to a normal distribution, it may be a good idea to specify a value of about 1/10 to the same as the noise offset.
+    - Negative values can also be specified, in which case the noise offset will be clipped to 0 or more.
+  - Other minor fixes.
diff --git a/docker-compose.yaml b/docker-compose.yaml
@@ -0,0 +1,30 @@
+version: "3.8"
+services:
+  kohya-ss-gui:
+    container_name: kohya-ss-gui
+    image: kohya-ss-gui:latest
+    build:
+      context: .
+    ports:
+      - 127.0.0.1:7860:7860
+    tty: true
+    ipc: host
+    environment:
+      CLI_ARGS: ""
+      SAFETENSORS_FAST_GPU: 1
+    tmpfs:
+      - /tmp      
+    volumes:
+      - ./dataset:/dataset
+      - ./.cache/user:/home/appuser/.cache
+      - ./.cache/triton:/home/appuser/.triton    
+      - ./.cache/config:/app/appuser/.config
+      - ./.cache/nv:/home/appuser/.nv 
+      - ./.cache/keras:/home/appuser/.keras      
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              device_ids: ['0']
+              capabilities: [gpu]
diff --git a/config_README-ja.md → docs/config_README-ja.md b/config_README-ja.md → docs/config_README-ja.md
diff --git a/fine_tune_README_ja.md → docs/fine_tune_README_ja.md b/fine_tune_README_ja.md → docs/fine_tune_README_ja.md