minor fixes, better README(s)

ukaserge · Feb 12, 2023 · 29d2477 · 29d2477
1 parent 5874ea3
commit 29d2477
Show file tree

Hide file tree

Showing 4 changed files with 23 additions and 7 deletions.
diff --git a/README.md b/README.md
@@ -1,7 +1,11 @@
 # kohya_ss-docker
 This is the tandem repository to exploit on linux the kohya_ss training webui converted to Linux.  
 
-Read the data sections for [wheels](kohya_ss/data) and [packages](kohya_ss/data/libs) prior to compiling the image or IT WILL FAIL.  
+Read the data sections for [wheels](kohya_ss/data) and [packages](kohya_ss/data/libs) prior to compiling the image or IT WILL FAIL.
+
+## Nvidia Docker Extensions
+
+You will need the docker nvidia extensions, please refer to the installations details here: https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html#setting-up-nvidia-container-toolkit
 
 ## TK and Xorgs
 
@@ -13,5 +17,11 @@ Remember to allow docker hosts to clients to your X server.
 xhost +
 ```
 
+Once you have compiled (or downloaded) the wheels and the debian packages, you can pretty just run it via the `docker compose --profile kohya up --build` and wait for the build to finish.  
+A message should notify the build is complete and you can access the gui via the link on the console.  
+```bash
+kohya-docker-kohya-1  | Running on local URL:  http://127.0.0.1:7680
+```
+
 Happy training on Linux!!!  
 
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -11,16 +11,18 @@ x-gpu-base-service: &gpu_service
 
 x-base_service: &base_service
   user: "${UID:-0}:${GID:-0}"
-  #network_mode: "host"
-  ports:
-    - "7680:7680"
+  network_mode: "host"
+  #ports:
+  #  - "7680:7680"
   build:
       context: ./kohya_ss
       args:
         # Compile time args
         TORCH_COMMAND: /bin/bash /docker/install-container-dep.sh /docker/torch-*.whl /docker/torchvision-*.whl /docker/tensorflow-*.whl
         PIP_REPOSITORY: https://download.pytorch.org/whl/cu116
         PYTORCH_CUDA_ALLOC_CONF: garbage_collection_threshold:0.9,max_split_size_mb:256
+        TORCH_CUDA_ARCH_LIST: 7.5
+        DS_BUILD_OPS: 1
         MAX_GCC_VERSION: 10
         JAX: False
         TPU: False

diff --git a/kohya_ss/Dockerfile b/kohya_ss/Dockerfile
@@ -10,7 +10,11 @@ ARG MAX_GCC_VERSION
 ARG JAX
 ARG TPU
 ARG PIP_REPOSITORY
+ARG TORCH_CUDA_ARCH_LIST
+ARG DS_BUILD_OPS
 
+ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
+ENV DS_BUILD_OPS=${DS_BUILD_OPS}
 ENV PIP_REPOSITORY=${PIP_REPOSITORY}
 ENV TORCH_COMMAND=${TORCH_COMMAND}
 ENV PYTORCH_CUDA_ALLOC_CONF=${PYTORCH_CUDA_ALLOC_CONF}

diff --git a/kohya_ss/scripts/run.sh b/kohya_ss/scripts/run.sh
@@ -5,7 +5,7 @@ set -Eeuo pipefail
 . /docker/mount.sh
 
 # Load environment
-source ${ROOT}/kohya_venv/bin/activate
+source "${ROOT}/kohya_venv/bin/activate"
 
 # check python
 python --version
@@ -30,8 +30,8 @@ for devid in range(0,torch.cuda.device_count()):
         print(torch.cuda.get_device_name(devid))
 EOF
 
-cd ${ROOT} && git pull
-if [[ ! -z "${ACCELERATE}" ]] && [[ "${ACCELERATE}" = "True" ]] && [[ -x "$(command -v accelerate)" ]]
+cd "${ROOT}" && git pull
+if [[ -n "${ACCELERATE}" ]] && [[ "${ACCELERATE}" = "True" ]] && [[ -x "$(command -v accelerate)" ]]
 then
     echo "Accelerating SD with distributed GPU+CPU..."
     accelerate launch --num_cpu_threads_per_process=6 $@