update file

Fjallraven-hc · Nov 13, 2023 · 6b60875 · 6b60875
1 parent bc44492
commit 6b60875
Show file tree

Hide file tree

Showing 88 changed files with 8,323 additions and 2 deletions.
diff --git a/OneFlow.yaml b/OneFlow.yaml
@@ -0,0 +1,80 @@
+name: OneFlow
+channels:
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - bzip2=1.0.8=h7b6447c_0
+  - ca-certificates=2023.08.22=h06a4308_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - libffi=3.4.4=h6a678d5_0
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - libuuid=1.41.5=h5eee18b_0
+  - ncurses=6.4=h6a678d5_0
+  - openssl=3.0.12=h7f8727e_0
+  - pip=23.3=py310h06a4308_0
+  - python=3.10.13=h955ad1f_0
+  - readline=8.2=h5eee18b_0
+  - setuptools=68.0.0=py310h06a4308_0
+  - sqlite=3.41.2=h5eee18b_0
+  - tk=8.6.12=h1ccaba5_0
+  - tzdata=2023c=h04d1e81_0
+  - wheel=0.41.2=py310h06a4308_0
+  - xz=5.4.2=h5eee18b_0
+  - zlib=1.2.13=h5eee18b_0
+  - pip:
+      - accelerate==0.24.1
+      - certifi==2023.7.22
+      - charset-normalizer==3.3.2
+      - diffusers==0.19.3
+      - filelock==3.13.1
+      - fsspec==2023.10.0
+      - huggingface-hub==0.18.0
+      - idna==3.4
+      - importlib-metadata==6.8.0
+      - jinja2==3.1.2
+      - markdown-it-py==3.0.0
+      - markupsafe==2.1.3
+      - mdurl==0.1.2
+      - mpmath==1.3.0
+      - networkx==3.2.1
+      - numpy==1.26.1
+      - nvidia-cublas-cu11==11.11.3.6
+      - nvidia-cublas-cu12==12.1.3.1
+      - nvidia-cuda-cupti-cu12==12.1.105
+      - nvidia-cuda-nvrtc-cu11==11.8.89
+      - nvidia-cuda-nvrtc-cu12==12.1.105
+      - nvidia-cuda-runtime-cu12==12.1.105
+      - nvidia-cudnn-cu11==8.9.6.50
+      - nvidia-cudnn-cu12==8.9.2.26
+      - nvidia-cufft-cu12==11.0.2.54
+      - nvidia-curand-cu12==10.3.2.106
+      - nvidia-cusolver-cu12==11.4.5.107
+      - nvidia-cusparse-cu12==12.1.0.106
+      - nvidia-nccl-cu12==2.18.1
+      - nvidia-nvjitlink-cu12==12.3.52
+      - nvidia-nvtx-cu12==12.1.105
+      - oneflow==0.9.1+cu118.git.165bb7a
+      - onefx==0.0.3
+      - packaging==23.2
+      - pillow==10.1.0
+      - protobuf==3.20.3
+      - psutil==5.9.6
+      - pygments==2.16.1
+      - pyyaml==6.0.1
+      - regex==2023.10.3
+      - requests==2.31.0
+      - rich==13.6.0
+      - safetensors==0.4.0
+      - sympy==1.12
+      - tokenizers==0.13.3
+      - torch==2.1.0
+      - tqdm==4.66.1
+      - transformers==4.27.1
+      - triton==2.1.0
+      - typing-extensions==4.8.0
+      - urllib3==2.0.7
+      - zipp==3.17.0
+prefix: /home/yhc/miniconda3/envs/OneFlow
diff --git a/README.md b/README.md
@@ -1,8 +1,32 @@
 # Stable Diffusion Serving System
-A stable diffusion serving system integrated with most advanced features.
+A low latency & high throughput stable diffusion serving system integrated with most advanced features.
 ## Features
 - SLOs-aware iteration scheduling.
 - multi-model/LoRA concurrent serving.
 - Co-schedule inference/finetune task.
-- [xformers](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers), major speed increase for select cards.
+- [xFormers](https://github.com/AUTOMATIC1111/stable-diffusion-webui/wiki/Xformers), Toolbox to Accelerate Research on transformers, developed by Meta AI.
+- [DeepSpeed](https://github.com/microsoft/DeepSpeed), Extreme Speed and Scale for DL Training and Inference, developed by Microsoft Research.
+- [OneFlow](https://github.com/Oneflow-Inc/oneflow), a deep learning framework designed to be user-friendly, scalable and efficient.
 - Machine Learning Compilation optimization.
+## Environment install
+For HuggingFace diffusers pipeline, xFormers, DeepSpeed, use `base-env.yaml`.  
+For OneFlow, use `OneFlow.yaml`, after installation, replace `diffusers.models.unet_2d_condition.forward` function with code in `unet_forward_with_different_timesteps.py`.
+## Performance
+Numbers are collected on Ubuntu 20.04.6 LTS with RTX 4090 24GB, CUDA=11.8.  
+Inference setting:   
+```
+{
+    "prompt": "an astronaut riding horse on the moon",
+    "num_inference_steps": 50,
+    "height": 512,
+    "width": 512,
+    "guidance_scale": 7.5
+}
+```  
+[Note](https://huggingface.co/docs/diffusers/optimization/memory#memory-efficient-attention), If you have PyTorch >= 2.0 installed, you should not expect a speed-up for inference when enabling xformers.
+| batch_size | PyTorch=2.1.0+diffusers=0.14.0 | OneFlow=0.9.0 |  xFormers=0.0.22 | DeepSpeed=0.12.2 |
+|:----:|:------:|:---:|:---:|:---:|
+| 1 | 1.660253 | 0.907718 | 1.837109 | 1.4444127429975198 |
+| 2 | 2.154117 | 1.481392 | 2.294451 | 2.094967 |
+| 4 | 3.949180 | 2.621291 | 4.086211 | 3.907683 |
+| 8 | 7.741389 | 5.011853 | 7.610301 | 7.674476 |
diff --git a/a cute cat-DeepSpeed.jpg b/a cute cat-DeepSpeed.jpg
diff --git a/a cute cat-OneFlow.jpg b/a cute cat-OneFlow.jpg
diff --git a/a cute cat-xFormers.jpg b/a cute cat-xFormers.jpg
diff --git a/a cute cat.jpg b/a cute cat.jpg
diff --git a/an astronaut riding horse on the moon-DeepSpeed.jpg b/an astronaut riding horse on the moon-DeepSpeed.jpg
diff --git a/an astronaut riding horse on the moon-OneFlow.jpg b/an astronaut riding horse on the moon-OneFlow.jpg
diff --git a/an astronaut riding horse on the moon-xFormers.jpg b/an astronaut riding horse on the moon-xFormers.jpg
diff --git a/an astronaut riding horse on the moon.jpg b/an astronaut riding horse on the moon.jpg
diff --git a/base-env.yaml b/base-env.yaml
@@ -0,0 +1,222 @@
+name: lora
+channels:
+  - xformers
+  - conda-forge
+  - pytorch
+  - nvidia
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - asttokens=2.0.5=pyhd3eb1b0_0
+  - backcall=0.2.0=pyhd3eb1b0_0
+  - blas=1.0=mkl
+  - brotli-python=1.0.9=py310h6a678d5_7
+  - bzip2=1.0.8=h7b6447c_0
+  - ca-certificates=2023.08.22=h06a4308_0
+  - certifi=2023.7.22=py310h06a4308_0
+  - cffi=1.15.1=py310h74dc2b5_0
+  - cryptography=41.0.3=py310h130f0dd_0
+  - cuda-cudart=11.8.89=0
+  - cuda-cupti=11.8.87=0
+  - cuda-libraries=11.8.0=0
+  - cuda-nvrtc=11.8.89=0
+  - cuda-nvtx=11.8.86=0
+  - cuda-runtime=11.8.0=0
+  - debugpy=1.6.7=py310h6a678d5_0
+  - decorator=5.1.1=pyhd3eb1b0_0
+  - exceptiongroup=1.0.4=py310h06a4308_0
+  - executing=0.8.3=pyhd3eb1b0_0
+  - ffmpeg=4.3=hf484d3e_0
+  - freetype=2.12.1=h4a9f257_0
+  - giflib=5.2.1=h5eee18b_3
+  - gmp=6.2.1=h295c915_3
+  - gmpy2=2.1.2=py310heeb90bb_0
+  - gnutls=3.6.15=he1e5248_0
+  - idna=3.4=py310h06a4308_0
+  - intel-openmp=2021.4.0=h06a4308_3561
+  - ipykernel=6.15.0=pyh210e3f2_0
+  - ipython=8.15.0=py310h06a4308_0
+  - jedi=0.18.1=py310h06a4308_1
+  - jinja2=3.1.2=py310h06a4308_0
+  - jpeg=9e=h5eee18b_1
+  - jupyter_client=8.5.0=py310h06a4308_0
+  - jupyter_core=5.5.0=py310h06a4308_0
+  - lame=3.100=h7b6447c_0
+  - lcms2=2.12=h3be6417_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - lerc=3.0=h295c915_0
+  - libcublas=11.11.3.6=0
+  - libcufft=10.9.0.58=0
+  - libcufile=1.8.0.34=0
+  - libcurand=10.3.4.52=0
+  - libcusolver=11.4.1.48=0
+  - libcusparse=11.7.5.86=0
+  - libdeflate=1.17=h5eee18b_1
+  - libffi=3.3=he6710b0_2
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libiconv=1.16=h7f8727e_2
+  - libidn2=2.3.4=h5eee18b_0
+  - libjpeg-turbo=2.0.0=h9bf148f_0
+  - libnpp=11.8.0.86=0
+  - libnvjpeg=11.9.0.86=0
+  - libpng=1.6.39=h5eee18b_0
+  - libsodium=1.0.18=h7b6447c_0
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - libtasn1=4.19.0=h5eee18b_0
+  - libtiff=4.5.1=h6a678d5_0
+  - libunistring=0.9.10=h27cfd23_0
+  - libuuid=1.41.5=h5eee18b_0
+  - libwebp=1.3.2=h11a3e52_0
+  - libwebp-base=1.3.2=h5eee18b_0
+  - llvm-openmp=14.0.6=h9e868ea_0
+  - lz4-c=1.9.4=h6a678d5_0
+  - matplotlib-inline=0.1.6=py310h06a4308_0
+  - mkl=2021.4.0=h06a4308_640
+  - mkl-service=2.4.0=py310h7f8727e_0
+  - mkl_fft=1.3.1=py310hd6ae3a3_0
+  - mkl_random=1.2.2=py310h00e6091_0
+  - mpc=1.1.0=h10f8cd9_1
+  - mpfr=4.0.2=hb69a4c5_1
+  - mpmath=1.3.0=py310h06a4308_0
+  - ncurses=6.4=h6a678d5_0
+  - nest-asyncio=1.5.6=py310h06a4308_0
+  - nettle=3.7.3=hbbd107a_1
+  - networkx=3.1=py310h06a4308_0
+  - numpy-base=1.24.3=py310h8e6c178_0
+  - openh264=2.1.1=h4ff587b_0
+  - openjpeg=2.4.0=h3ad879b_0
+  - openssl=1.1.1w=h7f8727e_0
+  - packaging=23.1=py310h06a4308_0
+  - parso=0.8.3=pyhd3eb1b0_0
+  - pexpect=4.8.0=pyhd3eb1b0_3
+  - pickleshare=0.7.5=pyhd3eb1b0_1003
+  - platformdirs=3.10.0=py310h06a4308_0
+  - prompt-toolkit=3.0.36=py310h06a4308_0
+  - ptyprocess=0.7.0=pyhd3eb1b0_2
+  - pure_eval=0.2.2=pyhd3eb1b0_0
+  - pycparser=2.21=pyhd3eb1b0_0
+  - pygments=2.15.1=py310h06a4308_1
+  - pyopenssl=23.2.0=py310h06a4308_0
+  - pysocks=1.7.1=py310h06a4308_0
+  - python=3.10.3=h12debd9_5
+  - python-dateutil=2.8.2=pyhd3eb1b0_0
+  - pytorch=2.1.0=py3.10_cuda11.8_cudnn8.7.0_0
+  - pytorch-cuda=11.8=h7e8668a_5
+  - pytorch-mutex=1.0=cuda
+  - pyyaml=6.0.1=py310h5eee18b_0
+  - pyzmq=25.1.0=py310h6a678d5_0
+  - readline=8.2=h5eee18b_0
+  - requests=2.31.0=py310h06a4308_0
+  - setuptools=68.0.0=py310h06a4308_0
+  - six=1.16.0=pyhd3eb1b0_1
+  - sqlite=3.41.2=h5eee18b_0
+  - stack_data=0.2.0=pyhd3eb1b0_0
+  - tk=8.6.12=h1ccaba5_0
+  - torchaudio=2.1.0=py310_cu118
+  - torchtriton=2.1.0=py310
+  - tornado=6.3.3=py310h5eee18b_0
+  - traitlets=5.7.1=py310h06a4308_0
+  - typing_extensions=4.7.1=py310h06a4308_0
+  - tzdata=2023c=h04d1e81_0
+  - wcwidth=0.2.5=pyhd3eb1b0_0
+  - wheel=0.41.2=py310h06a4308_0
+  - xformers=0.0.22.post7=py310_cu11.8.0_pyt2.1.0
+  - xz=5.4.2=h5eee18b_0
+  - yaml=0.2.5=h7b6447c_0
+  - zeromq=4.3.4=h2531618_0
+  - zlib=1.2.13=h5eee18b_0
+  - zstd=1.5.5=hc292b87_0
+  - pip:
+      - absl-py==1.4.0
+      - accelerate==0.21.0
+      - aiohttp==3.8.6
+      - aiosignal==1.3.1
+      - annotated-types==0.6.0
+      - antlr4-python3-runtime==4.9.3
+      - appdirs==1.4.4
+      - async-timeout==4.0.3
+      - attrs==23.1.0
+      - chardet==5.2.0
+      - charset-normalizer==3.2.0
+      - click==8.1.6
+      - cmake==3.27.0
+      - contourpy==1.1.0
+      - cycler==0.11.0
+      - deepspeed==0.12.2
+      - diffusers==0.14.0
+      - docker-pycreds==0.4.0
+      - einops==0.7.0
+      - filelock==3.12.2
+      - fire==0.5.0
+      - flatbuffers==23.5.26
+      - fonttools==4.41.1
+      - frozenlist==1.4.0
+      - fsspec==2023.6.0
+      - ftfy==6.1.1
+      - gitdb==4.0.10
+      - gitpython==3.1.32
+      - hjson==3.1.0
+      - huggingface-hub==0.16.4
+      - importlib-metadata==6.8.0
+      - kiwisolver==1.4.4
+      - kornia==0.7.0
+      - lightning-utilities==0.9.0
+      - lit==16.0.6
+      - lora-diffusion==0.1.7
+      - markupsafe==2.1.3
+      - matplotlib==3.7.2
+      - mediapipe==0.10.2
+      - multidict==6.0.4
+      - ninja==1.11.1.1
+      - numpy==1.25.1
+      - nvidia-cublas-cu11==11.10.3.66
+      - nvidia-cuda-cupti-cu11==11.7.101
+      - nvidia-cuda-nvrtc-cu11==11.7.99
+      - nvidia-cuda-runtime-cu11==11.7.99
+      - nvidia-cudnn-cu11==8.5.0.96
+      - nvidia-cufft-cu11==10.9.0.58
+      - nvidia-curand-cu11==10.2.10.91
+      - nvidia-cusolver-cu11==11.4.0.1
+      - nvidia-cusparse-cu11==11.7.4.91
+      - nvidia-nccl-cu11==2.14.3
+      - nvidia-nvtx-cu11==11.7.91
+      - omegaconf==2.3.0
+      - open-clip-torch==2.23.0
+      - opencv-contrib-python==4.8.0.74
+      - opencv-python==4.8.0.74
+      - pathtools==0.1.2
+      - pillow==10.0.0
+      - pip==23.3.1
+      - protobuf==3.20.3
+      - psutil==5.9.5
+      - py-cpuinfo==9.0.0
+      - pydantic==2.4.2
+      - pydantic-core==2.10.1
+      - pynvml==11.5.0
+      - pyparsing==3.0.9
+      - pytorch-lightning==2.1.0
+      - regex==2023.6.3
+      - safetensors==0.3.1
+      - scipy==1.11.1
+      - sentencepiece==0.1.99
+      - sentry-sdk==1.28.1
+      - setproctitle==1.3.2
+      - smmap==5.0.0
+      - sounddevice==0.4.6
+      - sympy==1.12
+      - termcolor==2.3.0
+      - timm==0.9.8
+      - tokenizers==0.13.3
+      - torch==2.0.1
+      - torchmetrics==1.2.0
+      - torchvision==0.15.2
+      - tqdm==4.65.0
+      - transformers==4.31.0
+      - triton==2.0.0
+      - urllib3==2.0.4
+      - wandb==0.15.5
+      - yarl==1.9.2
+      - zipp==3.16.2
+prefix: /home/yhc/miniconda3/envs/lora
diff --git a/block_stage_file/__init__.py b/block_stage_file/__init__.py
@@ -0,0 +1,3 @@
+from .stage_CLIP import *
+from .stage_UNET import *
+from .stage_vae_and_safety_checker import *
diff --git a/block_stage_file/__pycache__/__init__.cpython-310.pyc b/block_stage_file/__pycache__/__init__.cpython-310.pyc
diff --git a/block_stage_file/__pycache__/block_feature_extractor.cpython-310.pyc b/block_stage_file/__pycache__/block_feature_extractor.cpython-310.pyc
diff --git a/block_stage_file/__pycache__/block_safety_checker.cpython-310.pyc b/block_stage_file/__pycache__/block_safety_checker.cpython-310.pyc
diff --git a/block_stage_file/__pycache__/block_scheduler.cpython-310.pyc b/block_stage_file/__pycache__/block_scheduler.cpython-310.pyc
diff --git a/block_stage_file/__pycache__/block_text_encoder.cpython-310.pyc b/block_stage_file/__pycache__/block_text_encoder.cpython-310.pyc
diff --git a/block_stage_file/__pycache__/block_tokenizer.cpython-310.pyc b/block_stage_file/__pycache__/block_tokenizer.cpython-310.pyc
diff --git a/block_stage_file/__pycache__/block_unet.cpython-310.pyc b/block_stage_file/__pycache__/block_unet.cpython-310.pyc
diff --git a/block_stage_file/__pycache__/block_vae.cpython-310.pyc b/block_stage_file/__pycache__/block_vae.cpython-310.pyc
diff --git a/block_stage_file/__pycache__/modified_unet_2d_condition.cpython-310.pyc b/block_stage_file/__pycache__/modified_unet_2d_condition.cpython-310.pyc
diff --git a/block_stage_file/__pycache__/stage_CLIP.cpython-310.pyc b/block_stage_file/__pycache__/stage_CLIP.cpython-310.pyc
diff --git a/block_stage_file/__pycache__/stage_UNET.cpython-310.pyc b/block_stage_file/__pycache__/stage_UNET.cpython-310.pyc
diff --git a/block_stage_file/__pycache__/stage_vae_and_safety_checker.cpython-310.pyc b/block_stage_file/__pycache__/stage_vae_and_safety_checker.cpython-310.pyc
diff --git a/block_stage_file/block_feature_extractor.py b/block_stage_file/block_feature_extractor.py
@@ -0,0 +1,17 @@
+import torch
+from transformers.models.clip.image_processing_clip import CLIPImageProcessor
+
+def FEATURE_EXTRACTOR():
+    clip_image_processor = CLIPImageProcessor(
+        crop_size=224,
+        do_center_crop=True,
+        do_convert_rgb=True,
+        do_normalize=True,
+        do_resize=True,
+        image_mean=[0.48145466, 0.4578275, 0.40821073],
+        image_std=[0.26862954, 0.26130258, 0.27577711],
+        resample=3,
+        size=224
+    )
+    clip_image_processor.feature_extractor_type = "CLIPFeatureExtractor"
+    return clip_image_processor
diff --git a/block_stage_file/block_safety_checker.py b/block_stage_file/block_safety_checker.py
@@ -0,0 +1,9 @@
+import torch
+from diffusers.pipelines.stable_diffusion.safety_checker import StableDiffusionSafetyChecker, CLIPConfig
+
+def SAFETY_CHECKER():
+    config_path = "/data/yhc/stable-diffusion-v1-5/safety_checker/config.json"
+    parameter_path = "/data/yhc/stable-diffusion-v1-5/safety_checker/yhc_saved_safety_checker.bin"
+    safety_checker = StableDiffusionSafetyChecker(CLIPConfig.from_pretrained(config_path))
+    safety_checker.load_state_dict(torch.load(parameter_path, map_location='cpu'))
+    return safety_checker