Skip to content

Specify python package dependencies in requirements.txt #78

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
May 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 3 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# CacheFlow

## Installation
## Build from source

```bash
pip install ninja psutil numpy sentencepiece ray torch transformers xformers
pip install -e .
pip install -r requirements.txt
pip install -e . # This may take several minutes.
```

## Test simple server
Expand All @@ -21,11 +21,6 @@ python simple_server.py --help

## FastAPI server

Install the following additional dependencies:
```bash
pip install fastapi uvicorn
```

To start the server:
```bash
ray start --head
Expand Down
10 changes: 10 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
ninja # For faster builds.
psutil
ray
sentencepiece # Required for LLaMA tokenizer.
numpy
torch >= 2.0.0
transformers >= 4.28.0 # Required for LLaMA.
xformers >= 0.0.19
fastapi
uvicorn
72 changes: 44 additions & 28 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,83 @@
from typing import List

import setuptools
import torch
from torch.utils import cpp_extension
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
from torch.utils.cpp_extension import CUDA_HOME


CXX_FLAGS = ['-g']
NVCC_FLAGS = ['-O2']
# Build custom operators.
CXX_FLAGS = ["-g"]
# TODO(woosuk): Should we use -O3?
NVCC_FLAGS = ["-O2"]

if not torch.cuda.is_available():
raise RuntimeError(
f'Cannot find CUDA at CUDA_HOME: {cpp_extension.CUDA_HOME}. '
'CUDA must be available in order to build the package.')
f"Cannot find CUDA at CUDA_HOME: {CUDA_HOME}. "
"CUDA must be available in order to build the package.")

# FIXME(woosuk): Consider the case where the machine has multiple GPUs with
# different compute capabilities.
compute_capability = torch.cuda.get_device_capability()
major, minor = compute_capability
# Enable bfloat16 support if the compute capability is >= 8.0.
if major >= 8:
NVCC_FLAGS.append('-DENABLE_BF16')
NVCC_FLAGS.append("-DENABLE_BF16")

ext_modules = []

# Cache operations.
cache_extension = cpp_extension.CUDAExtension(
name='cacheflow.cache_ops',
sources=['csrc/cache.cpp', 'csrc/cache_kernels.cu'],
extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS},
cache_extension = CUDAExtension(
name="cacheflow.cache_ops",
sources=["csrc/cache.cpp", "csrc/cache_kernels.cu"],
extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
)
ext_modules.append(cache_extension)

# Attention kernels.
attention_extension = cpp_extension.CUDAExtension(
name='cacheflow.attention_ops',
sources=['csrc/attention.cpp', 'csrc/attention/attention_kernels.cu'],
extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS},
attention_extension = CUDAExtension(
name="cacheflow.attention_ops",
sources=["csrc/attention.cpp", "csrc/attention/attention_kernels.cu"],
extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
)
ext_modules.append(attention_extension)

# Positional encoding kernels.
positional_encoding_extension = cpp_extension.CUDAExtension(
name='cacheflow.pos_encoding_ops',
sources=['csrc/pos_encoding.cpp', 'csrc/pos_encoding_kernels.cu'],
extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS},
positional_encoding_extension = CUDAExtension(
name="cacheflow.pos_encoding_ops",
sources=["csrc/pos_encoding.cpp", "csrc/pos_encoding_kernels.cu"],
extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
)
ext_modules.append(positional_encoding_extension)

# Layer normalization kernels.
layernorm_extension = cpp_extension.CUDAExtension(
name='cacheflow.layernorm_ops',
sources=['csrc/layernorm.cpp', 'csrc/layernorm_kernels.cu'],
extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS},
layernorm_extension = CUDAExtension(
name="cacheflow.layernorm_ops",
sources=["csrc/layernorm.cpp", "csrc/layernorm_kernels.cu"],
extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
)
ext_modules.append(layernorm_extension)

# Activation kernels.
activation_extension = cpp_extension.CUDAExtension(
name='cacheflow.activation_ops',
sources=['csrc/activation.cpp', 'csrc/activation_kernels.cu'],
extra_compile_args={'cxx': CXX_FLAGS, 'nvcc': NVCC_FLAGS},
activation_extension = CUDAExtension(
name="cacheflow.activation_ops",
sources=["csrc/activation.cpp", "csrc/activation_kernels.cu"],
extra_compile_args={"cxx": CXX_FLAGS, "nvcc": NVCC_FLAGS},
)
ext_modules.append(activation_extension)


def get_requirements() -> List[str]:
"""Get Python package dependencies from requirements.txt."""
with open("requirements.txt") as f:
requirements = f.read().strip().split("\n")
return requirements


setuptools.setup(
name='cacheflow',
name="cacheflow",
python_requires=">=3.8",
install_requires=get_requirements(),
ext_modules=ext_modules,
cmdclass={'build_ext': cpp_extension.BuildExtension},
cmdclass={"build_ext": BuildExtension},
)