forked from ggerganov/llama.cpp
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
flake.nix : rewrite (ggerganov#4605)
* flake.lock: update to hotfix CUDA::cuda_driver Required to support ggerganov#4606 * flake.nix: rewrite 1. Split into separate files per output. 2. Added overlays, so that this flake can be integrated into others. The names in the overlay are `llama-cpp`, `llama-cpp-opencl`, `llama-cpp-cuda`, and `llama-cpp-rocm` so that they fit into the broader set of Nix packages from [nixpkgs](https://github.com/nixos/nixpkgs). 3. Use [callPackage](https://summer.nixos.org/blog/callpackage-a-tool-for-the-lazy/) rather than `with pkgs;` so that there's dependency injection rather than dependency lookup. 4. Add a description and meta information for each package. The description includes a bit about what's trying to accelerate each one. 5. Use specific CUDA packages instead of cudatoolkit on the advice of SomeoneSerge. 6. Format with `serokell/nixfmt` for a consistent style. 7. Update `flake.lock` with the latest goods. * flake.nix: use finalPackage instead of passing it manually * nix: unclutter darwin support * nix: pass most darwin frameworks unconditionally ...for simplicity * *.nix: nixfmt nix shell github:piegamesde/nixfmt/rfc101-style --command \ nixfmt flake.nix .devops/nix/*.nix * flake.nix: add maintainers * nix: move meta down to follow Nixpkgs style more closely * nix: add missing meta attributes nix: clarify the interpretation of meta.maintainers nix: clarify the meaning of "broken" and "badPlatforms" nix: passthru: expose the use* flags for inspection E.g.: ``` ❯ nix eval .#cuda.useCuda true ``` * flake.nix: avoid re-evaluating nixpkgs too many times * flake.nix: use flake-parts * nix: migrate to pname+version * flake.nix: overlay: expose both the namespace and the default attribute * ci: add the (Nix) flakestry workflow * nix: cmakeFlags: explicit OFF bools * nix: cuda: reduce runtime closure * nix: fewer rebuilds * nix: respect config.cudaCapabilities * nix: add the impure driver's location to the DT_RUNPATHs * nix: clean sources more thoroughly ...this way outPaths change less frequently, and so there are fewer rebuilds * nix: explicit mpi support * nix: explicit jetson support * flake.nix: darwin: only expose the default --------- Co-authored-by: Someone Serge <sergei.kozlukov@aalto.fi>
- Loading branch information
1 parent
97bbca6
commit 68eccbd
Showing
9 changed files
with
524 additions
and
159 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
{ | ||
perSystem = | ||
{ config, lib, ... }: | ||
{ | ||
apps = | ||
let | ||
inherit (config.packages) default; | ||
binaries = [ | ||
"llama" | ||
"llama-embedding" | ||
"llama-server" | ||
"quantize" | ||
"train-text-from-scratch" | ||
]; | ||
mkApp = name: { | ||
type = "app"; | ||
program = "${default}/bin/${name}"; | ||
}; | ||
in | ||
lib.genAttrs binaries mkApp; | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
{ | ||
perSystem = | ||
{ config, lib, ... }: | ||
{ | ||
devShells = | ||
lib.concatMapAttrs | ||
(name: package: { | ||
${name} = package.passthru.shell; | ||
${name + "-extra"} = package.passthru.shell-extra; | ||
}) | ||
config.packages; | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
{ inputs, ... }: | ||
{ | ||
perSystem = | ||
{ | ||
config, | ||
system, | ||
lib, | ||
pkgsCuda, | ||
... | ||
}: | ||
lib.optionalAttrs (system == "aarch64-linux") { | ||
packages = | ||
let | ||
caps.jetson-xavier = "7.2"; | ||
caps.jetson-orin = "8.7"; | ||
caps.jetson-nano = "5.3"; | ||
|
||
pkgsFor = | ||
cap: | ||
import inputs.nixpkgs { | ||
inherit system; | ||
config = { | ||
cudaSupport = true; | ||
cudaCapabilities = [ cap ]; | ||
cudaEnableForwardCompat = false; | ||
inherit (pkgsCuda.config) allowUnfreePredicate; | ||
}; | ||
}; | ||
in | ||
builtins.mapAttrs (name: cap: ((pkgsFor cap).callPackage ./scope.nix { }).llama-cpp) caps; | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
{ inputs, ... }: | ||
{ | ||
# The _module.args definitions are passed on to modules as arguments. E.g. | ||
# the module `{ pkgs ... }: { /* config */ }` implicitly uses | ||
# `_module.args.pkgs` (defined in this case by flake-parts). | ||
perSystem = | ||
{ system, ... }: | ||
{ | ||
_module.args = { | ||
pkgsCuda = import inputs.nixpkgs { | ||
inherit system; | ||
# Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc, | ||
# and ucx are built with CUDA support) | ||
config.cudaSupport = true; | ||
config.allowUnfreePredicate = | ||
p: | ||
builtins.all | ||
( | ||
license: | ||
license.free | ||
|| builtins.elem license.shortName [ | ||
"CUDA EULA" | ||
"cuDNN EULA" | ||
] | ||
) | ||
(p.meta.licenses or [ p.meta.license ]); | ||
}; | ||
# Ensure dependencies use ROCm consistently | ||
pkgsRocm = import inputs.nixpkgs { | ||
inherit system; | ||
config.rocmSupport = true; | ||
}; | ||
}; | ||
}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,265 @@ | ||
{ | ||
lib, | ||
config, | ||
stdenv, | ||
mkShell, | ||
cmake, | ||
ninja, | ||
pkg-config, | ||
git, | ||
python3, | ||
mpi, | ||
openblas, # TODO: Use the generic `blas` so users could switch betwen alternative implementations | ||
cudaPackages, | ||
darwin, | ||
rocmPackages, | ||
clblast, | ||
useBlas ? builtins.all (x: !x) [ | ||
useCuda | ||
useMetalKit | ||
useOpenCL | ||
useRocm | ||
], | ||
useCuda ? config.cudaSupport, | ||
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL, | ||
useMpi ? false, # Increases the runtime closure size by ~700M | ||
useOpenCL ? false, | ||
useRocm ? config.rocmSupport, | ||
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake | ||
}@inputs: | ||
|
||
let | ||
inherit (lib) | ||
cmakeBool | ||
cmakeFeature | ||
optionals | ||
strings | ||
versionOlder | ||
; | ||
|
||
# It's necessary to consistently use backendStdenv when building with CUDA support, | ||
# otherwise we get libstdc++ errors downstream. | ||
stdenv = throw "Use effectiveStdenv instead"; | ||
effectiveStdenv = if useCuda then cudaPackages.backendStdenv else inputs.stdenv; | ||
|
||
suffices = | ||
lib.optionals useBlas [ "BLAS" ] | ||
++ lib.optionals useCuda [ "CUDA" ] | ||
++ lib.optionals useMetalKit [ "MetalKit" ] | ||
++ lib.optionals useMpi [ "MPI" ] | ||
++ lib.optionals useOpenCL [ "OpenCL" ] | ||
++ lib.optionals useRocm [ "ROCm" ]; | ||
|
||
pnameSuffix = | ||
strings.optionalString (suffices != [ ]) | ||
"-${strings.concatMapStringsSep "-" strings.toLower suffices}"; | ||
descriptionSuffix = | ||
strings.optionalString (suffices != [ ]) | ||
", accelerated with ${strings.concatStringsSep ", " suffices}"; | ||
|
||
# TODO: package the Python in this repository in a Nix-like way. | ||
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo | ||
# is PEP 517-compatible, and ensure the correct .dist-info is generated. | ||
# https://peps.python.org/pep-0517/ | ||
llama-python = python3.withPackages ( | ||
ps: [ | ||
ps.numpy | ||
ps.sentencepiece | ||
] | ||
); | ||
|
||
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime | ||
llama-python-extra = python3.withPackages ( | ||
ps: [ | ||
ps.numpy | ||
ps.sentencepiece | ||
ps.torchWithoutCuda | ||
ps.transformers | ||
] | ||
); | ||
|
||
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64 | ||
# separately | ||
darwinBuildInputs = | ||
with darwin.apple_sdk.frameworks; | ||
[ | ||
Accelerate | ||
CoreVideo | ||
CoreGraphics | ||
] | ||
++ optionals useMetalKit [ MetalKit ]; | ||
|
||
cudaBuildInputs = with cudaPackages; [ | ||
cuda_cccl.dev # <nv/target> | ||
|
||
# A temporary hack for reducing the closure size, remove once cudaPackages | ||
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792 | ||
cuda_cudart.dev | ||
cuda_cudart.lib | ||
cuda_cudart.static | ||
libcublas.dev | ||
libcublas.lib | ||
libcublas.static | ||
]; | ||
|
||
rocmBuildInputs = with rocmPackages; [ | ||
clr | ||
hipblas | ||
rocblas | ||
]; | ||
in | ||
|
||
effectiveStdenv.mkDerivation ( | ||
finalAttrs: { | ||
pname = "llama-cpp${pnameSuffix}"; | ||
version = llamaVersion; | ||
|
||
src = lib.cleanSourceWith { | ||
filter = | ||
name: type: | ||
!(builtins.any (_: _) [ | ||
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths | ||
(name == "README.md") # Ignore *.md changes whe computing outPaths | ||
(lib.hasPrefix "." name) # Skip hidden files and directories | ||
]); | ||
src = lib.cleanSource ../../.; | ||
}; | ||
|
||
postPatch = '' | ||
substituteInPlace ./ggml-metal.m \ | ||
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" | ||
# TODO: Package up each Python script or service appropriately. | ||
# If we were to migrate to buildPythonPackage and prepare the `pyproject.toml`, | ||
# we could make those *.py into setuptools' entrypoints | ||
substituteInPlace ./*.py --replace "/usr/bin/env python" "${llama-python}/bin/python" | ||
''; | ||
|
||
nativeBuildInputs = | ||
[ | ||
cmake | ||
ninja | ||
pkg-config | ||
git | ||
] | ||
++ optionals useCuda [ | ||
cudaPackages.cuda_nvcc | ||
|
||
# TODO: Replace with autoAddDriverRunpath | ||
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged | ||
cudaPackages.autoAddOpenGLRunpathHook | ||
]; | ||
|
||
buildInputs = | ||
optionals effectiveStdenv.isDarwin darwinBuildInputs | ||
++ optionals useCuda cudaBuildInputs | ||
++ optionals useMpi [ mpi ] | ||
++ optionals useOpenCL [ clblast ] | ||
++ optionals useRocm rocmBuildInputs; | ||
|
||
cmakeFlags = | ||
[ | ||
(cmakeBool "LLAMA_NATIVE" true) | ||
(cmakeBool "LLAMA_BUILD_SERVER" true) | ||
(cmakeBool "BUILD_SHARED_LIBS" true) | ||
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) | ||
(cmakeBool "LLAMA_BLAS" useBlas) | ||
(cmakeBool "LLAMA_CLBLAST" useOpenCL) | ||
(cmakeBool "LLAMA_CUBLAS" useCuda) | ||
(cmakeBool "LLAMA_HIPBLAS" useRocm) | ||
(cmakeBool "LLAMA_METAL" useMetalKit) | ||
(cmakeBool "LLAMA_MPI" useMpi) | ||
] | ||
++ optionals useCuda [ | ||
( | ||
with cudaPackages.flags; | ||
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( | ||
builtins.concatStringsSep ";" (map dropDot cudaCapabilities) | ||
) | ||
) | ||
] | ||
++ optionals useRocm [ | ||
(cmakeFeature "CMAKE_C_COMPILER" "hipcc") | ||
(cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") | ||
|
||
# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM | ||
# in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt | ||
# and select the line that matches the current nixpkgs version of rocBLAS. | ||
# Should likely use `rocmPackages.clr.gpuTargets`. | ||
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" | ||
] | ||
++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") ] | ||
++ optionals useBlas [ (lib.cmakeFeature "LLAMA_BLAS_VENDOR" "OpenBLAS") ]; | ||
|
||
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, | ||
# if they haven't been added yet. | ||
postInstall = '' | ||
mv $out/bin/main $out/bin/llama | ||
mv $out/bin/server $out/bin/llama-server | ||
mkdir -p $out/include | ||
cp $src/llama.h $out/include/ | ||
''; | ||
|
||
# Define the shells here, but don't add in the inputsFrom to avoid recursion. | ||
passthru = { | ||
inherit | ||
useBlas | ||
useCuda | ||
useMetalKit | ||
useMpi | ||
useOpenCL | ||
useRocm | ||
; | ||
|
||
shell = mkShell { | ||
name = "shell-${finalAttrs.finalPackage.name}"; | ||
description = "contains numpy and sentencepiece"; | ||
buildInputs = [ llama-python ]; | ||
inputsFrom = [ finalAttrs.finalPackage ]; | ||
}; | ||
|
||
shell-extra = mkShell { | ||
name = "shell-extra-${finalAttrs.finalPackage.name}"; | ||
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; | ||
buildInputs = [ llama-python-extra ]; | ||
inputsFrom = [ finalAttrs.finalPackage ]; | ||
}; | ||
}; | ||
|
||
meta = { | ||
# Configurations we don't want even the CI to evaluate. Results in the | ||
# "unsupported platform" messages. This is mostly a no-op, because | ||
# cudaPackages would've refused to evaluate anyway. | ||
badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin; | ||
|
||
# Configurations that are known to result in build failures. Can be | ||
# overridden by importing Nixpkgs with `allowBroken = true`. | ||
broken = (useMetalKit && !effectiveStdenv.isDarwin); | ||
|
||
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; | ||
homepage = "https://github.com/ggerganov/llama.cpp/"; | ||
license = lib.licenses.mit; | ||
|
||
# Accommodates `nix run` and `lib.getExe` | ||
mainProgram = "llama"; | ||
|
||
# These people might respond, on the best effort basis, if you ping them | ||
# in case of Nix-specific regressions or for reviewing Nix-specific PRs. | ||
# Consider adding yourself to this list if you want to ensure this flake | ||
# stays maintained and you're willing to invest your time. Do not add | ||
# other people without their consent. Consider removing people after | ||
# they've been unreachable for long periods of time. | ||
|
||
# Note that lib.maintainers is defined in Nixpkgs, but you may just add | ||
# an attrset following the same format as in | ||
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix | ||
maintainers = with lib.maintainers; [ | ||
philiptaron | ||
SomeoneSerge | ||
]; | ||
|
||
# Extend `badPlatforms` instead | ||
platforms = lib.platforms.all; | ||
}; | ||
} | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
{ | ||
lib, | ||
newScope, | ||
llamaVersion ? "0.0.0", | ||
}: | ||
|
||
lib.makeScope newScope ( | ||
self: { | ||
inherit llamaVersion; | ||
llama-cpp = self.callPackage ./package.nix { }; | ||
} | ||
) |
Oops, something went wrong.