Skip to content

Commit

Permalink
flake.nix: rewrite
Browse files Browse the repository at this point in the history
1. Split into separate files per output.

2. Added overlays, so that this flake can be integrated into others.
   The names in the overlay are `llama-cpp`, `llama-cpp-opencl`,
   `llama-cpp-cuda`, and `llama-cpp-rocm` so that they fit into the
   broader set of Nix packages from [nixpkgs](https://github.com/nixos/nixpkgs).

3. Use [callPackage](https://summer.nixos.org/blog/callpackage-a-tool-for-the-lazy/)
   rather than `with pkgs;` so that there's dependency injection rather
   than dependency lookup.

4. Add a description and meta information for each package.
   The description includes a bit about what's trying to accelerate each one.
  • Loading branch information
philiptaron committed Dec 22, 2023
1 parent 7082d24 commit 2e53df7
Show file tree
Hide file tree
Showing 6 changed files with 231 additions and 171 deletions.
14 changes: 14 additions & 0 deletions apps.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
names: pkgs:

let
default = builtins.elemAt names 0;
mkApp = name: {
${name} = {
type = "app";
program = "${pkgs.llama-cpp}/bin/${name}";
};
};
result = builtins.foldl' (acc: name: (mkApp name) // acc) {} names;
in

result // { default = result.${default}; }
32 changes: 32 additions & 0 deletions devshells.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
pkgs:

let
llama-python = pkgs.python3.withPackages (ps: [
ps.numpy
ps.sentencepiece
]);

# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
llama-python-extra = pkgs.python3.withPackages (ps: [
ps.numpy
ps.sentencepiece
ps.torchWithoutCuda
ps.transformers
]);
in

{
default = pkgs.mkShell {
name = "default";
description = "contains numpy and sentencepiece";
inputsFrom = [ pkgs.llama-cpp ];
buildInputs = [ llama-python ];
};

extra = pkgs.mkShell {
name = "extra";
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
inputsFrom = [ pkgs.llama-cpp ];
buildInputs = [ llama-python-extra ];
};
}
40 changes: 3 additions & 37 deletions flake.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

178 changes: 44 additions & 134 deletions flake.nix
Original file line number Diff line number Diff line change
@@ -1,139 +1,49 @@
{
inputs = {
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
flake-utils.url = "github:numtide/flake-utils";
};
outputs = { self, nixpkgs, flake-utils }:
flake-utils.lib.eachDefaultSystem (system:
let
name = "llama.cpp";
src = ./.;
meta.mainProgram = "llama";
inherit (pkgs.stdenv) isAarch32 isAarch64 isDarwin;
buildInputs = with pkgs; [ openmpi ];
osSpecific = with pkgs; buildInputs ++ (
if isAarch64 && isDarwin then
with pkgs.darwin.apple_sdk_11_0.frameworks; [
Accelerate
MetalKit
]
else if isAarch32 && isDarwin then
with pkgs.darwin.apple_sdk.frameworks; [
Accelerate
CoreGraphics
CoreVideo
]
else if isDarwin then
with pkgs.darwin.apple_sdk.frameworks; [
Accelerate
CoreGraphics
CoreVideo
]
else
with pkgs; [ openblas ]
);
pkgs = import nixpkgs { inherit system; };
nativeBuildInputs = with pkgs; [ cmake ninja pkg-config ];
cudatoolkit_joined = with pkgs; symlinkJoin {
# HACK(Green-Sky): nix currently has issues with cmake findcudatoolkit
# see https://github.com/NixOS/nixpkgs/issues/224291
# copied from jaxlib
name = "${cudaPackages.cudatoolkit.name}-merged";
paths = [
cudaPackages.cudatoolkit.lib
cudaPackages.cudatoolkit.out
] ++ lib.optionals (lib.versionOlder cudaPackages.cudatoolkit.version "11") [
# for some reason some of the required libs are in the targets/x86_64-linux
# directory; not sure why but this works around it
"${cudaPackages.cudatoolkit}/targets/${system}"
];
};
llama-python =
pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece ]);
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
llama-python-extra =
pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece torchWithoutCuda transformers ]);
postPatch = ''
substituteInPlace ./ggml-metal.m \
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
substituteInPlace ./*.py --replace '/usr/bin/env python' '${llama-python}/bin/python'
'';
postInstall = ''
mv $out/bin/main $out/bin/llama
mv $out/bin/server $out/bin/llama-server
mkdir -p $out/include
cp ${src}/llama.h $out/include/
'';
cmakeFlags = [ "-DLLAMA_NATIVE=OFF" "-DLLAMA_BUILD_SERVER=ON" "-DBUILD_SHARED_LIBS=ON" "-DCMAKE_SKIP_BUILD_RPATH=ON" ];
in
{
packages.default = pkgs.stdenv.mkDerivation {
inherit name src meta postPatch nativeBuildInputs postInstall;
buildInputs = osSpecific;
cmakeFlags = cmakeFlags
++ (if isAarch64 && isDarwin then [
"-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1"
"-DLLAMA_METAL=ON"
] else [
"-DLLAMA_BLAS=ON"
"-DLLAMA_BLAS_VENDOR=OpenBLAS"
]);
};
packages.opencl = pkgs.stdenv.mkDerivation {
inherit name src meta postPatch nativeBuildInputs postInstall;
buildInputs = with pkgs; buildInputs ++ [ clblast ];
cmakeFlags = cmakeFlags ++ [
"-DLLAMA_CLBLAST=ON"
];
};
packages.cuda = pkgs.stdenv.mkDerivation {
inherit name src meta postPatch nativeBuildInputs postInstall;
buildInputs = with pkgs; buildInputs ++ [ cudatoolkit_joined ];
cmakeFlags = cmakeFlags ++ [
"-DLLAMA_CUBLAS=ON"
];
};
packages.rocm = pkgs.stdenv.mkDerivation {
inherit name src meta postPatch nativeBuildInputs postInstall;
buildInputs = with pkgs.rocmPackages; buildInputs ++ [ clr hipblas rocblas ];
cmakeFlags = cmakeFlags ++ [
"-DLLAMA_HIPBLAS=1"
"-DCMAKE_C_COMPILER=hipcc"
"-DCMAKE_CXX_COMPILER=hipcc"
# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
# in github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
# and select the line that matches the current nixpkgs version of rocBLAS.
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
];
};
apps.llama-server = {
type = "app";
program = "${self.packages.${system}.default}/bin/llama-server";
};
apps.llama-embedding = {
type = "app";
program = "${self.packages.${system}.default}/bin/embedding";
};
apps.llama = {
type = "app";
program = "${self.packages.${system}.default}/bin/llama";
};
apps.quantize = {
type = "app";
program = "${self.packages.${system}.default}/bin/quantize";
};
apps.train-text-from-scratch = {
type = "app";
program = "${self.packages.${system}.default}/bin/train-text-from-scratch";
};
apps.default = self.apps.${system}.llama;
devShells.default = pkgs.mkShell {
buildInputs = [ llama-python ];
packages = nativeBuildInputs ++ osSpecific;
};
devShells.extra = pkgs.mkShell {
buildInputs = [ llama-python-extra ];
packages = nativeBuildInputs ++ osSpecific;
};
});

outputs = { self, nixpkgs }:

let
inherit (nixpkgs.lib) genAttrs;
overlays = import ./overlays.nix;
importNixpkgs = system: import nixpkgs {
inherit system;
overlays = [ overlays ];
};
systems = [ "aarch64-darwin" "aarch64-linux" "x86_64-darwin" "x86_64-linux" ];
withSystemPackages = f: genAttrs systems (system: f (importNixpkgs system));
in

{
# These define the various ways to build the llama.cpp project.
# Integrate them into your flake.nix configuration by adding this
# overlay to nixpkgs.overlays.
overlays = {
default = overlays;
};

# These use the definitions from ./overlays.nix and expose them as installables.
packages = withSystemPackages (pkgs: {
default = pkgs.llama-cpp;
opencl = pkgs.llama-cpp-opencl;
cuda = pkgs.llama-cpp-cuda;
rocm = pkgs.llama-cpp-rocm;
});

# These use the definition of llama-cpp from ./overlays.nix and expose various
# binaries as apps so that they're able to be run with `nix run`.
apps = withSystemPackages (import ./apps.nix [
"llama"
"llama-embedding"
"llama-server"
"quantize"
"train-text-from-scratch"
]);

# These expose a build environment for either a "default" or an "extra" set of
# dependencies.
devShells = withSystemPackages (import ./devshells.nix);
};
}
Loading

0 comments on commit 2e53df7

Please sign in to comment.