flake.nix: rewrite

1. Split into separate files per output. 2. Added overlays, so that this flake can be integrated into others. The names in the overlay are `llama-cpp`, `llama-cpp-opencl`, `llama-cpp-cuda`, and `llama-cpp-rocm` so that they fit into the broader set of Nix packages from [nixpkgs](https://github.com/nixos/nixpkgs). 3. Use [callPackage](https://summer.nixos.org/blog/callpackage-a-tool-for-the-lazy/) rather than `with pkgs;` so that there's dependency injection rather than dependency lookup. 4. Add a description and meta information for each package. The description includes a bit about what's trying to accelerate each one.
ggerganov · Dec 22, 2023 · 2e53df7 · 2e53df7
1 parent 7082d24
commit 2e53df7
Show file tree

Hide file tree

Showing 6 changed files with 231 additions and 171 deletions.
diff --git a/apps.nix b/apps.nix
@@ -0,0 +1,14 @@
+names: pkgs:
+
+let
+  default = builtins.elemAt names 0;
+  mkApp = name: {
+    ${name} = {
+      type = "app";
+      program = "${pkgs.llama-cpp}/bin/${name}";
+    };
+  };
+  result = builtins.foldl' (acc: name: (mkApp name) // acc) {} names;
+in
+
+result // { default = result.${default}; }
diff --git a/devshells.nix b/devshells.nix
@@ -0,0 +1,32 @@
+pkgs:
+
+let
+  llama-python = pkgs.python3.withPackages (ps: [
+    ps.numpy
+    ps.sentencepiece
+  ]);
+
+  # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
+  llama-python-extra = pkgs.python3.withPackages (ps: [
+    ps.numpy
+    ps.sentencepiece
+    ps.torchWithoutCuda
+    ps.transformers
+  ]);
+in
+
+{
+  default = pkgs.mkShell {
+    name = "default";
+    description = "contains numpy and sentencepiece";
+    inputsFrom = [ pkgs.llama-cpp ];
+    buildInputs = [ llama-python ];
+  };
+
+  extra = pkgs.mkShell {
+    name = "extra";
+    description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
+    inputsFrom = [ pkgs.llama-cpp ];
+    buildInputs = [ llama-python-extra ];
+  };
+}
diff --git a/flake.lock b/flake.lock
diff --git a/flake.nix b/flake.nix
@@ -1,139 +1,49 @@
 {
   inputs = {
     nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
-    flake-utils.url = "github:numtide/flake-utils";
   };
-  outputs = { self, nixpkgs, flake-utils }:
-    flake-utils.lib.eachDefaultSystem (system:
-      let
-        name = "llama.cpp";
-        src = ./.;
-        meta.mainProgram = "llama";
-        inherit (pkgs.stdenv) isAarch32 isAarch64 isDarwin;
-        buildInputs = with pkgs; [ openmpi ];
-        osSpecific = with pkgs; buildInputs ++ (
-          if isAarch64 && isDarwin then
-            with pkgs.darwin.apple_sdk_11_0.frameworks; [
-              Accelerate
-              MetalKit
-            ]
-          else if isAarch32 && isDarwin then
-            with pkgs.darwin.apple_sdk.frameworks; [
-              Accelerate
-              CoreGraphics
-              CoreVideo
-            ]
-          else if isDarwin then
-            with pkgs.darwin.apple_sdk.frameworks; [
-              Accelerate
-              CoreGraphics
-              CoreVideo
-            ]
-          else
-            with pkgs; [ openblas ]
-        );
-        pkgs = import nixpkgs { inherit system; };
-        nativeBuildInputs = with pkgs; [ cmake ninja pkg-config ];
-        cudatoolkit_joined = with pkgs; symlinkJoin {
-          # HACK(Green-Sky): nix currently has issues with cmake findcudatoolkit
-          # see https://github.com/NixOS/nixpkgs/issues/224291
-          # copied from jaxlib
-          name = "${cudaPackages.cudatoolkit.name}-merged";
-          paths = [
-            cudaPackages.cudatoolkit.lib
-            cudaPackages.cudatoolkit.out
-          ] ++ lib.optionals (lib.versionOlder cudaPackages.cudatoolkit.version "11") [
-            # for some reason some of the required libs are in the targets/x86_64-linux
-            # directory; not sure why but this works around it
-            "${cudaPackages.cudatoolkit}/targets/${system}"
-          ];
-        };
-        llama-python =
-          pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece ]);
-        # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
-        llama-python-extra =
-          pkgs.python3.withPackages (ps: with ps; [ numpy sentencepiece torchWithoutCuda transformers ]);
-        postPatch = ''
-          substituteInPlace ./ggml-metal.m \
-            --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
-          substituteInPlace ./*.py --replace '/usr/bin/env python' '${llama-python}/bin/python'
-        '';
-        postInstall = ''
-          mv $out/bin/main $out/bin/llama
-          mv $out/bin/server $out/bin/llama-server
-          mkdir -p $out/include
-          cp ${src}/llama.h $out/include/
-        '';
-        cmakeFlags = [ "-DLLAMA_NATIVE=OFF" "-DLLAMA_BUILD_SERVER=ON" "-DBUILD_SHARED_LIBS=ON" "-DCMAKE_SKIP_BUILD_RPATH=ON" ];
-      in
-      {
-        packages.default = pkgs.stdenv.mkDerivation {
-          inherit name src meta postPatch nativeBuildInputs postInstall;
-          buildInputs = osSpecific;
-          cmakeFlags = cmakeFlags
-            ++ (if isAarch64 && isDarwin then [
-            "-DCMAKE_C_FLAGS=-D__ARM_FEATURE_DOTPROD=1"
-            "-DLLAMA_METAL=ON"
-          ] else [
-            "-DLLAMA_BLAS=ON"
-            "-DLLAMA_BLAS_VENDOR=OpenBLAS"
-          ]);
-        };
-        packages.opencl = pkgs.stdenv.mkDerivation {
-          inherit name src meta postPatch nativeBuildInputs postInstall;
-          buildInputs = with pkgs; buildInputs ++ [ clblast ];
-          cmakeFlags = cmakeFlags ++ [
-            "-DLLAMA_CLBLAST=ON"
-          ];
-        };
-        packages.cuda = pkgs.stdenv.mkDerivation {
-          inherit name src meta postPatch nativeBuildInputs postInstall;
-          buildInputs = with pkgs; buildInputs ++ [ cudatoolkit_joined ];
-          cmakeFlags = cmakeFlags ++ [
-            "-DLLAMA_CUBLAS=ON"
-          ];
-        };
-        packages.rocm = pkgs.stdenv.mkDerivation {
-          inherit name src meta postPatch nativeBuildInputs postInstall;
-          buildInputs = with pkgs.rocmPackages; buildInputs ++ [ clr hipblas rocblas ];
-          cmakeFlags = cmakeFlags ++ [
-            "-DLLAMA_HIPBLAS=1"
-            "-DCMAKE_C_COMPILER=hipcc"
-            "-DCMAKE_CXX_COMPILER=hipcc"
-            # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
-            # in github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
-            # and select the line that matches the current nixpkgs version of rocBLAS.
-            "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
-          ];
-        };
-        apps.llama-server = {
-          type = "app";
-          program = "${self.packages.${system}.default}/bin/llama-server";
-        };
-        apps.llama-embedding = {
-          type = "app";
-          program = "${self.packages.${system}.default}/bin/embedding";
-        };
-        apps.llama = {
-          type = "app";
-          program = "${self.packages.${system}.default}/bin/llama";
-        };
-        apps.quantize = {
-          type = "app";
-          program = "${self.packages.${system}.default}/bin/quantize";
-        };
-        apps.train-text-from-scratch = {
-          type = "app";
-          program = "${self.packages.${system}.default}/bin/train-text-from-scratch";
-        };
-        apps.default = self.apps.${system}.llama;
-        devShells.default = pkgs.mkShell {
-          buildInputs = [ llama-python ];
-          packages = nativeBuildInputs ++ osSpecific;
-        };
-        devShells.extra = pkgs.mkShell {
-          buildInputs = [ llama-python-extra ];
-          packages = nativeBuildInputs ++ osSpecific;
-        };
-      });
+
+  outputs = { self, nixpkgs }:
+
+  let
+    inherit (nixpkgs.lib) genAttrs;
+    overlays = import ./overlays.nix;
+    importNixpkgs = system: import nixpkgs {
+      inherit system;
+      overlays = [ overlays ];
+    };
+    systems = [ "aarch64-darwin" "aarch64-linux" "x86_64-darwin" "x86_64-linux" ];
+    withSystemPackages = f: genAttrs systems (system: f (importNixpkgs system));
+  in
+
+  {
+    # These define the various ways to build the llama.cpp project.
+    # Integrate them into your flake.nix configuration by adding this
+    # overlay to nixpkgs.overlays.
+    overlays = {
+      default = overlays;
+    };
+
+    # These use the definitions from ./overlays.nix and expose them as installables.
+    packages = withSystemPackages (pkgs: {
+      default = pkgs.llama-cpp;
+      opencl = pkgs.llama-cpp-opencl;
+      cuda = pkgs.llama-cpp-cuda;
+      rocm = pkgs.llama-cpp-rocm;
+    });
+
+    # These use the definition of llama-cpp from ./overlays.nix and expose various
+    # binaries as apps so that they're able to be run with `nix run`.
+    apps = withSystemPackages (import ./apps.nix [
+      "llama"
+      "llama-embedding"
+      "llama-server"
+      "quantize"
+      "train-text-from-scratch"
+    ]);
+
+    # These expose a build environment for either a "default" or an "extra" set of
+    # dependencies.
+    devShells = withSystemPackages (import ./devshells.nix);
+  };
 }