ELS-RD · pommedeterresautee · Jan 31, 2023 · Jan 29, 2023 · Jan 29, 2023 · Jan 29, 2023
diff --git a/Dockerfile b/Dockerfile
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:11.6.1-devel-ubuntu20.04
+FROM nvidia/cuda:12.0.0-devel-ubuntu22.04
 ENV DEBIAN_FRONTEND=noninteractive
 
 ENV CUDA_INSTALL_PATH=/usr/local/cuda/
@@ -21,16 +21,16 @@ RUN apt-get install -y git \
     python3.9-dev \
     nano
 
-RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.8 1 && \
+RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.10 1 && \
   update-alternatives --install /usr/bin/python python /usr/bin/python3.9 2 && \
   update-alternatives --set python /usr/bin/python3.9 && \
-  update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.8 1 && \
+  update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 && \
   update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.9 2 && \
   update-alternatives --set python3 /usr/bin/python3.9
 
 RUN python3.9 -m ensurepip --default-pip --upgrade
 
-RUN pip install --pre torch==2.0.0.dev20230119+cu117 --extra-index-url https://download.pytorch.org/whl/nightly/cu117
+RUN pip install --pre torch==2.0.0.dev20230128+cu117 --extra-index-url https://download.pytorch.org/whl/nightly/cu117
 
 
 WORKDIR /syncback

diff --git a/README.md b/README.md
@@ -35,7 +35,6 @@ If you prefer `Docker`:
 
 ```shell
 # build
-make docker_build
 DOCKER_BUILDKIT=1 docker build -t kernl .
 # run
 docker run --rm -it --gpus all -v $(pwd):/kernl kernl

diff --git a/experimental/whisper/README.md b/experimental/whisper/README.md
@@ -7,6 +7,6 @@ To run the notebook through shell, use the following command:
 DOCKER_BUILDKIT=1 docker build -t kernl .
 docker run --rm -it --gpus all -v $(pwd):/kernl kernl
 apt install libsndfile1-dev # used by a Python audio dependency
-pip install datasets soundfile librosa -q
+pip install datasets soundfile librosa jupyter notebook
 jupyter nbconvert --execute --clear-output experimental/whisper/speedup.ipynb --log-level=10
 ```
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 triton==2.0.0.dev20221202
-torch==2.0.0.dev20230119+cu117
+torch==2.0.0.dev20230128+cu117
 pytest
 tabulate
 termcolor

diff --git a/src/kernl/optimizer/cuda_graph.py b/src/kernl/optimizer/cuda_graph.py
@@ -72,7 +72,7 @@ def cuda_graphs_wrapper(model: Callable, inputs: Union[list[torch.Tensor], tuple
         f = cudagraphify_impl(
             model=lambda args: model(*args), inputs=inputs, static_input_idxs=tuple(range(len(inputs)))
         )
-        return lambda args: f(get_static_inputs(args))
+        return lambda *args: f(get_static_inputs(args))
 
     compiled_fn = None
 

diff --git a/test/test_attention.py b/test/test_attention.py
@@ -206,7 +206,7 @@ def test_benchmark_skinny_cross_attention(benchmark, implementation, shape):
     output = torch.empty_like(q)
     fn = implementations_skinny_cross_attention[implementation](output, sm_scale)
     r = cuda_graphs_wrapper(fn, [q, k, v])
-    _ = r([q, k, v])[0]
-    result = benchmark(r, [q, k, v])[0]
+    _ = r(q, k, v)[0]
+    result = benchmark(r, q, k, v)[0]
 
     assert_all_close(a=expected, b=result.float(), atol=1e-2)
diff --git a/test/test_layer_norm.py b/test/test_layer_norm.py
@@ -67,7 +67,7 @@ def test_benchmark_layer_norm(benchmark, shape: int, dtype, cuda_graphs: bool, i
     if cuda_graphs:
         run = cuda_graphs_wrapper(model=fn, inputs=[x])
         # CUDA graphs wraps output in a tuple
-        fn = lambda tensor: run([tensor])[0]  # noqa: E731
+        fn = lambda tensor: run(tensor)[0]  # noqa: E731
 
     value = benchmark(fn, x)
     assert_all_close(value.float(), expected, atol=1e-1)
@@ -101,7 +101,7 @@ def test_benchmark_rms_norm(benchmark, shape: int, dtype, cuda_graphs: bool, imp
     if cuda_graphs:
         run = cuda_graphs_wrapper(model=fn, inputs=[x])
         # CUDA graphs wraps output in a tuple
-        fn = lambda tensor: run([tensor])[0]  # noqa: E731
+        fn = lambda tensor: run(tensor)[0]  # noqa: E731
 
     value = benchmark(fn, x)
     assert_all_close(value.float(), expected, atol=1e-1)

diff --git a/test/test_linear_layer.py b/test/test_linear_layer.py
@@ -92,7 +92,7 @@ def test_benchmark(
     if cuda_graphs:
         run = cuda_graphs_wrapper(model=fn, inputs=[x])
         # CUDA graphs wraps output in a tuple
-        fn = lambda tensor: run([tensor])[0]  # noqa: E731
+        fn = lambda tensor: run(tensor)[0]  # noqa: E731
 
     value = benchmark(fn, x)