diff --git a/Dockerfile b/Dockerfile
index de030f85..8c2efa85 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,5 @@
 # syntax=docker/dockerfile:1.7-labs
-FROM nvcr.io/nvidia/pytorch:24.07-py3
+FROM nvcr.io/nvidia/pytorch:24.11-py3
 
 # Install dependencies.
 RUN apt-get update \
@@ -20,9 +20,9 @@ RUN mkdir -m 777 /app/Megatron-LM /app/examples /app/fast_llm /app/tests /app/to
       /usr/local \
       /usr/local/bin \
       /usr/local/lib \
-      /usr/local/lib/python3.10 \
-      /usr/local/lib/python3.10/dist-packages \
-      /usr/local/lib/python3.10/dist-packages/__pycache__
+      /usr/local/lib/python3.12 \
+      /usr/local/lib/python3.12/dist-packages \
+      /usr/local/lib/python3.12/dist-packages/__pycache__
 
 # Copy dependency files with universal write permissions for all users.
 COPY --chmod=777 setup.py setup.cfg pyproject.toml ./
diff --git a/README.md b/README.md
index 9da114bb..d02e7f95 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 <div align="center" style="margin-bottom: 1em;">
 
-<img width=50% src="docs/assets/images/logo.png" alt="Fast-LLM Logo"></img>
+<img width=50% src="docs/assets/images/logo.svg" alt="Fast-LLM"></img>
 
 [![Docker][ci-badge]][ci-workflow]
 [![Documentation][docs-badge]][docs-workflow]
diff --git a/docs/assets/images/logo.png b/docs/assets/images/logo.png
deleted file mode 100644
index 6141c4dd..00000000
Binary files a/docs/assets/images/logo.png and /dev/null differ
diff --git a/docs/assets/images/logo.svg b/docs/assets/images/logo.svg
new file mode 100644
index 00000000..bd534f63
--- /dev/null
+++ b/docs/assets/images/logo.svg
@@ -0,0 +1 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?><!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd"><svg width="100%" height="100%" viewBox="0 0 635 591" version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" xmlns:serif="http://www.serif.com/" style="fill-rule:evenodd;clip-rule:evenodd;stroke-linejoin:round;stroke-miterlimit:2;"><g id="Fast-LLM"><path d="M40.915,200.787l-40.915,-23.622l0,-118.11l122.744,70.866l-0,23.622l-81.829,-47.244l-0,23.622l40.914,23.622l0,23.622l-40.914,-23.622l-0,47.244Z" style="fill-rule:nonzero;"/><path d="M143.201,141.732l122.744,70.866l-0,118.111l-122.744,-70.866l0,-70.867l81.829,47.244l0,-23.622l-81.829,-47.244l0,-23.622Zm81.829,118.111l-40.914,-23.623l-0,23.623l40.914,23.622l0,-23.622Z" style="fill-rule:nonzero;"/><path d="M286.402,224.409l122.744,70.867l-0,23.622l-81.829,-47.244l-0,23.622l81.829,47.244l-0,70.866l-122.744,-70.866l0,-23.622l81.829,47.244l0,-23.622l-81.829,-47.244l0,-70.867Z" style="fill-rule:nonzero;"/><path d="M429.603,307.087l122.744,70.866l-0,23.622l-40.915,-23.622l0,94.488l-40.914,-23.622l-0,-94.488l-40.915,-23.622l0,-23.622Z" style="fill-rule:nonzero;"/><path d="M0,224.409l40.915,23.622l-0,94.489l81.829,47.244l-0,23.622l-122.744,-70.866l0,-118.111Z" style="fill-rule:nonzero;"/><path d="M143.201,307.087l40.915,23.622l-0,94.488l81.829,47.244l-0,23.622l-122.744,-70.866l0,-118.11Z" style="fill-rule:nonzero;"/><path d="M327.317,531.496l-40.915,-23.622l0,-141.732l40.915,23.622l-0,23.622l20.457,11.811l-0,23.622l20.457,11.811l0,-23.622l20.458,11.811l-0,-23.622l40.914,23.622l0,141.732l-40.914,-23.622l-0,-47.244l-20.458,-11.811l0,23.622l-20.457,-11.811l-0,-23.622l-20.457,-11.811l-0,47.244Z" style="fill-rule:nonzero;"/><path d="M143.201,118.11l-20.457,11.811l-122.744,-70.866l61.372,-35.433l20.457,11.811l-40.914,23.622l20.457,11.811l81.829,-47.244l20.457,11.811l-61.372,35.433l20.458,11.811l20.457,-11.811l20.457,11.811l-40.914,23.622l20.457,11.811Z" style="fill:#d61785;"/><path d="M286.402,200.787l-20.457,11.811l-122.744,-70.866l81.829,-47.244l20.458,11.811l-61.372,35.433l20.457,11.811l81.829,-47.244l20.457,11.811l-61.371,35.433l20.457,11.811l-20.457,11.811l40.914,23.622Z" style="fill:#d61785;"/><path d="M409.146,271.654l20.457,11.811l-20.457,11.811l-122.744,-70.867l102.287,-59.055l20.457,11.811l-81.829,47.244l20.457,11.811l40.915,-23.622l20.457,11.811l20.457,-11.811l20.457,11.811l-40.914,23.622l20.457,11.812l-20.457,11.811Z" style="fill:#d61785;"/><path d="M634.176,330.709l-81.829,47.244l-122.744,-70.866l81.829,-47.244l20.458,11.811l-40.915,23.622l20.457,11.811l61.372,-35.433l20.457,11.811l-61.371,35.433l20.457,11.811l-20.457,11.811l20.457,11.811l61.372,-35.433l20.457,11.811Z" style="fill:#d61785;"/><path d="M40.915,248.031l-40.915,-23.622l61.372,-35.433l20.457,11.811l-40.914,23.622l20.457,11.811l-20.457,11.811Z" style="fill:#d61785;"/><path d="M184.116,330.709l-40.915,-23.622l40.915,-23.622l20.457,11.811l-20.457,11.811l20.457,11.811l-20.457,11.811Z" style="fill:#d61785;"/><path d="M429.603,448.819l-40.914,-23.622l40.914,-23.622l20.457,11.811l-20.457,11.811l20.457,11.811l-20.457,11.811Z" style="fill:#d61785;"/><path d="M122.744,389.764l-81.829,-47.244l20.457,-11.811l-0,-23.622l20.457,11.811l20.457,-11.811l20.458,11.811l-40.915,23.622l61.372,35.433l-20.457,11.811Z" style="fill:#d61785;"/><path d="M102.286,141.732l-20.457,11.811l-40.914,-23.622l20.457,-11.811l40.914,23.622Z" style="fill:#d61785;"/><path d="M204.573,11.811l-20.457,11.811l-20.458,-11.811l20.458,-11.811l20.457,11.811Z" style="fill:#d61785;"/><path d="M184.116,23.622l-0,23.622l-20.458,-11.811l0,-23.622l20.458,11.811Z"/><path d="M143.201,118.11l0,23.622l-20.457,11.811l-0,-23.622l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M429.603,283.465l0,23.622l-20.457,11.811l-0,-23.622l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M204.573,11.811l-0,23.622l-20.457,11.811l-0,-23.622l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M122.744,11.811l-20.458,11.811l-20.457,-11.811l20.457,-11.811l20.458,11.811Z" style="fill:#d61785;"/><path d="M102.286,23.622l0,23.622l-20.457,-11.811l0,-23.622l20.457,11.811Z"/><path d="M122.744,11.811l-0,23.622l-20.458,11.811l0,-23.622l20.458,-11.811Z" style="fill:#f9a121;"/><path d="M347.774,94.488l-20.457,11.811l-20.458,-11.811l20.458,-11.811l20.457,11.811Z" style="fill:#d61785;"/><path d="M327.317,106.299l-0,23.622l-20.458,-11.811l0,-23.622l20.458,11.811Z"/><path d="M347.774,94.488l-0,23.622l-20.457,11.811l-0,-23.622l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M306.859,165.354l-20.457,11.811l-20.457,-11.811l20.457,-11.811l20.457,11.811Z" style="fill:#d61785;"/><path d="M286.402,177.165l0,23.622l-20.457,-11.811l-0,-23.622l20.457,11.811Z"/><path d="M306.859,165.354l0,23.622l-20.457,11.811l0,-23.622l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M490.975,177.165l-20.457,11.811l-20.458,-11.811l20.458,-11.811l20.457,11.811Z" style="fill:#d61785;"/><path d="M470.518,188.976l-0,23.622l-20.458,-11.811l0,-23.622l20.458,11.811Z"/><path d="M490.975,177.165l0,23.622l-20.457,11.811l-0,-23.622l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M634.176,259.843l-20.457,11.811l-20.458,-11.811l20.458,-11.812l20.457,11.812Z" style="fill:#d61785;"/><path d="M613.719,271.654l-0,23.622l-20.458,-11.811l0,-23.622l20.458,11.811Z"/><path d="M634.176,259.843l0,23.622l-20.457,11.811l-0,-23.622l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M122.744,200.787l-20.458,11.811l-20.457,-11.811l20.457,-11.811l20.458,11.811Z" style="fill:#d61785;"/><path d="M102.286,212.598l0,23.622l-20.457,-11.811l0,-23.622l20.457,11.811Z"/><path d="M122.744,200.787l-0,23.622l-20.458,11.811l0,-23.622l20.458,-11.811Z" style="fill:#f9a121;"/><path d="M368.231,413.386l0,23.622l-20.457,11.811l-0,-23.622l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M347.774,377.953l-0,23.622l-20.457,11.811l-0,-23.622l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M286.402,460.63l0,23.622l-20.457,11.811l-0,-23.622l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M265.945,425.197l-0,23.622l-20.457,-11.811l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M429.603,259.843l0,23.622l-20.457,-11.811l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M552.347,330.709l-0,23.622l-20.457,-11.811l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M143.201,377.953l0,23.622l-20.457,11.811l-0,-23.622l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M122.744,318.898l-0,23.622l-20.458,11.811l-20.457,-11.811l40.915,-23.622Z" style="fill:#f9a121;"/><path d="M245.488,59.055l-40.915,23.622l-20.457,-11.811l40.914,-23.622l20.458,11.811Z" style="fill:#d61785;"/><path d="M204.573,82.677l-0,23.622l-20.457,-11.811l-0,-23.622l20.457,11.811Z"/><path d="M245.488,59.055l-0,23.622l-40.915,23.622l-0,-23.622l40.915,-23.622Z" style="fill:#f9a121;"/><path d="M490.975,248.031l-40.915,23.623l-20.457,-11.811l40.915,-23.623l20.457,11.811Z" style="fill:#d61785;"/><path d="M450.06,271.654l0,23.622l-20.457,-11.811l0,-23.622l20.457,11.811Z"/><path d="M490.975,248.031l0,23.623l-40.915,23.622l0,-23.622l40.915,-23.623Z" style="fill:#f9a121;"/><path d="M276.173,348.425l-40.914,23.622l-20.457,-11.811l40.914,-23.622l20.457,11.811Z" style="fill:#d61785;"/><path d="M368.231,342.52l0,23.622l-81.829,-47.244l20.457,-11.811l61.372,35.433Z" style="fill:#d61785;"/><path d="M225.03,259.843l0,23.622l-40.914,-23.622l20.457,-11.812l20.457,11.812Z" style="fill:#d61785;"/><path d="M225.03,212.598l0,23.622l-81.829,-47.244l20.457,-11.811l61.372,35.433Z" style="fill:#d61785;"/><path d="M429.603,330.709l-20.457,11.811l-81.829,-47.244l20.457,-11.811l81.829,47.244Z" style="fill:#d61785;"/><path d="M490.975,484.252l-40.915,23.622l0,-23.622l20.458,-11.811l20.457,11.811Z" style="fill:#d61785;"/><path d="M235.259,372.047l-0,23.622l-20.457,-11.811l-0,-23.622l20.457,11.811Z"/><path d="M276.173,348.425l0,23.622l-40.914,23.622l-0,-23.622l40.914,-23.622Z" style="fill:#f9a121;"/><path d="M163.658,82.677l0,23.622l-20.457,11.811l-20.457,-11.811l40.914,-23.622Z" style="fill:#f9a121;"/><path d="M634.176,330.709l0,23.622l-81.829,47.244l-0,-23.622l81.829,-47.244Z" style="fill:#f9a121;"/><path d="M163.658,35.433l0,23.622l-40.914,23.622l-20.458,-11.811l61.372,-35.433Z" style="fill:#f9a121;"/><path d="M306.859,118.11l0,23.622l-40.914,23.622l-20.457,-11.811l61.371,-35.433Z" style="fill:#f9a121;"/><path d="M81.829,35.433l0,23.622l-20.457,11.811l-20.457,-11.811l40.914,-23.622Z" style="fill:#f9a121;"/><path d="M102.286,141.732l0,23.622l-20.457,11.811l0,-23.622l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M81.829,200.787l0,23.622l-20.457,11.811l-20.457,-11.811l40.914,-23.622Z" style="fill:#f9a121;"/><path d="M61.372,165.354l-0,23.622l-20.457,11.811l-0,-47.244l20.457,11.811Z" style="fill:#f9a121;"/><path d="M61.372,236.22l-0,94.489l-20.457,11.811l-0,-94.489l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M204.573,318.898l-0,94.488l-20.457,11.811l-0,-94.488l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M40.915,106.299l20.457,11.811l-20.457,11.811l-0,-23.622Z" style="fill:#f9a121;"/><path d="M327.317,271.654l20.457,11.811l-20.457,11.811l-0,-23.622Z" style="fill:#f9a121;"/><path d="M184.116,236.22l20.457,11.811l-20.457,11.812l-0,-23.623Z" style="fill:#f9a121;"/><path d="M327.317,484.252l20.457,11.811l-0,23.622l-20.457,11.811l-0,-47.244Z" style="fill:#f9a121;"/><path d="M368.231,507.874l20.458,11.811l-20.458,11.811l0,-23.622Z" style="fill:#f9a121;"/><path d="M265.945,472.441l-81.829,-47.244l20.457,-11.811l20.457,11.811l20.458,-11.811l20.457,11.811l-20.457,11.811l40.914,23.622l-20.457,11.811Z" style="fill:#d61785;"/><path d="M204.573,295.276l-0,23.622l-20.457,-11.811l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M531.89,389.764l-0,70.866l-20.458,11.811l0,-94.488l20.458,11.811Z" style="fill:#f9a121;"/><path d="M429.603,330.709l0,70.866l-20.457,11.811l-0,-70.866l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M286.402,200.787l0,118.111l-20.457,11.811l-0,-118.111l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M450.06,437.008l0,70.866l40.915,-23.622l0,23.622l-20.457,11.811l-0,23.622l-20.458,11.811l0,23.622l-20.457,11.811l0,-141.732l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M327.317,389.764l-40.915,-23.622l20.457,-11.811l40.915,23.622l-20.457,11.811Z" style="fill:#d61785;"/><path d="M347.774,425.197l-20.457,-11.811l20.457,-11.811l20.457,11.811l-20.457,11.811Z" style="fill:#d61785;"/><path d="M368.231,460.63l-20.457,-11.811l20.457,-11.811l0,23.622Z" style="fill:#d61785;"/><path d="M388.689,448.819l-20.458,-11.811l20.458,-11.811l-0,23.622Z" style="fill:#d61785;"/><path d="M450.06,413.386l0,23.622l-20.457,-11.811l20.457,-11.811Z" style="fill:#f9a121;"/><path d="M245.488,106.299l-0,23.622l-40.915,23.622l-20.457,-11.811l61.372,-35.433Z" style="fill:#f9a121;"/><path d="M409.146,177.165l-0,23.622l-61.372,35.433l-20.457,-11.811l81.829,-47.244Z" style="fill:#f9a121;"/><path d="M593.261,283.465l0,23.622l-40.914,23.622l-20.457,-11.811l61.371,-35.433Z" style="fill:#f9a121;"/><path d="M450.06,224.409l0,23.622l-20.457,11.812l-20.457,-11.812l40.914,-23.622Z" style="fill:#f9a121;"/><path d="M531.89,271.654l-0,23.622l-20.458,11.811l-20.457,-11.811l40.915,-23.622Z" style="fill:#f9a121;"/><path d="M265.945,165.354l-0,23.622l-20.457,-11.811l20.457,-11.811Z" style="fill:#f9a121;"/></g></svg>
\ No newline at end of file
diff --git a/fast_llm/__init__.py b/fast_llm/__init__.py
index 3dc1f76b..d3ec452c 100644
--- a/fast_llm/__init__.py
+++ b/fast_llm/__init__.py
@@ -1 +1 @@
-__version__ = "0.1.0"
+__version__ = "0.2.0"
diff --git a/fast_llm/functional/triton/mlp.py b/fast_llm/functional/triton/mlp.py
index db8188d7..ac01d362 100644
--- a/fast_llm/functional/triton/mlp.py
+++ b/fast_llm/functional/triton/mlp.py
@@ -25,6 +25,9 @@
 from fast_llm.tensor import param_get_and_unset_is_zero
 from triton import language as tl
 
+# Triton requires global variables to be annotated with `tl.constexpr`.
+_TritonActivationType: tl.constexpr = ActivationType
+
 
 @triton.jit
 def triton_mlp_activation_forward_kernel(
@@ -47,15 +50,15 @@ def triton_mlp_activation_forward_kernel(
 
     input_ = tl.load(input_ptr, mask=mask).to(tl.float32)
 
-    if activation_type == ActivationType.gelu:
+    if activation_type == _TritonActivationType.gelu.value:
         tanh_input = 0.79788456 * input_ * (1 + 0.044715 * input_ * input_)
         tanh = 1 - 2 / (1 + tl.exp(2 * tanh_input))
         out = input_ * 0.5 * (1.0 + tanh)
-    elif activation_type == ActivationType.silu:
+    elif activation_type == _TritonActivationType.silu.value:
         out = input_ / (1 + tl.exp(-input_))
-    elif activation_type == ActivationType.relu:
+    elif activation_type == _TritonActivationType.relu.value:
         out = tl.where(input_ > 0, input_, 0)
-    elif activation_type == ActivationType.squared_relu:
+    elif activation_type == _TritonActivationType.squared_relu:
         relu_out = tl.where(input_ > 0, input_, 0)
         out = relu_out * relu_out
     else:
@@ -95,23 +98,23 @@ def triton_mlp_activation_backward_kernel(
     input_ = tl.load(input_ptr, mask=mask).to(tl.float32)
     output_grad = tl.load(grad_output_ptr + output_offsets, mask=mask).to(tl.float32)
 
-    if activation_type == ActivationType.gelu:
+    if activation_type == _TritonActivationType.gelu:
         tanh_input = 0.79788456 * input_ * (1 + 0.044715 * input_ * input_)
         tanh = 1 - 2 / (1 + tl.exp(2 * tanh_input))
         grad = 0.5 * input_ * ((1 - tanh * tanh) * (0.79788456 + 0.1070322243 * input_ * input_)) + 0.5 * (1 + tanh)
         if gated or recompute:
             out = input_ * 0.5 * (1.0 + tanh)
-    elif activation_type == ActivationType.silu:
+    elif activation_type == _TritonActivationType.silu:
         exp = tl.exp(-input_)
         sigma = 1 / (1 + exp)
         grad = sigma * sigma + (1 + input_) / (2 + exp + 1 / exp)
         if gated or recompute:
             out = input_ * sigma
-    elif activation_type == ActivationType.relu:
+    elif activation_type == _TritonActivationType.relu:
         grad = tl.where(input_ > 0, 1, 0)
         if gated or recompute:
             out = tl.where(input_ > 0, input_, 0)
-    elif activation_type == ActivationType.squared_relu:
+    elif activation_type == _TritonActivationType.squared_relu:
         relu_out = tl.where(input_ > 0, input_, 0)
         grad = 2 * relu_out
         if gated or recompute:
@@ -148,7 +151,7 @@ def triton_mlp_activation_forward(
         input_,
         output,
         gated=gated,  # noqa
-        activation_type=activation_type,  # noqa
+        activation_type=activation_type.value,  # noqa
         n_cols=n_cols,  # noqa
         block_size=TritonConfig.POINTWISE_BLOCK_SIZE,
     )
diff --git a/mkdocs.yaml b/mkdocs.yaml
index 4a137fcf..eaec87d4 100644
--- a/mkdocs.yaml
+++ b/mkdocs.yaml
@@ -18,8 +18,8 @@ copyright: Copyright 2024 ServiceNow, Inc.
 theme:
   name: material
   custom_dir: docs/overrides
-  logo: assets/images/logo.png
-  favicon: assets/images/logo.png
+  logo: assets/images/logo.svg
+  favicon: assets/images/logo.svg
   icon:
     repo: fontawesome/brands/github
   features:
@@ -58,15 +58,15 @@ theme:
         name: Switch to light mode
     - media: "(prefers-color-scheme: light)"
       scheme: default
-      primary: indigo
-      accent: indigo
+      primary: white
+      accent: white
       toggle:
         icon: material/toggle-switch
         name: Switch to dark mode
     - media: "(prefers-color-scheme: dark)"
       scheme: slate
       primary: black
-      accent: indigo
+      accent: white
       toggle:
         icon: material/toggle-switch-off
         name: Switch to system preference
diff --git a/setup.cfg b/setup.cfg
index 5429dc91..95ec3b69 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,7 +1,7 @@
 [metadata]
 name = fast_llm
 # TODO: Take from __init__.py instead?
-version = 0.1.0
+version = 0.2.0
 
 [options]
 packages = find_namespace:
@@ -25,7 +25,7 @@ CORE =
     # Used for checkpoints
     safetensors>=0.4.4
     # Update the base image (version fixed to ensure there is a wheel for the base image), may need --no-build-isolation
-    flash-attn==2.6.3
+    flash-attn==2.7.2.post1
 
 # Required for some optional features and tools.
 OPTIONAL =
@@ -45,6 +45,8 @@ OPTIONAL =
 DEV =
     pytest>=8.3.2
     pytest-depends>=1.0.1
+    # Somehow needed for Megatron to work with base image 24.11
+    setuptools>=75.6.0
 
 # Required for building the documentation
 DOCS =