Re-enable llama3 runtime (#164)

fmassa · web-flow · commit ff9c57446de0 · 2025-09-27T18:51:55.000+02:00
* Re-enable llama3 runtime This is a copy of #163 but pointing on main * Add back verbose flag It's useful for debugging
diff --git a/examples/example_llama3.py b/examples/example_llama3.py
@@ -192,23 +192,23 @@ def add_tp_constraints(autop):
         add_tp_constraints(autop)
 
     t = time.time()
-    sharding_placement = autop.optimize_placement(verbose=False)
+    sharding_placement = autop.optimize_placement(verbose=True)
     print(f"Took {time.time() - t:.2f} s")
     parallel_mod = autop.apply_placement(sharding_placement)
 
 # run weight init on our sharded DTensor params
-# parallel_mod.to_empty(device="cuda")
-# parallel_mod.init_weights()
+parallel_mod.to_empty(device="cuda")
+parallel_mod.init_weights()
 
 # now let's run it
-# x = (
-#     torch.randint(
-#         0,
-#         vocab_size,
-#         (batch_size // mesh.shape[0], seqlen),
-#         device=torch.device("cuda"),
-#     ),
-# )
-# out = parallel_mod(*x)
-# out.backward(torch.randn_like(out))
+x = (
+    torch.randint(
+        0,
+        vocab_size,
+        (batch_size // mesh.shape[0], seqlen),
+        device=torch.device("cuda"),
+    ),
+)
+out = parallel_mod(*x)
+out.backward(torch.randn_like(out))
 print("All good!")