fixed benchmarking bug, minor test change

facebookresearch · yuanandonly · Jul 7, 2022 · Jul 8, 2022 · Jul 11, 2022 · Jul 13, 2022
commit 8c59bb9318736a811d35e26848a71e1d56a339f1
diff --git a/tests/test_nvfuser.py b/tests/test_nvfuser.py
@@ -4,6 +4,8 @@
 # LICENSE file in the root directory of this source tree.
 
 
+import logging
+
 import pytest
 import torch
 import torch.nn as nn
@@ -15,13 +17,17 @@
 
 _gpu_available = torch.cuda.is_available()
 
-if xformers._is_functorch_available:
+xformers._is_functorch_available = True
+
+try:
     from xformers.components.nvfuser import (
         NVFusedBiasActivationDropout,
         NVFusedBiasDropoutRes,
         NVFusedBiasDropoutResLayerNorm,
     )
     from xformers.components.nvfuser.utils import build_nvfused
+except ImportError as e:
+    logging.warning(f"Functorch is not available in test_nvfuser.py. \nError {e}")
 
 FUSED_PATTERNS = (
     [

diff --git a/xformers/benchmarks/benchmark_nvfuser.py b/xformers/benchmarks/benchmark_nvfuser.py
@@ -183,14 +183,14 @@ def step(fn, residual, x):
             for testcase in testcases:
                 torch.cuda.empty_cache()
                 torch.cuda.reset_peak_memory_stats()
-                # torch.cuda.synchronize()
+                torch.cuda.synchronize()
 
                 time = triton.testing.do_bench(
                     lambda: testcase.function(x=a), grad_to_none=[a, b]
                 )[0]
 
-                # torch.cuda.synchronize()
-                max_memory = torch.cuda.max_memory_allocated() / 2**20
+                torch.cuda.synchronize()
+                max_memory = torch.cuda.max_memory_allocated() // 2**20
 
                 key = f"B={B}, M={M}, K={K}"
                 if key not in results:
@@ -211,7 +211,7 @@ def step(fn, residual, x):
             units="GB/s",
         )
         pretty_print(
-            results,
+            results_mem,
             title="\n --- PEAK MEMORY Type: {} {} --- ".format(pattern_str, dtype),
             units="MB",
         )
@@ -230,7 +230,7 @@ def step(fn, residual, x):
             legend_loc="upper left",
         )
         pretty_plot(
-            results,
+            results_mem,
             title="MAXMEM-{}-FW{}-{}{}-{}{}".format(
                 pattern_str,
                 "+BW" if backward else "",