@@ -15803,7 +15803,7 @@ def test_softmax_results(self, device, dtype):
15803
15803
@largeTensorTest("20GB")
15804
15804
@largeTensorTest("90GB", "cpu")
15805
15805
@precisionOverride({torch.half: 0.001})
15806
- def test_softmax_64bit_indexing (self, device, dtype):
15806
+ def test_warp_softmax_64bit_indexing (self, device, dtype):
15807
15807
def run_test(*shape):
15808
15808
x = torch.randn(shape, device="cuda", dtype=torch.float16, requires_grad=True)
15809
15809
y = F.log_softmax(x, dim=-1, dtype=dtype)
@@ -15818,6 +15818,22 @@ def run_test(*shape):
15818
15818
run_test(1100000000, 2) # Illegal memory access https://github.com/pytorch/pytorch/issues/52715
15819
15819
run_test(2200000000, 1) # invalid configuration argument https://github.com/pytorch/pytorch/issues/52716
15820
15820
15821
+ @onlyCUDA
15822
+ @dtypes(torch.half)
15823
+ @largeTensorTest("20GB")
15824
+ @largeTensorTest("90GB", "cpu")
15825
+ @precisionOverride({torch.half: 0.001})
15826
+ def test_softmax_64bit_indexing(self, device, dtype):
15827
+ def run_test(*shape):
15828
+ x = torch.ones(shape, device=device, dtype=dtype, requires_grad=True)
15829
+ y = F.log_softmax(x, dim=-1, dtype=dtype)
15830
+ y.backward(y)
15831
+ self.assertEqual(y[0], y[-1])
15832
+ self.assertEqual(x.grad[0], x.grad[-1])
15833
+
15834
+ run_test(1024 * 256 + 1, 8192) # https://github.com/pytorch/pytorch/issues/84144
15835
+
15836
+
15821
15837
@dtypes(torch.float)
15822
15838
@dtypesIfCUDA(torch.float, torch.half)
15823
15839
def test_log_softmax_big(self, device, dtype):
0 commit comments