|
47 | 47 | ParallelForExtraCode, ParallelForWrongLoopCount,
|
48 | 48 | omp_in_parallel, omp_get_level, omp_get_active_level,
|
49 | 49 | omp_get_team_size, omp_get_ancestor_thread_num,
|
50 |
| - omp_get_team_num, omp_get_num_teams, omp_in_final) |
| 50 | + omp_get_team_num, omp_get_num_teams, omp_in_final, omp_shared_array) |
51 | 51 | import cmath
|
52 | 52 | import unittest
|
53 | 53 |
|
@@ -3796,6 +3796,52 @@ def test_impl():
|
3796 | 3796 | r = test_impl()
|
3797 | 3797 | np.testing.assert_array_equal(r, np.arange(10) * 10)
|
3798 | 3798 |
|
| 3799 | + def target_teams_shared_array_2d(self, device): |
| 3800 | + target_pragma = f"target teams num_teams(10) map(tofrom: a) device({device})" |
| 3801 | + @njit |
| 3802 | + def test_impl(): |
| 3803 | + a = np.zeros((10, 2, 2), dtype=np.int32) |
| 3804 | + |
| 3805 | + with openmp (target_pragma): |
| 3806 | + team_shared_array = np.empty((2, 2), dtype=np.int32) |
| 3807 | + team = omp_get_team_num() |
| 3808 | + for i in range(2): |
| 3809 | + for j in range(2): |
| 3810 | + team_shared_array[i, j] = team |
| 3811 | + |
| 3812 | + for i in range(2): |
| 3813 | + for j in range(2): |
| 3814 | + a[team, i, j] = team_shared_array[i, j] |
| 3815 | + return a |
| 3816 | + |
| 3817 | + a = test_impl() |
| 3818 | + expected = np.empty((10, 2, 2)) |
| 3819 | + for i in range(10): |
| 3820 | + expected[i] = np.full((2,2), i) |
| 3821 | + np.testing.assert_array_equal(a, expected) |
| 3822 | + |
| 3823 | + def target_local_array(self, device): |
| 3824 | + target_pragma = f"target teams num_teams(1) map(tofrom: a) device({device})" |
| 3825 | + @njit |
| 3826 | + def test_impl(): |
| 3827 | + a = np.zeros((32, 10), dtype=np.int32) |
| 3828 | + with openmp(target_pragma): |
| 3829 | + with openmp("parallel num_threads(32)"): |
| 3830 | + local_array = np.empty(10, dtype=np.int32) |
| 3831 | + tid = omp_get_thread_num() |
| 3832 | + for i in range(10): |
| 3833 | + local_array[i] = tid |
| 3834 | + for i in range(10): |
| 3835 | + #a[tid, i] = tid |
| 3836 | + a[tid, i] = local_array[i] |
| 3837 | + return a |
| 3838 | + |
| 3839 | + a = test_impl() |
| 3840 | + expected = np.empty((32, 10), dtype=np.int32) |
| 3841 | + for i in range(32): |
| 3842 | + expected[i] = [i]*10 |
| 3843 | + np.testing.assert_array_equal(a, expected) |
| 3844 | + |
3799 | 3845 | def target_teams_parallel_shared_array(self, device):
|
3800 | 3846 | target_pragma = f"target teams num_teams(10) map(tofrom: outside) device({device})"
|
3801 | 3847 | @njit
|
|
0 commit comments