Skip to content
This repository was archived by the owner on Apr 23, 2020. It is now read-only.

Commit 923dcc5

Browse files
Factor architecture dependent code out of loop.cu
Summary: [libomptarget] Factor architecture dependent code out of loop.cu Related to the patch series starting D64217. Added subscribers to said series as reviewers. This effort is smaller in scope. This patch factors out just enough architecture dependent code from loop.cu to allow the same source to be used with amdgcn, given a different target_impl.h. Testing is that the same bitcode (modulo variable names) is generated for libomptarget before and after the refactor, for nvptx and the out of tree amdgcn. Reviewers: jdoerfert, ABataev, bollu, jfb, tra, grokos, Hahnfeld, guansong, xtian, gregrodgers, ronlieb, hfinkel, gtbercea, guraypp, arpith-jacob Reviewed By: jdoerfert, ABataev Subscribers: dexonsmith, openmp-commits Tags: #openmp Differential Revision: https://reviews.llvm.org/D65836 git-svn-id: https://llvm.org/svn/llvm-project/openmp/trunk@368751 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent ee9c0c5 commit 923dcc5

File tree

2 files changed

+49
-9
lines changed

2 files changed

+49
-9
lines changed

libomptarget/deviceRTLs/nvptx/src/loop.cu

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
//===----------------------------------------------------------------------===//
1414

1515
#include "omptarget-nvptx.h"
16+
#include "target_impl.h"
1617

1718
////////////////////////////////////////////////////////////////////////////////
1819
////////////////////////////////////////////////////////////////////////////////
@@ -381,20 +382,18 @@ public:
381382

382383
INLINE static int64_t Shuffle(unsigned active, int64_t val, int leader) {
383384
int lo, hi;
384-
asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val));
385+
__kmpc_impl_unpack(val, lo, hi);
385386
hi = __SHFL_SYNC(active, hi, leader);
386387
lo = __SHFL_SYNC(active, lo, leader);
387-
asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi));
388-
return val;
388+
return __kmpc_impl_pack(lo, hi);
389389
}
390390

391391
INLINE static uint64_t NextIter() {
392-
unsigned int active = __ACTIVEMASK();
393-
int leader = __ffs(active) - 1;
394-
int change = __popc(active);
395-
unsigned lane_mask_lt;
396-
asm("mov.u32 %0, %%lanemask_lt;" : "=r"(lane_mask_lt));
397-
unsigned int rank = __popc(active & lane_mask_lt);
392+
__kmpc_impl_lanemask_t active = __ACTIVEMASK();
393+
int leader = __kmpc_impl_ffs(active) - 1;
394+
int change = __kmpc_impl_popc(active);
395+
__kmpc_impl_lanemask_t lane_mask_lt = __kmpc_impl_lanemask_lt();
396+
unsigned int rank = __kmpc_impl_popc(active & lane_mask_lt);
398397
uint64_t warp_res;
399398
if (rank == 0) {
400399
warp_res = atomicAdd(
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
//===------------ target_impl.h - NVPTX OpenMP GPU options ------- CUDA -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// Definitions of target specific functions
10+
//
11+
//===----------------------------------------------------------------------===//
12+
#ifndef _TARGET_IMPL_H_
13+
#define _TARGET_IMPL_H_
14+
15+
#include <stdint.h>
16+
17+
#include "option.h"
18+
19+
INLINE void __kmpc_impl_unpack(int64_t val, int32_t &lo, int32_t &hi) {
20+
asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val));
21+
}
22+
23+
INLINE int64_t __kmpc_impl_pack(int32_t lo, int32_t hi) {
24+
int64_t val;
25+
asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi));
26+
return val;
27+
}
28+
29+
typedef uint32_t __kmpc_impl_lanemask_t;
30+
31+
INLINE __kmpc_impl_lanemask_t __kmpc_impl_lanemask_lt() {
32+
__kmpc_impl_lanemask_t res;
33+
asm("mov.u32 %0, %%lanemask_lt;" : "=r"(res));
34+
return res;
35+
}
36+
37+
INLINE int __kmpc_impl_ffs(uint32_t x) { return __ffs(x); }
38+
39+
INLINE int __kmpc_impl_popc(uint32_t x) { return __popc(x); }
40+
41+
#endif

0 commit comments

Comments
 (0)