Skip to content
This repository was archived by the owner on Apr 23, 2025. It is now read-only.

Commit 690ca53

Browse files
committed
Fix reductions and cmake building
- Using atomicrmw for FP is problematic on aarch64, switch to codegen using cmpxchg - Template reduction impl. for extensibility - Update cmake building (set C++ standard, dynamically link with llvmdev) - Unify error handling
1 parent ddcf7b8 commit 690ca53

File tree

6 files changed

+301
-99
lines changed

6 files changed

+301
-99
lines changed

ffi/CGIntrinsicsOpenMP.cpp

Lines changed: 101 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -128,8 +128,7 @@ Function *CGIntrinsicsOpenMP::createOutlinedFunction(
128128
assert(V->getName().startswith(".") &&
129129
"Expected Numba temporary value, named starting with .");
130130
if (!V->getName().startswith("."))
131-
report_fatal_error(
132-
"Expected Numba temporary value, named starting with .");
131+
FATAL_ERROR("Expected Numba temporary value, named starting with .");
133132
Privates.push_back(V);
134133
continue;
135134
}
@@ -155,10 +154,12 @@ Function *CGIntrinsicsOpenMP::createOutlinedFunction(
155154
CapturedShared.push_back(V);
156155
break;
157156
case DSA_REDUCTION_ADD:
157+
case DSA_REDUCTION_SUB:
158+
case DSA_REDUCTION_MUL:
158159
Reductions.push_back(V);
159160
break;
160161
default:
161-
report_fatal_error("Unexpected DSA type");
162+
FATAL_ERROR("Unexpected DSA type");
162163
}
163164
}
164165

@@ -343,26 +344,30 @@ Function *CGIntrinsicsOpenMP::createOutlinedFunction(
343344
if (VMap)
344345
(*VMap)[V] = AI;
345346

346-
if (DSAValueMap[V].Type == DSA_REDUCTION_ADD) {
347-
Type *VTy = V->getType()->getPointerElementType();
348-
Value *Priv = CreateAllocaAtEntry(VTy, /* ArraySize */ nullptr,
349-
V->getName() + ".red.priv");
347+
InsertPointTy AllocaIP(OutlinedEntryBB,
348+
OutlinedEntryBB->getFirstInsertionPt());
350349

351-
// Store idempotent value based on operation and type.
352-
// TODO: create templated emitInitAndAppendInfo in CGReduction
353-
if (VTy->isIntegerTy())
354-
OMPBuilder.Builder.CreateStore(ConstantInt::get(VTy, 0), Priv);
355-
else if (VTy->isFloatTy() || VTy->isDoubleTy())
356-
OMPBuilder.Builder.CreateStore(ConstantFP::get(VTy, 0.0), Priv);
357-
else
358-
assert(false &&
359-
"Unsupported type to init with idempotent reduction value");
360350

361-
ReductionInfos.push_back({VTy, AI, Priv, CGReduction::sumReduction,
362-
CGReduction::sumAtomicReduction});
363-
ReplaceUses(Uses, Priv);
364-
} else
365-
llvm_unreachable("Unsupported reduction");
351+
Value *Priv = nullptr;
352+
switch (DSAValueMap[V].Type) {
353+
case DSA_REDUCTION_ADD:
354+
Priv = CGReduction::emitInitAndAppendInfo<DSA_REDUCTION_ADD>(
355+
OMPBuilder.Builder, AllocaIP, AI, ReductionInfos);
356+
break;
357+
case DSA_REDUCTION_SUB:
358+
Priv = CGReduction::emitInitAndAppendInfo<DSA_REDUCTION_SUB>(
359+
OMPBuilder.Builder, AllocaIP, AI, ReductionInfos);
360+
break;
361+
case DSA_REDUCTION_MUL:
362+
Priv = CGReduction::emitInitAndAppendInfo<DSA_REDUCTION_MUL>(
363+
OMPBuilder.Builder, AllocaIP, AI, ReductionInfos);
364+
break;
365+
default:
366+
FATAL_ERROR("Unsupported reduction");
367+
}
368+
369+
assert(Priv && "Expected non-null private reduction variable");
370+
ReplaceUses(Uses, Priv);
366371

367372
++AI;
368373
}
@@ -388,7 +393,7 @@ Function *CGIntrinsicsOpenMP::createOutlinedFunction(
388393
<< *OutlinedFn << "=== End of Dump OutlinedFn\n");
389394

390395
if (verifyFunction(*OutlinedFn, &errs()))
391-
report_fatal_error("Verification of OutlinedFn failed!");
396+
FATAL_ERROR("Verification of OutlinedFn failed!");
392397

393398
CapturedVars.append(CapturedShared);
394399
CapturedVars.append(CapturedFirstprivate);
@@ -571,7 +576,7 @@ void CGIntrinsicsOpenMP::emitOMPParallelHostRuntime(
571576
<< *Fn << "=== End of Dump OuterFn\n");
572577

573578
if (verifyFunction(*Fn, &errs()))
574-
report_fatal_error("Verification of OuterFn failed!");
579+
FATAL_ERROR("Verification of OuterFn failed!");
575580
}
576581

577582
#if 0
@@ -649,7 +654,7 @@ void CGIntrinsicsOpenMP::emitOMPParallelHostRuntimeOMPIRBuilder(
649654

650655
OMPBuilder.Builder.restoreIP(CodeGenIP);
651656
// Store idempotent value based on operation and type.
652-
// TODO: create templated emitInitAndAppendInfo in CGReduction
657+
// TODO: use emitInitAndAppendInfo in CGReduction
653658
if (VTy->isIntegerTy())
654659
OMPBuilder.Builder.CreateStore(ConstantInt::get(VTy, 0), V);
655660
else if (VTy->isFloatTy() || VTy->isDoubleTy())
@@ -822,7 +827,7 @@ void CGIntrinsicsOpenMP::emitOMPParallelDeviceRuntime(
822827
OMPBuilder.Builder.CreateRetVoid();
823828

824829
if (verifyFunction(*OutlinedWrapperFn, &errs()))
825-
report_fatal_error("Verification of OutlinedWrapperFn failed!");
830+
FATAL_ERROR("Verification of OutlinedWrapperFn failed!");
826831

827832
DEBUG_ENABLE(dbgs() << "=== Dump OutlinedWrapper\n"
828833
<< *OutlinedWrapperFn
@@ -980,7 +985,7 @@ void CGIntrinsicsOpenMP::emitOMPParallelDeviceRuntime(
980985
<< *Fn << "=== End of Dump OuterFn\n");
981986

982987
if (verifyFunction(*Fn, &errs()))
983-
report_fatal_error("Verification of OuterFn failed!");
988+
FATAL_ERROR("Verification of OuterFn failed!");
984989
}
985990

986991
FunctionCallee CGIntrinsicsOpenMP::getKmpcForStaticInit(Type *Ty) {
@@ -993,7 +998,8 @@ FunctionCallee CGIntrinsicsOpenMP::getKmpcForStaticInit(Type *Ty) {
993998
if (Bitwidth == 64)
994999
return OMPBuilder.getOrCreateRuntimeFunction(
9951000
M, OMPRTL___kmpc_for_static_init_8u);
996-
llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1001+
1002+
FATAL_ERROR("unknown OpenMP loop iterator bitwidth");
9971003
}
9981004

9991005
FunctionCallee CGIntrinsicsOpenMP::getKmpcDistributeStaticInit(Type *Ty) {
@@ -1006,7 +1012,8 @@ FunctionCallee CGIntrinsicsOpenMP::getKmpcDistributeStaticInit(Type *Ty) {
10061012
if (Bitwidth == 64)
10071013
return OMPBuilder.getOrCreateRuntimeFunction(
10081014
M, OMPRTL___kmpc_distribute_static_init_8u);
1009-
llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1015+
1016+
FATAL_ERROR("unknown OpenMP loop iterator bitwidth");
10101017
}
10111018

10121019
void CGIntrinsicsOpenMP::emitOMPFor(DSAValueMapTy &DSAValueMap,
@@ -1108,26 +1115,17 @@ void CGIntrinsicsOpenMP::emitOMPFor(DSAValueMapTy &DSAValueMap,
11081115
} else
11091116
OMPBuilder.Builder.CreateStore(V, ReplacementValue);
11101117
} else if (DSA == DSA_REDUCTION_ADD) {
1111-
ReplacementValue = OMPBuilder.Builder.CreateAlloca(
1112-
VTy, /* ArraySize */ nullptr, Orig->getName() + ".red.priv");
1113-
1114-
// Store idempotent value based on operation and type.
1115-
// TODO: create templated emitInitAndAppendInfo in CGReduction
1116-
if (VTy->isIntegerTy())
1117-
OMPBuilder.Builder.CreateStore(ConstantInt::get(VTy, 0),
1118-
ReplacementValue);
1119-
else if (VTy->isFloatTy() || VTy->isDoubleTy())
1120-
OMPBuilder.Builder.CreateStore(ConstantFP::get(VTy, 0.0),
1121-
ReplacementValue);
1122-
else
1123-
report_fatal_error(
1124-
"Unsupported type to init with idempotent reduction value");
1125-
1126-
ReductionInfos.push_back({VTy, Orig, ReplacementValue,
1127-
CGReduction::sumReduction,
1128-
CGReduction::sumAtomicReduction});
1118+
ReplacementValue =
1119+
CGReduction::emitInitAndAppendInfo<DSA_REDUCTION_ADD>(
1120+
OMPBuilder.Builder, OMPBuilder.Builder.saveIP(), Orig,
1121+
ReductionInfos);
1122+
} else if (DSA == DSA_REDUCTION_SUB) {
1123+
ReplacementValue =
1124+
CGReduction::emitInitAndAppendInfo<DSA_REDUCTION_SUB>(
1125+
OMPBuilder.Builder, OMPBuilder.Builder.saveIP(), Orig,
1126+
ReductionInfos);
11291127
} else
1130-
assert(false && "Unsupported privatization");
1128+
FATAL_ERROR("Unsupported privatization");
11311129

11321130
assert(ReplacementValue && "Expected non-null ReplacementValue");
11331131

@@ -1266,7 +1264,7 @@ void CGIntrinsicsOpenMP::emitOMPFor(DSAValueMapTy &DSAValueMap,
12661264
}
12671265

12681266
if (verifyFunction(*PreHeader->getParent(), &errs()))
1269-
report_fatal_error("Verification of omp for lowering failed!");
1267+
FATAL_ERROR("Verification of omp for lowering failed!");
12701268
}
12711269

12721270
void CGIntrinsicsOpenMP::emitOMPTask(DSAValueMapTy &DSAValueMap, Function *Fn,
@@ -1308,7 +1306,7 @@ void CGIntrinsicsOpenMP::emitOMPTask(DSAValueMapTy &DSAValueMap, Function *Fn,
13081306
// Store a copy of the value, thus get the pointer element type.
13091307
PrivatesTy.push_back(OriginalValue->getType()->getPointerElementType());
13101308
} else
1311-
assert(false && "Unknown DSA type");
1309+
FATAL_ERROR("Unknown DSA type");
13121310
}
13131311

13141312
StructType *KmpSharedsTTy = nullptr;
@@ -1528,7 +1526,7 @@ void CGIntrinsicsOpenMP::emitOMPTask(DSAValueMapTy &DSAValueMap, Function *Fn,
15281526
ReplacementValue = FirstprivateGEP;
15291527
++PrivatesGEPIdx;
15301528
} else
1531-
assert(false && "Unknown DSA type");
1529+
FATAL_ERROR("Unknown DSA type");
15321530

15331531
assert(ReplacementValue && "Expected non-null ReplacementValue");
15341532
SmallVector<User *, 8> Users(OriginalValue->users());
@@ -1671,8 +1669,7 @@ void CGIntrinsicsOpenMP::emitOMPOffloadingMappings(
16711669
// do nothing
16721670
break;
16731671
default:
1674-
assert(false && "Unknown mapping type");
1675-
report_fatal_error("Unknown mapping type");
1672+
FATAL_ERROR("Unknown mapping type");
16761673
}
16771674

16781675
return MapType;
@@ -1772,8 +1769,7 @@ void CGIntrinsicsOpenMP::emitOMPOffloadingMappings(
17721769
break;
17731770
}
17741771
default:
1775-
assert(false && "Unknown mapping type");
1776-
report_fatal_error("Unknown mapping type");
1772+
FATAL_ERROR("Unknown mapping type");
17771773
}
17781774
}
17791775

@@ -1889,7 +1885,7 @@ void CGIntrinsicsOpenMP::emitOMPCritical(Function *Fn, BasicBlock *BBEntry,
18891885
BodyGenCallbackTy BodyGenCB,
18901886
FinalizeCallbackTy FiniCB) {
18911887
if (isOpenMPDeviceRuntime())
1892-
report_fatal_error("Critical regions are not (yet) implemented on device");
1888+
FATAL_ERROR("Critical regions are not (yet) implemented on device");
18931889

18941890
const DebugLoc DL = BBEntry->getTerminator()->getDebugLoc();
18951891
BBEntry->getTerminator()->eraseFromParent();
@@ -2394,7 +2390,7 @@ void CGIntrinsicsOpenMP::emitOMPTeamsDeviceRuntime(
23942390
<< *Fn << "=== End of Dump OuterFn\n");
23952391

23962392
if (verifyFunction(*Fn, &errs()))
2397-
report_fatal_error("Verification of OuterFn failed!");
2393+
FATAL_ERROR("Verification of OuterFn failed!");
23982394
}
23992395

24002396
void CGIntrinsicsOpenMP::emitOMPTeams(DSAValueMapTy &DSAValueMap,
@@ -2494,7 +2490,7 @@ void CGIntrinsicsOpenMP::emitOMPTeamsHostRuntime(
24942490
<< *Fn << "=== End of Dump OuterFn\n");
24952491

24962492
if (verifyFunction(*Fn, &errs()))
2497-
report_fatal_error("Verification of OuterFn failed!");
2493+
FATAL_ERROR("Verification of OuterFn failed!");
24982494
}
24992495

25002496
void CGIntrinsicsOpenMP::emitOMPTargetEnterData(
@@ -2702,7 +2698,7 @@ void CGIntrinsicsOpenMP::emitOMPDistribute(DSAValueMapTy &DSAValueMap,
27022698
} else
27032699
OMPBuilder.Builder.CreateStore(V, ReplacementValue);
27042700
} else
2705-
report_fatal_error("Unsupported privatization");
2701+
FATAL_ERROR("Unsupported privatization");
27062702

27072703
assert(ReplacementValue && "Expected non-null ReplacementValue");
27082704

@@ -3071,7 +3067,7 @@ void CGIntrinsicsOpenMP::emitOMPDistributeParallelFor(
30713067
DSAValueMap.erase(PUpperBound);
30723068

30733069
if (verifyFunction(*Fn, &errs()))
3074-
report_fatal_error(
3070+
FATAL_ERROR(
30753071
"Verification of DistributeParallelFor lowering failed!");
30763072

30773073
DEBUG_ENABLE(dbgs() << "=== Dump DistributeParallelFor\n"
@@ -3172,3 +3168,49 @@ bool CGIntrinsicsOpenMP::isOpenMPDeviceRuntime() {
31723168

31733169
return false;
31743170
}
3171+
3172+
template <>
3173+
Value *CGReduction::emitOperation<DSA_REDUCTION_ADD>(IRBuilderBase &IRB,
3174+
Value *LHS, Value *RHS) {
3175+
Type *VTy = RHS->getType();
3176+
if (VTy->isIntegerTy())
3177+
return IRB.CreateAdd(LHS, RHS, "red.add");
3178+
else if (VTy->isFloatTy() || VTy->isDoubleTy())
3179+
return IRB.CreateFAdd(LHS, RHS, "red.add");
3180+
else
3181+
FATAL_ERROR("Unsupported type for reduction operation");
3182+
}
3183+
3184+
// OpenMP 5.1, 2.21.5, sub is the same as add.
3185+
template <>
3186+
Value *CGReduction::emitOperation<DSA_REDUCTION_SUB>(IRBuilderBase &IRB,
3187+
Value *LHS, Value *RHS) {
3188+
return emitOperation<DSA_REDUCTION_ADD>(IRB, LHS, RHS);
3189+
}
3190+
3191+
template <>
3192+
Value *CGReduction::emitOperation<DSA_REDUCTION_MUL>(IRBuilderBase &IRB,
3193+
Value *LHS, Value *RHS) {
3194+
Type *VTy = RHS->getType();
3195+
if (VTy->isIntegerTy())
3196+
return IRB.CreateMul(LHS, RHS, "red.mul");
3197+
else if (VTy->isFloatTy() || VTy->isDoubleTy())
3198+
return IRB.CreateFMul(LHS, RHS, "red.mul");
3199+
else
3200+
FATAL_ERROR("Unsupported type for reduction operation");
3201+
}
3202+
3203+
template <>
3204+
InsertPointTy CGReduction::emitAtomicOperationRMW<DSA_REDUCTION_ADD>(
3205+
IRBuilderBase &IRB, Value *LHS, Value *Partial) {
3206+
IRB.CreateAtomicRMW(AtomicRMWInst::Add, LHS, Partial, None,
3207+
AtomicOrdering::Monotonic);
3208+
return IRB.saveIP();
3209+
}
3210+
3211+
// OpenMP 5.1, 2.21.5, sub is the same as add.
3212+
template <>
3213+
InsertPointTy CGReduction::emitAtomicOperationRMW<DSA_REDUCTION_SUB>(
3214+
IRBuilderBase &IRB, Value *LHS, Value *Partial) {
3215+
return emitAtomicOperationRMW<DSA_REDUCTION_ADD>(IRB, LHS, Partial);
3216+
}

0 commit comments

Comments
 (0)