@@ -22,6 +22,7 @@ include "mlir/Dialect/OpenMP/OpenMPOpBase.td"
2222include "mlir/Interfaces/ControlFlowInterfaces.td"
2323include "mlir/Interfaces/SideEffectInterfaces.td"
2424include "mlir/IR/EnumAttr.td"
25+ include "mlir/IR/OpAsmInterface.td"
2526include "mlir/IR/OpBase.td"
2627include "mlir/IR/SymbolInterfaces.td"
2728
@@ -356,6 +357,212 @@ def SingleOp : OpenMP_Op<"single", traits = [
356357 let hasVerifier = 1;
357358}
358359
360+ //===---------------------------------------------------------------------===//
361+ // OpenMP Canonical Loop Info Type
362+ //===---------------------------------------------------------------------===//
363+
364+ def CanonicalLoopInfoType : OpenMP_Type<"CanonicalLoopInfo", "cli"> {
365+ let summary = "Type for representing a reference to a canonical loop";
366+ let description = [{
367+ A variable of type CanonicalLoopInfo refers to an OpenMP-compatible
368+ canonical loop in the same function. Values of this type are not
369+ available at runtime and therefore cannot be used by the program itself,
370+ i.e. an opaque type. It is similar to the transform dialect's
371+ `!transform.interface` type, but instead of implementing an interface
372+ for each transformation, the OpenMP dialect itself defines possible
373+ operations on this type.
374+
375+ A value of type CanonicalLoopInfoType (in the following: CLI) value can be
376+
377+ 1. created by omp.new_cli.
378+ 2. passed to omp.canonical_loop to associate the loop to that CLI. A CLI
379+ can only be associated once.
380+ 3. passed to an omp loop transformation operation that modifies the loop
381+ associated with the CLI. The CLI is the "applyee" and the operation is
382+ the consumer. A CLI can only be consumed once.
383+ 4. passed to an omp loop transformation operation to associate the cli with
384+ a result of that transformation. The CLI is the "generatee" and the
385+ operation is the generator.
386+
387+ A CLI cannot
388+
389+ 1. be returned from a function.
390+ 2. be passed to operations that are not specifically designed to take a
391+ CanonicalLoopInfoType, including AnyType.
392+
393+ A CLI directly corresponds to an object of
394+ OpenMPIRBuilder's CanonicalLoopInfo struct when lowering to LLVM-IR.
395+ }];
396+ }
397+
398+ //===---------------------------------------------------------------------===//
399+ // OpenMP Canonical Loop Info Creation
400+ //===---------------------------------------------------------------------===//
401+
402+ def NewCliOp : OpenMP_Op<"new_cli",
403+ [DeclareOpInterfaceMethods<OpAsmOpInterface, ["getAsmResultNames"]>]> {
404+ let summary = "Create a new Canonical Loop Info value.";
405+ let description = [{
406+ Create a new CLI that can be passed as an argument to a CanonicalLoopOp
407+ and to loop transformation operations to handle dependencies between
408+ loop transformation operations.
409+ }];
410+
411+ let arguments = (ins );
412+ let results = (outs CanonicalLoopInfoType:$result);
413+ let assemblyFormat = [{
414+ attr-dict
415+ }];
416+
417+ let builders = [
418+ OpBuilder<(ins )>,
419+ ];
420+
421+ let hasVerifier = 1;
422+ }
423+
424+ //===---------------------------------------------------------------------===//
425+ // OpenMP Canonical Loop Operation
426+ //===---------------------------------------------------------------------===//
427+ def CanonicalLoopOp : OpenMPTransform_Op<"canonical_loop",
428+ [DeclareOpInterfaceMethods<OpAsmOpInterface, [ "getAsmBlockNames", "getAsmBlockArgumentNames"]>]> {
429+ let summary = "OpenMP Canonical Loop Operation";
430+ let description = [{
431+ All loops that conform to OpenMP's definition of a canonical loop can be
432+ simplified to a CanonicalLoopOp. In particular, there are no loop-carried
433+ variables and the number of iterations it will execute is known before the
434+ operation. This allows e.g. to determine the number of threads and chunks
435+ the iterations space is split into before executing any iteration. More
436+ restrictions may apply in cases such as (collapsed) loop nests, doacross
437+ loops, etc.
438+
439+ In contrast to other loop operations such as `scf.for`, the number of
440+ iterations is determined by only a single variable, the trip-count. The
441+ induction variable value is the logical iteration number of that iteration,
442+ which OpenMP defines to be between 0 and the trip-count (exclusive).
443+ Loop representation having lower-bound, upper-bound, and step-size operands,
444+ require passes to do more work than necessary, including handling special
445+ cases such as upper-bound smaller than lower-bound, upper-bound equal to
446+ the integer type's maximal value, negative step size, etc. This complexity
447+ is better only handled once by the front-end and can apply its semantics
448+ for such cases while still being able to represent any kind of loop, which
449+ kind of the point of a mid-end intermediate representation. User-defined
450+ types such as random-access iterators in C++ could not directly be
451+ represented anyway.
452+
453+ The induction variable is always of the same type as the tripcount argument.
454+ Since it can never be negative, tripcount is always interpreted as an
455+ unsigned integer. It is the caller's responsibility to ensure the tripcount
456+ is not negative when its interpretation is signed, i.e.
457+ `%tripcount = max(0,%tripcount)`.
458+
459+ An optional argument to a omp.canonical_loop that can be passed in
460+ is a CanonicalLoopInfo value that can be used to refer to the canonical
461+ loop to apply transformations -- such as tiling, unrolling, or
462+ work-sharing -- to the loop, similar to the transform dialect but
463+ with OpenMP-specific semantics. Because it is optional, it has to be the
464+ last of the operands, but appears first in the pretty format printing.
465+
466+ The pretty assembly format is inspired by python syntax, where `range(n)`
467+ returns an iterator that runs from $0$ to $n-1$. The pretty assembly syntax
468+ is one of:
469+
470+ omp.canonical_loop(%cli) %iv : !type in range(%tripcount)
471+ omp.canonical_loop %iv : !type in range(%tripcount)
472+
473+ A CanonicalLoopOp is lowered to LLVM-IR using
474+ `OpenMPIRBuilder::createCanonicalLoop`.
475+
476+ #### Examples
477+
478+ Translation from lower-bound, upper-bound, step-size to trip-count.
479+ ```c
480+ for (int i = 3; i < 42; i+=2) {
481+ B[i] = A[i];
482+ }
483+ ```
484+
485+ ```mlir
486+ %lb = arith.constant 3 : i32
487+ %ub = arith.constant 42 : i32
488+ %step = arith.constant 2 : i32
489+ %range = arith.sub %ub, %lb : i32
490+ %tripcount = arith.div %range, %step : i32
491+ omp.canonical_loop %iv : i32 in range(%tripcount) {
492+ %offset = arith.mul %iv, %step : i32
493+ %i = arith.add %offset, %lb : i32
494+ %a = load %arrA[%i] : memref<?xf32>
495+ store %a, %arrB[%i] : memref<?xf32>
496+ }
497+ ```
498+
499+ Nested canonical loop with transformation of the inner loop.
500+ ```mlir
501+ %outer = omp.new_cli : !omp.cli
502+ %inner = omp.new_cli : !omp.cli
503+ omp.canonical_loop(%outer) %iv1 : i32 in range(%tc1) {
504+ omp.canonical_loop(%inner) %iv2 : i32 in range(%tc2) {
505+ %a = load %arrA[%iv1, %iv2] : memref<?x?xf32>
506+ store %a, %arrB[%iv1, %iv2] : memref<?x?xf32>
507+ }
508+ }
509+ omp.unroll_full(%inner)
510+ ```
511+ }];
512+
513+
514+ let arguments = (ins IntLikeType:$tripCount,
515+ Optional<CanonicalLoopInfoType>:$cli);
516+ let regions = (region AnyRegion:$region);
517+
518+ let extraClassDeclaration = [{
519+ ::mlir::Value getInductionVar();
520+ }];
521+
522+ let builders = [
523+ OpBuilder<(ins "::mlir::Value":$tripCount)>,
524+ OpBuilder<(ins "::mlir::Value":$tripCount, "::mlir::Value":$cli)>,
525+ ];
526+
527+ let hasCustomAssemblyFormat = 1;
528+ let hasVerifier = 1;
529+ }
530+
531+ //===----------------------------------------------------------------------===//
532+ // OpenMP unroll_heuristic operation
533+ //===----------------------------------------------------------------------===//
534+
535+ def UnrollHeuristicOp : OpenMPTransform_Op<"unroll_heuristic", []> {
536+ let summary = "OpenMP heuristic unroll operation";
537+ let description = [{
538+ Represents a `#pragma omp unroll` construct introduced in OpenMP 5.1.
539+
540+ The operation has one applyee and no generatees. The applyee is unrolled
541+ according to implementation-defined heuristics. Implementations may choose
542+ to not unroll the loop, partially unroll by a chosen factor, or fully
543+ unroll it. Even if the implementation chooses to partially unroll the
544+ applyee, the resulting unrolled loop is not accessible as a generatee. Use
545+ omp.unroll_partial if a generatee is required.
546+
547+ The lowering is implemented using `OpenMPIRBuilder::unrollLoopHeuristic`,
548+ which just attaches `llvm.loop.unroll.enable` metadata to the loop so the
549+ unrolling is carried-out by LLVM's LoopUnroll pass. That is, unrolling only
550+ actually performed in optimized builds.
551+
552+ Assembly formats:
553+ omp.unroll_heuristic(%cli)
554+ omp.unroll_heuristic(%cli) -> ()
555+ }];
556+
557+ let arguments = (ins CanonicalLoopInfoType:$applyee);
558+
559+ let builders = [
560+ OpBuilder<(ins "::mlir::Value":$cli)>,
561+ ];
562+
563+ let hasCustomAssemblyFormat = 1;
564+ }
565+
359566//===----------------------------------------------------------------------===//
360567// 2.8.3 Workshare Construct
361568//===----------------------------------------------------------------------===//
0 commit comments