-
Notifications
You must be signed in to change notification settings - Fork 299
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Arc] Add InitialOp and lowering support for FirReg preset values. #7480
Changes from 8 commits
d59eafe
ee28446
3eb8751
06ef1d7
796f58a
0924a13
c227d18
bb4e2dc
f3bb752
2202b72
fdbf6a3
25a3f85
53565fa
9b41101
f3ffd72
e77578d
85a4d96
d8e2f88
870edda
747b792
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -134,6 +134,8 @@ def StateOp : ArcOp<"state", [ | |
DeclareOpInterfaceMethods<SymbolUserOpInterface>, | ||
AttrSizedOperandSegments, | ||
DeclareOpInterfaceMethods<ClockedOpInterface>, | ||
PredOpTrait<"types of initial arguments must match result types", CPred< | ||
[{getInitials().empty() || llvm::equal(getInitials().getType(), getResults().getType())}]>> | ||
]> { | ||
let summary = "State transfer arc"; | ||
|
||
|
@@ -143,12 +145,13 @@ def StateOp : ArcOp<"state", [ | |
Optional<I1>:$enable, | ||
Optional<I1>:$reset, | ||
I32Attr:$latency, | ||
Variadic<AnyType>:$inputs); | ||
Variadic<AnyType>:$inputs, | ||
Variadic<AnyType>:$initials); | ||
let results = (outs Variadic<AnyType>:$outputs); | ||
|
||
let assemblyFormat = [{ | ||
$arc `(` $inputs `)` (`clock` $clock^)? (`enable` $enable^)? | ||
(`reset` $reset^)? `latency` $latency attr-dict | ||
(`reset` $reset^)? ( `initial` ` ` `(` $initials^ `:` type($initials) `)`)? `latency` $latency attr-dict | ||
`:` functional-type($inputs, results) | ||
}]; | ||
|
||
|
@@ -157,21 +160,23 @@ def StateOp : ArcOp<"state", [ | |
|
||
let builders = [ | ||
OpBuilder<(ins "DefineOp":$arc, "mlir::Value":$clock, "mlir::Value":$enable, | ||
"unsigned":$latency, CArg<"mlir::ValueRange", "{}">:$inputs), [{ | ||
"unsigned":$latency, CArg<"mlir::ValueRange", "{}">:$inputs, | ||
CArg<"mlir::ValueRange", "{}">:$initials), [{ | ||
build($_builder, $_state, mlir::SymbolRefAttr::get(arc), | ||
arc.getFunctionType().getResults(), clock, enable, latency, | ||
inputs); | ||
inputs, initials); | ||
}]>, | ||
OpBuilder<(ins "mlir::SymbolRefAttr":$arc, "mlir::TypeRange":$results, | ||
"mlir::Value":$clock, "mlir::Value":$enable, "unsigned":$latency, | ||
CArg<"mlir::ValueRange", "{}">:$inputs | ||
CArg<"mlir::ValueRange", "{}">:$inputs, CArg<"mlir::ValueRange", "{}">:$initials | ||
), [{ | ||
build($_builder, $_state, arc, results, clock, enable, Value(), latency, | ||
inputs); | ||
inputs, initials); | ||
}]>, | ||
OpBuilder<(ins "mlir::SymbolRefAttr":$arc, "mlir::TypeRange":$results, | ||
"mlir::Value":$clock, "mlir::Value":$enable, "mlir::Value":$reset, | ||
"unsigned":$latency, CArg<"mlir::ValueRange", "{}">:$inputs | ||
"unsigned":$latency, CArg<"mlir::ValueRange", "{}">:$inputs, | ||
CArg<"mlir::ValueRange", "{}">:$initials | ||
), [{ | ||
if (clock) | ||
$_state.addOperands(clock); | ||
|
@@ -180,30 +185,32 @@ def StateOp : ArcOp<"state", [ | |
if (reset) | ||
$_state.addOperands(reset); | ||
$_state.addOperands(inputs); | ||
$_state.addOperands(initials); | ||
$_state.addAttribute("arc", arc); | ||
$_state.addAttribute("latency", $_builder.getI32IntegerAttr(latency)); | ||
$_state.addAttribute(getOperandSegmentSizeAttr(), | ||
$_builder.getDenseI32ArrayAttr({ | ||
clock ? 1 : 0, | ||
enable ? 1 : 0, | ||
reset ? 1 : 0, | ||
static_cast<int32_t>(inputs.size())})); | ||
static_cast<int32_t>(inputs.size()), | ||
static_cast<int32_t>(initials.size())})); | ||
$_state.addTypes(results); | ||
}]>, | ||
OpBuilder<(ins "mlir::StringAttr":$arc, "mlir::TypeRange":$results, | ||
"mlir::Value":$clock, "mlir::Value":$enable, "unsigned":$latency, | ||
CArg<"mlir::ValueRange", "{}">:$inputs | ||
CArg<"mlir::ValueRange", "{}">:$inputs, CArg<"mlir::ValueRange", "{}">:$initials | ||
), [{ | ||
build($_builder, $_state, mlir::SymbolRefAttr::get(arc), results, clock, | ||
enable, latency, inputs); | ||
enable, latency, inputs, initials); | ||
}]>, | ||
OpBuilder<(ins "mlir::StringRef":$arc, "mlir::TypeRange":$results, | ||
"mlir::Value":$clock, "mlir::Value":$enable, "unsigned":$latency, | ||
CArg<"mlir::ValueRange", "{}">:$inputs | ||
CArg<"mlir::ValueRange", "{}">:$inputs, CArg<"mlir::ValueRange", "{}">:$initials | ||
), [{ | ||
build($_builder, $_state, | ||
mlir::StringAttr::get($_builder.getContext(), arc), | ||
results, clock, enable, latency, inputs); | ||
results, clock, enable, latency, inputs, initials); | ||
}]> | ||
]; | ||
let skipDefaultBuilders = 1; | ||
|
@@ -452,6 +459,20 @@ def PassThroughOp : ArcOp<"passthrough", [NoTerminator, NoRegionArguments]> { | |
}]; | ||
} | ||
|
||
def InitialOp : ArcOp<"initial", [RecursiveMemoryEffects, NoTerminator, NoRegionArguments]> { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it makes sense to restrict it. Again, probably same for PassThroughOp? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point! I think it also makes sense to add it for PassThroughOp. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks looks like more than 80 cols to me. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can proudly announce that I have now enabled the vertical ruler for TableGen files in my editor. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @fabianschuiki has probably thought more about this than I since he suggested we'd make this a nested op, but is the only reason this is not isolated from above to access the results of the alloc ops? Are there other use-cases? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As mentioned above, I shortly had it as isolated from above. But for sake of simplicity and to keep it in line with the PassthroughOp I decided not to keep that. What would be the benefit of making it isolated form above? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since you just compute the initial values in this region it doesn't make sense to use SSA values from arbitrary places in the design. Making it isolated provides additional compile-time guarantees. I'd just leave it as is for now. We are still thinking about how we can better represent the storage rather than hardcoding integer offsets. Maybe we'll unify the alloc ops and storage argument at some point (I have an experimental branch doing that, but it makes use of symbols which is a bit of an issue in verifier time). If something like that makes it in, one block argument would be enough and we could make it isolated from above, but at that point it will be just a small change. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. With @uenoku's work on initial values in Seq we'll probably get initial values on registers that depend on the same SSA value. I guess with a single initial op you could stuff them all into that one op without any issues. But the idea has come up to make the LowerState pass less monolithic, and possibly break it up into multiple partial lowerings. In that case it might be beneficial to plan for |
||
let summary = "Clock-less logic called at the start of simulation"; | ||
let regions = (region SizedRegion<1>:$body); | ||
let assemblyFormat = [{ | ||
attr-dict-with-keyword $body | ||
}]; | ||
let extraClassDeclaration = [{ | ||
mlir::Block &getBodyBlock() { return getBody().front(); } | ||
}]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice, thanks. This was just copy pasted from the PassThroughOp, so I guess we could add |
||
let builders = [ | ||
OpBuilder<(ins), "build($_builder, $_state, ValueRange{});"> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you sure this builder is not generated by default? What would the ValueRange do if we'd pass one? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're right That was a left-over from a point where I had InitialOp |
||
]; | ||
} | ||
|
||
//===----------------------------------------------------------------------===// | ||
// Storage Allocation | ||
//===----------------------------------------------------------------------===// | ||
|
@@ -659,11 +680,12 @@ def ModelOp : ArcOp<"model", [RegionKindInterface, IsolatedFromAbove, | |
specifies the I/O of the module associated to this model. | ||
}]; | ||
let arguments = (ins SymbolNameAttr:$sym_name, | ||
TypeAttrOf<ModuleType>:$io); | ||
TypeAttrOf<ModuleType>:$io, | ||
OptionalAttr<FlatSymbolRefAttr>:$initialFn); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SymbolUserOpInterface is necessary if we want to add initializer as a function. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks! Missed that one. |
||
let regions = (region SizedRegion<1>:$body); | ||
|
||
let assemblyFormat = [{ | ||
$sym_name `io` $io attr-dict-with-keyword $body | ||
$sym_name `io` $io (`initializer` $initialFn^)? attr-dict-with-keyword $body | ||
}]; | ||
|
||
let extraClassDeclaration = [{ | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Super-nit: remove empty line for consistency There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oops. |
||
// RUN: arcilator %s --run --jit-entry=main | FileCheck %s | ||
// REQUIRES: arcilator-jit | ||
|
||
// CHECK-LABEL: output = ca | ||
// CHECK-NEXT: output = ca | ||
// CHECK-NEXT: output = 0 | ||
// CHECK-NEXT: output = fe | ||
// CHECK-NEXT: output = ff | ||
|
||
module { | ||
|
||
hw.module @shiftreg(in %clock : i1, in %reset : i1, in %en : i1, in %din : i8, out dout : i8) { | ||
%seq_clk = seq.to_clock %clock | ||
%srA = seq.firreg %0 clock %seq_clk preset 0xFE : i8 | ||
%srB = seq.firreg %1 clock %seq_clk : i8 | ||
%srC = seq.firreg %2 clock %seq_clk preset 0xCA : i8 | ||
%0 = comb.mux bin %en, %din, %srA : i8 | ||
%1 = comb.mux bin %en, %srA, %srB : i8 | ||
%2 = comb.mux bin %en, %srB, %srC : i8 | ||
hw.output %srC : i8 | ||
} | ||
|
||
func.func @main() { | ||
%ff = arith.constant 0xFF : i8 | ||
%false = arith.constant 0 : i1 | ||
%true = arith.constant 1 : i1 | ||
|
||
arc.sim.instantiate @shiftreg as %model { | ||
arc.sim.set_input %model, "en" = %false : i1, !arc.sim.instance<@shiftreg> | ||
arc.sim.set_input %model, "reset" = %false : i1, !arc.sim.instance<@shiftreg> | ||
arc.sim.set_input %model, "din" = %ff : i8, !arc.sim.instance<@shiftreg> | ||
|
||
%res0 = arc.sim.get_port %model, "dout" : i8, !arc.sim.instance<@shiftreg> | ||
arc.sim.emit "output", %res0 : i8 | ||
|
||
arc.sim.set_input %model, "clock" = %true : i1, !arc.sim.instance<@shiftreg> | ||
arc.sim.step %model : !arc.sim.instance<@shiftreg> | ||
arc.sim.set_input %model, "clock" = %false : i1, !arc.sim.instance<@shiftreg> | ||
arc.sim.step %model : !arc.sim.instance<@shiftreg> | ||
|
||
%res1 = arc.sim.get_port %model, "dout" : i8, !arc.sim.instance<@shiftreg> | ||
arc.sim.emit "output", %res1 : i8 | ||
|
||
arc.sim.set_input %model, "en" = %true : i1, !arc.sim.instance<@shiftreg> | ||
|
||
arc.sim.set_input %model, "clock" = %true : i1, !arc.sim.instance<@shiftreg> | ||
arc.sim.step %model : !arc.sim.instance<@shiftreg> | ||
arc.sim.set_input %model, "clock" = %false : i1, !arc.sim.instance<@shiftreg> | ||
arc.sim.step %model : !arc.sim.instance<@shiftreg> | ||
%res2 = arc.sim.get_port %model, "dout" : i8, !arc.sim.instance<@shiftreg> | ||
arc.sim.emit "output", %res2 : i8 | ||
|
||
arc.sim.set_input %model, "clock" = %true : i1, !arc.sim.instance<@shiftreg> | ||
arc.sim.step %model : !arc.sim.instance<@shiftreg> | ||
arc.sim.set_input %model, "clock" = %false : i1, !arc.sim.instance<@shiftreg> | ||
arc.sim.step %model : !arc.sim.instance<@shiftreg> | ||
%res3 = arc.sim.get_port %model, "dout" : i8, !arc.sim.instance<@shiftreg> | ||
arc.sim.emit "output", %res3 : i8 | ||
|
||
arc.sim.set_input %model, "clock" = %true : i1, !arc.sim.instance<@shiftreg> | ||
arc.sim.step %model : !arc.sim.instance<@shiftreg> | ||
arc.sim.set_input %model, "clock" = %false : i1, !arc.sim.instance<@shiftreg> | ||
arc.sim.step %model : !arc.sim.instance<@shiftreg> | ||
%res4 = arc.sim.get_port %model, "dout" : i8, !arc.sim.instance<@shiftreg> | ||
arc.sim.emit "output", %res4 : i8 | ||
} | ||
return | ||
} | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for adding such a nice integration test! 🎉 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah this is really fantastic! |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
// RUN: arcilator %s --run --jit-entry=main 2>&1 >/dev/null | FileCheck %s | ||
// REQUIRES: arcilator-jit | ||
|
||
// CHECK: - Init - | ||
|
||
module { | ||
llvm.func @_arc_env_get_print_stream(i32) -> !llvm.ptr | ||
llvm.func @_arc_libc_fputs(!llvm.ptr, !llvm.ptr) -> i32 | ||
llvm.mlir.global internal constant @global_init_str(" - Init -\0A\00") {addr_space = 0 : i32} | ||
|
||
arc.model @initmodel io !hw.modty<> { | ||
^bb0(%arg0: !arc.storage): | ||
arc.passthrough { | ||
%dummy = llvm.mlir.constant(0 : i32) : i32 | ||
} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't quite get what this is supposed to test? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It was a ham-fisted way of creating a non-empty block. It's gone now. |
||
arc.initial { | ||
%cst0 = llvm.mlir.constant(0 : i32) : i32 | ||
%stderr = llvm.call @_arc_env_get_print_stream(%cst0) : (i32) -> !llvm.ptr | ||
%str = llvm.mlir.addressof @global_init_str : !llvm.ptr | ||
%0 = llvm.call @_arc_libc_fputs(%str, %stderr) : (!llvm.ptr, !llvm.ptr) -> i32 | ||
} | ||
Comment on lines
+13
to
+18
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Very nice! I guess a future |
||
} | ||
func.func @main() { | ||
arc.sim.instantiate @initmodel as %arg0 { | ||
} | ||
return | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -105,6 +105,7 @@ LogicalResult circt::arc::collectStates(Value storage, unsigned offset, | |
|
||
LogicalResult circt::arc::collectModels(mlir::ModuleOp module, | ||
SmallVector<ModelInfo> &models) { | ||
|
||
for (auto modelOp : module.getOps<ModelOp>()) { | ||
auto storageArg = modelOp.getBody().getArgument(0); | ||
auto storageType = cast<StorageType>(storageArg.getType()); | ||
|
@@ -115,7 +116,7 @@ LogicalResult circt::arc::collectModels(mlir::ModuleOp module, | |
llvm::sort(states, [](auto &a, auto &b) { return a.offset < b.offset; }); | ||
|
||
models.emplace_back(std::string(modelOp.getName()), storageType.getSize(), | ||
std::move(states)); | ||
std::move(states), modelOp.getInitialFnAttr()); | ||
} | ||
|
||
return success(); | ||
|
@@ -130,6 +131,7 @@ void circt::arc::serializeModelInfoToJson(llvm::raw_ostream &outputStream, | |
json.object([&] { | ||
json.attribute("name", model.name); | ||
json.attribute("numStateBytes", model.numStateBytes); | ||
json.attribute("hasInitialFn", !!model.initialFnSym); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we also add the name of the function if it is present? |
||
json.attributeArray("states", [&] { | ||
for (const auto &state : model.states) { | ||
json.object([&] { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If the latency of >1, don't we need even more initials? Like a VariadicOfVariadic because an
arc.state
with 3 outputs and latency 3 consists of originally 9 registers. I'm not requesting you to change this, just pointing out the (not so big) limitation. (we don't support latency >1 arcs yet anyway)There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah... this is not something I have considered so far. It makes me again wonder if an initializer region for StateOps would have been the better choice. But I suppose VariadicOfVariadic would also work (can't say I've ever used it though).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's probably a good idea to revisit this once @uenoku is finished with his register initialization work to make sure it works well with that.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If I recall correctly, latency >1 is always a result of some form of optimization within the Arc dialect. We could just declare the initial value on the
arc.state
op to be used for every step in the latency pipeline, such that the first N cycles will produce the initial value. So basically you don't get to pick different initial values for the different time steps. And then we only merge multiple state ops into a >1 latency one if they have the same initial value.