Skip to content

Commit

Permalink
Fix timing merge (#81)
Browse files Browse the repository at this point in the history
* MSHRBuf: change SRAM to Reg

* Remove B MergeTask feature
severely affect timing, and has never been actually triggered (SPEC 0.3)
even if utilized, it also has little improvement on performance

* Timing: Exclude s1 info in GrantBuf noSpaceForMSHRReq
assume always true
  • Loading branch information
Ivyfeather authored Nov 13, 2023
1 parent aa545af commit c4b6330
Show file tree
Hide file tree
Showing 8 changed files with 28 additions and 139 deletions.
4 changes: 1 addition & 3 deletions src/main/scala/coupledL2/Common.scala
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,8 @@ class MSHRInfo(implicit p: Parameters) extends L2Bundle {
val metaTag = UInt(tagBits.W)
val dirHit = Bool()

// decide whether can nest B (req same-addr) or merge B with release (meta same-addr)
// decide whether can nest B (req same-addr)
val nestB = Bool()
val mergeB = Bool()

// to drop duplicate prefetch reqs
val isAcqOrPrefetch = Bool()
Expand Down Expand Up @@ -208,7 +207,6 @@ class FSMState(implicit p: Parameters) extends L2Bundle {
val s_release = Bool() // release downwards
val s_probeack = Bool() // respond probeack downwards
val s_refill = Bool() // respond grant upwards
val s_merge_probeack = Bool() // respond probeack downwards, Probe merge into A-replacement-Release
// val s_grantack = Bool() // respond grantack downwards, moved to GrantBuf
// val s_triggerprefetch = prefetchOpt.map(_ => Bool())

Expand Down
3 changes: 3 additions & 0 deletions src/main/scala/coupledL2/DataStorage.scala
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ class DSBeat(implicit p: Parameters) extends L2Bundle {

class DSBlock(implicit p: Parameters) extends L2Bundle {
val data = UInt((blockBytes * 8).W)

// WARNING:TODO: check this
def toBeats: Vec[DSBeat] = Reverse(data).asTypeOf(Vec(beatSize, new DSBeat))
}

class DataStorage(implicit p: Parameters) extends L2Module {
Expand Down
9 changes: 5 additions & 4 deletions src/main/scala/coupledL2/GrantBuffer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -249,19 +249,20 @@ class GrantBuffer(implicit p: Parameters) extends L2Module {
val noSpaceForSinkReq = PopCount(VecInit(io.pipeStatusVec.tail.map { case s =>
s.valid && (s.bits.fromA || s.bits.fromC)
}).asUInt) + grantQueueCnt >= mshrsAll.U
val noSpaceForMSHRReq = PopCount(VecInit(io.pipeStatusVec.map { case s =>
// for timing consideration, drop s1 info, so always reserve one entry for it
val noSpaceForMSHRReq = PopCount(VecInit(io.pipeStatusVec.tail.map { case s =>
s.valid && (s.bits.fromA || s.bits.fromC)
}).asUInt) + grantQueueCnt >= mshrsAll.U
}).asUInt) + grantQueueCnt >= (mshrsAll-1).U
// pftRespQueue also requires back pressure to ensure that it will not exceed capacity
// Ideally, it should only block Prefetch from entering MainPipe
// But since it is extremely rare that pftRespQueue of 10 would be full, we just block all Entrance here, simpler logic
// TODO: consider optimize this
val noSpaceForSinkPft = prefetchOpt.map(_ => PopCount(VecInit(io.pipeStatusVec.tail.map { case s =>
s.valid && s.bits.fromA
}).asUInt) + pftRespQueue.get.io.count >= pftQueueLen.U)
val noSpaceForMSHRPft = prefetchOpt.map(_ => PopCount(VecInit(io.pipeStatusVec.map { case s =>
val noSpaceForMSHRPft = prefetchOpt.map(_ => PopCount(VecInit(io.pipeStatusVec.tail.map { case s =>
s.valid && s.bits.fromA
}).asUInt) + pftRespQueue.get.io.count >= pftQueueLen.U)
}).asUInt) + pftRespQueue.get.io.count >= (pftQueueLen-1).U)

io.toReqArb.blockSinkReqEntrance.blockA_s1 := noSpaceForSinkReq || noSpaceForSinkPft.getOrElse(false.B)
io.toReqArb.blockSinkReqEntrance.blockB_s1 := Cat(inflightGrant.map(g => g.valid &&
Expand Down
87 changes: 7 additions & 80 deletions src/main/scala/coupledL2/MSHR.scala
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ class MSHR(implicit p: Parameters) extends L2Module {
val nestedwb = Input(new NestedWriteback)
val nestedwbData = Output(Bool())
val aMergeTask = Flipped(ValidIO(new TaskBundle))
val bMergeTask = Flipped(ValidIO(new BMergeTask))
val replResp = Flipped(ValidIO(new ReplacerResult))
})

Expand Down Expand Up @@ -109,17 +108,14 @@ class MSHR(implicit p: Parameters) extends L2Module {
// Theoretically, data to be released is saved in ReleaseBuffer, so Acquire can be sent as soon as req enters mshr
io.tasks.source_a.valid := !state.s_acquire
io.tasks.source_b.valid := !state.s_pprobe || !state.s_rprobe
val mp_release_valid = !state.s_release && state.w_rprobeacklast && !io.bMergeTask.valid &&
state.w_grantlast &&
val mp_release_valid = !state.s_release && state.w_rprobeacklast && state.w_grantlast &&
state.w_replResp // release after Grant to L1 sent and replRead returns

val mp_probeack_valid = !state.s_probeack && state.w_pprobeacklast
val mp_merge_probeack_valid = !state.s_merge_probeack && state.w_rprobeacklast
val mp_grant_valid = !state.s_refill && state.w_grantlast && state.w_rprobeacklast // [Alias] grant after rprobe done
io.tasks.mainpipe.valid := mp_release_valid || mp_probeack_valid || mp_merge_probeack_valid || mp_grant_valid
io.tasks.mainpipe.valid := mp_release_valid || mp_probeack_valid || mp_grant_valid
// io.tasks.prefetchTrain.foreach(t => t.valid := !state.s_triggerprefetch.getOrElse(true.B))


val a_task = {
val oa = io.tasks.source_a.bits
oa.tag := req.tag
Expand Down Expand Up @@ -160,7 +156,7 @@ class MSHR(implicit p: Parameters) extends L2Module {
ob.alias.foreach(_ := meta.alias.getOrElse(0.U))
ob
}
val mp_release, mp_probeack, mp_merge_probeack, mp_grant = Wire(new TaskBundle)
val mp_release, mp_probeack, mp_grant = Wire(new TaskBundle)
val mp_release_task = {
mp_release.channel := req.channel
mp_release.tag := dirResult.tag
Expand Down Expand Up @@ -265,56 +261,6 @@ class MSHR(implicit p: Parameters) extends L2Module {
mp_probeack
}

// merge_probeack also serves the function of MSHR-Release
val mp_merge_probeack_task = {
val task = RegEnable(io.bMergeTask.bits.task, 0.U.asTypeOf(new TaskBundle), io.bMergeTask.valid)
mp_merge_probeack.channel := task.channel
mp_merge_probeack.tag := task.tag
mp_merge_probeack.set := task.set
mp_merge_probeack.off := task.off
mp_merge_probeack.opcode := Mux(
meta.dirty && isT(meta.state) || probeDirty || task.needProbeAckData,
ProbeAckData,
ProbeAck
)
mp_merge_probeack.param := ParallelLookUp(
Cat(isT(meta.state), task.param(bdWidth - 1, 0)),
Seq(
Cat(false.B, toN) -> BtoN,
Cat(true.B, toN) -> TtoN,
Cat(true.B, toB) -> TtoB
)
)
mp_merge_probeack.mshrTask := true.B
mp_merge_probeack.mshrId := io.id
// mp_merge_probeack definitely read releaseBuf and refillBuf at ReqArb
// and it needs to write refillData to DS, so useProbeData is set false according to DS.wdata logic
mp_merge_probeack.useProbeData := false.B
mp_merge_probeack.way := dirResult.way
mp_merge_probeack.dirty := meta.dirty && meta.state =/= INVALID || probeDirty
mp_merge_probeack.metaWen := false.B
mp_merge_probeack.meta := MetaEntry()
mp_merge_probeack.tagWen := false.B
mp_merge_probeack.dsWen := true.B // write refillData to DS

// unused, set to default
mp_merge_probeack.alias.foreach(_ := 0.U)
mp_merge_probeack.vaddr.foreach(_ := 0.U)
mp_merge_probeack.aliasTask.foreach(_ := false.B)
mp_merge_probeack.size := offsetBits.U
mp_merge_probeack.sourceId := 0.U
mp_merge_probeack.bufIdx := 0.U
mp_merge_probeack.needProbeAckData := false.B
mp_merge_probeack.fromL2pft.foreach(_ := false.B)
mp_merge_probeack.needHint.foreach(_ := false.B)
mp_merge_probeack.wayMask := Fill(cacheParams.ways, "b1".U)
mp_merge_probeack.replTask := true.B
mp_merge_probeack.reqSource := MemReqSource.NoWhere.id.U
mp_merge_probeack.mergeA := false.B
mp_merge_probeack.aMergeTask := 0.U.asTypeOf(new MergeTaskBundle)
mp_merge_probeack
}

val mergeA = RegInit(false.B)
when(io.aMergeTask.valid) {
mergeA := true.B
Expand Down Expand Up @@ -435,8 +381,7 @@ class MSHR(implicit p: Parameters) extends L2Module {
Seq(
mp_grant_valid -> mp_grant,
mp_release_valid -> mp_release,
mp_probeack_valid -> mp_probeack,
mp_merge_probeack_valid -> mp_merge_probeack
mp_probeack_valid -> mp_probeack
)
)
io.tasks.mainpipe.bits.reqSource := req.reqSource
Expand All @@ -458,9 +403,7 @@ class MSHR(implicit p: Parameters) extends L2Module {
state.s_rprobe := true.B
}
when (io.tasks.mainpipe.ready) {
when (mp_merge_probeack_valid) {
state.s_merge_probeack := true.B
}.elsewhen (mp_grant_valid) {
when (mp_grant_valid) {
state.s_refill := true.B
}.elsewhen (mp_release_valid) {
state.s_release := true.B
Expand Down Expand Up @@ -547,7 +490,7 @@ class MSHR(implicit p: Parameters) extends L2Module {
timer := timer + 1.U
}

val no_schedule = state.s_refill && state.s_probeack && state.s_merge_probeack && state.s_release // && state.s_triggerprefetch.getOrElse(true.B)
val no_schedule = state.s_refill && state.s_probeack && state.s_release // && state.s_triggerprefetch.getOrElse(true.B)
val no_wait = state.w_rprobeacklast && state.w_pprobeacklast && state.w_grantlast && state.w_releaseack && state.w_replResp
val will_free = no_schedule && no_wait
when (will_free && req_valid) {
Expand All @@ -558,11 +501,8 @@ class MSHR(implicit p: Parameters) extends L2Module {
// when grant not received, B can nest A
val nestB = !state.w_grantfirst

// mergeB is only allowed when release not sent
//(TODO: or we could just blockB, since Release will be sent to MP very shortly and have no deadlock problem)
val mergeB = !state.s_release
// alias: should protect meta from being accessed or occupied
val releaseNotSent = !state.s_release || !state.s_merge_probeack || io.bMergeTask.valid
val releaseNotSent = !state.s_release
io.status.valid := req_valid
io.status.bits.channel := req.channel
io.status.bits.set := req.set
Expand All @@ -587,7 +527,6 @@ class MSHR(implicit p: Parameters) extends L2Module {
io.msInfo.bits.metaTag := dirResult.tag
io.msInfo.bits.willFree := will_free
io.msInfo.bits.nestB := nestB
io.msInfo.bits.mergeB := mergeB
io.msInfo.bits.isAcqOrPrefetch := req_acquire || req_prefetch
io.msInfo.bits.isPrefetch := req_prefetch
io.msInfo.bits.s_refill := state.s_refill
Expand All @@ -597,18 +536,6 @@ class MSHR(implicit p: Parameters) extends L2Module {
assert(!(c_resp.valid && !io.status.bits.w_c_resp))
assert(!(d_resp.valid && !io.status.bits.w_d_resp))

/* ======== Handling Nested B ======== */
when (io.bMergeTask.valid) {
state.s_merge_probeack := false.B
state.s_release := true.B
state.w_releaseack := true.B
when (meta.clients.orR) {
state.s_rprobe := false.B
state.w_rprobeackfirst := false.B
state.w_rprobeacklast := false.B
}
}

/* ======== Handling Nested C ======== */
// for A miss, only when replResp do we finally choose a way, allowing nested C
// for A-alias, always allowing nested C (state.w_replResp === true.B)
Expand Down
28 changes: 9 additions & 19 deletions src/main/scala/coupledL2/MSHRBuffer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -48,14 +48,10 @@ class MSHRBuffer(wPorts: Int = 1)(implicit p: Parameters) extends L2Module {
val w = Vec(wPorts, new MSHRBufWrite)
})

val buffer = Seq.fill(mshrsAll) {
Seq.fill(beatSize) {
Module(new SRAMTemplate(new DSBeat(), set = 1, way = 1, singlePort = true))
}
}
val valids = RegInit(VecInit(Seq.fill(mshrsAll) {
VecInit(Seq.fill(beatSize)(false.B))
}))
val buffer = Reg(Vec(mshrsAll, Vec(beatSize, new DSBeat())))

io.w.foreach {
case w =>
Expand All @@ -82,25 +78,19 @@ class MSHRBuffer(wPorts: Int = 1)(implicit p: Parameters) extends L2Module {

val w_beat_sel = PriorityMux(wens, io.w.map(_.beat_sel))
val w_data = PriorityMux(wens, io.w.map(_.data))
val ren = io.r.valid && io.r.id === i.U
block.zipWithIndex.foreach {
case (entry, j) =>
entry.io.w.req.valid := wens.orR && w_beat_sel(j)
entry.io.w.req.bits.apply(
data = w_data.data((j + 1) * beatBytes * 8 - 1, j * beatBytes * 8).asTypeOf(new DSBeat),
setIdx = 0.U,
waymask = 1.U
)
entry.io.r.req.valid := ren
entry.io.r.req.bits.apply(0.U)

when(wens.orR) {
(0 until beatSize).map { i =>
when(w_beat_sel(i)) {
block(i) := w_data.data(beatBytes * 8 * (i+1) - 1, beatBytes * 8 * i).asTypeOf(new DSBeat)
}
}
}
}

io.r.ready := true.B
io.w.foreach(_.ready := true.B)

val ridReg = RegNext(io.r.id, 0.U.asTypeOf(io.r.id))
io.r.data.data := VecInit(buffer.map {
case block => VecInit(block.map(_.io.r.resp.data.asUInt)).asUInt
})(ridReg)
io.r.data.data := buffer(ridReg).asUInt
}
3 changes: 0 additions & 3 deletions src/main/scala/coupledL2/MSHRCtl.scala
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@ class MSHRCtl(implicit p: Parameters) extends L2Module {
/* to SinkB, to merge nested B req */
val msInfo = Vec(mshrsAll, ValidIO(new MSHRInfo))
val aMergeTask = Flipped(ValidIO(new AMergeTask))
val bMergeTask = Flipped(ValidIO(new BMergeTask))

/* refill read replacer result */
val replResp = Flipped(ValidIO(new ReplacerResult))
Expand Down Expand Up @@ -130,8 +129,6 @@ class MSHRCtl(implicit p: Parameters) extends L2Module {
m.io.nestedwb := io.nestedwb
m.io.aMergeTask.valid := io.aMergeTask.valid && io.aMergeTask.bits.id === i.U
m.io.aMergeTask.bits := io.aMergeTask.bits.task
m.io.bMergeTask.valid := io.bMergeTask.valid && io.bMergeTask.bits.id === i.U
m.io.bMergeTask.bits := io.bMergeTask.bits
}

io.toReqArb.blockC_s1 := false.B
Expand Down
32 changes: 3 additions & 29 deletions src/main/scala/coupledL2/SinkB.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,11 @@ import freechips.rocketchip.tilelink.TLPermissions._
import coupledL2.utils.XSPerfAccumulate
import utility.MemReqSource

class BMergeTask(implicit p: Parameters) extends L2Bundle {
val id = UInt(mshrBits.W)
val task = new TaskBundle()
}

class SinkB(implicit p: Parameters) extends L2Module {
val io = IO(new Bundle() {
val b = Flipped(DecoupledIO(new TLBundleB(edgeIn.bundle)))
val task = DecoupledIO(new TaskBundle)
val msInfo = Vec(mshrsAll, Flipped(ValidIO(new MSHRInfo)))
val bMergeTask = ValidIO(new BMergeTask)
})

def fromTLBtoTaskBundle(b: TLBundleB): TaskBundle = {
Expand Down Expand Up @@ -74,39 +68,19 @@ class SinkB(implicit p: Parameters) extends L2Module {
}
val task = fromTLBtoTaskBundle(io.b.bits)

/* ======== Merge Nested-B req ======== */
// unable to accept incoming B req because same-addr as some MSHR REQ
val addrConflict = VecInit(io.msInfo.map(s =>
s.valid && s.bits.set === task.set && s.bits.reqTag === task.tag && !s.bits.willFree && !s.bits.nestB
)).asUInt.orR

// unable to accept incoming B req because same-addr as some MSHR replaced block and cannot nest
val replaceConflictMask = VecInit(io.msInfo.map(s =>
s.valid && s.bits.set === task.set && s.bits.metaTag === task.tag && s.bits.releaseNotSent && !s.bits.mergeB
s.valid && s.bits.set === task.set && s.bits.metaTag === task.tag && s.bits.releaseNotSent
)).asUInt
val replaceConflict = replaceConflictMask.orR

// incoming B can be merged with some MSHR replaced block and able to be accepted
val mergeBMask = VecInit(io.msInfo.map(s =>
s.valid && s.bits.set === task.set && s.bits.metaTag === task.tag && s.bits.mergeB
)).asUInt

assert(PopCount(replaceConflictMask) <= 1.U)
assert(PopCount(mergeBMask) <= 1.U)

val mergeB = mergeBMask.orR && task.param === toN // only toN can merge with MSHR-Release
val mergeBId = OHToUInt(mergeBMask)

// when conflict, we block B req from entering SinkB
// when !conflict and mergeB , we merge B req to MSHR
io.task.valid := io.b.valid && !addrConflict && !replaceConflict && !mergeB
io.task.valid := io.b.valid && !addrConflict && !replaceConflict
io.task.bits := task
io.b.ready := mergeB || (io.task.ready && !addrConflict && !replaceConflict)

io.bMergeTask.valid := io.b.valid && mergeB
io.bMergeTask.bits.id := mergeBId
io.bMergeTask.bits.task := task

XSPerfAccumulate(cacheParams, "mergeBTask", io.bMergeTask.valid)
//!!WARNING: TODO: if this is zero, that means fucntion [Probe merge into MSHR-Release] is never tested, and may have flaws
io.b.ready := io.task.ready && !addrConflict && !replaceConflict
}
1 change: 0 additions & 1 deletion src/main/scala/coupledL2/Slice.scala
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,6 @@ class Slice()(implicit p: Parameters) extends L2Module {
mshrCtl.io.resps.sourceC := sourceC.io.resp
mshrCtl.io.nestedwb := mainPipe.io.nestedwb
mshrCtl.io.aMergeTask := a_reqBuf.io.aMergeTask
mshrCtl.io.bMergeTask := sinkB.io.bMergeTask
mshrCtl.io.replResp <> directory.io.replResp
mainPipe.io.replResp <> directory.io.replResp

Expand Down

0 comments on commit c4b6330

Please sign in to comment.