@@ -24,7 +24,8 @@ import chisel3._
24
24
import chisel3 .util ._
25
25
import freechips .rocketchip .tilelink ._
26
26
import freechips .rocketchip .tilelink .TLMessages .{AcquireBlock , AcquirePerm , ReleaseAck }
27
- import huancun .utils .SReg
27
+ import huancun .utils ._
28
+
28
29
29
30
class SourceD (implicit p : Parameters ) extends HuanCunModule {
30
31
/*
@@ -47,16 +48,16 @@ class SourceD(implicit p: Parameters) extends HuanCunModule {
47
48
val bs_wdata = Output (new DSData )
48
49
// data hazards
49
50
val sourceD_r_hazard = ValidIO (new SourceDHazard )
51
+ // putbuffer interface
52
+ val pb_pop = DecoupledIO (new PutBufferPop )
53
+ val pb_beat = Input (new PutBufferBeatEntry )
50
54
})
51
55
52
- io.bs_waddr.valid := false .B
53
- io.bs_waddr.bits := DontCare
54
- io.bs_wdata := DontCare
55
-
56
56
val d = io.d
57
57
val s1_valid = Wire (Bool ())
58
58
val s2_valid, s2_ready = Wire (Bool ())
59
59
val s3_valid, s3_ready = Wire (Bool ())
60
+ val s4_ready = Wire (Bool ())
60
61
61
62
// stage1
62
63
val busy = RegInit (false .B )
@@ -65,8 +66,10 @@ class SourceD(implicit p: Parameters) extends HuanCunModule {
65
66
val s1_req = Mux (busy, s1_req_reg, io.task.bits)
66
67
val s1_needData = s1_req.fromA && (
67
68
s1_req.opcode === TLMessages .GrantData ||
68
- s1_req.opcode === TLMessages .AccessAckData
69
+ s1_req.opcode === TLMessages .AccessAckData ||
70
+ s1_req.opcode === TLMessages .AccessAck // Put should also read data TODO: no need for full-sized PutFullData
69
71
)
72
+ val s1_need_pb = s1_req.fromA && (s1_req.opcode === TLMessages .AccessAck )
70
73
val s1_counter = RegInit (0 .U (beatBits.W )) // how many beats have been sent
71
74
val s1_total_beats = Mux (s1_needData, totalBeats(s1_req.size), 0 .U (beatBits.W ))
72
75
val s1_beat = startBeat(s1_req.off) | s1_counter
@@ -126,13 +129,29 @@ class SourceD(implicit p: Parameters) extends HuanCunModule {
126
129
val s2_latch = s1_valid && s2_ready
127
130
val s2_req = RegEnable (s1_req, s2_latch)
128
131
val s2_needData = RegEnable (s1_needData, s2_latch)
132
+ val s2_last = RegEnable (s1_last, s2_latch)
133
+ val s2_counter = RegEnable (s1_counter, s2_latch)
129
134
val s2_full = RegInit (false .B )
130
135
val s2_releaseAck = s2_req.opcode === ReleaseAck
131
136
val s2_bypass_hit = RegEnable (
132
137
Mux (busy, s1_bypass_hit_reg, s1_bypass_hit_wire),
133
138
false .B , s2_latch
134
139
)
135
140
val s2_d = Wire (io.d.cloneType)
141
+ val s2_need_pb = RegEnable (s1_need_pb, s2_latch)
142
+ val s2_need_d = RegEnable (! s1_need_pb || s1_counter === 0 .U , s2_latch) // AccessAck for Put should only be fired once
143
+ val s2_valid_pb = RegInit (false .B ) // put buffer is valid, wait put buffer fire
144
+ val s2_pdata_raw = io.pb_beat
145
+ val pb_ready = io.pb_pop.ready
146
+ val s2_pdata = HoldUnless (s2_pdata_raw, s2_valid_pb)
147
+
148
+ io.pb_pop.valid := s2_valid_pb && s2_req.fromA
149
+ io.pb_pop.bits.bufIdx := s2_req.bufIdx
150
+ io.pb_pop.bits.count := s2_counter
151
+ io.pb_pop.bits.last := s2_last
152
+
153
+ when (pb_ready) { s2_valid_pb := false .B }
154
+ when (s2_latch) { s2_valid_pb := s1_need_pb }
136
155
137
156
s1_queue.io.deq.ready := s2_full && s2_bypass_hit && s2_d.ready
138
157
s2_d.valid := s2_full && ((s2_bypass_hit && s1_queue.io.deq.valid) || ! s2_needData)
@@ -146,33 +165,37 @@ class SourceD(implicit p: Parameters) extends HuanCunModule {
146
165
s2_d.bits.corrupt := false .B
147
166
s2_d.bits.echo.lift(DirtyKey ).foreach(_ := s2_req.dirty)
148
167
149
- val s2_can_go = Mux (s2_d.valid, s2_d.ready, s3_ready)
168
+ val s2_can_go = Mux (s2_d.valid, s2_d.ready, s3_ready && ( ! s2_valid_pb || pb_ready) )
150
169
when(s2_full && s2_can_go) { s2_full := false .B }
151
170
when(s2_latch) { s2_full := true .B }
152
171
153
- s2_valid := s2_full && ! s2_d.valid
172
+ s2_valid := s2_full && ! s2_d.valid && ( ! s2_valid_pb || pb_ready)
154
173
s2_ready := ! s2_full || s2_can_go
155
174
156
175
// stage3
157
176
val s3_latch = s2_valid && s3_ready
158
- val s3_full = RegInit (false .B )
177
+ val s3_valid_d = RegInit (false .B )
178
+
159
179
// wait counter for sram data
160
180
val s3_wait = Reg (UInt (log2Ceil(cacheParams.sramCycleFactor).W ))
161
181
val s3_needData = RegInit (false .B )
162
182
val s3_req = RegEnable (s2_req, s3_latch)
183
+ val s3_counter = RegEnable (s2_counter, s3_latch)
184
+ val s3_pdata = RegEnable (s2_pdata, s3_latch)
185
+ val s3_need_pb = RegEnable (s2_need_pb, s3_latch)
163
186
val s3_releaseAck = RegEnable (s2_releaseAck, s3_latch)
164
187
val s3_d = Wire (io.d.cloneType)
165
188
val s3_queue = Module (new Queue (new DSData , 3 , flow = true ))
166
189
val s3_can_go = if (cacheParams.sramCycleFactor == 1 ) true .B else s3_wait === 0 .U
167
190
168
- assert(! s3_full || s3_needData, " Only data task can go to stage3!" )
191
+ assert(! s3_valid_d || s3_needData, " Only data task can go to stage3!" )
169
192
170
193
when(s3_d.ready && s3_can_go) {
171
- s3_full := false .B
194
+ s3_valid_d := false .B
172
195
s3_needData := false .B
173
196
}
174
197
when(s3_latch) {
175
- s3_full := true . B
198
+ s3_valid_d := s2_need_d
176
199
s3_needData := s2_needData
177
200
}
178
201
s3_wait := Mux (s3_latch,
@@ -181,7 +204,7 @@ class SourceD(implicit p: Parameters) extends HuanCunModule {
181
204
)
182
205
183
206
val s3_rdata = s3_queue.io.deq.bits.data
184
- s3_d.valid := s3_valid && s3_can_go
207
+ s3_d.valid := s3_valid_d && s3_can_go
185
208
s3_d.bits.opcode := s3_req.opcode
186
209
s3_d.bits.param := Mux (s3_releaseAck, 0 .U , s3_req.param)
187
210
s3_d.bits.sink := s3_req.sinkId
@@ -198,10 +221,37 @@ class SourceD(implicit p: Parameters) extends HuanCunModule {
198
221
), false .B )
199
222
s3_queue.io.enq.bits := io.bs_rdata
200
223
assert(! s3_queue.io.enq.valid || s3_queue.io.enq.ready)
201
- s3_queue.io.deq.ready := s3_d.ready && s3_needData && s3_valid && s3_can_go
224
+ s3_queue.io.deq.ready := s3_d.ready && s3_needData && s3_valid && s3_can_go // TODO: inspect this
225
+
226
+ s3_ready := ! s3_valid_d || s3_d.ready && s3_can_go
227
+ s3_valid := s3_valid_d
228
+
229
+ // stage4
230
+ val s4_latch = s3_valid && s4_ready
231
+ val s4_req = RegEnable (s3_req, s4_latch)
232
+ val s4_rdata = RegEnable (s3_rdata, s4_latch)
233
+ val s4_pdata = RegEnable (s3_pdata, s4_latch)
234
+ val s4_need_pb = RegEnable (s3_need_pb, s4_latch)
235
+ val s4_beat = RegEnable (s3_counter, s4_latch)
236
+ val s4_full = RegInit (false .B )
237
+
238
+ when (io.bs_waddr.ready || ! s4_need_pb) { s4_full := false .B }
239
+ when (s4_latch) { s4_full := true .B }
240
+
241
+ val selects = s4_pdata.mask.asBools
242
+ val mergedData = Cat (selects.zipWithIndex.map { case (s, i) =>
243
+ VecInit (Seq (s4_rdata, s4_pdata.data).map(_((i + 1 ) * 8 - 1 , i * 8 )))(s)
244
+ }.reverse) // merge data according to mask
245
+
246
+ io.bs_waddr.valid := s4_full && s4_need_pb
247
+ io.bs_waddr.bits.noop := false .B
248
+ io.bs_waddr.bits.way := s4_req.way
249
+ io.bs_waddr.bits.set := s4_req.set
250
+ io.bs_waddr.bits.beat := s4_beat
251
+ io.bs_waddr.bits.write := true .B
252
+ io.bs_wdata.data := mergedData
202
253
203
- s3_ready := ! s3_valid || s3_d.ready && s3_can_go
204
- s3_valid := s3_full
254
+ s4_ready := ! s4_full || io.bs_waddr.ready || ! s4_need_pb
205
255
206
256
TLArbiter .lowest(edgeIn, io.d, s3_d, s2_d)
207
257
0 commit comments