Skip to content

Commit a655f48

Browse files
Sebastian Andrzej SiewiorFelipe Balbi
authored andcommitted
usb: musb: musb_cppi41: handle pre-mature TX complete interrupt
The TX-complete interrupt of the CPPI41 on AM335x fires too early. Adding a loop and counting how long it takes until the MUSB_TXCSR_TXPKTRDY bit is cleared I see FS: |musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadc54002, len=1514 is_tx=1 |cppi41_dma_callback() 74 loops |musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8802, len=1514 is_tx=1 |cppi41_dma_callback() 66 loops |musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadcd8002, len=1514 is_tx=1 |cppi41_dma_callback() 136 loops |musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=64, mode=0, dma_addr=0xadf55802, len=1514 is_tx=1 |cppi41_dma_callback() 136 loops avg: 110 - 150us HS: |musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xaca6f002, len=1514 is_tx=1 |cppi41_dma_callback() 0 loops |musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f802, len=1514 is_tx=1 |cppi41_dma_callback() 2 loops |musb-hdrc musb-hdrc.0.auto: configure ep1/80 packet_sz=512, mode=0, dma_addr=0xadd6f002, len=1514 is_tx=1 |cppi41_dma_callback() 13 loops avg: 2us for the same test case. One loop means a udelay(1). The delay seems to depend on the packet size. On HS the bit is always cleared for small packet sizes while on FS it is never the case, it mostly around 110us. This testing has been performed with g_ether (musb as device) and using BULK transfers. INTR transfers are way more fun: during init the gadget sends a INT packet to the host and cppi41 says "transfer done" shortly after. The MUSB_TXCSR_TXPKTRDY bit is set even seconds later. The reason is that the host did not try to receive it, it does so after the interface (on host side) has been configured. Until this happens, that packet remains in musb's FIFO. To fix this, two things are done: - No DMA transfers for INT based endpoints. These transfer are usually very small and rare so it is likely better to skip the DMA engine and stuff the four bytes directly into the FIFO - on HS we poll up to 25us and hope that bit goes away. If not we setup a hrtimer to poll for it. The 140us delay is a rule of thumb. In FS the command | ping 10.10.10.10 -c1 -s65130 creates about 44 1514bytes transfers. About 19 of them need a second timer to complete. Reported-by: Bin Liu <b-liu@ti.com> Cc: stable@vger.kernel.org Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de> Signed-off-by: Felipe Balbi <balbi@ti.com>
1 parent d373a85 commit a655f48

File tree

1 file changed

+108
-5
lines changed

1 file changed

+108
-5
lines changed

drivers/usb/musb/musb_cppi41.c

Lines changed: 108 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ struct cppi41_dma_channel {
3838
u32 prog_len;
3939
u32 transferred;
4040
u32 packet_sz;
41+
struct list_head tx_check;
4142
};
4243

4344
#define MUSB_DMA_NUM_CHANNELS 15
@@ -47,6 +48,8 @@ struct cppi41_dma_controller {
4748
struct cppi41_dma_channel rx_channel[MUSB_DMA_NUM_CHANNELS];
4849
struct cppi41_dma_channel tx_channel[MUSB_DMA_NUM_CHANNELS];
4950
struct musb *musb;
51+
struct hrtimer early_tx;
52+
struct list_head early_tx_list;
5053
u32 rx_mode;
5154
u32 tx_mode;
5255
u32 auto_req;
@@ -96,11 +99,23 @@ static void update_rx_toggle(struct cppi41_dma_channel *cppi41_channel)
9699
cppi41_channel->usb_toggle = toggle;
97100
}
98101

102+
static bool musb_is_tx_fifo_empty(struct musb_hw_ep *hw_ep)
103+
{
104+
u8 epnum = hw_ep->epnum;
105+
struct musb *musb = hw_ep->musb;
106+
void __iomem *epio = musb->endpoints[epnum].regs;
107+
u16 csr;
108+
109+
csr = musb_readw(epio, MUSB_TXCSR);
110+
if (csr & MUSB_TXCSR_TXPKTRDY)
111+
return false;
112+
return true;
113+
}
114+
99115
static void cppi41_dma_callback(void *private_data);
100116

101-
static void cppi41_trans_done(struct dma_channel *channel)
117+
static void cppi41_trans_done(struct cppi41_dma_channel *cppi41_channel)
102118
{
103-
struct cppi41_dma_channel *cppi41_channel = channel->private_data;
104119
struct musb_hw_ep *hw_ep = cppi41_channel->hw_ep;
105120
struct musb *musb = hw_ep->musb;
106121

@@ -138,7 +153,7 @@ static void cppi41_trans_done(struct dma_channel *channel)
138153
return;
139154

140155
dma_desc->callback = cppi41_dma_callback;
141-
dma_desc->callback_param = channel;
156+
dma_desc->callback_param = &cppi41_channel->channel;
142157
cppi41_channel->cookie = dma_desc->tx_submit(dma_desc);
143158
dma_async_issue_pending(dc);
144159

@@ -150,6 +165,41 @@ static void cppi41_trans_done(struct dma_channel *channel)
150165
}
151166
}
152167

168+
static enum hrtimer_restart cppi41_recheck_tx_req(struct hrtimer *timer)
169+
{
170+
struct cppi41_dma_controller *controller;
171+
struct cppi41_dma_channel *cppi41_channel, *n;
172+
struct musb *musb;
173+
unsigned long flags;
174+
enum hrtimer_restart ret = HRTIMER_NORESTART;
175+
176+
controller = container_of(timer, struct cppi41_dma_controller,
177+
early_tx);
178+
musb = controller->musb;
179+
180+
spin_lock_irqsave(&musb->lock, flags);
181+
list_for_each_entry_safe(cppi41_channel, n, &controller->early_tx_list,
182+
tx_check) {
183+
bool empty;
184+
struct musb_hw_ep *hw_ep = cppi41_channel->hw_ep;
185+
186+
empty = musb_is_tx_fifo_empty(hw_ep);
187+
if (empty) {
188+
list_del_init(&cppi41_channel->tx_check);
189+
cppi41_trans_done(cppi41_channel);
190+
}
191+
}
192+
193+
if (!list_empty(&controller->early_tx_list)) {
194+
ret = HRTIMER_RESTART;
195+
hrtimer_forward_now(&controller->early_tx,
196+
ktime_set(0, 150 * NSEC_PER_USEC));
197+
}
198+
199+
spin_unlock_irqrestore(&musb->lock, flags);
200+
return ret;
201+
}
202+
153203
static void cppi41_dma_callback(void *private_data)
154204
{
155205
struct dma_channel *channel = private_data;
@@ -159,6 +209,7 @@ static void cppi41_dma_callback(void *private_data)
159209
unsigned long flags;
160210
struct dma_tx_state txstate;
161211
u32 transferred;
212+
bool empty;
162213

163214
spin_lock_irqsave(&musb->lock, flags);
164215

@@ -177,8 +228,52 @@ static void cppi41_dma_callback(void *private_data)
177228
transferred < cppi41_channel->packet_sz)
178229
cppi41_channel->prog_len = 0;
179230

180-
cppi41_trans_done(channel);
181-
231+
empty = musb_is_tx_fifo_empty(hw_ep);
232+
if (empty) {
233+
cppi41_trans_done(cppi41_channel);
234+
} else {
235+
struct cppi41_dma_controller *controller;
236+
/*
237+
* On AM335x it has been observed that the TX interrupt fires
238+
* too early that means the TXFIFO is not yet empty but the DMA
239+
* engine says that it is done with the transfer. We don't
240+
* receive a FIFO empty interrupt so the only thing we can do is
241+
* to poll for the bit. On HS it usually takes 2us, on FS around
242+
* 110us - 150us depending on the transfer size.
243+
* We spin on HS (no longer than than 25us and setup a timer on
244+
* FS to check for the bit and complete the transfer.
245+
*/
246+
controller = cppi41_channel->controller;
247+
248+
if (musb->g.speed == USB_SPEED_HIGH) {
249+
unsigned wait = 25;
250+
251+
do {
252+
empty = musb_is_tx_fifo_empty(hw_ep);
253+
if (empty)
254+
break;
255+
wait--;
256+
if (!wait)
257+
break;
258+
udelay(1);
259+
} while (1);
260+
261+
empty = musb_is_tx_fifo_empty(hw_ep);
262+
if (empty) {
263+
cppi41_trans_done(cppi41_channel);
264+
goto out;
265+
}
266+
}
267+
list_add_tail(&cppi41_channel->tx_check,
268+
&controller->early_tx_list);
269+
if (!hrtimer_active(&controller->early_tx)) {
270+
hrtimer_start_range_ns(&controller->early_tx,
271+
ktime_set(0, 140 * NSEC_PER_USEC),
272+
40 * NSEC_PER_USEC,
273+
HRTIMER_MODE_REL);
274+
}
275+
}
276+
out:
182277
spin_unlock_irqrestore(&musb->lock, flags);
183278
}
184279

@@ -377,6 +472,8 @@ static int cppi41_is_compatible(struct dma_channel *channel, u16 maxpacket,
377472
WARN_ON(1);
378473
return 1;
379474
}
475+
if (cppi41_channel->hw_ep->ep_in.type != USB_ENDPOINT_XFER_BULK)
476+
return 0;
380477
if (cppi41_channel->is_tx)
381478
return 1;
382479
/* AM335x Advisory 1.0.13. No workaround for device RX mode */
@@ -401,6 +498,7 @@ static int cppi41_dma_channel_abort(struct dma_channel *channel)
401498
if (cppi41_channel->channel.status == MUSB_DMA_STATUS_FREE)
402499
return 0;
403500

501+
list_del_init(&cppi41_channel->tx_check);
404502
if (is_tx) {
405503
csr = musb_readw(epio, MUSB_TXCSR);
406504
csr &= ~MUSB_TXCSR_DMAENAB;
@@ -508,6 +606,7 @@ static int cppi41_dma_controller_start(struct cppi41_dma_controller *controller)
508606
cppi41_channel->controller = controller;
509607
cppi41_channel->port_num = port;
510608
cppi41_channel->is_tx = is_tx;
609+
INIT_LIST_HEAD(&cppi41_channel->tx_check);
511610

512611
musb_dma = &cppi41_channel->channel;
513612
musb_dma->private_data = cppi41_channel;
@@ -533,6 +632,7 @@ void dma_controller_destroy(struct dma_controller *c)
533632
struct cppi41_dma_controller *controller = container_of(c,
534633
struct cppi41_dma_controller, controller);
535634

635+
hrtimer_cancel(&controller->early_tx);
536636
cppi41_dma_controller_stop(controller);
537637
kfree(controller);
538638
}
@@ -552,6 +652,9 @@ struct dma_controller *dma_controller_create(struct musb *musb,
552652
if (!controller)
553653
goto kzalloc_fail;
554654

655+
hrtimer_init(&controller->early_tx, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
656+
controller->early_tx.function = cppi41_recheck_tx_req;
657+
INIT_LIST_HEAD(&controller->early_tx_list);
555658
controller->musb = musb;
556659

557660
controller->controller.channel_alloc = cppi41_dma_channel_allocate;

0 commit comments

Comments
 (0)