2121#include "xe_macros.h"
2222#include "xe_observation.h"
2323#include "xe_pm.h"
24+ #include "xe_trace.h"
2425
2526#include "regs/xe_eu_stall_regs.h"
2627#include "regs/xe_gt_regs.h"
2728
29+ #define POLL_PERIOD_MS 5
30+
2831static size_t per_xecore_buf_size = SZ_512K ;
2932
3033struct per_xecore_buf {
@@ -37,22 +40,27 @@ struct per_xecore_buf {
3740};
3841
3942struct xe_eu_stall_data_stream {
43+ bool pollin ;
4044 bool enabled ;
4145 int wait_num_reports ;
4246 int sampling_rate_mult ;
47+ wait_queue_head_t poll_wq ;
4348 size_t data_record_size ;
4449 size_t per_xecore_buf_size ;
4550
4651 struct xe_gt * gt ;
4752 struct xe_bo * bo ;
4853 struct per_xecore_buf * xecore_buf ;
54+ struct delayed_work buf_poll_work ;
4955};
5056
5157struct xe_eu_stall_gt {
5258 /* Lock to protect stream */
5359 struct mutex stream_lock ;
5460 /* EU stall data stream */
5561 struct xe_eu_stall_data_stream * stream ;
62+ /* Workqueue to schedule buffer pointers polling work */
63+ struct workqueue_struct * buf_ptr_poll_wq ;
5664};
5765
5866/**
@@ -114,6 +122,7 @@ static void xe_eu_stall_fini(void *arg)
114122{
115123 struct xe_gt * gt = arg ;
116124
125+ destroy_workqueue (gt -> eu_stall -> buf_ptr_poll_wq );
117126 mutex_destroy (& gt -> eu_stall -> stream_lock );
118127 kfree (gt -> eu_stall );
119128}
@@ -139,11 +148,19 @@ int xe_eu_stall_init(struct xe_gt *gt)
139148
140149 mutex_init (& gt -> eu_stall -> stream_lock );
141150
151+ gt -> eu_stall -> buf_ptr_poll_wq = alloc_ordered_workqueue ("xe_eu_stall" , 0 );
152+ if (!gt -> eu_stall -> buf_ptr_poll_wq ) {
153+ ret = - ENOMEM ;
154+ goto exit_free ;
155+ }
156+
142157 ret = devm_add_action_or_reset (xe -> drm .dev , xe_eu_stall_fini , gt );
143158 if (ret )
144- goto exit_free ;
159+ goto exit_destroy ;
145160
146161 return 0 ;
162+ exit_destroy :
163+ destroy_workqueue (gt -> eu_stall -> buf_ptr_poll_wq );
147164exit_free :
148165 mutex_destroy (& gt -> eu_stall -> stream_lock );
149166 kfree (gt -> eu_stall );
@@ -248,14 +265,214 @@ static int xe_eu_stall_user_extensions(struct xe_device *xe, u64 extension,
248265 return 0 ;
249266}
250267
268+ /**
269+ * buf_data_size - Calculate the number of bytes in a circular buffer
270+ * given the read and write pointers and the size of
271+ * the buffer.
272+ *
273+ * @buf_size: Size of the circular buffer
274+ * @read_ptr: Read pointer with an additional overflow bit
275+ * @write_ptr: Write pointer with an additional overflow bit
276+ *
277+ * Since the read and write pointers have an additional overflow bit,
278+ * this function calculates the offsets from the pointers and use the
279+ * offsets to calculate the data size in the buffer.
280+ *
281+ * Returns: number of bytes of data in the buffer
282+ */
283+ static u32 buf_data_size (size_t buf_size , u32 read_ptr , u32 write_ptr )
284+ {
285+ u32 read_offset , write_offset , size = 0 ;
286+
287+ if (read_ptr == write_ptr )
288+ goto exit ;
289+
290+ read_offset = read_ptr & (buf_size - 1 );
291+ write_offset = write_ptr & (buf_size - 1 );
292+
293+ if (write_offset > read_offset )
294+ size = write_offset - read_offset ;
295+ else
296+ size = buf_size - read_offset + write_offset ;
297+ exit :
298+ return size ;
299+ }
300+
301+ /**
302+ * eu_stall_data_buf_poll - Poll for EU stall data in the buffer.
303+ *
304+ * @stream: xe EU stall data stream instance
305+ *
306+ * Returns: true if the EU stall buffer contains minimum stall data as
307+ * specified by the event report count, else false.
308+ */
309+ static bool eu_stall_data_buf_poll (struct xe_eu_stall_data_stream * stream )
310+ {
311+ u32 read_ptr , write_ptr_reg , write_ptr , total_data = 0 ;
312+ u32 buf_size = stream -> per_xecore_buf_size ;
313+ struct per_xecore_buf * xecore_buf ;
314+ struct xe_gt * gt = stream -> gt ;
315+ bool min_data_present = false;
316+ u16 group , instance ;
317+ unsigned int xecore ;
318+
319+ mutex_lock (& gt -> eu_stall -> stream_lock );
320+ for_each_dss_steering (xecore , gt , group , instance ) {
321+ xecore_buf = & stream -> xecore_buf [xecore ];
322+ read_ptr = xecore_buf -> read ;
323+ write_ptr_reg = xe_gt_mcr_unicast_read (gt , XEHPC_EUSTALL_REPORT ,
324+ group , instance );
325+ write_ptr = REG_FIELD_GET (XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK , write_ptr_reg );
326+ write_ptr <<= 6 ;
327+ write_ptr &= ((buf_size << 1 ) - 1 );
328+ if (!min_data_present ) {
329+ total_data += buf_data_size (buf_size , read_ptr , write_ptr );
330+ if (num_data_rows (total_data ) >= stream -> wait_num_reports )
331+ min_data_present = true;
332+ }
333+ xecore_buf -> write = write_ptr ;
334+ }
335+ mutex_unlock (& gt -> eu_stall -> stream_lock );
336+
337+ return min_data_present ;
338+ }
339+
340+ static int xe_eu_stall_data_buf_read (struct xe_eu_stall_data_stream * stream ,
341+ char __user * buf , size_t count ,
342+ size_t * total_data_size , struct xe_gt * gt ,
343+ u16 group , u16 instance , unsigned int xecore )
344+ {
345+ size_t read_data_size , copy_size , buf_size ;
346+ u32 read_ptr_reg , read_ptr , write_ptr ;
347+ u8 * xecore_start_vaddr , * read_vaddr ;
348+ struct per_xecore_buf * xecore_buf ;
349+ u32 read_offset , write_offset ;
350+
351+ /* Hardware increments the read and write pointers such that they can
352+ * overflow into one additional bit. For example, a 256KB size buffer
353+ * offset pointer needs 18 bits. But HW uses 19 bits for the read and
354+ * write pointers. This technique avoids wasting a slot in the buffer.
355+ * Read and write offsets are calculated from the pointers in order to
356+ * check if the write pointer has wrapped around the array.
357+ */
358+ xecore_buf = & stream -> xecore_buf [xecore ];
359+ xecore_start_vaddr = xecore_buf -> vaddr ;
360+ read_ptr = xecore_buf -> read ;
361+ write_ptr = xecore_buf -> write ;
362+ buf_size = stream -> per_xecore_buf_size ;
363+
364+ read_data_size = buf_data_size (buf_size , read_ptr , write_ptr );
365+ /* Read only the data that the user space buffer can accommodate */
366+ read_data_size = min_t (size_t , count - * total_data_size , read_data_size );
367+ if (read_data_size == 0 )
368+ return 0 ;
369+
370+ read_offset = read_ptr & (buf_size - 1 );
371+ write_offset = write_ptr & (buf_size - 1 );
372+ read_vaddr = xecore_start_vaddr + read_offset ;
373+
374+ if (write_offset > read_offset ) {
375+ if (copy_to_user (buf + * total_data_size , read_vaddr , read_data_size ))
376+ return - EFAULT ;
377+ } else {
378+ if (read_data_size >= buf_size - read_offset )
379+ copy_size = buf_size - read_offset ;
380+ else
381+ copy_size = read_data_size ;
382+ if (copy_to_user (buf + * total_data_size , read_vaddr , copy_size ))
383+ return - EFAULT ;
384+ if (copy_to_user (buf + * total_data_size + copy_size ,
385+ xecore_start_vaddr , read_data_size - copy_size ))
386+ return - EFAULT ;
387+ }
388+
389+ * total_data_size += read_data_size ;
390+ read_ptr += read_data_size ;
391+
392+ /* Read pointer can overflow into one additional bit */
393+ read_ptr &= (buf_size << 1 ) - 1 ;
394+ read_ptr_reg = REG_FIELD_PREP (XEHPC_EUSTALL_REPORT1_READ_PTR_MASK , (read_ptr >> 6 ));
395+ read_ptr_reg = _MASKED_FIELD (XEHPC_EUSTALL_REPORT1_READ_PTR_MASK , read_ptr_reg );
396+ xe_gt_mcr_unicast_write (gt , XEHPC_EUSTALL_REPORT1 , read_ptr_reg , group , instance );
397+ xecore_buf -> read = read_ptr ;
398+ trace_xe_eu_stall_data_read (group , instance , read_ptr , write_ptr ,
399+ read_data_size , * total_data_size );
400+ return 0 ;
401+ }
402+
403+ /**
404+ * xe_eu_stall_stream_read_locked - copy EU stall counters data from the
405+ * per xecore buffers to the userspace buffer
406+ * @stream: A stream opened for EU stall count metrics
407+ * @file: An xe EU stall data stream file
408+ * @buf: destination buffer given by userspace
409+ * @count: the number of bytes userspace wants to read
410+ *
411+ * Returns: Number of bytes copied or a negative error code
412+ * If we've successfully copied any data then reporting that takes
413+ * precedence over any internal error status, so the data isn't lost.
414+ */
415+ static ssize_t xe_eu_stall_stream_read_locked (struct xe_eu_stall_data_stream * stream ,
416+ struct file * file , char __user * buf ,
417+ size_t count )
418+ {
419+ struct xe_gt * gt = stream -> gt ;
420+ size_t total_size = 0 ;
421+ u16 group , instance ;
422+ unsigned int xecore ;
423+ int ret = 0 ;
424+
425+ for_each_dss_steering (xecore , gt , group , instance ) {
426+ ret = xe_eu_stall_data_buf_read (stream , buf , count , & total_size ,
427+ gt , group , instance , xecore );
428+ if (ret || count == total_size )
429+ break ;
430+ }
431+ return total_size ?: (ret ?: - EAGAIN );
432+ }
433+
251434/*
252435 * Userspace must enable the EU stall stream with DRM_XE_OBSERVATION_IOCTL_ENABLE
253436 * before calling read().
254437 */
255438static ssize_t xe_eu_stall_stream_read (struct file * file , char __user * buf ,
256439 size_t count , loff_t * ppos )
257440{
258- ssize_t ret = 0 ;
441+ struct xe_eu_stall_data_stream * stream = file -> private_data ;
442+ struct xe_gt * gt = stream -> gt ;
443+ ssize_t ret , aligned_count ;
444+
445+ aligned_count = ALIGN_DOWN (count , stream -> data_record_size );
446+ if (aligned_count == 0 )
447+ return - EINVAL ;
448+
449+ if (!stream -> enabled ) {
450+ xe_gt_dbg (gt , "EU stall data stream not enabled to read\n" );
451+ return - EINVAL ;
452+ }
453+
454+ if (!(file -> f_flags & O_NONBLOCK )) {
455+ do {
456+ ret = wait_event_interruptible (stream -> poll_wq , stream -> pollin );
457+ if (ret )
458+ return - EINTR ;
459+
460+ mutex_lock (& gt -> eu_stall -> stream_lock );
461+ ret = xe_eu_stall_stream_read_locked (stream , file , buf , aligned_count );
462+ mutex_unlock (& gt -> eu_stall -> stream_lock );
463+ } while (ret == - EAGAIN );
464+ } else {
465+ mutex_lock (& gt -> eu_stall -> stream_lock );
466+ ret = xe_eu_stall_stream_read_locked (stream , file , buf , aligned_count );
467+ mutex_unlock (& gt -> eu_stall -> stream_lock );
468+ }
469+
470+ /*
471+ * This may not work correctly if the user buffer is very small.
472+ * We don't want to block the next read() when there is data in the buffer
473+ * now, but couldn't be accommodated in the small user buffer.
474+ */
475+ stream -> pollin = false;
259476
260477 return ret ;
261478}
@@ -348,6 +565,21 @@ static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream)
348565 return 0 ;
349566}
350567
568+ static void eu_stall_data_buf_poll_work_fn (struct work_struct * work )
569+ {
570+ struct xe_eu_stall_data_stream * stream =
571+ container_of (work , typeof (* stream ), buf_poll_work .work );
572+ struct xe_gt * gt = stream -> gt ;
573+
574+ if (eu_stall_data_buf_poll (stream )) {
575+ stream -> pollin = true;
576+ wake_up (& stream -> poll_wq );
577+ }
578+ queue_delayed_work (gt -> eu_stall -> buf_ptr_poll_wq ,
579+ & stream -> buf_poll_work ,
580+ msecs_to_jiffies (POLL_PERIOD_MS ));
581+ }
582+
351583static int xe_eu_stall_stream_init (struct xe_eu_stall_data_stream * stream ,
352584 struct eu_stall_open_properties * props )
353585{
@@ -372,6 +604,9 @@ static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
372604 max_wait_num_reports );
373605 return - EINVAL ;
374606 }
607+
608+ init_waitqueue_head (& stream -> poll_wq );
609+ INIT_DELAYED_WORK (& stream -> buf_poll_work , eu_stall_data_buf_poll_work_fn );
375610 stream -> per_xecore_buf_size = per_xecore_buf_size ;
376611 stream -> sampling_rate_mult = props -> sampling_rate_mult ;
377612 stream -> wait_num_reports = props -> wait_num_reports ;
@@ -389,15 +624,35 @@ static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
389624 return 0 ;
390625}
391626
627+ static __poll_t xe_eu_stall_stream_poll_locked (struct xe_eu_stall_data_stream * stream ,
628+ struct file * file , poll_table * wait )
629+ {
630+ __poll_t events = 0 ;
631+
632+ poll_wait (file , & stream -> poll_wq , wait );
633+
634+ if (stream -> pollin )
635+ events |= EPOLLIN ;
636+
637+ return events ;
638+ }
639+
392640static __poll_t xe_eu_stall_stream_poll (struct file * file , poll_table * wait )
393641{
394- __poll_t ret = 0 ;
642+ struct xe_eu_stall_data_stream * stream = file -> private_data ;
643+ struct xe_gt * gt = stream -> gt ;
644+ __poll_t ret ;
645+
646+ mutex_lock (& gt -> eu_stall -> stream_lock );
647+ ret = xe_eu_stall_stream_poll_locked (stream , file , wait );
648+ mutex_unlock (& gt -> eu_stall -> stream_lock );
395649
396650 return ret ;
397651}
398652
399653static int xe_eu_stall_enable_locked (struct xe_eu_stall_data_stream * stream )
400654{
655+ struct xe_gt * gt = stream -> gt ;
401656 int ret = 0 ;
402657
403658 if (stream -> enabled )
@@ -406,6 +661,10 @@ static int xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream *stream)
406661 stream -> enabled = true;
407662
408663 ret = xe_eu_stall_stream_enable (stream );
664+
665+ queue_delayed_work (gt -> eu_stall -> buf_ptr_poll_wq ,
666+ & stream -> buf_poll_work ,
667+ msecs_to_jiffies (POLL_PERIOD_MS ));
409668 return ret ;
410669}
411670
@@ -420,6 +679,8 @@ static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream)
420679
421680 xe_gt_mcr_multicast_write (gt , XEHPC_EUSTALL_BASE , 0 );
422681
682+ cancel_delayed_work_sync (& stream -> buf_poll_work );
683+
423684 xe_force_wake_put (gt_to_fw (gt ), XE_FW_RENDER );
424685 xe_pm_runtime_put (gt_to_xe (gt ));
425686
0 commit comments