Skip to content

Commit 332dd01

Browse files
mbrost05rodrigovivi
authored andcommitted
drm/xe: Add range based TLB invalidations
If the platform supports range based TLB invalidations use them. Hide these details in the xe_gt_tlb_invalidation layer. Signed-off-by: Matthew Brost <matthew.brost@intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Reviewed-by: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
1 parent 9d25e28 commit 332dd01

File tree

5 files changed

+84
-25
lines changed

5 files changed

+84
-25
lines changed

drivers/gpu/drm/xe/xe_gt_pagefault.c

+1-6
Original file line numberDiff line numberDiff line change
@@ -240,12 +240,7 @@ static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
240240
goto retry_userptr;
241241

242242
if (!ret) {
243-
/*
244-
* FIXME: Doing a full TLB invalidation for now, likely could
245-
* defer TLB invalidate + fault response to a callback of fence
246-
* too
247-
*/
248-
ret = xe_gt_tlb_invalidation(gt, NULL);
243+
ret = xe_gt_tlb_invalidation(gt, NULL, vma);
249244
if (ret >= 0)
250245
ret = 0;
251246
}

drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c

+73-14
Original file line numberDiff line numberDiff line change
@@ -92,16 +92,10 @@ int xe_gt_tlb_invalidation_init(struct xe_gt *gt)
9292
}
9393

9494
static int send_tlb_invalidation(struct xe_guc *guc,
95-
struct xe_gt_tlb_invalidation_fence *fence)
95+
struct xe_gt_tlb_invalidation_fence *fence,
96+
u32 *action, int len)
9697
{
9798
struct xe_gt *gt = guc_to_gt(guc);
98-
u32 action[] = {
99-
XE_GUC_ACTION_TLB_INVALIDATION,
100-
0,
101-
XE_GUC_TLB_INVAL_FULL << XE_GUC_TLB_INVAL_TYPE_SHIFT |
102-
XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT |
103-
XE_GUC_TLB_INVAL_FLUSH_CACHE,
104-
};
10599
int seqno;
106100
int ret;
107101
bool queue_work;
@@ -125,7 +119,7 @@ static int send_tlb_invalidation(struct xe_guc *guc,
125119
TLB_INVALIDATION_SEQNO_MAX;
126120
if (!gt->tlb_invalidation.seqno)
127121
gt->tlb_invalidation.seqno = 1;
128-
ret = xe_guc_ct_send_locked(&guc->ct, action, ARRAY_SIZE(action),
122+
ret = xe_guc_ct_send_locked(&guc->ct, action, len,
129123
G2H_LEN_DW_TLB_INVALIDATE, 1);
130124
if (!ret && fence) {
131125
fence->invalidation_time = ktime_get();
@@ -146,18 +140,83 @@ static int send_tlb_invalidation(struct xe_guc *guc,
146140
* @gt: graphics tile
147141
* @fence: invalidation fence which will be signal on TLB invalidation
148142
* completion, can be NULL
143+
* @vma: VMA to invalidate
149144
*
150-
* Issue a full TLB invalidation on the GT. Completion of TLB is asynchronous
151-
* and caller can either use the invalidation fence or seqno +
152-
* xe_gt_tlb_invalidation_wait to wait for completion.
145+
* Issue a range based TLB invalidation if supported, if not fallback to a full
146+
* TLB invalidation. Completion of TLB is asynchronous and caller can either use
147+
* the invalidation fence or seqno + xe_gt_tlb_invalidation_wait to wait for
148+
* completion.
153149
*
154150
* Return: Seqno which can be passed to xe_gt_tlb_invalidation_wait on success,
155151
* negative error code on error.
156152
*/
157153
int xe_gt_tlb_invalidation(struct xe_gt *gt,
158-
struct xe_gt_tlb_invalidation_fence *fence)
154+
struct xe_gt_tlb_invalidation_fence *fence,
155+
struct xe_vma *vma)
159156
{
160-
return send_tlb_invalidation(&gt->uc.guc, fence);
157+
struct xe_device *xe = gt_to_xe(gt);
158+
#define MAX_TLB_INVALIDATION_LEN 7
159+
u32 action[MAX_TLB_INVALIDATION_LEN];
160+
int len = 0;
161+
162+
XE_BUG_ON(!vma);
163+
164+
if (!xe->info.has_range_tlb_invalidation) {
165+
action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
166+
action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
167+
#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
168+
XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
169+
XE_GUC_TLB_INVAL_FLUSH_CACHE)
170+
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
171+
} else {
172+
u64 start = vma->start;
173+
u64 length = vma->end - vma->start + 1;
174+
u64 align, end;
175+
176+
if (length < SZ_4K)
177+
length = SZ_4K;
178+
179+
/*
180+
* We need to invalidate a higher granularity if start address
181+
* is not aligned to length. When start is not aligned with
182+
* length we need to find the length large enough to create an
183+
* address mask covering the required range.
184+
*/
185+
align = roundup_pow_of_two(length);
186+
start = ALIGN_DOWN(vma->start, align);
187+
end = ALIGN(vma->start + length, align);
188+
length = align;
189+
while (start + length < end) {
190+
length <<= 1;
191+
start = ALIGN_DOWN(vma->start, length);
192+
}
193+
194+
/*
195+
* Minimum invalidation size for a 2MB page that the hardware
196+
* expects is 16MB
197+
*/
198+
if (length >= SZ_2M) {
199+
length = max_t(u64, SZ_16M, length);
200+
start = ALIGN_DOWN(vma->start, length);
201+
}
202+
203+
XE_BUG_ON(length < SZ_4K);
204+
XE_BUG_ON(!is_power_of_2(length));
205+
XE_BUG_ON(length & GENMASK(ilog2(SZ_16M) - 1, ilog2(SZ_2M) + 1));
206+
XE_BUG_ON(!IS_ALIGNED(start, length));
207+
208+
action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
209+
action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
210+
action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
211+
action[len++] = vma->vm->usm.asid;
212+
action[len++] = lower_32_bits(start);
213+
action[len++] = upper_32_bits(start);
214+
action[len++] = ilog2(length) - ilog2(SZ_4K);
215+
}
216+
217+
XE_BUG_ON(len > MAX_TLB_INVALIDATION_LEN);
218+
219+
return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
161220
}
162221

163222
static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)

drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,13 @@
1212

1313
struct xe_gt;
1414
struct xe_guc;
15+
struct xe_vma;
1516

1617
int xe_gt_tlb_invalidation_init(struct xe_gt *gt);
1718
void xe_gt_tlb_invalidation_reset(struct xe_gt *gt);
1819
int xe_gt_tlb_invalidation(struct xe_gt *gt,
19-
struct xe_gt_tlb_invalidation_fence *fence);
20+
struct xe_gt_tlb_invalidation_fence *fence,
21+
struct xe_vma *vma);
2022
int xe_gt_tlb_invalidation_wait(struct xe_gt *gt, int seqno);
2123
int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
2224

drivers/gpu/drm/xe/xe_pt.c

+6-3
Original file line numberDiff line numberDiff line change
@@ -1466,6 +1466,7 @@ static const struct xe_migrate_pt_update_ops userptr_unbind_ops = {
14661466
struct invalidation_fence {
14671467
struct xe_gt_tlb_invalidation_fence base;
14681468
struct xe_gt *gt;
1469+
struct xe_vma *vma;
14691470
struct dma_fence *fence;
14701471
struct dma_fence_cb cb;
14711472
struct work_struct work;
@@ -1505,12 +1506,13 @@ static void invalidation_fence_work_func(struct work_struct *w)
15051506
container_of(w, struct invalidation_fence, work);
15061507

15071508
trace_xe_gt_tlb_invalidation_fence_work_func(&ifence->base);
1508-
xe_gt_tlb_invalidation(ifence->gt, &ifence->base);
1509+
xe_gt_tlb_invalidation(ifence->gt, &ifence->base, ifence->vma);
15091510
}
15101511

15111512
static int invalidation_fence_init(struct xe_gt *gt,
15121513
struct invalidation_fence *ifence,
1513-
struct dma_fence *fence)
1514+
struct dma_fence *fence,
1515+
struct xe_vma *vma)
15141516
{
15151517
int ret;
15161518

@@ -1528,6 +1530,7 @@ static int invalidation_fence_init(struct xe_gt *gt,
15281530
dma_fence_get(&ifence->base.base); /* Ref for caller */
15291531
ifence->fence = fence;
15301532
ifence->gt = gt;
1533+
ifence->vma = vma;
15311534

15321535
INIT_WORK(&ifence->work, invalidation_fence_work_func);
15331536
ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
@@ -1614,7 +1617,7 @@ __xe_pt_unbind_vma(struct xe_gt *gt, struct xe_vma *vma, struct xe_engine *e,
16141617
int err;
16151618

16161619
/* TLB invalidation must be done before signaling unbind */
1617-
err = invalidation_fence_init(gt, ifence, fence);
1620+
err = invalidation_fence_init(gt, ifence, fence, vma);
16181621
if (err) {
16191622
dma_fence_put(fence);
16201623
kfree(ifence);

drivers/gpu/drm/xe/xe_vm.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -3349,7 +3349,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
33493349
if (xe_pt_zap_ptes(gt, vma)) {
33503350
gt_needs_invalidate |= BIT(id);
33513351
xe_device_wmb(xe);
3352-
seqno[id] = xe_gt_tlb_invalidation(gt, NULL);
3352+
seqno[id] = xe_gt_tlb_invalidation(gt, NULL, vma);
33533353
if (seqno[id] < 0)
33543354
return seqno[id];
33553355
}

0 commit comments

Comments
 (0)