Skip to content

Commit 090bad3

Browse files
aikmpe
authored andcommitted
powerpc/powernv: Add indirect levels to it_userspace
We want to support sparse memory and therefore huge chunks of DMA windows do not need to be mapped. If a DMA window big enough to require 2 or more indirect levels, and a DMA window is used to map all RAM (which is a default case for 64bit window), we can actually save some memory by not allocation TCE for regions which we are not going to map anyway. The hardware tables alreary support indirect levels but we also keep host-physical-to-userspace translation array which is allocated by vmalloc() and is a flat array which might use quite some memory. This converts it_userspace from vmalloc'ed array to a multi level table. As the format becomes platform dependend, this replaces the direct access to it_usespace with a iommu_table_ops::useraddrptr hook which returns a pointer to the userspace copy of a TCE; future extension will return NULL if the level was not allocated. This should not change non-KVM handling of TCE tables and it_userspace will not be allocated for non-KVM tables. Reviewed-by: David Gibson <david@gibson.dropbear.id.au> Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
1 parent 00a5c58 commit 090bad3

File tree

6 files changed

+73
-78
lines changed

6 files changed

+73
-78
lines changed

arch/powerpc/include/asm/iommu.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ struct iommu_table_ops {
6969
long index,
7070
unsigned long *hpa,
7171
enum dma_data_direction *direction);
72+
73+
__be64 *(*useraddrptr)(struct iommu_table *tbl, long index);
7274
#endif
7375
void (*clear)(struct iommu_table *tbl,
7476
long index, long npages);
@@ -123,9 +125,7 @@ struct iommu_table {
123125
};
124126

125127
#define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
126-
((tbl)->it_userspace ? \
127-
&((tbl)->it_userspace[(entry) - (tbl)->it_offset]) : \
128-
NULL)
128+
((tbl)->it_ops->useraddrptr((tbl), (entry)))
129129

130130
/* Pure 2^n version of get_order */
131131
static inline __attribute_const__

arch/powerpc/kvm/book3s_64_vio_hv.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -206,10 +206,6 @@ static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
206206
/* it_userspace allocation might be delayed */
207207
return H_TOO_HARD;
208208

209-
pua = (void *) vmalloc_to_phys(pua);
210-
if (WARN_ON_ONCE_RM(!pua))
211-
return H_HARDWARE;
212-
213209
mem = mm_iommu_lookup_rm(kvm->mm, be64_to_cpu(*pua), pgsize);
214210
if (!mem)
215211
return H_TOO_HARD;
@@ -282,10 +278,6 @@ static long kvmppc_rm_tce_iommu_do_map(struct kvm *kvm, struct iommu_table *tbl,
282278
if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa)))
283279
return H_HARDWARE;
284280

285-
pua = (void *) vmalloc_to_phys(pua);
286-
if (WARN_ON_ONCE_RM(!pua))
287-
return H_HARDWARE;
288-
289281
if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
290282
return H_CLOSED;
291283

arch/powerpc/platforms/powernv/pci-ioda-tce.c

Lines changed: 51 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@ void pnv_pci_setup_iommu_table(struct iommu_table *tbl,
3131
tbl->it_type = TCE_PCI;
3232
}
3333

34-
static __be64 *pnv_tce(struct iommu_table *tbl, long idx)
34+
static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx)
3535
{
36-
__be64 *tmp = ((__be64 *)tbl->it_base);
36+
__be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base;
3737
int level = tbl->it_indirect_levels;
3838
const long shift = ilog2(tbl->it_level_size);
3939
unsigned long mask = (tbl->it_level_size - 1) << (level * shift);
@@ -67,7 +67,7 @@ int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
6767
((rpn + i) << tbl->it_page_shift);
6868
unsigned long idx = index - tbl->it_offset + i;
6969

70-
*(pnv_tce(tbl, idx)) = cpu_to_be64(newtce);
70+
*(pnv_tce(tbl, false, idx)) = cpu_to_be64(newtce);
7171
}
7272

7373
return 0;
@@ -86,12 +86,21 @@ int pnv_tce_xchg(struct iommu_table *tbl, long index,
8686
if (newtce & TCE_PCI_WRITE)
8787
newtce |= TCE_PCI_READ;
8888

89-
oldtce = be64_to_cpu(xchg(pnv_tce(tbl, idx), cpu_to_be64(newtce)));
89+
oldtce = be64_to_cpu(xchg(pnv_tce(tbl, false, idx),
90+
cpu_to_be64(newtce)));
9091
*hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
9192
*direction = iommu_tce_direction(oldtce);
9293

9394
return 0;
9495
}
96+
97+
__be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index)
98+
{
99+
if (WARN_ON_ONCE(!tbl->it_userspace))
100+
return NULL;
101+
102+
return pnv_tce(tbl, true, index - tbl->it_offset);
103+
}
95104
#endif
96105

97106
void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
@@ -101,13 +110,15 @@ void pnv_tce_free(struct iommu_table *tbl, long index, long npages)
101110
for (i = 0; i < npages; i++) {
102111
unsigned long idx = index - tbl->it_offset + i;
103112

104-
*(pnv_tce(tbl, idx)) = cpu_to_be64(0);
113+
*(pnv_tce(tbl, false, idx)) = cpu_to_be64(0);
105114
}
106115
}
107116

108117
unsigned long pnv_tce_get(struct iommu_table *tbl, long index)
109118
{
110-
return be64_to_cpu(*(pnv_tce(tbl, index - tbl->it_offset)));
119+
__be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset);
120+
121+
return be64_to_cpu(*ptce);
111122
}
112123

113124
static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr,
@@ -144,6 +155,10 @@ void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl)
144155

145156
pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size,
146157
tbl->it_indirect_levels);
158+
if (tbl->it_userspace) {
159+
pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size,
160+
tbl->it_indirect_levels);
161+
}
147162
}
148163

149164
static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
@@ -191,10 +206,11 @@ static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift,
191206

192207
long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
193208
__u32 page_shift, __u64 window_size, __u32 levels,
194-
struct iommu_table *tbl)
209+
bool alloc_userspace_copy, struct iommu_table *tbl)
195210
{
196-
void *addr;
211+
void *addr, *uas = NULL;
197212
unsigned long offset = 0, level_shift, total_allocated = 0;
213+
unsigned long total_allocated_uas = 0;
198214
const unsigned int window_shift = ilog2(window_size);
199215
unsigned int entries_shift = window_shift - page_shift;
200216
unsigned int table_shift = max_t(unsigned int, entries_shift + 3,
@@ -228,10 +244,20 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
228244
* we did not allocate as much as we wanted,
229245
* release partially allocated table.
230246
*/
231-
if (offset < tce_table_size) {
232-
pnv_pci_ioda2_table_do_free_pages(addr,
233-
1ULL << (level_shift - 3), levels - 1);
234-
return -ENOMEM;
247+
if (offset < tce_table_size)
248+
goto free_tces_exit;
249+
250+
/* Allocate userspace view of the TCE table */
251+
if (alloc_userspace_copy) {
252+
offset = 0;
253+
uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift,
254+
levels, tce_table_size, &offset,
255+
&total_allocated_uas);
256+
if (!uas)
257+
goto free_tces_exit;
258+
if (offset < tce_table_size ||
259+
total_allocated_uas != total_allocated)
260+
goto free_uas_exit;
235261
}
236262

237263
/* Setup linux iommu table */
@@ -240,11 +266,22 @@ long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
240266
tbl->it_level_size = 1ULL << (level_shift - 3);
241267
tbl->it_indirect_levels = levels - 1;
242268
tbl->it_allocated_size = total_allocated;
269+
tbl->it_userspace = uas;
243270

244-
pr_devel("Created TCE table: ws=%08llx ts=%lx @%08llx\n",
245-
window_size, tce_table_size, bus_offset);
271+
pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d\n",
272+
window_size, tce_table_size, bus_offset, tbl->it_base,
273+
tbl->it_userspace, levels);
246274

247275
return 0;
276+
277+
free_uas_exit:
278+
pnv_pci_ioda2_table_do_free_pages(uas,
279+
1ULL << (level_shift - 3), levels - 1);
280+
free_tces_exit:
281+
pnv_pci_ioda2_table_do_free_pages(addr,
282+
1ULL << (level_shift - 3), levels - 1);
283+
284+
return -ENOMEM;
248285
}
249286

250287
static void pnv_iommu_table_group_link_free(struct rcu_head *head)

arch/powerpc/platforms/powernv/pci-ioda.c

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2036,6 +2036,7 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = {
20362036
#ifdef CONFIG_IOMMU_API
20372037
.exchange = pnv_ioda1_tce_xchg,
20382038
.exchange_rm = pnv_ioda1_tce_xchg_rm,
2039+
.useraddrptr = pnv_tce_useraddrptr,
20392040
#endif
20402041
.clear = pnv_ioda1_tce_free,
20412042
.get = pnv_tce_get,
@@ -2200,6 +2201,7 @@ static struct iommu_table_ops pnv_ioda2_iommu_ops = {
22002201
#ifdef CONFIG_IOMMU_API
22012202
.exchange = pnv_ioda2_tce_xchg,
22022203
.exchange_rm = pnv_ioda2_tce_xchg_rm,
2204+
.useraddrptr = pnv_tce_useraddrptr,
22032205
#endif
22042206
.clear = pnv_ioda2_tce_free,
22052207
.get = pnv_tce_get,
@@ -2455,7 +2457,7 @@ void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable)
24552457

24562458
static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
24572459
int num, __u32 page_shift, __u64 window_size, __u32 levels,
2458-
struct iommu_table **ptbl)
2460+
bool alloc_userspace_copy, struct iommu_table **ptbl)
24592461
{
24602462
struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
24612463
table_group);
@@ -2472,7 +2474,7 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
24722474

24732475
ret = pnv_pci_ioda2_table_alloc_pages(nid,
24742476
bus_offset, page_shift, window_size,
2475-
levels, tbl);
2477+
levels, alloc_userspace_copy, tbl);
24762478
if (ret) {
24772479
iommu_tce_table_put(tbl);
24782480
return ret;
@@ -2505,7 +2507,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
25052507
rc = pnv_pci_ioda2_create_table(&pe->table_group, 0,
25062508
IOMMU_PAGE_SHIFT_4K,
25072509
window_size,
2508-
POWERNV_IOMMU_DEFAULT_LEVELS, &tbl);
2510+
POWERNV_IOMMU_DEFAULT_LEVELS, false, &tbl);
25092511
if (rc) {
25102512
pe_err(pe, "Failed to create 32-bit TCE table, err %ld",
25112513
rc);
@@ -2592,7 +2594,16 @@ static unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
25922594
tce_table_size, direct_table_size);
25932595
}
25942596

2595-
return bytes;
2597+
return bytes + bytes; /* one for HW table, one for userspace copy */
2598+
}
2599+
2600+
static long pnv_pci_ioda2_create_table_userspace(
2601+
struct iommu_table_group *table_group,
2602+
int num, __u32 page_shift, __u64 window_size, __u32 levels,
2603+
struct iommu_table **ptbl)
2604+
{
2605+
return pnv_pci_ioda2_create_table(table_group,
2606+
num, page_shift, window_size, levels, true, ptbl);
25962607
}
25972608

25982609
static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
@@ -2621,7 +2632,7 @@ static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
26212632

26222633
static struct iommu_table_group_ops pnv_pci_ioda2_ops = {
26232634
.get_table_size = pnv_pci_ioda2_get_table_size,
2624-
.create_table = pnv_pci_ioda2_create_table,
2635+
.create_table = pnv_pci_ioda2_create_table_userspace,
26252636
.set_window = pnv_pci_ioda2_set_window,
26262637
.unset_window = pnv_pci_ioda2_unset_window,
26272638
.take_ownership = pnv_ioda2_take_ownership,
@@ -2726,7 +2737,7 @@ static void pnv_ioda2_npu_take_ownership(struct iommu_table_group *table_group)
27262737

27272738
static struct iommu_table_group_ops pnv_pci_ioda2_npu_ops = {
27282739
.get_table_size = pnv_pci_ioda2_get_table_size,
2729-
.create_table = pnv_pci_ioda2_create_table,
2740+
.create_table = pnv_pci_ioda2_create_table_userspace,
27302741
.set_window = pnv_pci_ioda2_npu_set_window,
27312742
.unset_window = pnv_pci_ioda2_npu_unset_window,
27322743
.take_ownership = pnv_ioda2_npu_take_ownership,

arch/powerpc/platforms/powernv/pci.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,11 +267,12 @@ extern int pnv_tce_build(struct iommu_table *tbl, long index, long npages,
267267
extern void pnv_tce_free(struct iommu_table *tbl, long index, long npages);
268268
extern int pnv_tce_xchg(struct iommu_table *tbl, long index,
269269
unsigned long *hpa, enum dma_data_direction *direction);
270+
extern __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index);
270271
extern unsigned long pnv_tce_get(struct iommu_table *tbl, long index);
271272

272273
extern long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset,
273274
__u32 page_shift, __u64 window_size, __u32 levels,
274-
struct iommu_table *tbl);
275+
bool alloc_userspace_copy, struct iommu_table *tbl);
275276
extern void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl);
276277

277278
extern long pnv_pci_link_table_and_group(int node, int num,

drivers/vfio/vfio_iommu_spapr_tce.c

Lines changed: 0 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -211,44 +211,6 @@ static long tce_iommu_register_pages(struct tce_container *container,
211211
return 0;
212212
}
213213

214-
static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl,
215-
struct mm_struct *mm)
216-
{
217-
unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
218-
tbl->it_size, PAGE_SIZE);
219-
unsigned long *uas;
220-
long ret;
221-
222-
BUG_ON(tbl->it_userspace);
223-
224-
ret = try_increment_locked_vm(mm, cb >> PAGE_SHIFT);
225-
if (ret)
226-
return ret;
227-
228-
uas = vzalloc(cb);
229-
if (!uas) {
230-
decrement_locked_vm(mm, cb >> PAGE_SHIFT);
231-
return -ENOMEM;
232-
}
233-
tbl->it_userspace = (__be64 *) uas;
234-
235-
return 0;
236-
}
237-
238-
static void tce_iommu_userspace_view_free(struct iommu_table *tbl,
239-
struct mm_struct *mm)
240-
{
241-
unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
242-
tbl->it_size, PAGE_SIZE);
243-
244-
if (!tbl->it_userspace)
245-
return;
246-
247-
vfree(tbl->it_userspace);
248-
tbl->it_userspace = NULL;
249-
decrement_locked_vm(mm, cb >> PAGE_SHIFT);
250-
}
251-
252214
static bool tce_page_is_contained(struct page *page, unsigned page_shift)
253215
{
254216
/*
@@ -599,12 +561,6 @@ static long tce_iommu_build_v2(struct tce_container *container,
599561
unsigned long hpa;
600562
enum dma_data_direction dirtmp;
601563

602-
if (!tbl->it_userspace) {
603-
ret = tce_iommu_userspace_view_alloc(tbl, container->mm);
604-
if (ret)
605-
return ret;
606-
}
607-
608564
for (i = 0; i < pages; ++i) {
609565
struct mm_iommu_table_group_mem_t *mem = NULL;
610566
__be64 *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry + i);
@@ -685,7 +641,6 @@ static void tce_iommu_free_table(struct tce_container *container,
685641
{
686642
unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
687643

688-
tce_iommu_userspace_view_free(tbl, container->mm);
689644
iommu_tce_table_put(tbl);
690645
decrement_locked_vm(container->mm, pages);
691646
}
@@ -1200,7 +1155,6 @@ static void tce_iommu_release_ownership(struct tce_container *container,
12001155
continue;
12011156

12021157
tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
1203-
tce_iommu_userspace_view_free(tbl, container->mm);
12041158
if (tbl->it_map)
12051159
iommu_release_ownership(tbl);
12061160

0 commit comments

Comments
 (0)