Skip to content

Coreforge's AMD Radeon driver work on 5.15.y Linux branch #6

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 10 commits into from
Closed
7 changes: 7 additions & 0 deletions drivers/gpu/drm/radeon/radeon.h
Original file line number Diff line number Diff line change
Expand Up @@ -2237,6 +2237,13 @@ void radeon_agp_disable(struct radeon_device *rdev);
int radeon_asic_init(struct radeon_device *rdev);


/*
* memcpy_io and memset_io functions that work on a raspberry pi 4
*/
void memcpy_fromio_pcie(void *to, const volatile void __iomem *from, size_t count);
void memcpy_toio_pcie(volatile void __iomem *to, const void *from, size_t count);
void memset_io_pcie(volatile void __iomem *dst, int c, size_t count);

/*
* IOCTL.
*/
Expand Down
6 changes: 3 additions & 3 deletions drivers/gpu/drm/radeon/radeon_bios.c
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ static bool igp_read_bios_from_vram(struct radeon_device *rdev)
iounmap(bios);
return false;
}
memcpy_fromio(rdev->bios, bios, size);
memcpy_fromio_pcie(rdev->bios, bios, size);
iounmap(bios);
return true;
}
Expand Down Expand Up @@ -101,7 +101,7 @@ static bool radeon_read_bios(struct radeon_device *rdev)
pci_unmap_rom(rdev->pdev, bios);
return false;
}
memcpy_fromio(rdev->bios, bios, size);
memcpy_fromio_pcie(rdev->bios, bios, size);
pci_unmap_rom(rdev->pdev, bios);
return true;
}
Expand All @@ -125,7 +125,7 @@ static bool radeon_read_platform_bios(struct radeon_device *rdev)
if (!bios)
goto free_bios;

memcpy_fromio(rdev->bios, bios, romlen);
memcpy_fromio_pcie(rdev->bios, bios, romlen);
iounmap(bios);

if (rdev->bios[0] != 0x55 || rdev->bios[1] != 0xaa)
Expand Down
94 changes: 93 additions & 1 deletion drivers/gpu/drm/radeon/radeon_device.c
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,98 @@ static const char radeon_family_name[][16] = {
"LAST",
};

/**
* DOC: memcpy_fromio_pcie
*
* like memcpy_fromio, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4
*/

void memcpy_fromio_pcie(void *to, const volatile void __iomem *from, size_t count)
{
while (count && !IS_ALIGNED((unsigned long)from, 8)) {
*(u8 *)to = __raw_readb(from);
from++;
to++;
count--;
}

while (count >= 4) {
*(u32 *)to = __raw_readl(from);
from += 4;
to += 4;
count -= 4;
}

while (count) {
*(u8 *)to = __raw_readb(from);
from++;
to++;
count--;
}
}

/**
* DOC: memcpy_toio_pcie
*
* like memcpy_toio, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4
*/

void memcpy_toio_pcie(volatile void __iomem *to, const void *from, size_t count)
{
while (count && !IS_ALIGNED((unsigned long)to, 8)) {
__raw_writeb(*(u8 *)from, to);
from++;
to++;
count--;
}

while (count >= 4) {
__raw_writel(*(u64 *)from, to);
from += 4;
to += 4;
count -= 4;
}

while (count) {
__raw_writeb(*(u8 *)from, to);
from++;
to++;
count--;
}
}

/**
* DOC: memset_io_pcie
*
* like memset_io, but it only uses 8-bit and 32-bit wide accesses to work on a raspberry pi 4
*/

void memset_io_pcie(volatile void __iomem *dst, int c, size_t count)
{
u32 qc = (u8)c;

qc |= qc << 8;
qc |= qc << 16;

while (count && !IS_ALIGNED((unsigned long)dst, 8)) {
__raw_writeb(c, dst);
dst++;
count--;
}

while (count >= 4) {
__raw_writel(qc, dst);
dst += 4;
count -= 4;
}

while (count) {
__raw_writeb(c, dst);
dst++;
count--;
}
}

#if defined(CONFIG_VGA_SWITCHEROO)
bool radeon_has_atpx_dgpu_power_cntl(void);
bool radeon_is_atpx_hybrid(void);
Expand Down Expand Up @@ -490,7 +582,7 @@ int radeon_wb_init(struct radeon_device *rdev)
}

/* clear wb memory */
memset((char *)rdev->wb.wb, 0, RADEON_GPU_PAGE_SIZE);
memset_io_pcie((char *)rdev->wb.wb, 0, RADEON_GPU_PAGE_SIZE);
/* disable event_write fences */
rdev->wb.use_event = false;
/* disabled via module param */
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/radeon/radeon_fb.c
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ static int radeonfb_create(struct drm_fb_helper *helper,
/* setup helper */
rfbdev->helper.fb = fb;

memset_io(rbo->kptr, 0x0, radeon_bo_size(rbo));
memset_io_pcie(rbo->kptr, 0x0, radeon_bo_size(rbo));

info->fbops = &radeonfb_ops;

Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/radeon/radeon_gem.c
Original file line number Diff line number Diff line change
Expand Up @@ -711,7 +711,7 @@ int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
* otherwise we will endup with broken userspace and we won't be able
* to enable this feature without adding new interface
*/
invalid_flags = RADEON_VM_PAGE_VALID | RADEON_VM_PAGE_SYSTEM;
invalid_flags = RADEON_VM_PAGE_VALID | RADEON_VM_PAGE_SYSTEM | RADEON_VM_PAGE_SNOOPED;
if ((args->flags & invalid_flags)) {
dev_err(dev->dev, "invalid flags 0x%08X vs 0x%08X\n",
args->flags, invalid_flags);
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/radeon/radeon_ib.c
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ int radeon_ib_pool_init(struct radeon_device *rdev)
r = radeon_sa_bo_manager_init(rdev, &rdev->ring_tmp_bo,
RADEON_IB_POOL_SIZE*64*1024,
RADEON_GPU_PAGE_SIZE,
RADEON_GEM_DOMAIN_GTT, 0);
RADEON_GEM_DOMAIN_GTT, RADEON_GEM_GTT_UC);
}
if (r) {
return r;
Expand Down
15 changes: 10 additions & 5 deletions drivers/gpu/drm/radeon/radeon_object.c
Original file line number Diff line number Diff line change
Expand Up @@ -109,29 +109,29 @@ void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
rbo->placements[c].fpfn =
rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
rbo->placements[c].mem_type = TTM_PL_VRAM;
rbo->placements[c++].flags = 0;
rbo->placements[c++].flags = RADEON_GEM_GTT_UC;
}

rbo->placements[c].fpfn = 0;
rbo->placements[c].mem_type = TTM_PL_VRAM;
rbo->placements[c++].flags = 0;
rbo->placements[c++].flags = RADEON_GEM_GTT_UC;
}

if (domain & RADEON_GEM_DOMAIN_GTT) {
rbo->placements[c].fpfn = 0;
rbo->placements[c].mem_type = TTM_PL_TT;
rbo->placements[c++].flags = 0;
rbo->placements[c++].flags = RADEON_GEM_GTT_UC;
}

if (domain & RADEON_GEM_DOMAIN_CPU) {
rbo->placements[c].fpfn = 0;
rbo->placements[c].mem_type = TTM_PL_SYSTEM;
rbo->placements[c++].flags = 0;
rbo->placements[c++].flags = RADEON_GEM_GTT_UC;
}
if (!c) {
rbo->placements[c].fpfn = 0;
rbo->placements[c].mem_type = TTM_PL_SYSTEM;
rbo->placements[c++].flags = 0;
rbo->placements[c++].flags = RADEON_GEM_GTT_UC;
}

rbo->placement.num_placement = c;
Expand Down Expand Up @@ -184,6 +184,7 @@ int radeon_bo_create(struct radeon_device *rdev,

bo->flags = flags;
/* PCI GART is always snooped */
/* Don't be so sure. TODO */
if (!(rdev->flags & RADEON_IS_PCIE))
bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);

Expand Down Expand Up @@ -220,6 +221,10 @@ int radeon_bo_create(struct radeon_device *rdev,
bo->flags &= ~RADEON_GEM_GTT_WC;
#endif

//Write combining may cause issues on the raspberry pi
bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
bo->flags |= RADEON_GEM_GTT_UC;

radeon_ttm_placement_from_domain(bo, domain);
/* Kernel allocation are uninterruptible */
down_read(&rdev->pm.mclk_lock);
Expand Down
2 changes: 1 addition & 1 deletion drivers/gpu/drm/radeon/radeon_ring.c
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig
/* Allocate ring buffer */
if (ring->ring_obj == NULL) {
r = radeon_bo_create(rdev, ring->ring_size, PAGE_SIZE, true,
RADEON_GEM_DOMAIN_GTT, 0, NULL,
RADEON_GEM_DOMAIN_GTT, RADEON_GEM_GTT_UC, NULL,
NULL, &ring->ring_obj);
if (r) {
dev_err(rdev->dev, "(%d) ring create failed\n", r);
Expand Down
8 changes: 5 additions & 3 deletions drivers/gpu/drm/radeon/radeon_ttm.c
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,7 @@ static int radeon_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resourc
return -EINVAL;
mem->bus.offset += rdev->mc.aper_base;
mem->bus.is_iomem = true;
mem->bus.caching = ttm_write_combined;
mem->bus.caching = ttm_uncached;
#ifdef __alpha__
/*
* Alpha: use bus.addr to hold the ioremap() return,
Expand Down Expand Up @@ -456,7 +456,8 @@ static int radeon_ttm_backend_bind(struct ttm_device *bdev,
ttm->num_pages, bo_mem, ttm);
}
if (ttm->caching == ttm_cached)
flags |= RADEON_GART_PAGE_SNOOP;
printk("TTM Page would've been snooped\n");
// flags |= RADEON_GART_PAGE_SNOOP;
r = radeon_gart_bind(rdev, gtt->offset, ttm->num_pages,
ttm->pages, gtt->ttm.dma_address, flags);
if (r) {
Expand Down Expand Up @@ -515,13 +516,14 @@ static struct ttm_tt *radeon_ttm_tt_create(struct ttm_buffer_object *bo,
return NULL;
}

// TODO: Highly suspect.
if (rbo->flags & RADEON_GEM_GTT_UC)
caching = ttm_uncached;
else if (rbo->flags & RADEON_GEM_GTT_WC)
caching = ttm_write_combined;
else
caching = ttm_cached;

caching = ttm_uncached;
if (ttm_sg_tt_init(&gtt->ttm, bo, page_flags, caching)) {
kfree(gtt);
return NULL;
Expand Down
4 changes: 2 additions & 2 deletions drivers/gpu/drm/radeon/radeon_uvd.c
Original file line number Diff line number Diff line change
Expand Up @@ -288,15 +288,15 @@ int radeon_uvd_resume(struct radeon_device *rdev)
if (rdev->uvd.vcpu_bo == NULL)
return -EINVAL;

memcpy_toio((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);
memcpy_toio_pcie((void __iomem *)rdev->uvd.cpu_addr, rdev->uvd_fw->data, rdev->uvd_fw->size);

size = radeon_bo_size(rdev->uvd.vcpu_bo);
size -= rdev->uvd_fw->size;

ptr = rdev->uvd.cpu_addr;
ptr += rdev->uvd_fw->size;

memset_io((void __iomem *)ptr, 0, size);
memset_io_pcie((void __iomem *)ptr, 0, size);

return 0;
}
Expand Down
3 changes: 2 additions & 1 deletion drivers/gpu/drm/radeon/radeon_vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -952,7 +952,8 @@ int radeon_vm_bo_update(struct radeon_device *rdev,
if (mem->mem_type == TTM_PL_TT) {
bo_va->flags |= RADEON_VM_PAGE_SYSTEM;
if (!(bo_va->bo->flags & (RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC)))
bo_va->flags |= RADEON_VM_PAGE_SNOOPED;
printk("VM Page would've been set to snooped\n");
//bo_va->flags |= RADEON_VM_PAGE_SNOOPED;

} else {
addr += rdev->vm_manager.vram_base_offset;
Expand Down
9 changes: 6 additions & 3 deletions drivers/gpu/drm/radeon/rs600.c
Original file line number Diff line number Diff line change
Expand Up @@ -651,16 +651,19 @@ uint64_t rs600_gart_get_page_entry(uint64_t addr, uint32_t flags)
addr |= R600_PTE_READABLE;
if (flags & RADEON_GART_PAGE_WRITE)
addr |= R600_PTE_WRITEABLE;
if (flags & RADEON_GART_PAGE_SNOOP)
addr |= R600_PTE_SNOOPED;
// if (flags & RADEON_GART_PAGE_SNOOP) // no snooping around
// addr |= R600_PTE_SNOOPED;
return addr;
}

void rs600_gart_set_page(struct radeon_device *rdev, unsigned i,
uint64_t entry)
{
void __iomem *ptr = (void *)rdev->gart.ptr;
writeq(entry, ptr + (i * 8));
uint32_t high = entry >> 32;
writel(entry,ptr+(i*8));
writel(high,ptr + (i*8) + 4);
//writeq(entry, ptr + (i * 8));
}

int rs600_irq_set(struct radeon_device *rdev)
Expand Down
16 changes: 14 additions & 2 deletions drivers/video/fbdev/core/cfbcopyarea.c
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,22 @@
# define FB_WRITEL fb_writel
# define FB_READL fb_readl
#else
# define FB_WRITEL fb_writeq
# define FB_READL fb_readq
# define FB_WRITEL fb_writel_writeq
# define FB_READL fb_readl_readq
#endif

static void fb_writel_writeq(u64 val, volatile void __iomem *addr){
fb_writel(val,addr);
fb_writel(val >> 32, addr + 4);
}

static u64 fb_readl_readq(volatile void __iomem *addr){
u64 val;
val = fb_readl(addr);
val |= fb_readl(addr + 4) << 32;
return val;
}

/*
* Generic bitwise copy algorithm
*/
Expand Down
16 changes: 14 additions & 2 deletions drivers/video/fbdev/core/cfbfillrect.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,22 @@
# define FB_WRITEL fb_writel
# define FB_READL fb_readl
#else
# define FB_WRITEL fb_writeq
# define FB_READL fb_readq
# define FB_WRITEL fb_writel_writeq
# define FB_READL fb_readl_readq
#endif

static void fb_writel_writeq(u64 val, volatile void __iomem *addr){
fb_writel(val,addr);
fb_writel(val >> 32, addr + 4);
}

static u64 fb_readl_readq(volatile void __iomem *addr){
u64 val;
val = fb_readl(addr);
val |= fb_readl(addr + 4) << 32;
return val;
}

/*
* Aligned pattern fill using 32/64-bit memory accesses
*/
Expand Down