Skip to content

Commit

Permalink
Merge tag 'amd-drm-next-6.12-2024-08-26' of https://gitlab.freedeskto…
Browse files Browse the repository at this point in the history
…p.org/agd5f/linux into drm-next

amd-drm-next-6.12-2024-08-26:

amdgpu:
- SDMA devcoredump support
- DCN 4.0.1 updates
- DC SUBVP fixes
- Refactor OPP in DC
- Refactor MMHUBBUB in DC
- DC DML 2.1 updates
- DC FAMS2 updates
- RAS updates
- GFX12 updates
- VCN 4.0.3 updates
- JPEG 4.0.3 updates
- Enable wave kill (soft recovery) for compute queues
- Clean up CP error interrupt handling
- Enable CP bad opcode interrupts
- VCN 4.x fixes
- VCN 5.x fixes
- GPU reset fixes
- Fix vbios embedded EDID size handling
- SMU 14.x updates
- Misc code cleanups and spelling fixes
- VCN devcoredump support
- ISP MFD i2c support
- DC vblank fixes
- GFX 12 fixes
- PSR fixes
- Convert vbios embedded EDID to drm_edid
- DCN 3.5 updates
- DMCUB updates
- Cursor fixes
- Overdrive support for SMU 14.x
- GFX CP padding optimizations
- DCC fixes
- DSC fixes
- Preliminary per queue reset infrastructure
- Initial per queue reset support for GFX 9
- Initial per queue reset support for GFX 7, 8
- DCN 3.2 fixes
- DP MST fixes
- SR-IOV fixes
- GFX 9.4.3/4 devcoredump support
- Add process isolation framework
- Enable process isolation support for GFX 9.4.3/4
- Take IOMMU remapping into account for P2P DMA checks

amdkfd:
- CRIU fixes
- Improved input validation for user queues
- HMM fix
- Enable process isolation support for GFX 9.4.3/4
- Initial per queue reset support for GFX 9
- Allow users to target recommended SDMA engines

radeon:
- remove .load and drm_dev_alloc
- Fix vbios embedded EDID size handling
- Convert vbios embedded EDID to drm_edid
- Use GEM references instead of TTM
- r100 cp init cleanup
- Fix potential overflows in evergreen CS offset tracking

UAPI:
- KFD support for targetting queues on recommended SDMA engines
  Proposed userspace:
  ROCm/ROCR-Runtime@2f588a2
  ROCm/ROCR-Runtime@eb30a5b

drm/buddy:
- Add start address support for trim function

From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240826201528.55307-1-alexander.deucher@amd.com
  • Loading branch information
danvet committed Aug 27, 2024
2 parents 4461e9e + 3376f92 commit e55ef65
Show file tree
Hide file tree
Showing 438 changed files with 13,108 additions and 4,958 deletions.
32 changes: 9 additions & 23 deletions drivers/gpu/drm/amd/amdgpu/amdgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@

#define MAX_GPU_INSTANCE 64

#define GFX_SLICE_PERIOD msecs_to_jiffies(250)

struct amdgpu_gpu_instance {
struct amdgpu_device *adev;
int mgpu_fan_enabled;
Expand Down Expand Up @@ -347,9 +349,9 @@ enum amdgpu_kiq_irq {
AMDGPU_CP_KIQ_IRQ_DRIVER0 = 0,
AMDGPU_CP_KIQ_IRQ_LAST
};
#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */
#define MAX_KIQ_REG_WAIT (amdgpu_sriov_vf(adev) ? 50000 : 5000) /* in usecs, extend for VF */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define SRIOV_USEC_TIMEOUT 1200000 /* wait 12 * 100ms for SRIOV */
#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */
#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */
#define MAX_KIQ_REG_TRY 1000

int amdgpu_device_ip_set_clockgating_state(void *dev,
Expand Down Expand Up @@ -823,17 +825,6 @@ struct amdgpu_mqd {
struct amdgpu_reset_domain;
struct amdgpu_fru_info;

struct amdgpu_reset_info {
/* reset dump register */
u32 *reset_dump_reg_list;
u32 *reset_dump_reg_value;
int num_regs;

#ifdef CONFIG_DEV_COREDUMP
struct amdgpu_coredump_info *coredump_info;
#endif
};

/*
* Non-zero (true) if the GPU has VRAM. Zero (false) otherwise.
*/
Expand Down Expand Up @@ -1157,8 +1148,6 @@ struct amdgpu_device {

struct mutex benchmark_mutex;

struct amdgpu_reset_info reset_info;

bool scpm_enabled;
uint32_t scpm_status;

Expand All @@ -1175,6 +1164,10 @@ struct amdgpu_device {
bool debug_disable_soft_recovery;
bool debug_use_vram_fw_buf;
bool debug_enable_ras_aca;

bool enforce_isolation[MAX_XCP];
/* Added this mutex for cleaner shader isolation between GFX and compute processes */
struct mutex enforce_isolation_mutex;
};

static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev,
Expand Down Expand Up @@ -1587,13 +1580,6 @@ static inline bool amdgpu_acpi_is_s3_active(struct amdgpu_device *adev) { return
static inline void amdgpu_choose_low_power_state(struct amdgpu_device *adev) { }
#endif

#if defined(CONFIG_DRM_AMD_DC)
int amdgpu_dm_display_resume(struct amdgpu_device *adev );
#else
static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; }
#endif


void amdgpu_register_gpu_instance(struct amdgpu_device *adev);
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev);

Expand Down
16 changes: 13 additions & 3 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ static void aca_banks_release(struct aca_banks *banks)
{
struct aca_bank_node *node, *tmp;

if (list_empty(&banks->list))
return;

list_for_each_entry_safe(node, tmp, &banks->list, node) {
list_del(&node->node);
kvfree(node);
Expand Down Expand Up @@ -453,13 +456,13 @@ static int aca_log_aca_error_data(struct aca_bank_error *bank_error, enum aca_er

switch (type) {
case ACA_ERROR_TYPE_UE:
amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, NULL, count);
amdgpu_ras_error_statistic_ue_count(err_data, &mcm_info, count);
break;
case ACA_ERROR_TYPE_CE:
amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, NULL, count);
amdgpu_ras_error_statistic_ce_count(err_data, &mcm_info, count);
break;
case ACA_ERROR_TYPE_DEFERRED:
amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, NULL, count);
amdgpu_ras_error_statistic_de_count(err_data, &mcm_info, count);
break;
default:
break;
Expand Down Expand Up @@ -562,9 +565,13 @@ static void aca_error_fini(struct aca_error *aerr)
struct aca_bank_error *bank_error, *tmp;

mutex_lock(&aerr->lock);
if (list_empty(&aerr->list))
goto out_unlock;

list_for_each_entry_safe(bank_error, tmp, &aerr->list, node)
aca_bank_error_remove(aerr, bank_error);

out_unlock:
mutex_destroy(&aerr->lock);
}

Expand Down Expand Up @@ -680,6 +687,9 @@ static void aca_manager_fini(struct aca_handle_manager *mgr)
{
struct aca_handle *handle, *tmp;

if (list_empty(&mgr->list))
return;

list_for_each_entry_safe(handle, tmp, &mgr->list, node)
amdgpu_aca_remove_handle(handle);
}
Expand Down
48 changes: 25 additions & 23 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
Original file line number Diff line number Diff line change
Expand Up @@ -364,15 +364,15 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
return r;
}

void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj)
void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj)
{
struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj;
struct amdgpu_bo **bo = (struct amdgpu_bo **) mem_obj;

amdgpu_bo_reserve(bo, true);
amdgpu_bo_kunmap(bo);
amdgpu_bo_unpin(bo);
amdgpu_bo_unreserve(bo);
amdgpu_bo_unref(&(bo));
amdgpu_bo_reserve(*bo, true);
amdgpu_bo_kunmap(*bo);
amdgpu_bo_unpin(*bo);
amdgpu_bo_unreserve(*bo);
amdgpu_bo_unref(bo);
}

int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
Expand Down Expand Up @@ -783,22 +783,6 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
return 0;
}

bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev,
int hub_inst, int hub_type)
{
if (!hub_type) {
if (adev->gfxhub.funcs->query_utcl2_poison_status)
return adev->gfxhub.funcs->query_utcl2_poison_status(adev, hub_inst);
else
return false;
} else {
if (adev->mmhub.funcs->query_utcl2_poison_status)
return adev->mmhub.funcs->query_utcl2_poison_status(adev, hub_inst);
else
return false;
}
}

int amdgpu_amdkfd_check_and_lock_kfd(struct amdgpu_device *adev)
{
return kgd2kfd_check_and_lock_kfd();
Expand Down Expand Up @@ -887,3 +871,21 @@ int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,

return r;
}

/* Stop scheduling on KFD */
int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id)
{
if (!adev->kfd.init_complete)
return 0;

return kgd2kfd_stop_sched(adev->kfd.dev, node_id);
}

/* Start scheduling on KFD */
int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id)
{
if (!adev->kfd.init_complete)
return 0;

return kgd2kfd_start_sched(adev->kfd.dev, node_id);
}
22 changes: 17 additions & 5 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ int amdgpu_amdkfd_bo_validate_and_fence(struct amdgpu_bo *bo,
int amdgpu_amdkfd_alloc_gtt_mem(struct amdgpu_device *adev, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr, bool mqd_gfx9);
void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void *mem_obj);
void amdgpu_amdkfd_free_gtt_mem(struct amdgpu_device *adev, void **mem_obj);
int amdgpu_amdkfd_alloc_gws(struct amdgpu_device *adev, size_t size,
void **mem_obj);
void amdgpu_amdkfd_free_gws(struct amdgpu_device *adev, void *mem_obj);
Expand Down Expand Up @@ -264,6 +264,8 @@ int amdgpu_amdkfd_send_close_event_drain_irq(struct amdgpu_device *adev,
uint32_t *payload);
int amdgpu_amdkfd_unmap_hiq(struct amdgpu_device *adev, u32 doorbell_off,
u32 inst);
int amdgpu_amdkfd_start_sched(struct amdgpu_device *adev, uint32_t node_id);
int amdgpu_amdkfd_stop_sched(struct amdgpu_device *adev, uint32_t node_id);

/* Read user wptr from a specified user address space with page fault
* disabled. The memory must be pinned and mapped to the hardware when
Expand Down Expand Up @@ -322,7 +324,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
void **kptr, uint64_t *size);
void amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(struct kgd_mem *mem);

int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo);
int amdgpu_amdkfd_map_gtt_bo_to_gart(struct amdgpu_bo *bo, struct amdgpu_bo **bo_gart);

int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
struct dma_fence __rcu **ef);
Expand All @@ -345,11 +347,9 @@ void amdgpu_amdkfd_ras_pasid_poison_consumption_handler(struct amdgpu_device *ad
pasid_notify pasid_fn, void *data, uint32_t reset);

bool amdgpu_amdkfd_is_fed(struct amdgpu_device *adev);
bool amdgpu_amdkfd_bo_mapped_to_dev(struct amdgpu_device *adev, struct kgd_mem *mem);
bool amdgpu_amdkfd_bo_mapped_to_dev(void *drm_priv, struct kgd_mem *mem);
void amdgpu_amdkfd_block_mmu_notifications(void *p);
int amdgpu_amdkfd_criu_resume(void *p);
bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev,
int hub_inst, int hub_type);
int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag, int8_t xcp_id);
void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev,
Expand Down Expand Up @@ -426,6 +426,8 @@ void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd);
void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask);
int kgd2kfd_check_and_lock_kfd(void);
void kgd2kfd_unlock_kfd(void);
int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id);
int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id);
#else
static inline int kgd2kfd_init(void)
{
Expand Down Expand Up @@ -496,5 +498,15 @@ static inline int kgd2kfd_check_and_lock_kfd(void)
static inline void kgd2kfd_unlock_kfd(void)
{
}

static inline int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id)
{
return 0;
}

static inline int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
{
return 0;
}
#endif
#endif /* AMDGPU_AMDKFD_H_INCLUDED */
2 changes: 2 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
Original file line number Diff line number Diff line change
Expand Up @@ -191,4 +191,6 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
.hqd_reset = kgd_gfx_v9_hqd_reset,
};
4 changes: 3 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
Original file line number Diff line number Diff line change
Expand Up @@ -418,5 +418,7 @@ const struct kfd2kgd_calls arcturus_kfd2kgd = {
.get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info,
.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
.hqd_reset = kgd_gfx_v9_hqd_reset
};
4 changes: 3 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gc_9_4_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -541,5 +541,7 @@ const struct kfd2kgd_calls gc_9_4_3_kfd2kgd = {
kgd_gfx_v9_4_3_set_wave_launch_trap_override,
.set_wave_launch_mode = kgd_aldebaran_set_wave_launch_mode,
.set_address_watch = kgd_gfx_v9_4_3_set_address_watch,
.clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch
.clear_address_watch = kgd_gfx_v9_4_3_clear_address_watch,
.hqd_get_pq_addr = kgd_gfx_v9_hqd_get_pq_addr,
.hqd_reset = kgd_gfx_v9_hqd_reset
};
16 changes: 16 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
Original file line number Diff line number Diff line change
Expand Up @@ -1070,6 +1070,20 @@ static void program_trap_handler_settings(struct amdgpu_device *adev,
unlock_srbm(adev);
}

uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
uint32_t inst)
{
return 0;
}

uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
uint32_t inst, unsigned int utimeout)
{
return 0;
}

const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.program_sh_mem_settings = kgd_program_sh_mem_settings,
.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
Expand Down Expand Up @@ -1097,4 +1111,6 @@ const struct kfd2kgd_calls gfx_v10_kfd2kgd = {
.get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times,
.build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info,
.program_trap_handler_settings = program_trap_handler_settings,
.hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
.hqd_reset = kgd_gfx_v10_hqd_reset
};
9 changes: 9 additions & 0 deletions drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,12 @@ void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev,
uint32_t grace_period,
uint32_t *reg_offset,
uint32_t *reg_data);
uint64_t kgd_gfx_v10_hqd_get_pq_addr(struct amdgpu_device *adev,
uint32_t pipe_id,
uint32_t queue_id,
uint32_t inst);
uint64_t kgd_gfx_v10_hqd_reset(struct amdgpu_device *adev,
uint32_t pipe_id,
uint32_t queue_id,
uint32_t inst,
unsigned int utimeout);
4 changes: 3 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
Original file line number Diff line number Diff line change
Expand Up @@ -680,5 +680,7 @@ const struct kfd2kgd_calls gfx_v10_3_kfd2kgd = {
.set_wave_launch_trap_override = kgd_gfx_v10_set_wave_launch_trap_override,
.set_wave_launch_mode = kgd_gfx_v10_set_wave_launch_mode,
.set_address_watch = kgd_gfx_v10_set_address_watch,
.clear_address_watch = kgd_gfx_v10_clear_address_watch
.clear_address_watch = kgd_gfx_v10_clear_address_watch,
.hqd_get_pq_addr = kgd_gfx_v10_hqd_get_pq_addr,
.hqd_reset = kgd_gfx_v10_hqd_reset
};
18 changes: 17 additions & 1 deletion drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v11.c
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,20 @@ static uint32_t kgd_gfx_v11_clear_address_watch(struct amdgpu_device *adev,
return 0;
}

static uint64_t kgd_gfx_v11_hqd_get_pq_addr(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
uint32_t inst)
{
return 0;
}

static uint64_t kgd_gfx_v11_hqd_reset(struct amdgpu_device *adev,
uint32_t pipe_id, uint32_t queue_id,
uint32_t inst, unsigned int utimeout)
{
return 0;
}

const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
.program_sh_mem_settings = program_sh_mem_settings_v11,
.set_pasid_vmid_mapping = set_pasid_vmid_mapping_v11,
Expand All @@ -808,5 +822,7 @@ const struct kfd2kgd_calls gfx_v11_kfd2kgd = {
.set_wave_launch_trap_override = kgd_gfx_v11_set_wave_launch_trap_override,
.set_wave_launch_mode = kgd_gfx_v11_set_wave_launch_mode,
.set_address_watch = kgd_gfx_v11_set_address_watch,
.clear_address_watch = kgd_gfx_v11_clear_address_watch
.clear_address_watch = kgd_gfx_v11_clear_address_watch,
.hqd_get_pq_addr = kgd_gfx_v11_hqd_get_pq_addr,
.hqd_reset = kgd_gfx_v11_hqd_reset
};
Loading

0 comments on commit e55ef65

Please sign in to comment.