Skip to content

Commit 8faf0e0

Browse files
committed
drm/amdgpu: clean up init sequence for failures
If we fail during device init, record what state each block is in so that we can tear down clearly. Fixes various problems on device init failure. Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
1 parent 0a90a0c commit 8faf0e0

File tree

3 files changed

+31
-19
lines changed

3 files changed

+31
-19
lines changed

drivers/gpu/drm/amd/amdgpu/amdgpu.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1866,6 +1866,12 @@ typedef void (*amdgpu_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
18661866
typedef uint32_t (*amdgpu_block_rreg_t)(struct amdgpu_device*, uint32_t, uint32_t);
18671867
typedef void (*amdgpu_block_wreg_t)(struct amdgpu_device*, uint32_t, uint32_t, uint32_t);
18681868

1869+
struct amdgpu_ip_block_status {
1870+
bool valid;
1871+
bool sw;
1872+
bool hw;
1873+
};
1874+
18691875
struct amdgpu_device {
18701876
struct device *dev;
18711877
struct drm_device *ddev;
@@ -2008,7 +2014,7 @@ struct amdgpu_device {
20082014

20092015
const struct amdgpu_ip_block_version *ip_blocks;
20102016
int num_ip_blocks;
2011-
bool *ip_block_enabled;
2017+
struct amdgpu_ip_block_status *ip_block_status;
20122018
struct mutex mn_lock;
20132019
DECLARE_HASHTABLE(mn_hash, 7);
20142020

drivers/gpu/drm/amd/amdgpu/amdgpu_device.c

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1191,8 +1191,9 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
11911191
return -EINVAL;
11921192
}
11931193

1194-
adev->ip_block_enabled = kcalloc(adev->num_ip_blocks, sizeof(bool), GFP_KERNEL);
1195-
if (adev->ip_block_enabled == NULL)
1194+
adev->ip_block_status = kcalloc(adev->num_ip_blocks,
1195+
sizeof(struct amdgpu_ip_block_status), GFP_KERNEL);
1196+
if (adev->ip_block_status == NULL)
11961197
return -ENOMEM;
11971198

11981199
if (adev->ip_blocks == NULL) {
@@ -1203,18 +1204,18 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
12031204
for (i = 0; i < adev->num_ip_blocks; i++) {
12041205
if ((amdgpu_ip_block_mask & (1 << i)) == 0) {
12051206
DRM_ERROR("disabled ip block: %d\n", i);
1206-
adev->ip_block_enabled[i] = false;
1207+
adev->ip_block_status[i].valid = false;
12071208
} else {
12081209
if (adev->ip_blocks[i].funcs->early_init) {
12091210
r = adev->ip_blocks[i].funcs->early_init((void *)adev);
12101211
if (r == -ENOENT)
1211-
adev->ip_block_enabled[i] = false;
1212+
adev->ip_block_status[i].valid = false;
12121213
else if (r)
12131214
return r;
12141215
else
1215-
adev->ip_block_enabled[i] = true;
1216+
adev->ip_block_status[i].valid = true;
12161217
} else {
1217-
adev->ip_block_enabled[i] = true;
1218+
adev->ip_block_status[i].valid = true;
12181219
}
12191220
}
12201221
}
@@ -1227,11 +1228,12 @@ static int amdgpu_init(struct amdgpu_device *adev)
12271228
int i, r;
12281229

12291230
for (i = 0; i < adev->num_ip_blocks; i++) {
1230-
if (!adev->ip_block_enabled[i])
1231+
if (!adev->ip_block_status[i].valid)
12311232
continue;
12321233
r = adev->ip_blocks[i].funcs->sw_init((void *)adev);
12331234
if (r)
12341235
return r;
1236+
adev->ip_block_status[i].sw = true;
12351237
/* need to do gmc hw init early so we can allocate gpu mem */
12361238
if (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_GMC) {
12371239
r = amdgpu_vram_scratch_init(adev);
@@ -1243,18 +1245,20 @@ static int amdgpu_init(struct amdgpu_device *adev)
12431245
r = amdgpu_wb_init(adev);
12441246
if (r)
12451247
return r;
1248+
adev->ip_block_status[i].hw = true;
12461249
}
12471250
}
12481251

12491252
for (i = 0; i < adev->num_ip_blocks; i++) {
1250-
if (!adev->ip_block_enabled[i])
1253+
if (!adev->ip_block_status[i].sw)
12511254
continue;
12521255
/* gmc hw init is done early */
12531256
if (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_GMC)
12541257
continue;
12551258
r = adev->ip_blocks[i].funcs->hw_init((void *)adev);
12561259
if (r)
12571260
return r;
1261+
adev->ip_block_status[i].hw = true;
12581262
}
12591263

12601264
return 0;
@@ -1265,7 +1269,7 @@ static int amdgpu_late_init(struct amdgpu_device *adev)
12651269
int i = 0, r;
12661270

12671271
for (i = 0; i < adev->num_ip_blocks; i++) {
1268-
if (!adev->ip_block_enabled[i])
1272+
if (!adev->ip_block_status[i].valid)
12691273
continue;
12701274
/* enable clockgating to save power */
12711275
r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
@@ -1287,7 +1291,7 @@ static int amdgpu_fini(struct amdgpu_device *adev)
12871291
int i, r;
12881292

12891293
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
1290-
if (!adev->ip_block_enabled[i])
1294+
if (!adev->ip_block_status[i].hw)
12911295
continue;
12921296
if (adev->ip_blocks[i].type == AMD_IP_BLOCK_TYPE_GMC) {
12931297
amdgpu_wb_fini(adev);
@@ -1300,14 +1304,16 @@ static int amdgpu_fini(struct amdgpu_device *adev)
13001304
return r;
13011305
r = adev->ip_blocks[i].funcs->hw_fini((void *)adev);
13021306
/* XXX handle errors */
1307+
adev->ip_block_status[i].hw = false;
13031308
}
13041309

13051310
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
1306-
if (!adev->ip_block_enabled[i])
1311+
if (!adev->ip_block_status[i].sw)
13071312
continue;
13081313
r = adev->ip_blocks[i].funcs->sw_fini((void *)adev);
13091314
/* XXX handle errors */
1310-
adev->ip_block_enabled[i] = false;
1315+
adev->ip_block_status[i].sw = false;
1316+
adev->ip_block_status[i].valid = false;
13111317
}
13121318

13131319
return 0;
@@ -1318,7 +1324,7 @@ static int amdgpu_suspend(struct amdgpu_device *adev)
13181324
int i, r;
13191325

13201326
for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
1321-
if (!adev->ip_block_enabled[i])
1327+
if (!adev->ip_block_status[i].valid)
13221328
continue;
13231329
/* ungate blocks so that suspend can properly shut them down */
13241330
r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
@@ -1336,7 +1342,7 @@ static int amdgpu_resume(struct amdgpu_device *adev)
13361342
int i, r;
13371343

13381344
for (i = 0; i < adev->num_ip_blocks; i++) {
1339-
if (!adev->ip_block_enabled[i])
1345+
if (!adev->ip_block_status[i].valid)
13401346
continue;
13411347
r = adev->ip_blocks[i].funcs->resume(adev);
13421348
if (r)
@@ -1582,8 +1588,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
15821588
amdgpu_fence_driver_fini(adev);
15831589
amdgpu_fbdev_fini(adev);
15841590
r = amdgpu_fini(adev);
1585-
kfree(adev->ip_block_enabled);
1586-
adev->ip_block_enabled = NULL;
1591+
kfree(adev->ip_block_status);
1592+
adev->ip_block_status = NULL;
15871593
adev->accel_working = false;
15881594
/* free i2c buses */
15891595
amdgpu_i2c_fini(adev);

drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
235235

236236
for (i = 0; i < adev->num_ip_blocks; i++) {
237237
if (adev->ip_blocks[i].type == type &&
238-
adev->ip_block_enabled[i]) {
238+
adev->ip_block_status[i].valid) {
239239
ip.hw_ip_version_major = adev->ip_blocks[i].major;
240240
ip.hw_ip_version_minor = adev->ip_blocks[i].minor;
241241
ip.capabilities_flags = 0;
@@ -274,7 +274,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
274274

275275
for (i = 0; i < adev->num_ip_blocks; i++)
276276
if (adev->ip_blocks[i].type == type &&
277-
adev->ip_block_enabled[i] &&
277+
adev->ip_block_status[i].valid &&
278278
count < AMDGPU_HW_IP_INSTANCE_MAX_COUNT)
279279
count++;
280280

0 commit comments

Comments
 (0)