1818#include " xenia/base/byte_stream.h"
1919#include " xenia/base/logging.h"
2020#include " xenia/base/math.h"
21+ #include " xenia/base/memory.h"
2122#include " xenia/base/profiling.h"
2223#include " xenia/base/ring_buffer.h"
2324#include " xenia/gpu/gpu_flags.h"
@@ -334,27 +335,29 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
334335 return ;
335336 }
336337
337- regs.values [index].u32 = value;
338+ // Volatile for the WAIT_REG_MEM loop.
339+ const_cast <volatile uint32_t &>(regs.values [index]) = value;
338340 if (!regs.GetRegisterInfo (index)) {
339341 XELOGW (" GPU: Write to unknown register ({:04X} = {:08X})" , index, value);
340342 }
341343
342344 // Scratch register writeback.
343345 if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
344346 uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
345- if ((1 << scratch_reg) & regs.values [XE_GPU_REG_SCRATCH_UMSK]. u32 ) {
347+ if ((1 << scratch_reg) & regs.values [XE_GPU_REG_SCRATCH_UMSK]) {
346348 // Enabled - write to address.
347- uint32_t scratch_addr = regs.values [XE_GPU_REG_SCRATCH_ADDR]. u32 ;
349+ uint32_t scratch_addr = regs.values [XE_GPU_REG_SCRATCH_ADDR];
348350 uint32_t mem_addr = scratch_addr + (scratch_reg * 4 );
349351 xe::store_and_swap<uint32_t >(memory_->TranslatePhysical (mem_addr), value);
350352 }
351353 } else {
352354 switch (index) {
353355 // If this is a COHER register, set the dirty flag.
354- // This will block the command processor the next time it WAIT_MEM_REGs
356+ // This will block the command processor the next time it WAIT_REG_MEMs
355357 // and allow us to synchronize the memory.
356358 case XE_GPU_REG_COHER_STATUS_HOST: {
357- regs.values [index].u32 |= UINT32_C (0x80000000 );
359+ const_cast <volatile uint32_t &>(regs.values [index]) |=
360+ UINT32_C (0x80000000 );
358361 } break ;
359362
360363 case XE_GPU_REG_DC_LUT_RW_INDEX: {
@@ -365,12 +368,12 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
365368
366369 case XE_GPU_REG_DC_LUT_SEQ_COLOR: {
367370 // Should be in the 256-entry table writing mode.
368- assert_zero (regs[XE_GPU_REG_DC_LUT_RW_MODE]. u32 & 0b1 );
371+ assert_zero (regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1 );
369372 auto & gamma_ramp_rw_index = regs.Get <reg::DC_LUT_RW_INDEX>();
370373 // DC_LUT_SEQ_COLOR is in the red, green, blue order, but the write
371374 // enable mask is blue, green, red.
372375 bool write_gamma_ramp_component =
373- (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK]. u32 &
376+ (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] &
374377 (UINT32_C (1 ) << (2 - gamma_ramp_rw_component_))) != 0 ;
375378 if (write_gamma_ramp_component) {
376379 reg::DC_LUT_30_COLOR& gamma_ramp_entry =
@@ -401,14 +404,14 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
401404
402405 case XE_GPU_REG_DC_LUT_PWL_DATA: {
403406 // Should be in the PWL writing mode.
404- assert_not_zero (regs[XE_GPU_REG_DC_LUT_RW_MODE]. u32 & 0b1 );
407+ assert_not_zero (regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1 );
405408 auto & gamma_ramp_rw_index = regs.Get <reg::DC_LUT_RW_INDEX>();
406409 // Bit 7 of the index is ignored for PWL.
407410 uint32_t gamma_ramp_rw_index_pwl = gamma_ramp_rw_index.rw_index & 0x7F ;
408411 // DC_LUT_PWL_DATA is likely in the red, green, blue order because
409412 // DC_LUT_SEQ_COLOR is, but the write enable mask is blue, green, red.
410413 bool write_gamma_ramp_component =
411- (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK]. u32 &
414+ (regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] &
412415 (UINT32_C (1 ) << (2 - gamma_ramp_rw_component_))) != 0 ;
413416 if (write_gamma_ramp_component) {
414417 reg::DC_LUT_PWL_DATA& gamma_ramp_entry =
@@ -436,10 +439,10 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
436439
437440 case XE_GPU_REG_DC_LUT_30_COLOR: {
438441 // Should be in the 256-entry table writing mode.
439- assert_zero (regs[XE_GPU_REG_DC_LUT_RW_MODE]. u32 & 0b1 );
442+ assert_zero (regs[XE_GPU_REG_DC_LUT_RW_MODE] & 0b1 );
440443 auto & gamma_ramp_rw_index = regs.Get <reg::DC_LUT_RW_INDEX>();
441444 uint32_t gamma_ramp_write_enable_mask =
442- regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK]. u32 & 0b111 ;
445+ regs[XE_GPU_REG_DC_LUT_WRITE_EN_MASK] & 0b111 ;
443446 if (gamma_ramp_write_enable_mask) {
444447 reg::DC_LUT_30_COLOR& gamma_ramp_entry =
445448 gamma_ramp_256_entry_table_[gamma_ramp_rw_index.rw_index ];
@@ -479,10 +482,12 @@ void CommandProcessor::MakeCoherent() {
479482 // https://web.archive.org/web/20160711162346/https://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/10/R6xx_R7xx_3D.pdf
480483 // https://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454
481484
482- RegisterFile* regs = register_file_;
483- auto & status_host = regs->Get <reg::COHER_STATUS_HOST>();
484- auto base_host = regs->values [XE_GPU_REG_COHER_BASE_HOST].u32 ;
485- auto size_host = regs->values [XE_GPU_REG_COHER_SIZE_HOST].u32 ;
485+ // Volatile because this may be called from the WAIT_REG_MEM loop.
486+ volatile uint32_t * regs_volatile = register_file_->values ;
487+ auto status_host = xe::memory::Reinterpret<reg::COHER_STATUS_HOST>(
488+ uint32_t (regs_volatile[XE_GPU_REG_COHER_STATUS_HOST]));
489+ uint32_t base_host = regs_volatile[XE_GPU_REG_COHER_BASE_HOST];
490+ uint32_t size_host = regs_volatile[XE_GPU_REG_COHER_SIZE_HOST];
486491
487492 if (!status_host.status ) {
488493 return ;
@@ -502,7 +507,7 @@ void CommandProcessor::MakeCoherent() {
502507 base_host + size_host, size_host, action);
503508
504509 // Mark coherent.
505- status_host. status = 0 ;
510+ regs_volatile[XE_GPU_REG_COHER_STATUS_HOST] = 0 ;
506511}
507512
508513void CommandProcessor::PrepareForWait () { trace_writer_.Flush (); }
@@ -940,28 +945,33 @@ bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingBuffer* reader,
940945 SCOPE_profile_cpu_f (" gpu" );
941946
942947 // wait until a register or memory location is a specific value
948+
943949 uint32_t wait_info = reader->ReadAndSwap <uint32_t >();
944950 uint32_t poll_reg_addr = reader->ReadAndSwap <uint32_t >();
945951 uint32_t ref = reader->ReadAndSwap <uint32_t >();
946952 uint32_t mask = reader->ReadAndSwap <uint32_t >();
947953 uint32_t wait = reader->ReadAndSwap <uint32_t >();
954+
955+ bool is_memory = (wait_info & 0x10 ) != 0 ;
956+
957+ assert_true (is_memory || poll_reg_addr < RegisterFile::kRegisterCount );
958+ const volatile uint32_t & value_ref =
959+ is_memory ? *reinterpret_cast <uint32_t *>(memory_->TranslatePhysical (
960+ poll_reg_addr & ~uint32_t (0x3 )))
961+ : register_file_->values [poll_reg_addr];
962+
948963 bool matched = false ;
949964 do {
950- uint32_t value;
951- if (wait_info & 0x10 ) {
952- // Memory.
953- auto endianness = static_cast <xenos::Endian>(poll_reg_addr & 0x3 );
954- poll_reg_addr &= ~0x3 ;
955- value = xe::load<uint32_t >(memory_->TranslatePhysical (poll_reg_addr));
956- value = GpuSwap (value, endianness);
957- trace_writer_.WriteMemoryRead (CpuToGpu (poll_reg_addr), 4 );
965+ uint32_t value = value_ref;
966+ if (is_memory) {
967+ trace_writer_.WriteMemoryRead (CpuToGpu (poll_reg_addr & ~uint32_t (0x3 )),
968+ sizeof (uint32_t ));
969+ value = xenos::GpuSwap (value,
970+ static_cast <xenos::Endian>(poll_reg_addr & 0x3 ));
958971 } else {
959- // Register.
960- assert_true (poll_reg_addr < RegisterFile::kRegisterCount );
961- value = register_file_->values [poll_reg_addr].u32 ;
962972 if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) {
963973 MakeCoherent ();
964- value = register_file_-> values [poll_reg_addr]. u32 ;
974+ value = value_ref ;
965975 }
966976 }
967977 switch (wait_info & 0x7 ) {
@@ -1024,17 +1034,17 @@ bool CommandProcessor::ExecutePacketType3_REG_RMW(RingBuffer* reader,
10241034 uint32_t rmw_info = reader->ReadAndSwap <uint32_t >();
10251035 uint32_t and_mask = reader->ReadAndSwap <uint32_t >();
10261036 uint32_t or_mask = reader->ReadAndSwap <uint32_t >();
1027- uint32_t value = register_file_->values [rmw_info & 0x1FFF ]. u32 ;
1037+ uint32_t value = register_file_->values [rmw_info & 0x1FFF ];
10281038 if ((rmw_info >> 31 ) & 0x1 ) {
10291039 // & reg
1030- value &= register_file_->values [and_mask & 0x1FFF ]. u32 ;
1040+ value &= register_file_->values [and_mask & 0x1FFF ];
10311041 } else {
10321042 // & imm
10331043 value &= and_mask;
10341044 }
10351045 if ((rmw_info >> 30 ) & 0x1 ) {
10361046 // | reg
1037- value |= register_file_->values [or_mask & 0x1FFF ]. u32 ;
1047+ value |= register_file_->values [or_mask & 0x1FFF ];
10381048 } else {
10391049 // | imm
10401050 value |= or_mask;
@@ -1055,7 +1065,7 @@ bool CommandProcessor::ExecutePacketType3_REG_TO_MEM(RingBuffer* reader,
10551065 uint32_t reg_val;
10561066
10571067 assert_true (reg_addr < RegisterFile::kRegisterCount );
1058- reg_val = register_file_->values [reg_addr]. u32 ;
1068+ reg_val = register_file_->values [reg_addr];
10591069
10601070 auto endianness = static_cast <xenos::Endian>(mem_addr & 0x3 );
10611071 mem_addr &= ~0x3 ;
@@ -1105,7 +1115,7 @@ bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingBuffer* reader,
11051115 } else {
11061116 // Register.
11071117 assert_true (poll_reg_addr < RegisterFile::kRegisterCount );
1108- value = register_file_->values [poll_reg_addr]. u32 ;
1118+ value = register_file_->values [poll_reg_addr];
11091119 }
11101120 bool matched = false ;
11111121 switch (wait_info & 0x7 ) {
@@ -1240,7 +1250,7 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_ZPD(RingBuffer* reader,
12401250 if (fake_sample_count >= 0 ) {
12411251 auto * pSampleCounts =
12421252 memory_->TranslatePhysical <xe_gpu_depth_sample_counts*>(
1243- register_file_->values [XE_GPU_REG_RB_SAMPLE_COUNT_ADDR]. u32 );
1253+ register_file_->values [XE_GPU_REG_RB_SAMPLE_COUNT_ADDR]);
12441254 // 0xFFFFFEED is written to this two locations by D3D only on D3DISSUE_END
12451255 // and used to detect a finished query.
12461256 bool is_end_via_z_pass = pSampleCounts->ZPass_A == kQueryFinished &&
@@ -1599,10 +1609,10 @@ bool CommandProcessor::ExecutePacketType3_VIZ_QUERY(RingBuffer* reader,
15991609 // The scan converter writes the internal result back to the register here.
16001610 // We just fake it and say it was visible in case it is read back.
16011611 if (id < 32 ) {
1602- register_file_->values [XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0]. u32 |=
1603- uint32_t ( 1 ) << id;
1612+ register_file_->values [XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_0] |= uint32_t ( 1 )
1613+ << id;
16041614 } else {
1605- register_file_->values [XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1]. u32 |=
1615+ register_file_->values [XE_GPU_REG_PA_SC_VIZ_QUERY_STATUS_1] |=
16061616 uint32_t (1 ) << (id - 32 );
16071617 }
16081618 }
@@ -1614,9 +1624,8 @@ void CommandProcessor::InitializeTrace() {
16141624 // Write the initial register values, to be loaded directly into the
16151625 // RegisterFile since all registers, including those that may have side
16161626 // effects on setting, will be saved.
1617- trace_writer_.WriteRegisters (
1618- 0 , reinterpret_cast <const uint32_t *>(register_file_->values ),
1619- RegisterFile::kRegisterCount , false );
1627+ trace_writer_.WriteRegisters (0 , register_file_->values ,
1628+ RegisterFile::kRegisterCount , false );
16201629
16211630 trace_writer_.WriteGammaRamp (gamma_ramp_256_entry_table (),
16221631 gamma_ramp_pwl_rgb (), gamma_ramp_rw_component_);
0 commit comments