diff options
Diffstat (limited to 'src/core/hw')
-rw-r--r-- | src/core/hw/gpu.cpp | 186 | ||||
-rw-r--r-- | src/core/hw/gpu.h | 65 | ||||
-rw-r--r-- | src/core/hw/hw.cpp | 3 |
3 files changed, 149 insertions, 105 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 947365da..88002352 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -9,6 +9,7 @@ #include "core/settings.h" #include "core/core.h" #include "core/mem_map.h" +#include "core/core_timing.h" #include "core/hle/hle.h" #include "core/hle/service/gsp_gpu.h" @@ -25,14 +26,17 @@ namespace GPU { Regs g_regs; -bool g_skip_frame = false; ///< True if the current frame was skipped +/// True if the current frame was skipped +bool g_skip_frame = false; -static u64 frame_ticks = 0; ///< 268MHz / gpu_refresh_rate frames per second -static u64 line_ticks = 0; ///< Number of ticks for a screen line -static u32 cur_line = 0; ///< Current screen line -static u64 last_update_tick = 0; ///< CPU ticl count from last GPU update -static u64 frame_count = 0; ///< Number of frames drawn -static bool last_skip_frame = false; ///< True if the last frame was skipped +/// 268MHz / gpu_refresh_rate frames per second +static u64 frame_ticks; +/// Event id for CoreTiming +static int vblank_event; +/// Total number of frames drawn +static u64 frame_count; +/// True if the last frame was skipped +static bool last_skip_frame = false; template <typename T> inline void Read(T &var, const u32 raw_addr) { @@ -64,22 +68,43 @@ inline void Write(u32 addr, const T data) { switch (index) { // Memory fills are triggered once the fill value is written. - // NOTE: This is not verified. - case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].value, 0x00004 + 0x3): - case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].value, 0x00008 + 0x3): + case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].trigger, 0x00004 + 0x3): + case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].trigger, 0x00008 + 0x3): { - const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].value)); - const auto& config = g_regs.memory_fill_config[is_second_filler]; - - // TODO: Not sure if this check should be done at GSP level instead - if (config.address_start) { - // TODO: Not sure if this algorithm is correct, particularly because it doesn't use the size member at all - u32* start = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetStartAddress())); - u32* end = (u32*)Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetEndAddress())); - for (u32* ptr = start; ptr < end; ++ptr) - *ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation + const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger)); + auto& config = g_regs.memory_fill_config[is_second_filler]; + + if (config.address_start && config.trigger) { + u8* start = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetStartAddress())); + u8* end = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetEndAddress())); + + if (config.fill_24bit) { + // fill with 24-bit values + for (u8* ptr = start; ptr < end; ptr += 3) { + ptr[0] = config.value_24bit_b; + ptr[1] = config.value_24bit_g; + ptr[2] = config.value_24bit_r; + } + } else if (config.fill_32bit) { + // fill with 32-bit values + for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr) + *ptr = config.value_32bit; + } else { + // fill with 16-bit values + for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr) + *ptr = config.value_16bit; + } LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); + + config.trigger = 0; + config.finished = 1; + + if (!is_second_filler) { + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0); + } else { + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1); + } } break; } @@ -91,26 +116,28 @@ inline void Write(u32 addr, const T data) { u8* source_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalInputAddress())); u8* dest_pointer = Memory::GetPointer(Memory::PhysicalToVirtualAddress(config.GetPhysicalOutputAddress())); + // Cheap emulation of horizontal scaling: Just skip each second pixel of the + // input framebuffer. We keep track of this in the pixel_skip variable. + unsigned pixel_skip = (config.scale_horizontally != 0) ? 2 : 1; + + u32 output_width = config.output_width / pixel_skip; + for (u32 y = 0; y < config.output_height; ++y) { // TODO: Why does the register seem to hold twice the framebuffer width? - for (u32 x = 0; x < config.output_width; ++x) { + + for (u32 x = 0; x < output_width; ++x) { struct { int r, g, b, a; } source_color = { 0, 0, 0, 0 }; - // Cheap emulation of horizontal scaling: Just skip each second pixel of the - // input framebuffer. We keep track of this in the pixel_skip variable. - unsigned pixel_skip = (config.scale_horizontally != 0) ? 2 : 1; - switch (config.input_format) { case Regs::PixelFormat::RGBA8: { - // TODO: Most likely got the component order messed up. - u8* srcptr = source_pointer + x * 4 * pixel_skip + y * config.input_width * 4 * pixel_skip; - source_color.r = srcptr[0]; // blue - source_color.g = srcptr[1]; // green - source_color.b = srcptr[2]; // red - source_color.a = srcptr[3]; // alpha + u8* srcptr = source_pointer + (x * pixel_skip + y * config.input_width) * 4; + source_color.r = srcptr[3]; // red + source_color.g = srcptr[2]; // green + source_color.b = srcptr[1]; // blue + source_color.a = srcptr[0]; // alpha break; } @@ -153,11 +180,10 @@ inline void Write(u32 addr, const T data) { case Regs::PixelFormat::RGB8: { - // TODO: Most likely got the component order messed up. - u8* dstptr = dest_pointer + x * 3 + y * config.output_width * 3; - dstptr[0] = source_color.r; // blue + u8* dstptr = dest_pointer + (x + y * output_width) * 3; + dstptr[2] = source_color.r; // red dstptr[1] = source_color.g; // green - dstptr[2] = source_color.b; // red + dstptr[0] = source_color.b; // blue break; } @@ -185,10 +211,12 @@ inline void Write(u32 addr, const T data) { } LOG_TRACE(HW_GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%ux%u)-> 0x%08x(%ux%u), dst format %x", - config.output_height * config.output_width * 4, + config.output_height * output_width * 4, config.GetPhysicalInputAddress(), (u32)config.input_width, (u32)config.input_height, - config.GetPhysicalOutputAddress(), (u32)config.output_width, (u32)config.output_height, + config.GetPhysicalOutputAddress(), (u32)output_width, (u32)config.output_height, config.output_format.Value()); + + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); } break; } @@ -223,51 +251,37 @@ template void Write<u16>(u32 addr, const u16 data); template void Write<u8>(u32 addr, const u8 data); /// Update hardware -void Update() { - auto& framebuffer_top = g_regs.framebuffer_config[0]; - - // Synchronize GPU on a thread reschedule: Because we cannot accurately predict a vertical - // blank, we need to simulate it. Based on testing, it seems that retail applications work more - // accurately when this is signalled between thread switches. - - if (HLE::g_reschedule) { - u64 current_ticks = Core::g_app_core->GetTicks(); - u32 num_lines = static_cast<u32>((current_ticks - last_update_tick) / line_ticks); - - // Synchronize line... - if (num_lines > 0) { - GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0); - cur_line += num_lines; - last_update_tick += (num_lines * line_ticks); - } - - // Synchronize frame... - if (cur_line >= framebuffer_top.height) { - cur_line = 0; - frame_count++; - last_skip_frame = g_skip_frame; - g_skip_frame = (frame_count & Settings::values.frame_skip) != 0; - - // Swap buffers based on the frameskip mode, which is a little bit tricky. When - // a frame is being skipped, nothing is being rendered to the internal framebuffer(s). - // So, we should only swap frames if the last frame was rendered. The rules are: - // - If frameskip == 0 (disabled), always swap buffers - // - If frameskip == 1, swap buffers every other frame (starting from the first frame) - // - If frameskip > 1, swap buffers every frameskip^n frames (starting from the second frame) - if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) && last_skip_frame != g_skip_frame) || - Settings::values.frame_skip == 0) { - VideoCore::g_renderer->SwapBuffers(); - } - - // Signal to GSP that GPU interrupt has occurred - GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1); - - // TODO(bunnei): Fake a DSP interrupt on each frame. This does not belong here, but - // until we can emulate DSP interrupts, this is probably the only reasonable place to do - // this. Certain games expect this to be periodically signaled. - DSP_DSP::SignalInterrupt(); - } +static void VBlankCallback(u64 userdata, int cycles_late) { + frame_count++; + last_skip_frame = g_skip_frame; + g_skip_frame = (frame_count & Settings::values.frame_skip) != 0; + + // Swap buffers based on the frameskip mode, which is a little bit tricky. When + // a frame is being skipped, nothing is being rendered to the internal framebuffer(s). + // So, we should only swap frames if the last frame was rendered. The rules are: + // - If frameskip == 0 (disabled), always swap buffers + // - If frameskip == 1, swap buffers every other frame (starting from the first frame) + // - If frameskip > 1, swap buffers every frameskip^n frames (starting from the second frame) + if ((((Settings::values.frame_skip != 1) ^ last_skip_frame) && last_skip_frame != g_skip_frame) || + Settings::values.frame_skip == 0) { + VideoCore::g_renderer->SwapBuffers(); } + + // Signal to GSP that GPU interrupt has occurred + // TODO(yuriks): hwtest to determine if PDC0 is for the Top screen and PDC1 for the Sub + // screen, or if both use the same interrupts and these two instead determine the + // beginning and end of the VBlank period. If needed, split the interrupt firing into + // two different intervals. + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0); + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1); + + // TODO(bunnei): Fake a DSP interrupt on each frame. This does not belong here, but + // until we can emulate DSP interrupts, this is probably the only reasonable place to do + // this. Certain games expect this to be periodically signaled. + DSP_DSP::SignalInterrupt(); + + // Reschedule recurrent event + CoreTiming::ScheduleEvent(frame_ticks - cycles_late, vblank_event); } /// Initialize hardware @@ -284,8 +298,8 @@ void Init() { framebuffer_top.address_right1 = 0x18273000; framebuffer_top.address_right2 = 0x182B9800; framebuffer_sub.address_left1 = 0x1848F000; - //framebuffer_sub.address_left2 = unknown; - framebuffer_sub.address_right1 = 0x184C7800; + framebuffer_sub.address_left2 = 0x184C7800; + //framebuffer_sub.address_right1 = unknown; //framebuffer_sub.address_right2 = unknown; framebuffer_top.width = 240; @@ -301,12 +315,12 @@ void Init() { framebuffer_sub.active_fb = 0; frame_ticks = 268123480 / Settings::values.gpu_refresh_rate; - line_ticks = (GPU::frame_ticks / framebuffer_top.height); - cur_line = 0; - last_update_tick = Core::g_app_core->GetTicks(); last_skip_frame = false; g_skip_frame = false; + vblank_event = CoreTiming::RegisterEvent("GPU::VBlankCallback", VBlankCallback); + CoreTiming::ScheduleEvent(frame_ticks, vblank_event); + LOG_DEBUG(HW_GPU, "initialized OK"); } diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 7de05523..75f52446 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -34,13 +34,6 @@ namespace GPU { // MMIO region 0x1EFxxxxx struct Regs { -// helper macro to properly align structure members. -// Calling INSERT_PADDING_WORDS will add a new member variable with a name like "pad121", -// depending on the current source line to make sure variable names are unique. -#define INSERT_PADDING_WORDS_HELPER1(x, y) x ## y -#define INSERT_PADDING_WORDS_HELPER2(x, y) INSERT_PADDING_WORDS_HELPER1(x, y) -#define INSERT_PADDING_WORDS(num_words) u32 INSERT_PADDING_WORDS_HELPER2(pad, __LINE__)[(num_words)] - // helper macro to make sure the defined structures are of the expected size. #if defined(_MSC_VER) // TODO: MSVC does not support using sizeof() on non-static data members even though this @@ -53,6 +46,7 @@ struct Regs { "Structure size and register block length don't match") #endif + // All of those formats are described in reverse byte order, since the 3DS is little-endian. enum class PixelFormat : u32 { RGBA8 = 0, RGB8 = 1, @@ -61,13 +55,57 @@ struct Regs { RGBA4 = 4, }; + /** + * Returns the number of bytes per pixel. + */ + static int BytesPerPixel(PixelFormat format) { + switch (format) { + case PixelFormat::RGBA8: + return 4; + case PixelFormat::RGB8: + return 3; + case PixelFormat::RGB565: + case PixelFormat::RGB5A1: + case PixelFormat::RGBA4: + return 2; + default: + UNIMPLEMENTED(); + } + } + INSERT_PADDING_WORDS(0x4); struct { u32 address_start; - u32 address_end; // ? - u32 size; - u32 value; // ? + u32 address_end; + + union { + u32 value_32bit; + + BitField<0, 16, u32> value_16bit; + + // TODO: Verify component order + BitField< 0, 8, u32> value_24bit_r; + BitField< 8, 8, u32> value_24bit_g; + BitField<16, 8, u32> value_24bit_b; + }; + + union { + u32 control; + + // Setting this field to 1 triggers the memory fill. + // This field also acts as a status flag, and gets reset to 0 upon completion. + BitField<0, 1, u32> trigger; + + // Set to 1 upon completion. + BitField<0, 1, u32> finished; + + // 0: fill with 16- or 32-bit wide values; 1: fill with 24-bit wide values + BitField<8, 1, u32> fill_24bit; + + // 0: fill with 16-bit wide values; 1: fill with 32-bit wide values + BitField<9, 1, u32> fill_32bit; + }; inline u32 GetStartAddress() const { return DecodeAddressRegister(address_start); @@ -193,10 +231,6 @@ struct Regs { INSERT_PADDING_WORDS(0x9c3); -#undef INSERT_PADDING_WORDS_HELPER1 -#undef INSERT_PADDING_WORDS_HELPER2 -#undef INSERT_PADDING_WORDS - static inline size_t NumIds() { return sizeof(Regs) / sizeof(u32); } @@ -252,9 +286,6 @@ void Read(T &var, const u32 addr); template <typename T> void Write(u32 addr, const T data); -/// Update hardware -void Update(); - /// Initialize hardware void Init(); diff --git a/src/core/hw/hw.cpp b/src/core/hw/hw.cpp index 848ab534..a63ba6ee 100644 --- a/src/core/hw/hw.cpp +++ b/src/core/hw/hw.cpp @@ -75,7 +75,6 @@ template void Write<u8>(u32 addr, const u8 data); /// Update hardware void Update() { - GPU::Update(); } /// Initialize hardware @@ -89,4 +88,4 @@ void Shutdown() { LOG_DEBUG(HW, "shutdown OK"); } -}
\ No newline at end of file +} |