diff options
Diffstat (limited to 'src/core/hw')
-rw-r--r-- | src/core/hw/gpu.cpp | 214 | ||||
-rw-r--r-- | src/core/hw/gpu.h | 2 | ||||
-rw-r--r-- | src/core/hw/hw.cpp | 30 | ||||
-rw-r--r-- | src/core/hw/lcd.cpp | 12 | ||||
-rw-r--r-- | src/core/hw/lcd.h | 1 | ||||
-rw-r--r-- | src/core/hw/y2r.cpp | 6 |
6 files changed, 185 insertions, 80 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 7471def5..3ccbc03b 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -2,17 +2,18 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <cstring> +#include <type_traits> + #include "common/color.h" #include "common/common_types.h" - -#include "core/arm/arm_interface.h" +#include "common/logging/log.h" +#include "common/vector_math.h" #include "core/settings.h" -#include "core/core.h" #include "core/memory.h" #include "core/core_timing.h" -#include "core/hle/hle.h" #include "core/hle/service/gsp_gpu.h" #include "core/hle/service/dsp_dsp.h" #include "core/hle/service/hid/hid.h" @@ -20,10 +21,17 @@ #include "core/hw/hw.h" #include "core/hw/gpu.h" +#include "core/tracer/recorder.h" + #include "video_core/command_processor.h" +#include "video_core/hwrasterizer_base.h" +#include "video_core/renderer_base.h" #include "video_core/utils.h" #include "video_core/video_core.h" +#include "video_core/debug_utils/debug_utils.h" + + namespace GPU { Regs g_regs; @@ -53,6 +61,29 @@ inline void Read(T &var, const u32 raw_addr) { var = g_regs[addr / 4]; } +static Math::Vec4<u8> DecodePixel(Regs::PixelFormat input_format, const u8* src_pixel) { + switch (input_format) { + case Regs::PixelFormat::RGBA8: + return Color::DecodeRGBA8(src_pixel); + + case Regs::PixelFormat::RGB8: + return Color::DecodeRGB8(src_pixel); + + case Regs::PixelFormat::RGB565: + return Color::DecodeRGB565(src_pixel); + + case Regs::PixelFormat::RGB5A1: + return Color::DecodeRGB5A1(src_pixel); + + case Regs::PixelFormat::RGBA4: + return Color::DecodeRGBA4(src_pixel); + + default: + LOG_ERROR(HW_GPU, "Unknown source framebuffer format %x", input_format); + return {0, 0, 0, 0}; + } +} + template <typename T> inline void Write(u32 addr, const T data) { addr -= HW::VADDR_GPU; @@ -75,39 +106,43 @@ inline void Write(u32 addr, const T data) { const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger)); auto& config = g_regs.memory_fill_config[is_second_filler]; - if (config.address_start && config.trigger) { - u8* start = Memory::GetPhysicalPointer(config.GetStartAddress()); - u8* end = Memory::GetPhysicalPointer(config.GetEndAddress()); - - if (config.fill_24bit) { - // fill with 24-bit values - for (u8* ptr = start; ptr < end; ptr += 3) { - ptr[0] = config.value_24bit_r; - ptr[1] = config.value_24bit_g; - ptr[2] = config.value_24bit_b; + if (config.trigger) { + if (config.address_start) { // Some games pass invalid values here + u8* start = Memory::GetPhysicalPointer(config.GetStartAddress()); + u8* end = Memory::GetPhysicalPointer(config.GetEndAddress()); + + if (config.fill_24bit) { + // fill with 24-bit values + for (u8* ptr = start; ptr < end; ptr += 3) { + ptr[0] = config.value_24bit_r; + ptr[1] = config.value_24bit_g; + ptr[2] = config.value_24bit_b; + } + } else if (config.fill_32bit) { + // fill with 32-bit values + for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr) + *ptr = config.value_32bit; + } else { + // fill with 16-bit values + for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr) + *ptr = config.value_16bit; } - } else if (config.fill_32bit) { - // fill with 32-bit values - for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr) - *ptr = config.value_32bit; - } else { - // fill with 16-bit values - for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr) - *ptr = config.value_16bit; - } - LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); + LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); - config.trigger = 0; - config.finished = 1; + if (!is_second_filler) { + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0); + } else { + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1); + } - if (!is_second_filler) { - GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0); - } else { - GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1); + VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress()); } - VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress()); + // Reset "trigger" flag and set the "finish" flag + // NOTE: This was confirmed to happen on hardware even if "address_start" is zero. + config.trigger = 0; + config.finished = 1; } break; } @@ -116,6 +151,10 @@ inline void Write(u32 addr, const T data) { { const auto& config = g_regs.display_transfer_config; if (config.trigger & 1) { + + if (Pica::g_debug_context) + Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr); + u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress()); u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress()); @@ -125,11 +164,18 @@ inline void Write(u32 addr, const T data) { break; } - unsigned horizontal_scale = (config.scaling != config.NoScale) ? 2 : 1; - unsigned vertical_scale = (config.scaling == config.ScaleXY) ? 2 : 1; + if (config.output_tiled && + (config.scaling == config.ScaleXY || config.scaling == config.ScaleX)) { + LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input"); + UNIMPLEMENTED(); + break; + } - u32 output_width = config.output_width / horizontal_scale; - u32 output_height = config.output_height / vertical_scale; + bool horizontal_scale = config.scaling != config.NoScale; + bool vertical_scale = config.scaling == config.ScaleXY; + + u32 output_width = config.output_width >> horizontal_scale; + u32 output_height = config.output_height >> vertical_scale; u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format); u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format); @@ -153,16 +199,14 @@ inline void Write(u32 addr, const T data) { break; } - // TODO(Subv): Implement the box filter when scaling is enabled - // right now we're just skipping the extra pixels. for (u32 y = 0; y < output_height; ++y) { for (u32 x = 0; x < output_width; ++x) { - Math::Vec4<u8> src_color = { 0, 0, 0, 0 }; + Math::Vec4<u8> src_color; // Calculate the [x,y] position of the input image // based on the current output position and the scale - u32 input_x = x * horizontal_scale; - u32 input_y = y * vertical_scale; + u32 input_x = x << horizontal_scale; + u32 input_y = y << vertical_scale; if (config.flip_vertically) { // Flip the y value of the output data, @@ -177,46 +221,49 @@ inline void Write(u32 addr, const T data) { u32 dst_offset; if (config.output_tiled) { - // Interpret the input as linear and the output as tiled - u32 coarse_y = y & ~7; - u32 stride = output_width * dst_bytes_per_pixel; - - src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; - dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride; + if (!config.dont_swizzle) { + // Interpret the input as linear and the output as tiled + u32 coarse_y = y & ~7; + u32 stride = output_width * dst_bytes_per_pixel; + + src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; + dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride; + } else { + // Both input and output are linear + src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel; + dst_offset = (x + y * output_width) * dst_bytes_per_pixel; + } } else { - // Interpret the input as tiled and the output as linear - u32 coarse_y = input_y & ~7; - u32 stride = config.input_width * src_bytes_per_pixel; - - src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride; - dst_offset = (x + y * output_width) * dst_bytes_per_pixel; + if (!config.dont_swizzle) { + // Interpret the input as tiled and the output as linear + u32 coarse_y = input_y & ~7; + u32 stride = config.input_width * src_bytes_per_pixel; + + src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride; + dst_offset = (x + y * output_width) * dst_bytes_per_pixel; + } else { + // Both input and output are tiled + u32 out_coarse_y = y & ~7; + u32 out_stride = output_width * dst_bytes_per_pixel; + + u32 in_coarse_y = input_y & ~7; + u32 in_stride = config.input_width * src_bytes_per_pixel; + + src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride; + dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride; + } } const u8* src_pixel = src_pointer + src_offset; - switch (config.input_format) { - case Regs::PixelFormat::RGBA8: - src_color = Color::DecodeRGBA8(src_pixel); - break; - - case Regs::PixelFormat::RGB8: - src_color = Color::DecodeRGB8(src_pixel); - break; - - case Regs::PixelFormat::RGB565: - src_color = Color::DecodeRGB565(src_pixel); - break; - - case Regs::PixelFormat::RGB5A1: - src_color = Color::DecodeRGB5A1(src_pixel); - break; - - case Regs::PixelFormat::RGBA4: - src_color = Color::DecodeRGBA4(src_pixel); - break; - - default: - LOG_ERROR(HW_GPU, "Unknown source framebuffer format %x", config.input_format.Value()); - break; + src_color = DecodePixel(config.input_format, src_pixel); + if (config.scaling == config.ScaleX) { + Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel); + src_color = ((src_color + pixel) / 2).Cast<u8>(); + } else if (config.scaling == config.ScaleXY) { + Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel); + Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel); + Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel); + src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>(); } u8* dst_pixel = dst_pointer + dst_offset; @@ -254,6 +301,7 @@ inline void Write(u32 addr, const T data) { config.GetPhysicalOutputAddress(), output_width, output_height, config.output_format.Value(), config.flags); + g_regs.display_transfer_config.trigger = 0; GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF); VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size); @@ -268,7 +316,14 @@ inline void Write(u32 addr, const T data) { if (config.trigger & 1) { u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress()); + + if (Pica::g_debug_context && Pica::g_debug_context->recorder) { + Pica::g_debug_context->recorder->MemoryAccessed((u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress()); + } + Pica::CommandProcessor::ProcessCommandList(buffer, config.size); + + g_regs.command_processor_config.trigger = 0; } break; } @@ -276,6 +331,13 @@ inline void Write(u32 addr, const T data) { default: break; } + + // Notify tracer about the register write + // This is happening *after* handling the write to make sure we properly catch all memory reads. + if (Pica::g_debug_context && Pica::g_debug_context->recorder) { + // addr + GPU VBase - IO VBase + IO PBase + Pica::g_debug_context->recorder->RegisterWritten<T>(addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data); + } } // Explicitly instantiate template functions because we aren't defining this in the header: diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h index 699bcd2a..daad506f 100644 --- a/src/core/hw/gpu.h +++ b/src/core/hw/gpu.h @@ -5,6 +5,7 @@ #pragma once #include <cstddef> +#include <type_traits> #include "common/assert.h" #include "common/bit_field.h" @@ -202,6 +203,7 @@ struct Regs { BitField< 0, 1, u32> flip_vertically; // flips input data vertically BitField< 1, 1, u32> output_tiled; // Converts from linear to tiled format BitField< 3, 1, u32> raw_copy; // Copies the data without performing any processing + BitField< 5, 1, u32> dont_swizzle; BitField< 8, 3, PixelFormat> input_format; BitField<12, 3, PixelFormat> output_format; diff --git a/src/core/hw/hw.cpp b/src/core/hw/hw.cpp index c7006a49..b5fdbf9c 100644 --- a/src/core/hw/hw.cpp +++ b/src/core/hw/hw.cpp @@ -15,6 +15,21 @@ template <typename T> inline void Read(T &var, const u32 addr) { switch (addr & 0xFFFFF000) { case VADDR_GPU: + case VADDR_GPU + 0x1000: + case VADDR_GPU + 0x2000: + case VADDR_GPU + 0x3000: + case VADDR_GPU + 0x4000: + case VADDR_GPU + 0x5000: + case VADDR_GPU + 0x6000: + case VADDR_GPU + 0x7000: + case VADDR_GPU + 0x8000: + case VADDR_GPU + 0x9000: + case VADDR_GPU + 0xA000: + case VADDR_GPU + 0xB000: + case VADDR_GPU + 0xC000: + case VADDR_GPU + 0xD000: + case VADDR_GPU + 0xE000: + case VADDR_GPU + 0xF000: GPU::Read(var, addr); break; case VADDR_LCD: @@ -29,6 +44,21 @@ template <typename T> inline void Write(u32 addr, const T data) { switch (addr & 0xFFFFF000) { case VADDR_GPU: + case VADDR_GPU + 0x1000: + case VADDR_GPU + 0x2000: + case VADDR_GPU + 0x3000: + case VADDR_GPU + 0x4000: + case VADDR_GPU + 0x5000: + case VADDR_GPU + 0x6000: + case VADDR_GPU + 0x7000: + case VADDR_GPU + 0x8000: + case VADDR_GPU + 0x9000: + case VADDR_GPU + 0xA000: + case VADDR_GPU + 0xB000: + case VADDR_GPU + 0xC000: + case VADDR_GPU + 0xD000: + case VADDR_GPU + 0xE000: + case VADDR_GPU + 0xF000: GPU::Write(addr, data); break; case VADDR_LCD: diff --git a/src/core/hw/lcd.cpp b/src/core/hw/lcd.cpp index 963c8d98..6f93709e 100644 --- a/src/core/hw/lcd.cpp +++ b/src/core/hw/lcd.cpp @@ -7,11 +7,12 @@ #include "common/common_types.h" #include "common/logging/log.h" -#include "core/arm/arm_interface.h" -#include "core/hle/hle.h" #include "core/hw/hw.h" #include "core/hw/lcd.h" +#include "core/tracer/recorder.h" +#include "video_core/debug_utils/debug_utils.h" + namespace LCD { Regs g_regs; @@ -42,6 +43,13 @@ inline void Write(u32 addr, const T data) { } g_regs[index] = static_cast<u32>(data); + + // Notify tracer about the register write + // This is happening *after* handling the write to make sure we properly catch all memory reads. + if (Pica::g_debug_context && Pica::g_debug_context->recorder) { + // addr + GPU VBase - IO VBase + IO PBase + Pica::g_debug_context->recorder->RegisterWritten<T>(addr + HW::VADDR_LCD - 0x1EC00000 + 0x10100000, data); + } } // Explicitly instantiate template functions because we aren't defining this in the header: diff --git a/src/core/hw/lcd.h b/src/core/hw/lcd.h index 8631eb20..bcce6d8c 100644 --- a/src/core/hw/lcd.h +++ b/src/core/hw/lcd.h @@ -5,6 +5,7 @@ #pragma once #include <cstddef> +#include <type_traits> #include "common/bit_field.h" #include "common/common_funcs.h" diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp index 5b7fb39e..f80e26ec 100644 --- a/src/core/hw/y2r.cpp +++ b/src/core/hw/y2r.cpp @@ -2,8 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> #include <array> -#include <numeric> +#include <cstddef> +#include <memory> #include "common/assert.h" #include "common/color.h" @@ -109,7 +111,7 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data while (output < unit_end) { u32 color = *input++; Math::Vec4<u8> col_vec{ - (color >> 24) & 0xFF, (color >> 16) & 0xFF, (color >> 8) & 0xFF, alpha, + (u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha }; switch (output_format) { |