aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/core/hw
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/hw')
-rw-r--r--src/core/hw/gpu.cpp214
-rw-r--r--src/core/hw/gpu.h2
-rw-r--r--src/core/hw/hw.cpp30
-rw-r--r--src/core/hw/lcd.cpp12
-rw-r--r--src/core/hw/lcd.h1
-rw-r--r--src/core/hw/y2r.cpp6
6 files changed, 185 insertions, 80 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 7471def5..3ccbc03b 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -2,17 +2,18 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <cstring>
+#include <type_traits>
+
#include "common/color.h"
#include "common/common_types.h"
-
-#include "core/arm/arm_interface.h"
+#include "common/logging/log.h"
+#include "common/vector_math.h"
#include "core/settings.h"
-#include "core/core.h"
#include "core/memory.h"
#include "core/core_timing.h"
-#include "core/hle/hle.h"
#include "core/hle/service/gsp_gpu.h"
#include "core/hle/service/dsp_dsp.h"
#include "core/hle/service/hid/hid.h"
@@ -20,10 +21,17 @@
#include "core/hw/hw.h"
#include "core/hw/gpu.h"
+#include "core/tracer/recorder.h"
+
#include "video_core/command_processor.h"
+#include "video_core/hwrasterizer_base.h"
+#include "video_core/renderer_base.h"
#include "video_core/utils.h"
#include "video_core/video_core.h"
+#include "video_core/debug_utils/debug_utils.h"
+
+
namespace GPU {
Regs g_regs;
@@ -53,6 +61,29 @@ inline void Read(T &var, const u32 raw_addr) {
var = g_regs[addr / 4];
}
+static Math::Vec4<u8> DecodePixel(Regs::PixelFormat input_format, const u8* src_pixel) {
+ switch (input_format) {
+ case Regs::PixelFormat::RGBA8:
+ return Color::DecodeRGBA8(src_pixel);
+
+ case Regs::PixelFormat::RGB8:
+ return Color::DecodeRGB8(src_pixel);
+
+ case Regs::PixelFormat::RGB565:
+ return Color::DecodeRGB565(src_pixel);
+
+ case Regs::PixelFormat::RGB5A1:
+ return Color::DecodeRGB5A1(src_pixel);
+
+ case Regs::PixelFormat::RGBA4:
+ return Color::DecodeRGBA4(src_pixel);
+
+ default:
+ LOG_ERROR(HW_GPU, "Unknown source framebuffer format %x", input_format);
+ return {0, 0, 0, 0};
+ }
+}
+
template <typename T>
inline void Write(u32 addr, const T data) {
addr -= HW::VADDR_GPU;
@@ -75,39 +106,43 @@ inline void Write(u32 addr, const T data) {
const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger));
auto& config = g_regs.memory_fill_config[is_second_filler];
- if (config.address_start && config.trigger) {
- u8* start = Memory::GetPhysicalPointer(config.GetStartAddress());
- u8* end = Memory::GetPhysicalPointer(config.GetEndAddress());
-
- if (config.fill_24bit) {
- // fill with 24-bit values
- for (u8* ptr = start; ptr < end; ptr += 3) {
- ptr[0] = config.value_24bit_r;
- ptr[1] = config.value_24bit_g;
- ptr[2] = config.value_24bit_b;
+ if (config.trigger) {
+ if (config.address_start) { // Some games pass invalid values here
+ u8* start = Memory::GetPhysicalPointer(config.GetStartAddress());
+ u8* end = Memory::GetPhysicalPointer(config.GetEndAddress());
+
+ if (config.fill_24bit) {
+ // fill with 24-bit values
+ for (u8* ptr = start; ptr < end; ptr += 3) {
+ ptr[0] = config.value_24bit_r;
+ ptr[1] = config.value_24bit_g;
+ ptr[2] = config.value_24bit_b;
+ }
+ } else if (config.fill_32bit) {
+ // fill with 32-bit values
+ for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr)
+ *ptr = config.value_32bit;
+ } else {
+ // fill with 16-bit values
+ for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr)
+ *ptr = config.value_16bit;
}
- } else if (config.fill_32bit) {
- // fill with 32-bit values
- for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr)
- *ptr = config.value_32bit;
- } else {
- // fill with 16-bit values
- for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr)
- *ptr = config.value_16bit;
- }
- LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress());
+ LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress());
- config.trigger = 0;
- config.finished = 1;
+ if (!is_second_filler) {
+ GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0);
+ } else {
+ GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1);
+ }
- if (!is_second_filler) {
- GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0);
- } else {
- GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1);
+ VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
}
- VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
+ // Reset "trigger" flag and set the "finish" flag
+ // NOTE: This was confirmed to happen on hardware even if "address_start" is zero.
+ config.trigger = 0;
+ config.finished = 1;
}
break;
}
@@ -116,6 +151,10 @@ inline void Write(u32 addr, const T data) {
{
const auto& config = g_regs.display_transfer_config;
if (config.trigger & 1) {
+
+ if (Pica::g_debug_context)
+ Pica::g_debug_context->OnEvent(Pica::DebugContext::Event::IncomingDisplayTransfer, nullptr);
+
u8* src_pointer = Memory::GetPhysicalPointer(config.GetPhysicalInputAddress());
u8* dst_pointer = Memory::GetPhysicalPointer(config.GetPhysicalOutputAddress());
@@ -125,11 +164,18 @@ inline void Write(u32 addr, const T data) {
break;
}
- unsigned horizontal_scale = (config.scaling != config.NoScale) ? 2 : 1;
- unsigned vertical_scale = (config.scaling == config.ScaleXY) ? 2 : 1;
+ if (config.output_tiled &&
+ (config.scaling == config.ScaleXY || config.scaling == config.ScaleX)) {
+ LOG_CRITICAL(HW_GPU, "Scaling is only implemented on tiled input");
+ UNIMPLEMENTED();
+ break;
+ }
- u32 output_width = config.output_width / horizontal_scale;
- u32 output_height = config.output_height / vertical_scale;
+ bool horizontal_scale = config.scaling != config.NoScale;
+ bool vertical_scale = config.scaling == config.ScaleXY;
+
+ u32 output_width = config.output_width >> horizontal_scale;
+ u32 output_height = config.output_height >> vertical_scale;
u32 input_size = config.input_width * config.input_height * GPU::Regs::BytesPerPixel(config.input_format);
u32 output_size = output_width * output_height * GPU::Regs::BytesPerPixel(config.output_format);
@@ -153,16 +199,14 @@ inline void Write(u32 addr, const T data) {
break;
}
- // TODO(Subv): Implement the box filter when scaling is enabled
- // right now we're just skipping the extra pixels.
for (u32 y = 0; y < output_height; ++y) {
for (u32 x = 0; x < output_width; ++x) {
- Math::Vec4<u8> src_color = { 0, 0, 0, 0 };
+ Math::Vec4<u8> src_color;
// Calculate the [x,y] position of the input image
// based on the current output position and the scale
- u32 input_x = x * horizontal_scale;
- u32 input_y = y * vertical_scale;
+ u32 input_x = x << horizontal_scale;
+ u32 input_y = y << vertical_scale;
if (config.flip_vertically) {
// Flip the y value of the output data,
@@ -177,46 +221,49 @@ inline void Write(u32 addr, const T data) {
u32 dst_offset;
if (config.output_tiled) {
- // Interpret the input as linear and the output as tiled
- u32 coarse_y = y & ~7;
- u32 stride = output_width * dst_bytes_per_pixel;
-
- src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
- dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride;
+ if (!config.dont_swizzle) {
+ // Interpret the input as linear and the output as tiled
+ u32 coarse_y = y & ~7;
+ u32 stride = output_width * dst_bytes_per_pixel;
+
+ src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
+ dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + coarse_y * stride;
+ } else {
+ // Both input and output are linear
+ src_offset = (input_x + input_y * config.input_width) * src_bytes_per_pixel;
+ dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
+ }
} else {
- // Interpret the input as tiled and the output as linear
- u32 coarse_y = input_y & ~7;
- u32 stride = config.input_width * src_bytes_per_pixel;
-
- src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride;
- dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
+ if (!config.dont_swizzle) {
+ // Interpret the input as tiled and the output as linear
+ u32 coarse_y = input_y & ~7;
+ u32 stride = config.input_width * src_bytes_per_pixel;
+
+ src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + coarse_y * stride;
+ dst_offset = (x + y * output_width) * dst_bytes_per_pixel;
+ } else {
+ // Both input and output are tiled
+ u32 out_coarse_y = y & ~7;
+ u32 out_stride = output_width * dst_bytes_per_pixel;
+
+ u32 in_coarse_y = input_y & ~7;
+ u32 in_stride = config.input_width * src_bytes_per_pixel;
+
+ src_offset = VideoCore::GetMortonOffset(input_x, input_y, src_bytes_per_pixel) + in_coarse_y * in_stride;
+ dst_offset = VideoCore::GetMortonOffset(x, y, dst_bytes_per_pixel) + out_coarse_y * out_stride;
+ }
}
const u8* src_pixel = src_pointer + src_offset;
- switch (config.input_format) {
- case Regs::PixelFormat::RGBA8:
- src_color = Color::DecodeRGBA8(src_pixel);
- break;
-
- case Regs::PixelFormat::RGB8:
- src_color = Color::DecodeRGB8(src_pixel);
- break;
-
- case Regs::PixelFormat::RGB565:
- src_color = Color::DecodeRGB565(src_pixel);
- break;
-
- case Regs::PixelFormat::RGB5A1:
- src_color = Color::DecodeRGB5A1(src_pixel);
- break;
-
- case Regs::PixelFormat::RGBA4:
- src_color = Color::DecodeRGBA4(src_pixel);
- break;
-
- default:
- LOG_ERROR(HW_GPU, "Unknown source framebuffer format %x", config.input_format.Value());
- break;
+ src_color = DecodePixel(config.input_format, src_pixel);
+ if (config.scaling == config.ScaleX) {
+ Math::Vec4<u8> pixel = DecodePixel(config.input_format, src_pixel + src_bytes_per_pixel);
+ src_color = ((src_color + pixel) / 2).Cast<u8>();
+ } else if (config.scaling == config.ScaleXY) {
+ Math::Vec4<u8> pixel1 = DecodePixel(config.input_format, src_pixel + 1 * src_bytes_per_pixel);
+ Math::Vec4<u8> pixel2 = DecodePixel(config.input_format, src_pixel + 2 * src_bytes_per_pixel);
+ Math::Vec4<u8> pixel3 = DecodePixel(config.input_format, src_pixel + 3 * src_bytes_per_pixel);
+ src_color = (((src_color + pixel1) + (pixel2 + pixel3)) / 4).Cast<u8>();
}
u8* dst_pixel = dst_pointer + dst_offset;
@@ -254,6 +301,7 @@ inline void Write(u32 addr, const T data) {
config.GetPhysicalOutputAddress(), output_width, output_height,
config.output_format.Value(), config.flags);
+ g_regs.display_transfer_config.trigger = 0;
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size);
@@ -268,7 +316,14 @@ inline void Write(u32 addr, const T data) {
if (config.trigger & 1)
{
u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress());
+
+ if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
+ Pica::g_debug_context->recorder->MemoryAccessed((u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress());
+ }
+
Pica::CommandProcessor::ProcessCommandList(buffer, config.size);
+
+ g_regs.command_processor_config.trigger = 0;
}
break;
}
@@ -276,6 +331,13 @@ inline void Write(u32 addr, const T data) {
default:
break;
}
+
+ // Notify tracer about the register write
+ // This is happening *after* handling the write to make sure we properly catch all memory reads.
+ if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
+ // addr + GPU VBase - IO VBase + IO PBase
+ Pica::g_debug_context->recorder->RegisterWritten<T>(addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data);
+ }
}
// Explicitly instantiate template functions because we aren't defining this in the header:
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index 699bcd2a..daad506f 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -5,6 +5,7 @@
#pragma once
#include <cstddef>
+#include <type_traits>
#include "common/assert.h"
#include "common/bit_field.h"
@@ -202,6 +203,7 @@ struct Regs {
BitField< 0, 1, u32> flip_vertically; // flips input data vertically
BitField< 1, 1, u32> output_tiled; // Converts from linear to tiled format
BitField< 3, 1, u32> raw_copy; // Copies the data without performing any processing
+ BitField< 5, 1, u32> dont_swizzle;
BitField< 8, 3, PixelFormat> input_format;
BitField<12, 3, PixelFormat> output_format;
diff --git a/src/core/hw/hw.cpp b/src/core/hw/hw.cpp
index c7006a49..b5fdbf9c 100644
--- a/src/core/hw/hw.cpp
+++ b/src/core/hw/hw.cpp
@@ -15,6 +15,21 @@ template <typename T>
inline void Read(T &var, const u32 addr) {
switch (addr & 0xFFFFF000) {
case VADDR_GPU:
+ case VADDR_GPU + 0x1000:
+ case VADDR_GPU + 0x2000:
+ case VADDR_GPU + 0x3000:
+ case VADDR_GPU + 0x4000:
+ case VADDR_GPU + 0x5000:
+ case VADDR_GPU + 0x6000:
+ case VADDR_GPU + 0x7000:
+ case VADDR_GPU + 0x8000:
+ case VADDR_GPU + 0x9000:
+ case VADDR_GPU + 0xA000:
+ case VADDR_GPU + 0xB000:
+ case VADDR_GPU + 0xC000:
+ case VADDR_GPU + 0xD000:
+ case VADDR_GPU + 0xE000:
+ case VADDR_GPU + 0xF000:
GPU::Read(var, addr);
break;
case VADDR_LCD:
@@ -29,6 +44,21 @@ template <typename T>
inline void Write(u32 addr, const T data) {
switch (addr & 0xFFFFF000) {
case VADDR_GPU:
+ case VADDR_GPU + 0x1000:
+ case VADDR_GPU + 0x2000:
+ case VADDR_GPU + 0x3000:
+ case VADDR_GPU + 0x4000:
+ case VADDR_GPU + 0x5000:
+ case VADDR_GPU + 0x6000:
+ case VADDR_GPU + 0x7000:
+ case VADDR_GPU + 0x8000:
+ case VADDR_GPU + 0x9000:
+ case VADDR_GPU + 0xA000:
+ case VADDR_GPU + 0xB000:
+ case VADDR_GPU + 0xC000:
+ case VADDR_GPU + 0xD000:
+ case VADDR_GPU + 0xE000:
+ case VADDR_GPU + 0xF000:
GPU::Write(addr, data);
break;
case VADDR_LCD:
diff --git a/src/core/hw/lcd.cpp b/src/core/hw/lcd.cpp
index 963c8d98..6f93709e 100644
--- a/src/core/hw/lcd.cpp
+++ b/src/core/hw/lcd.cpp
@@ -7,11 +7,12 @@
#include "common/common_types.h"
#include "common/logging/log.h"
-#include "core/arm/arm_interface.h"
-#include "core/hle/hle.h"
#include "core/hw/hw.h"
#include "core/hw/lcd.h"
+#include "core/tracer/recorder.h"
+#include "video_core/debug_utils/debug_utils.h"
+
namespace LCD {
Regs g_regs;
@@ -42,6 +43,13 @@ inline void Write(u32 addr, const T data) {
}
g_regs[index] = static_cast<u32>(data);
+
+ // Notify tracer about the register write
+ // This is happening *after* handling the write to make sure we properly catch all memory reads.
+ if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
+ // addr + GPU VBase - IO VBase + IO PBase
+ Pica::g_debug_context->recorder->RegisterWritten<T>(addr + HW::VADDR_LCD - 0x1EC00000 + 0x10100000, data);
+ }
}
// Explicitly instantiate template functions because we aren't defining this in the header:
diff --git a/src/core/hw/lcd.h b/src/core/hw/lcd.h
index 8631eb20..bcce6d8c 100644
--- a/src/core/hw/lcd.h
+++ b/src/core/hw/lcd.h
@@ -5,6 +5,7 @@
#pragma once
#include <cstddef>
+#include <type_traits>
#include "common/bit_field.h"
#include "common/common_funcs.h"
diff --git a/src/core/hw/y2r.cpp b/src/core/hw/y2r.cpp
index 5b7fb39e..f80e26ec 100644
--- a/src/core/hw/y2r.cpp
+++ b/src/core/hw/y2r.cpp
@@ -2,8 +2,10 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <algorithm>
#include <array>
-#include <numeric>
+#include <cstddef>
+#include <memory>
#include "common/assert.h"
#include "common/color.h"
@@ -109,7 +111,7 @@ static void SendData(const u32* input, ConversionBuffer& buf, int amount_of_data
while (output < unit_end) {
u32 color = *input++;
Math::Vec4<u8> col_vec{
- (color >> 24) & 0xFF, (color >> 16) & 0xFF, (color >> 8) & 0xFF, alpha,
+ (u8)(color >> 24), (u8)(color >> 16), (u8)(color >> 8), alpha
};
switch (output_format) {