aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/video_core/rasterizer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core/rasterizer.cpp')
-rw-r--r--src/video_core/rasterizer.cpp189
1 files changed, 174 insertions, 15 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 59d156ee..68b7cc05 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -126,6 +126,30 @@ static u32 GetDepth(int x, int y) {
}
}
+static u8 GetStencil(int x, int y) {
+ const auto& framebuffer = g_state.regs.framebuffer;
+ const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
+ u8* depth_buffer = Memory::GetPhysicalPointer(addr);
+
+ y = framebuffer.height - y;
+
+ const u32 coarse_y = y & ~7;
+ u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format);
+ u32 stride = framebuffer.width * bytes_per_pixel;
+
+ u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+ u8* src_pixel = depth_buffer + src_offset;
+
+ switch (framebuffer.depth_format) {
+ case Regs::DepthFormat::D24S8:
+ return Color::DecodeD24S8(src_pixel).y;
+
+ default:
+ LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format);
+ return 0;
+ }
+}
+
static void SetDepth(int x, int y, u32 value) {
const auto& framebuffer = g_state.regs.framebuffer;
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
@@ -144,13 +168,15 @@ static void SetDepth(int x, int y, u32 value) {
case Regs::DepthFormat::D16:
Color::EncodeD16(value, dst_pixel);
break;
+
case Regs::DepthFormat::D24:
Color::EncodeD24(value, dst_pixel);
break;
+
case Regs::DepthFormat::D24S8:
- // TODO(Subv): Implement the stencil buffer
- Color::EncodeD24S8(value, 0, dst_pixel);
+ Color::EncodeD24X8(value, dst_pixel);
break;
+
default:
LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
UNIMPLEMENTED();
@@ -158,6 +184,53 @@ static void SetDepth(int x, int y, u32 value) {
}
}
+static void SetStencil(int x, int y, u8 value) {
+ const auto& framebuffer = g_state.regs.framebuffer;
+ const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
+ u8* depth_buffer = Memory::GetPhysicalPointer(addr);
+
+ y = framebuffer.height - y;
+
+ const u32 coarse_y = y & ~7;
+ u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format);
+ u32 stride = framebuffer.width * bytes_per_pixel;
+
+ u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+ u8* dst_pixel = depth_buffer + dst_offset;
+
+ switch (framebuffer.depth_format) {
+ case Pica::Regs::DepthFormat::D16:
+ case Pica::Regs::DepthFormat::D24:
+ // Nothing to do
+ break;
+
+ case Pica::Regs::DepthFormat::D24S8:
+ Color::EncodeX24S8(value, dst_pixel);
+ break;
+
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
+ UNIMPLEMENTED();
+ break;
+ }
+}
+
+// TODO: Should the stencil mask be applied to the "dest" or "ref" operands? Most likely not!
+static u8 PerformStencilAction(Regs::StencilAction action, u8 dest, u8 ref) {
+ switch (action) {
+ case Regs::StencilAction::Keep:
+ return dest;
+
+ case Regs::StencilAction::Xor:
+ return dest ^ ref;
+
+ default:
+ LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action);
+ UNIMPLEMENTED();
+ return 0;
+ }
+}
+
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
struct Fix12P4 {
Fix12P4() {}
@@ -276,6 +349,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
auto textures = regs.GetTextures();
auto tev_stages = regs.GetTevStages();
+ bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8;
+ const auto stencil_test = g_state.regs.output_merger.stencil_test;
+
// Enter rasterization loop, starting at the center of the topleft bounding box corner.
// TODO: Not sure if looping through x first might be faster
for (u16 y = min_y + 8; y < max_y; y += 0x10) {
@@ -349,6 +425,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
val = std::min(val, (int)size - 1);
return val;
+ case Regs::TextureConfig::ClampToBorder:
+ return val;
+
case Regs::TextureConfig::Repeat:
return (int)((unsigned)val % size);
@@ -367,17 +446,26 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
}
};
- // Textures are laid out from bottom to top, hence we invert the t coordinate.
- // NOTE: This may not be the right place for the inversion.
- // TODO: Check if this applies to ETC textures, too.
- s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width);
- t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height);
-
- u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
- auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
-
- texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info);
- DebugUtils::DumpTexture(texture.config, texture_data);
+ if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder && (s < 0 || s >= texture.config.width))
+ || (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder && (t < 0 || t >= texture.config.height))) {
+ auto border_color = texture.config.border_color;
+ texture_color[i] = { border_color.r, border_color.g, border_color.b, border_color.a };
+ } else {
+ // Textures are laid out from bottom to top, hence we invert the t coordinate.
+ // NOTE: This may not be the right place for the inversion.
+ // TODO: Check if this applies to ETC textures, too.
+ s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width);
+ t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height);
+
+ u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
+ auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format);
+
+ // TODO: Apply the min and mag filters to the texture
+ texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info);
+#if PICA_DUMP_TEXTURES
+ DebugUtils::DumpTexture(texture.config, texture_data);
+#endif
+ }
}
// Texture environment - consists of 6 stages of color and alpha combining.
@@ -556,7 +644,18 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
result = (result * input[2].Cast<int>()) / 255;
return result.Cast<u8>();
}
-
+ case Operation::Dot3_RGB:
+ {
+ // Not fully accurate.
+ // Worst case scenario seems to yield a +/-3 error
+ // Some HW results indicate that the per-component computation can't have a higher precision than 1/256,
+ // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( (0x80,g0,b0),(0x80,g1,b1) ) give different results
+ int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 +
+ ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 +
+ ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256;
+ result = std::max(0, std::min(255, result));
+ return { (u8)result, (u8)result, (u8)result };
+ }
default:
LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
UNIMPLEMENTED();
@@ -638,6 +737,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
}
const auto& output_merger = regs.output_merger;
+ // TODO: Does alpha testing happen before or after stencil?
if (output_merger.alpha_test.enable) {
bool pass = false;
@@ -679,6 +779,54 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
continue;
}
+ u8 old_stencil = 0;
+ if (stencil_action_enable) {
+ old_stencil = GetStencil(x >> 4, y >> 4);
+ u8 dest = old_stencil & stencil_test.mask;
+ u8 ref = stencil_test.reference_value & stencil_test.mask;
+
+ bool pass = false;
+ switch (stencil_test.func) {
+ case Regs::CompareFunc::Never:
+ pass = false;
+ break;
+
+ case Regs::CompareFunc::Always:
+ pass = true;
+ break;
+
+ case Regs::CompareFunc::Equal:
+ pass = (ref == dest);
+ break;
+
+ case Regs::CompareFunc::NotEqual:
+ pass = (ref != dest);
+ break;
+
+ case Regs::CompareFunc::LessThan:
+ pass = (ref < dest);
+ break;
+
+ case Regs::CompareFunc::LessThanOrEqual:
+ pass = (ref <= dest);
+ break;
+
+ case Regs::CompareFunc::GreaterThan:
+ pass = (ref > dest);
+ break;
+
+ case Regs::CompareFunc::GreaterThanOrEqual:
+ pass = (ref >= dest);
+ break;
+ }
+
+ if (!pass) {
+ u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.replacement_value);
+ SetStencil(x >> 4, y >> 4, new_stencil);
+ continue;
+ }
+ }
+
// TODO: Does depth indeed only get written even if depth testing is enabled?
if (output_merger.depth_test_enable) {
unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format);
@@ -723,11 +871,22 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
break;
}
- if (!pass)
+ if (!pass) {
+ if (stencil_action_enable) {
+ u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.replacement_value);
+ SetStencil(x >> 4, y >> 4, new_stencil);
+ }
continue;
+ }
if (output_merger.depth_write_enable)
SetDepth(x >> 4, y >> 4, z);
+
+ if (stencil_action_enable) {
+ // TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway?
+ u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.replacement_value);
+ SetStencil(x >> 4, y >> 4, new_stencil);
+ }
}
auto dest = GetPixel(x >> 4, y >> 4);