diff options
Diffstat (limited to 'src/video_core/rasterizer.cpp')
-rw-r--r-- | src/video_core/rasterizer.cpp | 189 |
1 files changed, 174 insertions, 15 deletions
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 59d156ee..68b7cc05 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -126,6 +126,30 @@ static u32 GetDepth(int x, int y) { } } +static u8 GetStencil(int x, int y) { + const auto& framebuffer = g_state.regs.framebuffer; + const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); + u8* depth_buffer = Memory::GetPhysicalPointer(addr); + + y = framebuffer.height - y; + + const u32 coarse_y = y & ~7; + u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format); + u32 stride = framebuffer.width * bytes_per_pixel; + + u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; + u8* src_pixel = depth_buffer + src_offset; + + switch (framebuffer.depth_format) { + case Regs::DepthFormat::D24S8: + return Color::DecodeD24S8(src_pixel).y; + + default: + LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format); + return 0; + } +} + static void SetDepth(int x, int y, u32 value) { const auto& framebuffer = g_state.regs.framebuffer; const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); @@ -144,13 +168,15 @@ static void SetDepth(int x, int y, u32 value) { case Regs::DepthFormat::D16: Color::EncodeD16(value, dst_pixel); break; + case Regs::DepthFormat::D24: Color::EncodeD24(value, dst_pixel); break; + case Regs::DepthFormat::D24S8: - // TODO(Subv): Implement the stencil buffer - Color::EncodeD24S8(value, 0, dst_pixel); + Color::EncodeD24X8(value, dst_pixel); break; + default: LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); UNIMPLEMENTED(); @@ -158,6 +184,53 @@ static void SetDepth(int x, int y, u32 value) { } } +static void SetStencil(int x, int y, u8 value) { + const auto& framebuffer = g_state.regs.framebuffer; + const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); + u8* depth_buffer = Memory::GetPhysicalPointer(addr); + + y = framebuffer.height - y; + + const u32 coarse_y = y & ~7; + u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format); + u32 stride = framebuffer.width * bytes_per_pixel; + + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; + u8* dst_pixel = depth_buffer + dst_offset; + + switch (framebuffer.depth_format) { + case Pica::Regs::DepthFormat::D16: + case Pica::Regs::DepthFormat::D24: + // Nothing to do + break; + + case Pica::Regs::DepthFormat::D24S8: + Color::EncodeX24S8(value, dst_pixel); + break; + + default: + LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); + UNIMPLEMENTED(); + break; + } +} + +// TODO: Should the stencil mask be applied to the "dest" or "ref" operands? Most likely not! +static u8 PerformStencilAction(Regs::StencilAction action, u8 dest, u8 ref) { + switch (action) { + case Regs::StencilAction::Keep: + return dest; + + case Regs::StencilAction::Xor: + return dest ^ ref; + + default: + LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action); + UNIMPLEMENTED(); + return 0; + } +} + // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values struct Fix12P4 { Fix12P4() {} @@ -276,6 +349,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, auto textures = regs.GetTextures(); auto tev_stages = regs.GetTevStages(); + bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; + const auto stencil_test = g_state.regs.output_merger.stencil_test; + // Enter rasterization loop, starting at the center of the topleft bounding box corner. // TODO: Not sure if looping through x first might be faster for (u16 y = min_y + 8; y < max_y; y += 0x10) { @@ -349,6 +425,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, val = std::min(val, (int)size - 1); return val; + case Regs::TextureConfig::ClampToBorder: + return val; + case Regs::TextureConfig::Repeat: return (int)((unsigned)val % size); @@ -367,17 +446,26 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, } }; - // Textures are laid out from bottom to top, hence we invert the t coordinate. - // NOTE: This may not be the right place for the inversion. - // TODO: Check if this applies to ETC textures, too. - s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); - t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); - - u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); - auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); - - texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); - DebugUtils::DumpTexture(texture.config, texture_data); + if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder && (s < 0 || s >= texture.config.width)) + || (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder && (t < 0 || t >= texture.config.height))) { + auto border_color = texture.config.border_color; + texture_color[i] = { border_color.r, border_color.g, border_color.b, border_color.a }; + } else { + // Textures are laid out from bottom to top, hence we invert the t coordinate. + // NOTE: This may not be the right place for the inversion. + // TODO: Check if this applies to ETC textures, too. + s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); + t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); + + u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); + auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); + + // TODO: Apply the min and mag filters to the texture + texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); +#if PICA_DUMP_TEXTURES + DebugUtils::DumpTexture(texture.config, texture_data); +#endif + } } // Texture environment - consists of 6 stages of color and alpha combining. @@ -556,7 +644,18 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, result = (result * input[2].Cast<int>()) / 255; return result.Cast<u8>(); } - + case Operation::Dot3_RGB: + { + // Not fully accurate. + // Worst case scenario seems to yield a +/-3 error + // Some HW results indicate that the per-component computation can't have a higher precision than 1/256, + // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( (0x80,g0,b0),(0x80,g1,b1) ) give different results + int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + + ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + + ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; + result = std::max(0, std::min(255, result)); + return { (u8)result, (u8)result, (u8)result }; + } default: LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); UNIMPLEMENTED(); @@ -638,6 +737,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, } const auto& output_merger = regs.output_merger; + // TODO: Does alpha testing happen before or after stencil? if (output_merger.alpha_test.enable) { bool pass = false; @@ -679,6 +779,54 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, continue; } + u8 old_stencil = 0; + if (stencil_action_enable) { + old_stencil = GetStencil(x >> 4, y >> 4); + u8 dest = old_stencil & stencil_test.mask; + u8 ref = stencil_test.reference_value & stencil_test.mask; + + bool pass = false; + switch (stencil_test.func) { + case Regs::CompareFunc::Never: + pass = false; + break; + + case Regs::CompareFunc::Always: + pass = true; + break; + + case Regs::CompareFunc::Equal: + pass = (ref == dest); + break; + + case Regs::CompareFunc::NotEqual: + pass = (ref != dest); + break; + + case Regs::CompareFunc::LessThan: + pass = (ref < dest); + break; + + case Regs::CompareFunc::LessThanOrEqual: + pass = (ref <= dest); + break; + + case Regs::CompareFunc::GreaterThan: + pass = (ref > dest); + break; + + case Regs::CompareFunc::GreaterThanOrEqual: + pass = (ref >= dest); + break; + } + + if (!pass) { + u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.replacement_value); + SetStencil(x >> 4, y >> 4, new_stencil); + continue; + } + } + // TODO: Does depth indeed only get written even if depth testing is enabled? if (output_merger.depth_test_enable) { unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); @@ -723,11 +871,22 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, break; } - if (!pass) + if (!pass) { + if (stencil_action_enable) { + u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.replacement_value); + SetStencil(x >> 4, y >> 4, new_stencil); + } continue; + } if (output_merger.depth_write_enable) SetDepth(x >> 4, y >> 4, z); + + if (stencil_action_enable) { + // TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway? + u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.replacement_value); + SetStencil(x >> 4, y >> 4, new_stencil); + } } auto dest = GetPixel(x >> 4, y >> 4); |