From 1b42d55a9dcb27ac2374de0ed0d1d0ec8385b13e Mon Sep 17 00:00:00 2001 From: bunnei Date: Wed, 13 May 2015 23:29:27 -0400 Subject: Pica: Create 'State' structure and move state memory there. --- src/citra_qt/debugger/graphics_cmdlists.cpp | 10 +- src/citra_qt/debugger/graphics_framebuffer.cpp | 4 +- src/citra_qt/debugger/graphics_vertex_shader.cpp | 6 +- src/video_core/CMakeLists.txt | 1 + src/video_core/clipper.cpp | 13 +- src/video_core/command_processor.cpp | 47 +++-- src/video_core/debug_utils/debug_utils.cpp | 2 +- src/video_core/pica.cpp | 20 ++ src/video_core/pica.h | 186 +++++++++-------- src/video_core/rasterizer.cpp | 244 ++++++++++++----------- src/video_core/renderer_opengl/gl_rasterizer.cpp | 234 ++++++++++++---------- src/video_core/renderer_opengl/pica_to_gl.h | 12 +- src/video_core/vertex_shader.cpp | 96 +++------ src/video_core/vertex_shader.h | 11 - src/video_core/video_core.cpp | 13 +- 15 files changed, 461 insertions(+), 438 deletions(-) create mode 100644 src/video_core/pica.cpp diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp index 66e11dd5..804c735a 100644 --- a/src/citra_qt/debugger/graphics_cmdlists.cpp +++ b/src/citra_qt/debugger/graphics_cmdlists.cpp @@ -228,7 +228,7 @@ void GPUCommandListModel::OnPicaTraceFinished(const Pica::DebugUtils::PicaTrace& #define COMMAND_IN_RANGE(cmd_id, reg_name) \ (cmd_id >= PICA_REG_INDEX(reg_name) && \ - cmd_id < PICA_REG_INDEX(reg_name) + sizeof(decltype(Pica::registers.reg_name)) / 4) + cmd_id < PICA_REG_INDEX(reg_name) + sizeof(decltype(Pica::g_state.regs.reg_name)) / 4) void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) { const unsigned int command_id = list_widget->model()->data(index, GPUCommandListModel::CommandIdRole).toUInt(); @@ -244,8 +244,8 @@ void GPUCommandListWidget::OnCommandDoubleClicked(const QModelIndex& index) { } else { index = 2; } - auto config = Pica::registers.GetTextures()[index].config; - auto format = Pica::registers.GetTextures()[index].format; + auto config = Pica::g_state.regs.GetTextures()[index].config; + auto format = Pica::g_state.regs.GetTextures()[index].format; auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format); // TODO: Instead, emit a signal here to be caught by the main window widget. @@ -270,8 +270,8 @@ void GPUCommandListWidget::SetCommandInfo(const QModelIndex& index) { } else { index = 2; } - auto config = Pica::registers.GetTextures()[index].config; - auto format = Pica::registers.GetTextures()[index].format; + auto config = Pica::g_state.regs.GetTextures()[index].config; + auto format = Pica::g_state.regs.GetTextures()[index].format; auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config, format); u8* src = Memory::GetPhysicalPointer(config.GetPhysicalAddress()); diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp index 0c1a3f47..e0734459 100644 --- a/src/citra_qt/debugger/graphics_framebuffer.cpp +++ b/src/citra_qt/debugger/graphics_framebuffer.cpp @@ -178,7 +178,7 @@ void GraphicsFramebufferWidget::OnUpdate() { // TODO: Store a reference to the registers in the debug context instead of accessing them directly... - const auto& framebuffer = Pica::registers.framebuffer; + const auto& framebuffer = Pica::g_state.regs.framebuffer; framebuffer_address = framebuffer.GetColorBufferPhysicalAddress(); framebuffer_width = framebuffer.GetWidth(); @@ -191,7 +191,7 @@ void GraphicsFramebufferWidget::OnUpdate() case Source::DepthBuffer: { - const auto& framebuffer = Pica::registers.framebuffer; + const auto& framebuffer = Pica::g_state.regs.framebuffer; framebuffer_address = framebuffer.GetDepthBufferPhysicalAddress(); framebuffer_width = framebuffer.GetWidth(); diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp index 3b072d01..14d3f8f3 100644 --- a/src/citra_qt/debugger/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp @@ -253,13 +253,13 @@ void GraphicsVertexShaderModel::OnUpdate() info.Clear(); - for (auto instr : Pica::VertexShader::GetShaderBinary()) + for (auto instr : Pica::g_state.vs.program_code) info.code.push_back({instr}); - for (auto pattern : Pica::VertexShader::GetSwizzlePatterns()) + for (auto pattern : Pica::g_state.vs.swizzle_data) info.swizzle_info.push_back({pattern}); - info.labels.insert({Pica::registers.vs_main_offset, "main"}); + info.labels.insert({ Pica::g_state.regs.vs_main_offset, "main" }); endResetModel(); } diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 9866078d..0258a325 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -9,6 +9,7 @@ set(SRCS debug_utils/debug_utils.cpp clipper.cpp command_processor.cpp + pica.cpp primitive_assembly.cpp rasterizer.cpp utils.cpp diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index ba3876a7..943f3eb3 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp @@ -58,12 +58,13 @@ static void InitScreenCoordinates(OutputVertex& vtx) float24 offset_z; } viewport; - viewport.halfsize_x = float24::FromRawFloat24(registers.viewport_size_x); - viewport.halfsize_y = float24::FromRawFloat24(registers.viewport_size_y); - viewport.offset_x = float24::FromFloat32(static_cast(registers.viewport_corner.x)); - viewport.offset_y = float24::FromFloat32(static_cast(registers.viewport_corner.y)); - viewport.zscale = float24::FromRawFloat24(registers.viewport_depth_range); - viewport.offset_z = float24::FromRawFloat24(registers.viewport_depth_far_plane); + const auto& regs = g_state.regs; + viewport.halfsize_x = float24::FromRawFloat24(regs.viewport_size_x); + viewport.halfsize_y = float24::FromRawFloat24(regs.viewport_size_y); + viewport.offset_x = float24::FromFloat32(static_cast(regs.viewport_corner.x)); + viewport.offset_y = float24::FromFloat32(static_cast(regs.viewport_corner.y)); + viewport.zscale = float24::FromRawFloat24(regs.viewport_depth_range); + viewport.offset_z = float24::FromRawFloat24(regs.viewport_depth_far_plane); float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; vtx.color *= inv_w; diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index 5c4c0440..100d8c7c 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -21,8 +21,6 @@ namespace Pica { -Regs registers; - namespace CommandProcessor { static int float_regs_counter = 0; @@ -36,8 +34,9 @@ static u32 default_attr_write_buffer[3]; Common::Profiling::TimingCategory category_drawing("Drawing"); static inline void WritePicaReg(u32 id, u32 value, u32 mask) { + auto& regs = g_state.regs; - if (id >= registers.NumIds()) + if (id >= regs.NumIds()) return; // If we're skipping this frame, only allow trigger IRQ @@ -45,13 +44,13 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { return; // TODO: Figure out how register masking acts on e.g. vs_uniform_setup.set_value - u32 old_value = registers[id]; - registers[id] = (old_value & ~mask) | (value & mask); + u32 old_value = regs[id]; + regs[id] = (old_value & ~mask) | (value & mask); if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::CommandLoaded, reinterpret_cast(&id)); - DebugUtils::OnPicaRegWrite(id, registers[id]); + DebugUtils::OnPicaRegWrite(id, regs[id]); switch(id) { // Trigger IRQ @@ -65,12 +64,12 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { { Common::Profiling::ScopeTimer scope_timer(category_drawing); - DebugUtils::DumpTevStageConfig(registers.GetTevStages()); + DebugUtils::DumpTevStageConfig(regs.GetTevStages()); if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); - const auto& attribute_config = registers.vertex_attributes; + const auto& attribute_config = regs.vertex_attributes; const u32 base_address = attribute_config.GetPhysicalBaseAddress(); // Information about internal vertex attributes @@ -103,16 +102,16 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { // Load vertices bool is_indexed = (id == PICA_REG_INDEX(trigger_draw_indexed)); - const auto& index_info = registers.index_array; + const auto& index_info = regs.index_array; const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); const u16* index_address_16 = (u16*)index_address_8; bool index_u16 = index_info.format != 0; DebugUtils::GeometryDumper geometry_dumper; - PrimitiveAssembler primitive_assembler(registers.triangle_topology.Value()); - PrimitiveAssembler dumping_primitive_assembler(registers.triangle_topology.Value()); + PrimitiveAssembler primitive_assembler(regs.triangle_topology.Value()); + PrimitiveAssembler dumping_primitive_assembler(regs.triangle_topology.Value()); - for (unsigned int index = 0; index < registers.num_vertices; ++index) + for (unsigned int index = 0; index < regs.num_vertices; ++index) { unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index; @@ -131,7 +130,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) { // Load the default attribute if we're configured to do so, this data will be overwritten by the loader data if it's set if (attribute_config.IsDefaultAttribute(i)) { - input.attr[i] = VertexShader::GetDefaultAttribute(i); + input.attr[i] = g_state.vs.default_attributes[i]; LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)", i, vertex, index, input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(), @@ -216,7 +215,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX(vs_bool_uniforms): for (unsigned i = 0; i < 16; ++i) - VertexShader::GetBoolUniform(i) = (registers.vs_bool_uniforms.Value() & (1 << i)) != 0; + g_state.vs.uniforms.b[i] = (regs.vs_bool_uniforms.Value() & (1 << i)) != 0; break; @@ -226,8 +225,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4): { int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1)); - auto values = registers.vs_int_uniforms[index]; - VertexShader::GetIntUniform(index) = Math::Vec4(values.x, values.y, values.z, values.w); + auto values = regs.vs_int_uniforms[index]; + g_state.vs.uniforms.i[index] = Math::Vec4(values.x, values.y, values.z, values.w); LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value()); break; @@ -242,7 +241,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7): case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8): { - auto& uniform_setup = registers.vs_uniform_setup; + auto& uniform_setup = regs.vs_uniform_setup; // TODO: Does actual hardware indeed keep an intermediate buffer or does // it directly write the values? @@ -255,7 +254,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { (float_regs_counter >= 3 && !uniform_setup.IsFloat32())) { float_regs_counter = 0; - auto& uniform = VertexShader::GetFloatUniform(uniform_setup.index); + auto& uniform = g_state.vs.uniforms.f[uniform_setup.index]; if (uniform_setup.index > 95) { LOG_ERROR(HW_GPU, "Invalid VS uniform index %d", (int)uniform_setup.index); @@ -299,14 +298,14 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { if (default_attr_counter >= 3) { default_attr_counter = 0; - auto& setup = registers.vs_default_attributes_setup; + auto& setup = regs.vs_default_attributes_setup; if (setup.index >= 16) { LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); break; } - Math::Vec4& attribute = VertexShader::GetDefaultAttribute(setup.index); + Math::Vec4& attribute = g_state.vs.default_attributes[setup.index]; // NOTE: The destination component order indeed is "backwards" attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8); @@ -334,8 +333,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2): case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3): { - VertexShader::SubmitShaderMemoryChange(registers.vs_program.offset, value); - registers.vs_program.offset++; + g_state.vs.program_code[regs.vs_program.offset] = value; + regs.vs_program.offset++; break; } @@ -349,8 +348,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc): case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd): { - VertexShader::SubmitSwizzleDataChange(registers.vs_swizzle_patterns.offset, value); - registers.vs_swizzle_patterns.offset++; + g_state.vs.swizzle_data[regs.vs_swizzle_patterns.offset] = value; + regs.vs_swizzle_patterns.offset++; break; } diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 9da44ccd..7987b922 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -632,7 +632,7 @@ void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { info.width = texture_config.width; info.height = texture_config.height; info.stride = row_stride; - info.format = registers.texture0_format; + info.format = g_state.regs.texture0_format; Math::Vec4 texture_color = LookupTexture(data, x, y, info); buf[3 * x + y * row_stride ] = texture_color.r(); buf[3 * x + y * row_stride + 1] = texture_color.g(); diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp new file mode 100644 index 00000000..543d9c44 --- /dev/null +++ b/src/video_core/pica.cpp @@ -0,0 +1,20 @@ +// Copyright 2015 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include "pica.h" + +namespace Pica { + +State g_state; + +void Init() { +} + +void Shutdown() { + memset(&g_state, 0, sizeof(State)); +} + +} diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 503c09ec..b67dce1a 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -16,6 +16,8 @@ #include "common/common_types.h" #include "common/logging/log.h" +#include "math.h" + namespace Pica { // Returns index corresponding to the Regs member labeled by field_name @@ -356,50 +358,50 @@ struct Regs { tev_stage4, tev_stage5 }; }; - struct { - enum CompareFunc : u32 { - Never = 0, - Always = 1, - Equal = 2, - NotEqual = 3, - LessThan = 4, - LessThanOrEqual = 5, - GreaterThan = 6, - GreaterThanOrEqual = 7, - }; + enum class BlendEquation : u32 { + Add = 0, + Subtract = 1, + ReverseSubtract = 2, + Min = 3, + Max = 4, + }; + + enum class BlendFactor : u32 { + Zero = 0, + One = 1, + SourceColor = 2, + OneMinusSourceColor = 3, + DestColor = 4, + OneMinusDestColor = 5, + SourceAlpha = 6, + OneMinusSourceAlpha = 7, + DestAlpha = 8, + OneMinusDestAlpha = 9, + ConstantColor = 10, + OneMinusConstantColor = 11, + ConstantAlpha = 12, + OneMinusConstantAlpha = 13, + SourceAlphaSaturate = 14, + }; + enum class CompareFunc : u32 { + Never = 0, + Always = 1, + Equal = 2, + NotEqual = 3, + LessThan = 4, + LessThanOrEqual = 5, + GreaterThan = 6, + GreaterThanOrEqual = 7, + }; + + struct { union { // If false, logic blending is used BitField<8, 1, u32> alphablend_enable; }; union { - enum class BlendEquation : u32 { - Add = 0, - Subtract = 1, - ReverseSubtract = 2, - Min = 3, - Max = 4 - }; - - enum BlendFactor : u32 { - Zero = 0, - One = 1, - SourceColor = 2, - OneMinusSourceColor = 3, - DestColor = 4, - OneMinusDestColor = 5, - SourceAlpha = 6, - OneMinusSourceAlpha = 7, - DestAlpha = 8, - OneMinusDestAlpha = 9, - ConstantColor = 10, - OneMinusConstantColor = 11, - ConstantAlpha = 12, - OneMinusConstantAlpha = 13, - SourceAlphaSaturate = 14 - }; - BitField< 0, 8, BlendEquation> blend_equation_rgb; BitField< 8, 8, BlendEquation> blend_equation_a; @@ -454,49 +456,19 @@ struct Regs { INSERT_PADDING_WORDS(0x8); } output_merger; - enum DepthFormat : u32 { - D16 = 0, - - D24 = 2, - D24S8 = 3 + // Components are laid out in reverse byte order, most significant bits first. + enum class ColorFormat : u32 { + RGBA8 = 0, + RGB8 = 1, + RGB5A1 = 2, + RGB565 = 3, + RGBA4 = 4, }; - // Returns the number of bytes in the specified depth format - static u32 BytesPerDepthPixel(DepthFormat format) { - switch (format) { - case DepthFormat::D16: - return 2; - case DepthFormat::D24: - return 3; - case DepthFormat::D24S8: - return 4; - default: - LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); - UNIMPLEMENTED(); - } - } - - // Returns the number of bits per depth component of the specified depth format - static u32 DepthBitsPerPixel(DepthFormat format) { - switch (format) { - case DepthFormat::D16: - return 16; - case DepthFormat::D24: - case DepthFormat::D24S8: - return 24; - default: - LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); - UNIMPLEMENTED(); - } - } - - // Components are laid out in reverse byte order, most significant bits first. - enum ColorFormat : u32 { - RGBA8 = 0, - RGB8 = 1, - RGB5A1 = 2, - RGB565 = 3, - RGBA4 = 4, + enum class DepthFormat : u32 { + D16 = 0, + D24 = 2, + D24S8 = 3, }; // Returns the number of bytes in the specified color format @@ -554,6 +526,35 @@ struct Regs { } } framebuffer; + // Returns the number of bytes in the specified depth format + static u32 BytesPerDepthPixel(DepthFormat format) { + switch (format) { + case DepthFormat::D16: + return 2; + case DepthFormat::D24: + return 3; + case DepthFormat::D24S8: + return 4; + default: + LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); + UNIMPLEMENTED(); + } + } + + // Returns the number of bits per depth component of the specified depth format + static u32 DepthBitsPerPixel(DepthFormat format) { + switch (format) { + case DepthFormat::D16: + return 16; + case DepthFormat::D24: + case DepthFormat::D24S8: + return 24; + default: + LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format); + UNIMPLEMENTED(); + } + } + INSERT_PADDING_WORDS(0xe0); enum class VertexAttributeFormat : u64 { @@ -953,9 +954,6 @@ ASSERT_REG_POSITION(vs_swizzle_patterns, 0x2d5); static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); -extern Regs registers; // TODO: Not sure if we want to have one global instance for this - - struct float24 { static float24 FromFloat32(float val) { float24 ret; @@ -1066,4 +1064,30 @@ union CommandHeader { BitField<31, 1, u32> group_commands; }; +/// Struct used to describe current Pica state +struct State { + Regs regs; + + struct { + struct { + Math::Vec4 f[96]; + std::array b; + std::array, 4> i; + } uniforms; + + Math::Vec4 default_attributes[16]; + + std::array program_code; + std::array swizzle_data; + } vs; +}; + +/// Initialize Pica state +void Init(); + +/// Shutdown Pica state +void Shutdown(); + +extern State g_state; ///< Current Pica state + } // namespace diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 362efe52..767ff420 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -24,72 +24,74 @@ namespace Pica { namespace Rasterizer { static void DrawPixel(int x, int y, const Math::Vec4& color) { - const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); + const auto& framebuffer = g_state.regs.framebuffer; + const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); // Similarly to textures, the render framebuffer is laid out from bottom to top, too. // NOTE: The framebuffer height register contains the actual FB height minus one. - y = (registers.framebuffer.height - y); + y = framebuffer.height - y; const u32 coarse_y = y & ~7; - u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value())); - u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel; + u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; u8* dst_pixel = Memory::GetPhysicalPointer(addr) + dst_offset; - switch (registers.framebuffer.color_format) { - case Pica::Regs::ColorFormat::RGBA8: + switch (framebuffer.color_format) { + case Regs::ColorFormat::RGBA8: Color::EncodeRGBA8(color, dst_pixel); break; - case Pica::Regs::ColorFormat::RGB8: + case Regs::ColorFormat::RGB8: Color::EncodeRGB8(color, dst_pixel); break; - case Pica::Regs::ColorFormat::RGB5A1: + case Regs::ColorFormat::RGB5A1: Color::EncodeRGB5A1(color, dst_pixel); break; - case Pica::Regs::ColorFormat::RGB565: + case Regs::ColorFormat::RGB565: Color::EncodeRGB565(color, dst_pixel); break; - case Pica::Regs::ColorFormat::RGBA4: + case Regs::ColorFormat::RGBA4: Color::EncodeRGBA4(color, dst_pixel); break; default: - LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value()); + LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); UNIMPLEMENTED(); } } static const Math::Vec4 GetPixel(int x, int y) { - const PAddr addr = registers.framebuffer.GetColorBufferPhysicalAddress(); + const auto& framebuffer = g_state.regs.framebuffer; + const PAddr addr = framebuffer.GetColorBufferPhysicalAddress(); - y = (registers.framebuffer.height - y); + y = framebuffer.height - y; const u32 coarse_y = y & ~7; - u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(registers.framebuffer.color_format.Value())); - u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * registers.framebuffer.width * bytes_per_pixel; + u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer.color_format.Value())); + u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer.width * bytes_per_pixel; u8* src_pixel = Memory::GetPhysicalPointer(addr) + src_offset; - switch (registers.framebuffer.color_format) { - case Pica::Regs::ColorFormat::RGBA8: + switch (framebuffer.color_format) { + case Regs::ColorFormat::RGBA8: return Color::DecodeRGBA8(src_pixel); - case Pica::Regs::ColorFormat::RGB8: + case Regs::ColorFormat::RGB8: return Color::DecodeRGB8(src_pixel); - case Pica::Regs::ColorFormat::RGB5A1: + case Regs::ColorFormat::RGB5A1: return Color::DecodeRGB5A1(src_pixel); - case Pica::Regs::ColorFormat::RGB565: + case Regs::ColorFormat::RGB565: return Color::DecodeRGB565(src_pixel); - case Pica::Regs::ColorFormat::RGBA4: + case Regs::ColorFormat::RGBA4: return Color::DecodeRGBA4(src_pixel); default: - LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", registers.framebuffer.color_format.Value()); + LOG_CRITICAL(Render_Software, "Unknown framebuffer color format %x", framebuffer.color_format.Value()); UNIMPLEMENTED(); } @@ -97,58 +99,60 @@ static const Math::Vec4 GetPixel(int x, int y) { } static u32 GetDepth(int x, int y) { - const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); + const auto& framebuffer = g_state.regs.framebuffer; + const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); u8* depth_buffer = Memory::GetPhysicalPointer(addr); - y = (registers.framebuffer.height - y); + y = framebuffer.height - y; const u32 coarse_y = y & ~7; - u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format); - u32 stride = registers.framebuffer.width * bytes_per_pixel; + u32 bytes_per_pixel = Regs::BytesPerDepthPixel(framebuffer.depth_format); + u32 stride = framebuffer.width * bytes_per_pixel; u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; u8* src_pixel = depth_buffer + src_offset; - switch (registers.framebuffer.depth_format) { - case Pica::Regs::DepthFormat::D16: + switch (framebuffer.depth_format) { + case Regs::DepthFormat::D16: return Color::DecodeD16(src_pixel); - case Pica::Regs::DepthFormat::D24: + case Regs::DepthFormat::D24: return Color::DecodeD24(src_pixel); - case Pica::Regs::DepthFormat::D24S8: + case Regs::DepthFormat::D24S8: return Color::DecodeD24S8(src_pixel).x; default: - LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format); + LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); UNIMPLEMENTED(); return 0; } } static void SetDepth(int x, int y, u32 value) { - const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress(); + const auto& framebuffer = g_state.regs.framebuffer; + const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); u8* depth_buffer = Memory::GetPhysicalPointer(addr); - y = (registers.framebuffer.height - y); + y = framebuffer.height - y; const u32 coarse_y = y & ~7; - u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format); - u32 stride = registers.framebuffer.width * bytes_per_pixel; + u32 bytes_per_pixel = Regs::BytesPerDepthPixel(framebuffer.depth_format); + u32 stride = framebuffer.width * bytes_per_pixel; u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; u8* dst_pixel = depth_buffer + dst_offset; - switch (registers.framebuffer.depth_format) { - case Pica::Regs::DepthFormat::D16: + switch (framebuffer.depth_format) { + case Regs::DepthFormat::D16: Color::EncodeD16(value, dst_pixel); break; - case Pica::Regs::DepthFormat::D24: + case Regs::DepthFormat::D24: Color::EncodeD24(value, dst_pixel); break; - case Pica::Regs::DepthFormat::D24S8: + case Regs::DepthFormat::D24S8: // TODO(Subv): Implement the stencil buffer Color::EncodeD24S8(value, 0, dst_pixel); break; default: - LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format); + LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); UNIMPLEMENTED(); break; } @@ -200,6 +204,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, const VertexShader::OutputVertex& v2, bool reversed = false) { + const auto& regs = g_state.regs; Common::Profiling::ScopeTimer timer(rasterization_category); // vertex positions in rasterizer coordinates @@ -216,14 +221,14 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, ScreenToRasterizerCoordinates(v1.screenpos), ScreenToRasterizerCoordinates(v2.screenpos) }; - if (registers.cull_mode == Regs::CullMode::KeepAll) { + if (regs.cull_mode == Regs::CullMode::KeepAll) { // Make sure we always end up with a triangle wound counter-clockwise if (!reversed && SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), vtxpos[2].xy()) <= 0) { ProcessTriangleInternal(v0, v2, v1, true); return; } } else { - if (!reversed && registers.cull_mode == Regs::CullMode::KeepClockWise) { + if (!reversed && regs.cull_mode == Regs::CullMode::KeepClockWise) { // Reverse vertex order and use the CCW code path. ProcessTriangleInternal(v0, v2, v1, true); return; @@ -268,8 +273,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, auto w_inverse = Math::MakeVec(v0.pos.w, v1.pos.w, v2.pos.w); - auto textures = registers.GetTextures(); - auto tev_stages = registers.GetTevStages(); + auto textures = regs.GetTextures(); + auto tev_stages = regs.GetTevStages(); // Enter rasterization loop, starting at the center of the topleft bounding box corner. // TODO: Not sure if looping through x first might be faster @@ -384,8 +389,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, // analogously. Math::Vec4 combiner_output; Math::Vec4 combiner_buffer = { - registers.tev_combiner_buffer_color.r, registers.tev_combiner_buffer_color.g, - registers.tev_combiner_buffer_color.b, registers.tev_combiner_buffer_color.a + regs.tev_combiner_buffer_color.r, regs.tev_combiner_buffer_color.g, + regs.tev_combiner_buffer_color.b, regs.tev_combiner_buffer_color.a }; for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { @@ -609,51 +614,52 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier()); combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier()); - if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { + if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) { combiner_buffer.r() = combiner_output.r(); combiner_buffer.g() = combiner_output.g(); combiner_buffer.b() = combiner_output.b(); } - if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { + if (regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) { combiner_buffer.a() = combiner_output.a(); } } - if (registers.output_merger.alpha_test.enable) { + const auto& output_merger = regs.output_merger; + if (output_merger.alpha_test.enable) { bool pass = false; - switch (registers.output_merger.alpha_test.func) { - case registers.output_merger.Never: + switch (output_merger.alpha_test.func) { + case Regs::CompareFunc::Never: pass = false; break; - case registers.output_merger.Always: + case Regs::CompareFunc::Always: pass = true; break; - case registers.output_merger.Equal: - pass = combiner_output.a() == registers.output_merger.alpha_test.ref; + case Regs::CompareFunc::Equal: + pass = combiner_output.a() == output_merger.alpha_test.ref; break; - case registers.output_merger.NotEqual: - pass = combiner_output.a() != registers.output_merger.alpha_test.ref; + case Regs::CompareFunc::NotEqual: + pass = combiner_output.a() != output_merger.alpha_test.ref; break; - case registers.output_merger.LessThan: - pass = combiner_output.a() < registers.output_merger.alpha_test.ref; + case Regs::CompareFunc::LessThan: + pass = combiner_output.a() < output_merger.alpha_test.ref; break; - case registers.output_merger.LessThanOrEqual: - pass = combiner_output.a() <= registers.output_merger.alpha_test.ref; + case Regs::CompareFunc::LessThanOrEqual: + pass = combiner_output.a() <= output_merger.alpha_test.ref; break; - case registers.output_merger.GreaterThan: - pass = combiner_output.a() > registers.output_merger.alpha_test.ref; + case Regs::CompareFunc::GreaterThan: + pass = combiner_output.a() > output_merger.alpha_test.ref; break; - case registers.output_merger.GreaterThanOrEqual: - pass = combiner_output.a() >= registers.output_merger.alpha_test.ref; + case Regs::CompareFunc::GreaterThanOrEqual: + pass = combiner_output.a() >= output_merger.alpha_test.ref; break; } @@ -662,8 +668,8 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, } // TODO: Does depth indeed only get written even if depth testing is enabled? - if (registers.output_merger.depth_test_enable) { - unsigned num_bits = Pica::Regs::DepthBitsPerPixel(registers.framebuffer.depth_format); + if (output_merger.depth_test_enable) { + unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + v1.screenpos[2].ToFloat32() * w1 + v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum); @@ -671,36 +677,36 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, bool pass = false; - switch (registers.output_merger.depth_test_func) { - case registers.output_merger.Never: + switch (output_merger.depth_test_func) { + case Regs::CompareFunc::Never: pass = false; break; - case registers.output_merger.Always: + case Regs::CompareFunc::Always: pass = true; break; - case registers.output_merger.Equal: + case Regs::CompareFunc::Equal: pass = z == ref_z; break; - case registers.output_merger.NotEqual: + case Regs::CompareFunc::NotEqual: pass = z != ref_z; break; - case registers.output_merger.LessThan: + case Regs::CompareFunc::LessThan: pass = z < ref_z; break; - case registers.output_merger.LessThanOrEqual: + case Regs::CompareFunc::LessThanOrEqual: pass = z <= ref_z; break; - case registers.output_merger.GreaterThan: + case Regs::CompareFunc::GreaterThan: pass = z > ref_z; break; - case registers.output_merger.GreaterThanOrEqual: + case Regs::CompareFunc::GreaterThanOrEqual: pass = z >= ref_z; break; } @@ -708,59 +714,59 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, if (!pass) continue; - if (registers.output_merger.depth_write_enable) + if (output_merger.depth_write_enable) SetDepth(x >> 4, y >> 4, z); } auto dest = GetPixel(x >> 4, y >> 4); Math::Vec4 blend_output = combiner_output; - if (registers.output_merger.alphablend_enable) { - auto params = registers.output_merger.alpha_blending; + if (output_merger.alphablend_enable) { + auto params = output_merger.alpha_blending; - auto LookupFactorRGB = [&](decltype(params)::BlendFactor factor) -> Math::Vec3 { + auto LookupFactorRGB = [&](Regs::BlendFactor factor) -> Math::Vec3 { switch (factor) { - case params.Zero: + case Regs::BlendFactor::Zero : return Math::Vec3(0, 0, 0); - case params.One: + case Regs::BlendFactor::One : return Math::Vec3(255, 255, 255); - case params.SourceColor: + case Regs::BlendFactor::SourceColor: return combiner_output.rgb(); - case params.OneMinusSourceColor: + case Regs::BlendFactor::OneMinusSourceColor: return Math::Vec3(255 - combiner_output.r(), 255 - combiner_output.g(), 255 - combiner_output.b()); - case params.DestColor: + case Regs::BlendFactor::DestColor: return dest.rgb(); - case params.OneMinusDestColor: + case Regs::BlendFactor::OneMinusDestColor: return Math::Vec3(255 - dest.r(), 255 - dest.g(), 255 - dest.b()); - case params.SourceAlpha: + case Regs::BlendFactor::SourceAlpha: return Math::Vec3(combiner_output.a(), combiner_output.a(), combiner_output.a()); - case params.OneMinusSourceAlpha: + case Regs::BlendFactor::OneMinusSourceAlpha: return Math::Vec3(255 - combiner_output.a(), 255 - combiner_output.a(), 255 - combiner_output.a()); - case params.DestAlpha: + case Regs::BlendFactor::DestAlpha: return Math::Vec3(dest.a(), dest.a(), dest.a()); - case params.OneMinusDestAlpha: + case Regs::BlendFactor::OneMinusDestAlpha: return Math::Vec3(255 - dest.a(), 255 - dest.a(), 255 - dest.a()); - case params.ConstantColor: - return Math::Vec3(registers.output_merger.blend_const.r, registers.output_merger.blend_const.g, registers.output_merger.blend_const.b); + case Regs::BlendFactor::ConstantColor: + return Math::Vec3(output_merger.blend_const.r, output_merger.blend_const.g, output_merger.blend_const.b); - case params.OneMinusConstantColor: - return Math::Vec3(255 - registers.output_merger.blend_const.r, 255 - registers.output_merger.blend_const.g, 255 - registers.output_merger.blend_const.b); + case Regs::BlendFactor::OneMinusConstantColor: + return Math::Vec3(255 - output_merger.blend_const.r, 255 - output_merger.blend_const.g, 255 - output_merger.blend_const.b); - case params.ConstantAlpha: - return Math::Vec3(registers.output_merger.blend_const.a, registers.output_merger.blend_const.a, registers.output_merger.blend_const.a); + case Regs::BlendFactor::ConstantAlpha: + return Math::Vec3(output_merger.blend_const.a, output_merger.blend_const.a, output_merger.blend_const.a); - case params.OneMinusConstantAlpha: - return Math::Vec3(255 - registers.output_merger.blend_const.a, 255 - registers.output_merger.blend_const.a, 255 - registers.output_merger.blend_const.a); + case Regs::BlendFactor::OneMinusConstantAlpha: + return Math::Vec3(255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a, 255 - output_merger.blend_const.a); default: LOG_CRITICAL(HW_GPU, "Unknown color blend factor %x", factor); @@ -769,31 +775,31 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, } }; - auto LookupFactorA = [&](decltype(params)::BlendFactor factor) -> u8 { + auto LookupFactorA = [&](Regs::BlendFactor factor) -> u8 { switch (factor) { - case params.Zero: + case Regs::BlendFactor::Zero: return 0; - case params.One: + case Regs::BlendFactor::One: return 255; - case params.SourceAlpha: + case Regs::BlendFactor::SourceAlpha: return combiner_output.a(); - case params.OneMinusSourceAlpha: + case Regs::BlendFactor::OneMinusSourceAlpha: return 255 - combiner_output.a(); - case params.DestAlpha: + case Regs::BlendFactor::DestAlpha: return dest.a(); - case params.OneMinusDestAlpha: + case Regs::BlendFactor::OneMinusDestAlpha: return 255 - dest.a(); - case params.ConstantAlpha: - return registers.output_merger.blend_const.a; + case Regs::BlendFactor::ConstantAlpha: + return output_merger.blend_const.a; - case params.OneMinusConstantAlpha: - return 255 - registers.output_merger.blend_const.a; + case Regs::BlendFactor::OneMinusConstantAlpha: + return 255 - output_merger.blend_const.a; default: LOG_CRITICAL(HW_GPU, "Unknown alpha blend factor %x", factor); @@ -802,7 +808,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, } }; - using BlendEquation = decltype(params)::BlendEquation; + using BlendEquation = Regs::BlendEquation; static auto EvaluateBlendEquation = [](const Math::Vec4& src, const Math::Vec4& srcfactor, const Math::Vec4& dest, const Math::Vec4& destfactor, BlendEquation equation) { @@ -812,29 +818,29 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, auto dst_result = (dest * destfactor).Cast(); switch (equation) { - case BlendEquation::Add: + case Regs::BlendEquation::Add: result = (src_result + dst_result) / 255; break; - case BlendEquation::Subtract: + case Regs::BlendEquation::Subtract: result = (src_result - dst_result) / 255; break; - case BlendEquation::ReverseSubtract: + case Regs::BlendEquation::ReverseSubtract: result = (dst_result - src_result) / 255; break; // TODO: How do these two actually work? // OpenGL doesn't include the blend factors in the min/max computations, // but is this what the 3DS actually does? - case BlendEquation::Min: + case Regs::BlendEquation::Min: result.r() = std::min(src.r(), dest.r()); result.g() = std::min(src.g(), dest.g()); result.b() = std::min(src.b(), dest.b()); result.a() = std::min(src.a(), dest.a()); break; - case BlendEquation::Max: + case Regs::BlendEquation::Max: result.r() = std::max(src.r(), dest.r()); result.g() = std::max(src.g(), dest.g()); result.b() = std::max(src.b(), dest.b()); @@ -860,15 +866,15 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, blend_output = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_rgb); blend_output.a() = EvaluateBlendEquation(combiner_output, srcfactor, dest, dstfactor, params.blend_equation_a).a(); } else { - LOG_CRITICAL(HW_GPU, "logic op: %x", registers.output_merger.logic_op); + LOG_CRITICAL(HW_GPU, "logic op: %x", output_merger.logic_op); UNIMPLEMENTED(); } const Math::Vec4 result = { - registers.output_merger.red_enable ? blend_output.r() : dest.r(), - registers.output_merger.green_enable ? blend_output.g() : dest.g(), - registers.output_merger.blue_enable ? blend_output.b() : dest.b(), - registers.output_merger.alpha_enable ? blend_output.a() : dest.a() + output_merger.red_enable ? blend_output.r() : dest.r(), + output_merger.green_enable ? blend_output.g() : dest.g(), + output_merger.blue_enable ? blend_output.b() : dest.b(), + output_merger.alpha_enable ? blend_output.a() : dest.a() }; DrawPixel(x >> 4, y >> 4, result); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e4437554..4b7d099a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -46,7 +46,7 @@ void RasterizerOpenGL::InitObjects() { uniform_tev_combiner_buffer_color = glGetUniformLocation(shader.handle, "tev_combiner_buffer_color"); - const auto tev_stages = Pica::registers.GetTevStages(); + const auto tev_stages = Pica::g_state.regs.GetTevStages(); for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { auto& uniform_tev_cfg = uniform_tev_cfgs[tev_stage_index]; @@ -128,6 +128,8 @@ void RasterizerOpenGL::InitObjects() { } void RasterizerOpenGL::Reset() { + const auto& regs = Pica::g_state.regs; + SyncCullMode(); SyncBlendEnabled(); SyncBlendFuncs(); @@ -137,46 +139,46 @@ void RasterizerOpenGL::Reset() { SyncDepthTest(); // TEV stage 0 - SyncTevSources(0, Pica::registers.tev_stage0); - SyncTevModifiers(0, Pica::registers.tev_stage0); - SyncTevOps(0, Pica::registers.tev_stage0); - SyncTevColor(0, Pica::registers.tev_stage0); - SyncTevMultipliers(0, Pica::registers.tev_stage0); + SyncTevSources(0, regs.tev_stage0); + SyncTevModifiers(0, regs.tev_stage0); + SyncTevOps(0, regs.tev_stage0); + SyncTevColor(0, regs.tev_stage0); + SyncTevMultipliers(0, regs.tev_stage0); // TEV stage 1 - SyncTevSources(1, Pica::registers.tev_stage1); - SyncTevModifiers(1, Pica::registers.tev_stage1); - SyncTevOps(1, Pica::registers.tev_stage1); - SyncTevColor(1, Pica::registers.tev_stage1); - SyncTevMultipliers(1, Pica::registers.tev_stage1); + SyncTevSources(1, regs.tev_stage1); + SyncTevModifiers(1, regs.tev_stage1); + SyncTevOps(1, regs.tev_stage1); + SyncTevColor(1, regs.tev_stage1); + SyncTevMultipliers(1, regs.tev_stage1); // TEV stage 2 - SyncTevSources(2, Pica::registers.tev_stage2); - SyncTevModifiers(2, Pica::registers.tev_stage2); - SyncTevOps(2, Pica::registers.tev_stage2); - SyncTevColor(2, Pica::registers.tev_stage2); - SyncTevMultipliers(2, Pica::registers.tev_stage2); + SyncTevSources(2, regs.tev_stage2); + SyncTevModifiers(2, regs.tev_stage2); + SyncTevOps(2, regs.tev_stage2); + SyncTevColor(2, regs.tev_stage2); + SyncTevMultipliers(2, regs.tev_stage2); // TEV stage 3 - SyncTevSources(3, Pica::registers.tev_stage3); - SyncTevModifiers(3, Pica::registers.tev_stage3); - SyncTevOps(3, Pica::registers.tev_stage3); - SyncTevColor(3, Pica::registers.tev_stage3); - SyncTevMultipliers(3, Pica::registers.tev_stage3); + SyncTevSources(3, regs.tev_stage3); + SyncTevModifiers(3, regs.tev_stage3); + SyncTevOps(3, regs.tev_stage3); + SyncTevColor(3, regs.tev_stage3); + SyncTevMultipliers(3, regs.tev_stage3); // TEV stage 4 - SyncTevSources(4, Pica::registers.tev_stage4); - SyncTevModifiers(4, Pica::registers.tev_stage4); - SyncTevOps(4, Pica::registers.tev_stage4); - SyncTevColor(4, Pica::registers.tev_stage4); - SyncTevMultipliers(4, Pica::registers.tev_stage4); + SyncTevSources(4, regs.tev_stage4); + SyncTevModifiers(4, regs.tev_stage4); + SyncTevOps(4, regs.tev_stage4); + SyncTevColor(4, regs.tev_stage4); + SyncTevMultipliers(4, regs.tev_stage4); // TEV stage 5 - SyncTevSources(5, Pica::registers.tev_stage5); - SyncTevModifiers(5, Pica::registers.tev_stage5); - SyncTevOps(5, Pica::registers.tev_stage5); - SyncTevColor(5, Pica::registers.tev_stage5); - SyncTevMultipliers(5, Pica::registers.tev_stage5); + SyncTevSources(5, regs.tev_stage5); + SyncTevModifiers(5, regs.tev_stage5); + SyncTevOps(5, regs.tev_stage5); + SyncTevColor(5, regs.tev_stage5); + SyncTevMultipliers(5, regs.tev_stage5); SyncCombinerColor(); SyncCombinerWriteFlags(); @@ -210,6 +212,8 @@ void RasterizerOpenGL::CommitFramebuffer() { } void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { + const auto& regs = Pica::g_state.regs; + if (!Settings::values.use_hw_renderer) return; @@ -247,104 +251,104 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { // TEV stage 0 case PICA_REG_INDEX(tev_stage0.color_source1): - SyncTevSources(0, Pica::registers.tev_stage0); + SyncTevSources(0, regs.tev_stage0); break; case PICA_REG_INDEX(tev_stage0.color_modifier1): - SyncTevModifiers(0, Pica::registers.tev_stage0); + SyncTevModifiers(0, regs.tev_stage0); break; case PICA_REG_INDEX(tev_stage0.color_op): - SyncTevOps(0, Pica::registers.tev_stage0); + SyncTevOps(0, regs.tev_stage0); break; case PICA_REG_INDEX(tev_stage0.const_r): - SyncTevColor(0, Pica::registers.tev_stage0); + SyncTevColor(0, regs.tev_stage0); break; case PICA_REG_INDEX(tev_stage0.color_scale): - SyncTevMultipliers(0, Pica::registers.tev_stage0); + SyncTevMultipliers(0, regs.tev_stage0); break; // TEV stage 1 case PICA_REG_INDEX(tev_stage1.color_source1): - SyncTevSources(1, Pica::registers.tev_stage1); + SyncTevSources(1, regs.tev_stage1); break; case PICA_REG_INDEX(tev_stage1.color_modifier1): - SyncTevModifiers(1, Pica::registers.tev_stage1); + SyncTevModifiers(1, regs.tev_stage1); break; case PICA_REG_INDEX(tev_stage1.color_op): - SyncTevOps(1, Pica::registers.tev_stage1); + SyncTevOps(1, regs.tev_stage1); break; case PICA_REG_INDEX(tev_stage1.const_r): - SyncTevColor(1, Pica::registers.tev_stage1); + SyncTevColor(1, regs.tev_stage1); break; case PICA_REG_INDEX(tev_stage1.color_scale): - SyncTevMultipliers(1, Pica::registers.tev_stage1); + SyncTevMultipliers(1, regs.tev_stage1); break; // TEV stage 2 case PICA_REG_INDEX(tev_stage2.color_source1): - SyncTevSources(2, Pica::registers.tev_stage2); + SyncTevSources(2, regs.tev_stage2); break; case PICA_REG_INDEX(tev_stage2.color_modifier1): - SyncTevModifiers(2, Pica::registers.tev_stage2); + SyncTevModifiers(2, regs.tev_stage2); break; case PICA_REG_INDEX(tev_stage2.color_op): - SyncTevOps(2, Pica::registers.tev_stage2); + SyncTevOps(2, regs.tev_stage2); break; case PICA_REG_INDEX(tev_stage2.const_r): - SyncTevColor(2, Pica::registers.tev_stage2); + SyncTevColor(2, regs.tev_stage2); break; case PICA_REG_INDEX(tev_stage2.color_scale): - SyncTevMultipliers(2, Pica::registers.tev_stage2); + SyncTevMultipliers(2, regs.tev_stage2); break; // TEV stage 3 case PICA_REG_INDEX(tev_stage3.color_source1): - SyncTevSources(3, Pica::registers.tev_stage3); + SyncTevSources(3, regs.tev_stage3); break; case PICA_REG_INDEX(tev_stage3.color_modifier1): - SyncTevModifiers(3, Pica::registers.tev_stage3); + SyncTevModifiers(3, regs.tev_stage3); break; case PICA_REG_INDEX(tev_stage3.color_op): - SyncTevOps(3, Pica::registers.tev_stage3); + SyncTevOps(3, regs.tev_stage3); break; case PICA_REG_INDEX(tev_stage3.const_r): - SyncTevColor(3, Pica::registers.tev_stage3); + SyncTevColor(3, regs.tev_stage3); break; case PICA_REG_INDEX(tev_stage3.color_scale): - SyncTevMultipliers(3, Pica::registers.tev_stage3); + SyncTevMultipliers(3, regs.tev_stage3); break; // TEV stage 4 case PICA_REG_INDEX(tev_stage4.color_source1): - SyncTevSources(4, Pica::registers.tev_stage4); + SyncTevSources(4, regs.tev_stage4); break; case PICA_REG_INDEX(tev_stage4.color_modifier1): - SyncTevModifiers(4, Pica::registers.tev_stage4); + SyncTevModifiers(4, regs.tev_stage4); break; case PICA_REG_INDEX(tev_stage4.color_op): - SyncTevOps(4, Pica::registers.tev_stage4); + SyncTevOps(4, regs.tev_stage4); break; case PICA_REG_INDEX(tev_stage4.const_r): - SyncTevColor(4, Pica::registers.tev_stage4); + SyncTevColor(4, regs.tev_stage4); break; case PICA_REG_INDEX(tev_stage4.color_scale): - SyncTevMultipliers(4, Pica::registers.tev_stage4); + SyncTevMultipliers(4, regs.tev_stage4); break; // TEV stage 5 case PICA_REG_INDEX(tev_stage5.color_source1): - SyncTevSources(5, Pica::registers.tev_stage5); + SyncTevSources(5, regs.tev_stage5); break; case PICA_REG_INDEX(tev_stage5.color_modifier1): - SyncTevModifiers(5, Pica::registers.tev_stage5); + SyncTevModifiers(5, regs.tev_stage5); break; case PICA_REG_INDEX(tev_stage5.color_op): - SyncTevOps(5, Pica::registers.tev_stage5); + SyncTevOps(5, regs.tev_stage5); break; case PICA_REG_INDEX(tev_stage5.const_r): - SyncTevColor(5, Pica::registers.tev_stage5); + SyncTevColor(5, regs.tev_stage5); break; case PICA_REG_INDEX(tev_stage5.color_scale): - SyncTevMultipliers(5, Pica::registers.tev_stage5); + SyncTevMultipliers(5, regs.tev_stage5); break; // TEV combiner buffer color @@ -360,16 +364,18 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { } void RasterizerOpenGL::NotifyPreRead(PAddr addr, u32 size) { + const auto& regs = Pica::g_state.regs; + if (!Settings::values.use_hw_renderer) return; - PAddr cur_fb_color_addr = Pica::registers.framebuffer.GetColorBufferPhysicalAddress(); - u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(Pica::registers.framebuffer.color_format) - * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight(); + PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); + u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) + * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); - PAddr cur_fb_depth_addr = Pica::registers.framebuffer.GetDepthBufferPhysicalAddress(); - u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(Pica::registers.framebuffer.depth_format) - * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight(); + PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); + u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) + * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); // If source memory region overlaps 3DS framebuffers, commit them before the copy happens if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) @@ -380,16 +386,18 @@ void RasterizerOpenGL::NotifyPreRead(PAddr addr, u32 size) { } void RasterizerOpenGL::NotifyFlush(PAddr addr, u32 size) { + const auto& regs = Pica::g_state.regs; + if (!Settings::values.use_hw_renderer) return; - PAddr cur_fb_color_addr = Pica::registers.framebuffer.GetColorBufferPhysicalAddress(); - u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(Pica::registers.framebuffer.color_format) - * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight(); + PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); + u32 cur_fb_color_size = Pica::Regs::BytesPerColorPixel(regs.framebuffer.color_format) + * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); - PAddr cur_fb_depth_addr = Pica::registers.framebuffer.GetDepthBufferPhysicalAddress(); - u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(Pica::registers.framebuffer.depth_format) - * Pica::registers.framebuffer.GetWidth() * Pica::registers.framebuffer.GetHeight(); + PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); + u32 cur_fb_depth_size = Pica::Regs::BytesPerDepthPixel(regs.framebuffer.depth_format) + * regs.framebuffer.GetWidth() * regs.framebuffer.GetHeight(); // If modified memory region overlaps 3DS framebuffers, reload their contents into OpenGL if (MathUtil::IntervalsIntersect(addr, size, cur_fb_color_addr, cur_fb_color_size)) @@ -501,14 +509,16 @@ void RasterizerOpenGL::ReconfigureDepthTexture(DepthTextureInfo& texture, Pica:: } void RasterizerOpenGL::SyncFramebuffer() { - PAddr cur_fb_color_addr = Pica::registers.framebuffer.GetColorBufferPhysicalAddress(); - Pica::Regs::ColorFormat new_fb_color_format = Pica::registers.framebuffer.color_format; + const auto& regs = Pica::g_state.regs; - PAddr cur_fb_depth_addr = Pica::registers.framebuffer.GetDepthBufferPhysicalAddress(); - Pica::Regs::DepthFormat new_fb_depth_format = Pica::registers.framebuffer.depth_format; + PAddr cur_fb_color_addr = regs.framebuffer.GetColorBufferPhysicalAddress(); + Pica::Regs::ColorFormat new_fb_color_format = regs.framebuffer.color_format; - bool fb_size_changed = fb_color_texture.width != Pica::registers.framebuffer.GetWidth() || - fb_color_texture.height != Pica::registers.framebuffer.GetHeight(); + PAddr cur_fb_depth_addr = regs.framebuffer.GetDepthBufferPhysicalAddress(); + Pica::Regs::DepthFormat new_fb_depth_format = regs.framebuffer.depth_format; + + bool fb_size_changed = fb_color_texture.width != regs.framebuffer.GetWidth() || + fb_color_texture.height != regs.framebuffer.GetHeight(); bool color_fb_prop_changed = fb_color_texture.format != new_fb_color_format || fb_size_changed; @@ -532,12 +542,12 @@ void RasterizerOpenGL::SyncFramebuffer() { // Reconfigure framebuffer textures if any property has changed if (color_fb_prop_changed) { ReconfigureColorTexture(fb_color_texture, new_fb_color_format, - Pica::registers.framebuffer.GetWidth(), Pica::registers.framebuffer.GetHeight()); + regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight()); } if (depth_fb_prop_changed) { ReconfigureDepthTexture(fb_depth_texture, new_fb_depth_format, - Pica::registers.framebuffer.GetWidth(), Pica::registers.framebuffer.GetHeight()); + regs.framebuffer.GetWidth(), regs.framebuffer.GetHeight()); // Only attach depth buffer as stencil if it supports stencil switch (new_fb_depth_format) { @@ -572,7 +582,9 @@ void RasterizerOpenGL::SyncFramebuffer() { } void RasterizerOpenGL::SyncCullMode() { - switch (Pica::registers.cull_mode) { + const auto& regs = Pica::g_state.regs; + + switch (regs.cull_mode) { case Pica::Regs::CullMode::KeepAll: state.cull.enabled = false; break; @@ -588,25 +600,26 @@ void RasterizerOpenGL::SyncCullMode() { break; default: - LOG_CRITICAL(Render_OpenGL, "Unknown cull mode %d", Pica::registers.cull_mode.Value()); + LOG_CRITICAL(Render_OpenGL, "Unknown cull mode %d", regs.cull_mode.Value()); UNIMPLEMENTED(); break; } } void RasterizerOpenGL::SyncBlendEnabled() { - state.blend.enabled = Pica::registers.output_merger.alphablend_enable; + state.blend.enabled = (Pica::g_state.regs.output_merger.alphablend_enable == 1); } void RasterizerOpenGL::SyncBlendFuncs() { - state.blend.src_rgb_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_source_rgb); - state.blend.dst_rgb_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_dest_rgb); - state.blend.src_a_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_source_a); - state.blend.dst_a_func = PicaToGL::BlendFunc(Pica::registers.output_merger.alpha_blending.factor_dest_a); + const auto& regs = Pica::g_state.regs; + state.blend.src_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_rgb); + state.blend.dst_rgb_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_rgb); + state.blend.src_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_source_a); + state.blend.dst_a_func = PicaToGL::BlendFunc(regs.output_merger.alpha_blending.factor_dest_a); } void RasterizerOpenGL::SyncBlendColor() { - auto blend_color = PicaToGL::ColorRGBA8((u8*)&Pica::registers.output_merger.blend_const.r); + auto blend_color = PicaToGL::ColorRGBA8((u8*)&Pica::g_state.regs.output_merger.blend_const.r); state.blend.color.red = blend_color[0]; state.blend.color.green = blend_color[1]; state.blend.color.blue = blend_color[2]; @@ -614,9 +627,10 @@ void RasterizerOpenGL::SyncBlendColor() { } void RasterizerOpenGL::SyncAlphaTest() { - glUniform1i(uniform_alphatest_enabled, Pica::registers.output_merger.alpha_test.enable); - glUniform1i(uniform_alphatest_func, Pica::registers.output_merger.alpha_test.func); - glUniform1f(uniform_alphatest_ref, Pica::registers.output_merger.alpha_test.ref / 255.0f); + const auto& regs = Pica::g_state.regs; + glUniform1i(uniform_alphatest_enabled, regs.output_merger.alpha_test.enable); + glUniform1i(uniform_alphatest_func, (GLint)regs.output_merger.alpha_test.func.Value()); + glUniform1f(uniform_alphatest_ref, regs.output_merger.alpha_test.ref / 255.0f); } void RasterizerOpenGL::SyncStencilTest() { @@ -624,9 +638,10 @@ void RasterizerOpenGL::SyncStencilTest() { } void RasterizerOpenGL::SyncDepthTest() { - state.depth.test_enabled = Pica::registers.output_merger.depth_test_enable; - state.depth.test_func = PicaToGL::CompareFunc(Pica::registers.output_merger.depth_test_func); - state.depth.write_mask = Pica::registers.output_merger.depth_write_enable ? GL_TRUE : GL_FALSE; + const auto& regs = Pica::g_state.regs; + state.depth.test_enabled = (regs.output_merger.depth_test_enable == 1); + state.depth.test_func = PicaToGL::CompareFunc(regs.output_merger.depth_test_func); + state.depth.write_mask = regs.output_merger.depth_write_enable ? GL_TRUE : GL_FALSE; } void RasterizerOpenGL::SyncTevSources(unsigned stage_index, const Pica::Regs::TevStageConfig& config) { @@ -667,34 +682,37 @@ void RasterizerOpenGL::SyncTevMultipliers(unsigned stage_index, const Pica::Regs } void RasterizerOpenGL::SyncCombinerColor() { - auto combiner_color = PicaToGL::ColorRGBA8((u8*)&Pica::registers.tev_combiner_buffer_color.r); + auto combiner_color = PicaToGL::ColorRGBA8((u8*)&Pica::g_state.regs.tev_combiner_buffer_color.r); glUniform4fv(uniform_tev_combiner_buffer_color, 1, combiner_color.data()); } void RasterizerOpenGL::SyncCombinerWriteFlags() { - const auto tev_stages = Pica::registers.GetTevStages(); + const auto& regs = Pica::g_state.regs; + const auto tev_stages = regs.GetTevStages(); for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { glUniform2i(uniform_tev_cfgs[tev_stage_index].updates_combiner_buffer_color_alpha, - Pica::registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index), - Pica::registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)); + regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index), + regs.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)); } } void RasterizerOpenGL::SyncDrawState() { + const auto& regs = Pica::g_state.regs; + // Sync the viewport - GLsizei viewport_width = (GLsizei)Pica::float24::FromRawFloat24(Pica::registers.viewport_size_x).ToFloat32() * 2; - GLsizei viewport_height = (GLsizei)Pica::float24::FromRawFloat24(Pica::registers.viewport_size_y).ToFloat32() * 2; + GLsizei viewport_width = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_x).ToFloat32() * 2; + GLsizei viewport_height = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_y).ToFloat32() * 2; // OpenGL uses different y coordinates, so negate corner offset and flip origin // TODO: Ensure viewport_corner.x should not be negated or origin flipped // TODO: Use floating-point viewports for accuracy if supported - glViewport((GLsizei)static_cast(Pica::registers.viewport_corner.x), - -(GLsizei)static_cast(Pica::registers.viewport_corner.y) - + Pica::registers.framebuffer.GetHeight() - viewport_height, + glViewport((GLsizei)static_cast(regs.viewport_corner.x), + -(GLsizei)static_cast(regs.viewport_corner.y) + + regs.framebuffer.GetHeight() - viewport_height, viewport_width, viewport_height); // Sync bound texture(s), upload if not cached - const auto pica_textures = Pica::registers.GetTextures(); + const auto pica_textures = regs.GetTextures(); for (unsigned texture_index = 0; texture_index < pica_textures.size(); ++texture_index) { const auto& texture = pica_textures[texture_index]; @@ -707,7 +725,7 @@ void RasterizerOpenGL::SyncDrawState() { } // Skip processing TEV stages that simply pass the previous stage results through - const auto tev_stages = Pica::registers.GetTevStages(); + const auto tev_stages = regs.GetTevStages(); for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) { glUniform1i(uniform_tev_cfgs[tev_stage_index].enabled, !IsPassThroughTevStage(tev_stages[tev_stage_index])); } @@ -716,7 +734,7 @@ void RasterizerOpenGL::SyncDrawState() { } void RasterizerOpenGL::ReloadColorBuffer() { - u8* color_buffer = Memory::GetPhysicalPointer(Pica::registers.framebuffer.GetColorBufferPhysicalAddress()); + u8* color_buffer = Memory::GetPhysicalPointer(Pica::g_state.regs.framebuffer.GetColorBufferPhysicalAddress()); if (color_buffer == nullptr) return; @@ -748,7 +766,7 @@ void RasterizerOpenGL::ReloadColorBuffer() { void RasterizerOpenGL::ReloadDepthBuffer() { // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil - u8* depth_buffer = Memory::GetPhysicalPointer(Pica::registers.framebuffer.GetDepthBufferPhysicalAddress()); + u8* depth_buffer = Memory::GetPhysicalPointer(Pica::g_state.regs.framebuffer.GetDepthBufferPhysicalAddress()); if (depth_buffer == nullptr) { return; diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index 8369c649..f8763e71 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -41,7 +41,7 @@ inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) { return gl_mode; } -inline GLenum BlendFunc(u32 factor) { +inline GLenum BlendFunc(Pica::Regs::BlendFactor factor) { static const GLenum blend_func_table[] = { GL_ZERO, // BlendFactor::Zero GL_ONE, // BlendFactor::One @@ -61,17 +61,17 @@ inline GLenum BlendFunc(u32 factor) { }; // Range check table for input - if (factor >= ARRAY_SIZE(blend_func_table)) { + if ((unsigned)factor >= ARRAY_SIZE(blend_func_table)) { LOG_CRITICAL(Render_OpenGL, "Unknown blend factor %d", factor); UNREACHABLE(); return GL_ONE; } - return blend_func_table[factor]; + return blend_func_table[(unsigned)factor]; } -inline GLenum CompareFunc(u32 func) { +inline GLenum CompareFunc(Pica::Regs::CompareFunc func) { static const GLenum compare_func_table[] = { GL_NEVER, // CompareFunc::Never GL_ALWAYS, // CompareFunc::Always @@ -84,14 +84,14 @@ inline GLenum CompareFunc(u32 func) { }; // Range check table for input - if (func >= ARRAY_SIZE(compare_func_table)) { + if ((unsigned)func >= ARRAY_SIZE(compare_func_table)) { LOG_CRITICAL(Render_OpenGL, "Unknown compare function %d", func); UNREACHABLE(); return GL_ALWAYS; } - return compare_func_table[func]; + return compare_func_table[(unsigned)func]; } inline std::array ColorRGBA8(const u8* bytes) { diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 981d1a35..7d68998f 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -26,55 +26,8 @@ namespace Pica { namespace VertexShader { -static struct { - Math::Vec4 f[96]; - - std::array b; - - std::array,4> i; -} shader_uniforms; - -static Math::Vec4 vs_default_attributes[16]; - -// TODO: Not sure where the shader binary and swizzle patterns are supposed to be loaded to! -// For now, we just keep these local arrays around. -static std::array shader_memory; -static std::array swizzle_data; - -void SubmitShaderMemoryChange(u32 addr, u32 value) { - shader_memory[addr] = value; -} - -void SubmitSwizzleDataChange(u32 addr, u32 value) { - swizzle_data[addr] = value; -} - -Math::Vec4& GetFloatUniform(u32 index) { - return shader_uniforms.f[index]; -} - -bool& GetBoolUniform(u32 index) { - return shader_uniforms.b[index]; -} - -Math::Vec4& GetIntUniform(u32 index) { - return shader_uniforms.i[index]; -} - -Math::Vec4& GetDefaultAttribute(u32 index) { - return vs_default_attributes[index]; -} - -const std::array& GetShaderBinary() { - return shader_memory; -} - -const std::array& GetSwizzlePatterns() { - return swizzle_data; -} - struct VertexShaderState { - u32* program_counter; + const u32* program_counter; const float24* input_register_table[16]; Math::Vec4 output_registers[16]; @@ -109,6 +62,9 @@ struct VertexShaderState { }; static void ProcessShaderCode(VertexShaderState& state) { + const auto& uniforms = g_state.vs.uniforms; + const auto& swizzle_data = g_state.vs.swizzle_data; + const auto& program_code = g_state.vs.program_code; // Placeholder for invalid inputs static float24 dummy_vec4_float24[4]; @@ -116,14 +72,14 @@ static void ProcessShaderCode(VertexShaderState& state) { while (true) { if (!state.call_stack.empty()) { auto& top = state.call_stack.top(); - if (state.program_counter - shader_memory.data() == top.final_address) { + if (state.program_counter - program_code.data() == top.final_address) { state.address_registers[2] += top.loop_increment; if (top.repeat_counter-- == 0) { - state.program_counter = &shader_memory[top.return_address]; + state.program_counter = &program_code[top.return_address]; state.call_stack.pop(); } else { - state.program_counter = &shader_memory[top.loop_address]; + state.program_counter = &program_code[top.loop_address]; } // TODO: Is "trying again" accurate to hardware? @@ -135,12 +91,12 @@ static void ProcessShaderCode(VertexShaderState& state) { const Instruction& instr = *(const Instruction*)state.program_counter; const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; - static auto call = [](VertexShaderState& state, u32 offset, u32 num_instructions, + static auto call = [&program_code](VertexShaderState& state, u32 offset, u32 num_instructions, u32 return_offset, u8 repeat_count, u8 loop_increment) { - state.program_counter = &shader_memory[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset + state.program_counter = &program_code[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset state.call_stack.push({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); }; - u32 binary_offset = state.program_counter - shader_memory.data(); + u32 binary_offset = state.program_counter - program_code.data(); state.debug.max_offset = std::max(state.debug.max_offset, 1 + binary_offset); @@ -153,7 +109,7 @@ static void ProcessShaderCode(VertexShaderState& state) { return &state.temporary_registers[source_reg.GetIndex()].x; case RegisterType::FloatUniform: - return &shader_uniforms.f[source_reg.GetIndex()].x; + return &uniforms.f[source_reg.GetIndex()].x; default: return dummy_vec4_float24; @@ -471,13 +427,13 @@ static void ProcessShaderCode(VertexShaderState& state) { case OpCode::Id::JMPC: if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { - state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; + state.program_counter = &program_code[instr.flow_control.dest_offset] - 1; } break; case OpCode::Id::JMPU: - if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { - state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1; + if (uniforms.b[instr.flow_control.bool_uniform_id]) { + state.program_counter = &program_code[instr.flow_control.dest_offset] - 1; } break; @@ -489,7 +445,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; case OpCode::Id::CALLU: - if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { + if (uniforms.b[instr.flow_control.bool_uniform_id]) { call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, @@ -510,7 +466,7 @@ static void ProcessShaderCode(VertexShaderState& state) { break; case OpCode::Id::IFU: - if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) { + if (uniforms.b[instr.flow_control.bool_uniform_id]) { call(state, binary_offset + 1, instr.flow_control.dest_offset - binary_offset - 1, @@ -545,14 +501,14 @@ static void ProcessShaderCode(VertexShaderState& state) { case OpCode::Id::LOOP: { - state.address_registers[2] = shader_uniforms.i[instr.flow_control.int_uniform_id].y; + state.address_registers[2] = uniforms.i[instr.flow_control.int_uniform_id].y; call(state, binary_offset + 1, instr.flow_control.dest_offset - binary_offset + 1, instr.flow_control.dest_offset + 1, - shader_uniforms.i[instr.flow_control.int_uniform_id].x, - shader_uniforms.i[instr.flow_control.int_uniform_id].z); + uniforms.i[instr.flow_control.int_uniform_id].x, + uniforms.i[instr.flow_control.int_uniform_id].z); break; } @@ -578,15 +534,17 @@ static Common::Profiling::TimingCategory shader_category("Vertex Shader"); OutputVertex RunShader(const InputVertex& input, int num_attributes) { Common::Profiling::ScopeTimer timer(shader_category); + const auto& regs = g_state.regs; + const auto& vs = g_state.vs; VertexShaderState state; - const u32* main = &shader_memory[registers.vs_main_offset]; + const u32* main = &vs.program_code[regs.vs_main_offset]; state.program_counter = (u32*)main; state.debug.max_offset = 0; state.debug.max_opdesc_id = 0; // Setup input register table - const auto& attribute_register_map = registers.vs_input_register_map; + const auto& attribute_register_map = regs.vs_input_register_map; float24 dummy_register; boost::fill(state.input_register_table, &dummy_register); @@ -611,16 +569,16 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) { state.conditional_code[1] = false; ProcessShaderCode(state); - DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(), - state.debug.max_opdesc_id, registers.vs_main_offset, - registers.vs_output_attributes); + DebugUtils::DumpShader(vs.program_code.data(), state.debug.max_offset, vs.swizzle_data.data(), + state.debug.max_opdesc_id, regs.vs_main_offset, + regs.vs_output_attributes); // Setup output data OutputVertex ret; // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to // figure out what those circumstances are and enable the remaining outputs then. for (int i = 0; i < 7; ++i) { - const auto& output_register_map = registers.vs_output_attributes[i]; + const auto& output_register_map = regs.vs_output_attributes[i]; u32 semantics[4] = { output_register_map.map_x, output_register_map.map_y, diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index c26709bb..7471a6de 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h @@ -66,19 +66,8 @@ struct OutputVertex { static_assert(std::is_pod::value, "Structure is not POD"); static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); -void SubmitShaderMemoryChange(u32 addr, u32 value); -void SubmitSwizzleDataChange(u32 addr, u32 value); - OutputVertex RunShader(const InputVertex& input, int num_attributes); -Math::Vec4& GetFloatUniform(u32 index); -bool& GetBoolUniform(u32 index); -Math::Vec4& GetIntUniform(u32 index); -Math::Vec4& GetDefaultAttribute(u32 index); - -const std::array& GetShaderBinary(); -const std::array& GetSwizzlePatterns(); - } // namespace } // namespace diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp index d4d907d5..3becc426 100644 --- a/src/video_core/video_core.cpp +++ b/src/video_core/video_core.cpp @@ -8,9 +8,11 @@ #include "core/core.h" #include "core/settings.h" -#include "video_core/video_core.h" -#include "video_core/renderer_base.h" -#include "video_core/renderer_opengl/renderer_opengl.h" +#include "video_core.h" +#include "renderer_base.h" +#include "renderer_opengl/renderer_opengl.h" + +#include "pica.h" //////////////////////////////////////////////////////////////////////////////////////////////////// // Video Core namespace @@ -24,6 +26,8 @@ std::atomic g_hw_renderer_enabled; /// Initialize the video core void Init(EmuWindow* emu_window) { + Pica::Init(); + g_emu_window = emu_window; g_renderer = new RendererOpenGL(); g_renderer->SetWindow(g_emu_window); @@ -34,7 +38,10 @@ void Init(EmuWindow* emu_window) { /// Shutdown the video core void Shutdown() { + Pica::Shutdown(); + delete g_renderer; + LOG_DEBUG(Render, "shutdown OK"); } -- cgit v1.2.3