diff options
Diffstat (limited to 'src/video_core')
23 files changed, 801 insertions, 522 deletions
diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 5c7f4ae1..16210830 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -2,7 +2,6 @@ set(SRCS renderer_opengl/generated/gl_3_2_core.c renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer_cache.cpp - renderer_opengl/gl_resource_manager.cpp renderer_opengl/gl_shader_util.cpp renderer_opengl/gl_state.cpp renderer_opengl/renderer_opengl.cpp diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index 943f3eb3..558b49d6 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp @@ -94,7 +94,7 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { // NOTE: We clip against a w=epsilon plane to guarantee that the output has a positive w value. // TODO: Not sure if this is a valid approach. Also should probably instead use the smallest // epsilon possible within float24 accuracy. - static const float24 EPSILON = float24::FromFloat32(0.00001); + static const float24 EPSILON = float24::FromFloat32(0.00001f); static const float24 f0 = float24::FromFloat32(0.0); static const float24 f1 = float24::FromFloat32(1.0); static const std::array<ClippingEdge, 7> clipping_edges = {{ @@ -153,7 +153,7 @@ void ProcessTriangle(OutputVertex &v0, OutputVertex &v1, OutputVertex &v2) { "Triangle %lu/%lu at position (%.3f, %.3f, %.3f, %.3f), " "(%.3f, %.3f, %.3f, %.3f), (%.3f, %.3f, %.3f, %.3f) and " "screen position (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f), (%.2f, %.2f, %.2f)", - i, output_list->size(), + i + 1, output_list->size() - 2, vtx0.pos.x.ToFloat32(), vtx0.pos.y.ToFloat32(), vtx0.pos.z.ToFloat32(), vtx0.pos.w.ToFloat32(), vtx1.pos.x.ToFloat32(), vtx1.pos.y.ToFloat32(), vtx1.pos.z.ToFloat32(), vtx1.pos.w.ToFloat32(), vtx2.pos.x.ToFloat32(), vtx2.pos.y.ToFloat32(), vtx2.pos.z.ToFloat32(), vtx2.pos.w.ToFloat32(), diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index b46fadd9..ef9584ab 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -6,18 +6,20 @@ #include "common/profiler.h" +#include "core/hle/service/gsp_gpu.h" +#include "core/hw/gpu.h" +#include "core/settings.h" + +#include "debug_utils/debug_utils.h" + #include "clipper.h" #include "command_processor.h" #include "math.h" #include "pica.h" #include "primitive_assembly.h" +#include "renderer_base.h" #include "vertex_shader.h" #include "video_core.h" -#include "core/hle/service/gsp_gpu.h" -#include "core/hw/gpu.h" -#include "core/settings.h" - -#include "debug_utils/debug_utils.h" namespace Pica { @@ -43,12 +45,12 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { if (GPU::g_skip_frame && id != PICA_REG_INDEX(trigger_irq)) return; - // TODO: Figure out how register masking acts on e.g. vs_uniform_setup.set_value + // TODO: Figure out how register masking acts on e.g. vs.uniform_setup.set_value u32 old_value = regs[id]; regs[id] = (old_value & ~mask) | (value & mask); if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::CommandLoaded, reinterpret_cast<void*>(&id)); + g_debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded, reinterpret_cast<void*>(&id)); DebugUtils::OnPicaRegWrite(id, regs[id]); @@ -58,10 +60,50 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::P3D); break; + // Load default vertex input attributes + case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233): + case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234): + case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235): + { + // TODO: Does actual hardware indeed keep an intermediate buffer or does + // it directly write the values? + default_attr_write_buffer[default_attr_counter++] = value; + + // Default attributes are written in a packed format such that four float24 values are encoded in + // three 32-bit numbers. We write to internal memory once a full such vector is + // written. + if (default_attr_counter >= 3) { + default_attr_counter = 0; + + auto& setup = regs.vs_default_attributes_setup; + + if (setup.index >= 16) { + LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); + break; + } + + Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index]; + + // NOTE: The destination component order indeed is "backwards" + attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8); + attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); + attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); + attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF); + + LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, + attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), + attribute.w.ToFloat32()); + + // TODO: Verify that this actually modifies the register! + setup.index = setup.index + 1; + } + break; + } + case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[0], 0x23c): case PICA_REG_INDEX_WORKAROUND(command_buffer.trigger[1], 0x23d): { - unsigned index = id - PICA_REG_INDEX(command_buffer.trigger[0]); + unsigned index = static_cast<unsigned>(id - PICA_REG_INDEX(command_buffer.trigger[0])); u32* head_ptr = (u32*)Memory::GetPhysicalPointer(regs.command_buffer.GetPhysicalAddress(index)); g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr; g_state.cmd_list.length = regs.command_buffer.GetSize(index) / sizeof(u32); @@ -74,7 +116,9 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { { Common::Profiling::ScopeTimer scope_timer(category_drawing); +#if PICA_LOG_TEV DebugUtils::DumpTevStageConfig(regs.GetTevStages()); +#endif if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::IncomingPrimitiveBatch, nullptr); @@ -117,9 +161,50 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { const u16* index_address_16 = (u16*)index_address_8; bool index_u16 = index_info.format != 0; +#if PICA_DUMP_GEOMETRY DebugUtils::GeometryDumper geometry_dumper; - PrimitiveAssembler<VertexShader::OutputVertex> primitive_assembler(regs.triangle_topology.Value()); PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex> dumping_primitive_assembler(regs.triangle_topology.Value()); +#endif + PrimitiveAssembler<VertexShader::OutputVertex> primitive_assembler(regs.triangle_topology.Value()); + + if (g_debug_context) { + for (int i = 0; i < 3; ++i) { + const auto texture = regs.GetTextures()[i]; + if (!texture.enabled) + continue; + + u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); + if (g_debug_context && Pica::g_debug_context->recorder) + g_debug_context->recorder->MemoryAccessed(texture_data, Pica::Regs::NibblesPerPixel(texture.format) * texture.config.width / 2 * texture.config.height, texture.config.GetPhysicalAddress()); + } + } + + class { + /// Combine overlapping and close ranges + void SimplifyRanges() { + for (auto it = ranges.begin(); it != ranges.end(); ++it) { + // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too + auto it2 = std::next(it); + while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) { + it->second = std::max(it->second, it2->first + it2->second - it->first); + it2 = ranges.erase(it2); + } + } + } + + public: + /// Record a particular memory access in the list + void AddAccess(u32 paddr, u32 size) { + // Create new range or extend existing one + ranges[paddr] = std::max(ranges[paddr], size); + + // Simplify ranges... + SimplifyRanges(); + } + + /// Map of accessed ranges (mapping start address to range size) + std::map<u32, u32> ranges; + } memory_accesses; for (unsigned int index = 0; index < regs.num_vertices; ++index) { @@ -127,6 +212,10 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { if (is_indexed) { // TODO: Implement some sort of vertex cache! + if (g_debug_context && Pica::g_debug_context->recorder) { + int size = index_u16 ? 2 : 1; + memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); + } } // Initialize data for the current vertex @@ -149,7 +238,14 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { // Load per-vertex data from the loader arrays for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) { - const u8* srcdata = Memory::GetPhysicalPointer(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]); + u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]; + const u8* srcdata = Memory::GetPhysicalPointer(source_addr); + + if (g_debug_context && Pica::g_debug_context->recorder) { + memory_accesses.AddAccess(source_addr, + (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4 + : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1); + } const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *(s8*)srcdata : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *(u8*)srcdata : @@ -179,6 +275,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { if (g_debug_context) g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); +#if PICA_DUMP_GEOMETRY // NOTE: When dumping geometry, we simply assume that the first input attribute // corresponds to the position for now. DebugUtils::GeometryDumper::Vertex dumped_vertex = { @@ -188,9 +285,10 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { dumping_primitive_assembler.SubmitVertex(dumped_vertex, std::bind(&DebugUtils::GeometryDumper::AddTriangle, &geometry_dumper, _1, _2, _3)); +#endif // Send to vertex shader - VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes()); + VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes(), g_state.regs.vs, g_state.vs); if (is_indexed) { // TODO: Add processed vertex to vertex cache! @@ -211,47 +309,55 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { } } + for (auto& range : memory_accesses.ranges) { + g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first), + range.second, range.first); + } + if (Settings::values.use_hw_renderer) { VideoCore::g_renderer->hw_rasterizer->DrawTriangles(); } +#if PICA_DUMP_GEOMETRY geometry_dumper.Dump(); +#endif - if (g_debug_context) + if (g_debug_context) { g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); + } break; } - case PICA_REG_INDEX(vs_bool_uniforms): + case PICA_REG_INDEX(vs.bool_uniforms): for (unsigned i = 0; i < 16; ++i) - g_state.vs.uniforms.b[i] = (regs.vs_bool_uniforms.Value() & (1 << i)) != 0; + g_state.vs.uniforms.b[i] = (regs.vs.bool_uniforms.Value() & (1 << i)) != 0; break; - case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1): - case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[1], 0x2b2): - case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[2], 0x2b3): - case PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[3], 0x2b4): + case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1): + case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[1], 0x2b2): + case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[2], 0x2b3): + case PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[3], 0x2b4): { - int index = (id - PICA_REG_INDEX_WORKAROUND(vs_int_uniforms[0], 0x2b1)); - auto values = regs.vs_int_uniforms[index]; + int index = (id - PICA_REG_INDEX_WORKAROUND(vs.int_uniforms[0], 0x2b1)); + auto values = regs.vs.int_uniforms[index]; g_state.vs.uniforms.i[index] = Math::Vec4<u8>(values.x, values.y, values.z, values.w); LOG_TRACE(HW_GPU, "Set integer uniform %d to %02x %02x %02x %02x", index, values.x.Value(), values.y.Value(), values.z.Value(), values.w.Value()); break; } - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[0], 0x2c1): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[1], 0x2c2): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[2], 0x2c3): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[3], 0x2c4): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[4], 0x2c5): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[5], 0x2c6): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[6], 0x2c7): - case PICA_REG_INDEX_WORKAROUND(vs_uniform_setup.set_value[7], 0x2c8): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[0], 0x2c1): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[1], 0x2c2): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[2], 0x2c3): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[3], 0x2c4): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[4], 0x2c5): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[5], 0x2c6): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[6], 0x2c7): + case PICA_REG_INDEX_WORKAROUND(vs.uniform_setup.set_value[7], 0x2c8): { - auto& uniform_setup = regs.vs_uniform_setup; + auto& uniform_setup = regs.vs.uniform_setup; // TODO: Does actual hardware indeed keep an intermediate buffer or does // it directly write the values? @@ -293,73 +399,33 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { break; } - // Load default vertex input attributes - case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[0], 0x233): - case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[1], 0x234): - case PICA_REG_INDEX_WORKAROUND(vs_default_attributes_setup.set_value[2], 0x235): - { - // TODO: Does actual hardware indeed keep an intermediate buffer or does - // it directly write the values? - default_attr_write_buffer[default_attr_counter++] = value; - - // Default attributes are written in a packed format such that four float24 values are encoded in - // three 32-bit numbers. We write to internal memory once a full such vector is - // written. - if (default_attr_counter >= 3) { - default_attr_counter = 0; - - auto& setup = regs.vs_default_attributes_setup; - - if (setup.index >= 16) { - LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); - break; - } - - Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index]; - - // NOTE: The destination component order indeed is "backwards" - attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8); - attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF)); - attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF)); - attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF); - - LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index, - attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(), - attribute.w.ToFloat32()); - - // TODO: Verify that this actually modifies the register! - setup.index = setup.index + 1; - } - break; - } - // Load shader program code - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[0], 0x2cc): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[1], 0x2cd): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[2], 0x2ce): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[3], 0x2cf): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[4], 0x2d0): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[5], 0x2d1): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[6], 0x2d2): - case PICA_REG_INDEX_WORKAROUND(vs_program.set_word[7], 0x2d3): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[0], 0x2cc): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[1], 0x2cd): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[2], 0x2ce): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[3], 0x2cf): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[4], 0x2d0): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[5], 0x2d1): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[6], 0x2d2): + case PICA_REG_INDEX_WORKAROUND(vs.program.set_word[7], 0x2d3): { - g_state.vs.program_code[regs.vs_program.offset] = value; - regs.vs_program.offset++; + g_state.vs.program_code[regs.vs.program.offset] = value; + regs.vs.program.offset++; break; } // Load swizzle pattern data - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[0], 0x2d6): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[1], 0x2d7): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[2], 0x2d8): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[3], 0x2d9): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[4], 0x2da): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[5], 0x2db): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[6], 0x2dc): - case PICA_REG_INDEX_WORKAROUND(vs_swizzle_patterns.set_word[7], 0x2dd): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[0], 0x2d6): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[1], 0x2d7): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[2], 0x2d8): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[3], 0x2d9): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[4], 0x2da): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[5], 0x2db): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[6], 0x2dc): + case PICA_REG_INDEX_WORKAROUND(vs.swizzle_patterns.set_word[7], 0x2dd): { - g_state.vs.swizzle_data[regs.vs_swizzle_patterns.offset] = value; - regs.vs_swizzle_patterns.offset++; + g_state.vs.swizzle_data[regs.vs.swizzle_patterns.offset] = value; + regs.vs.swizzle_patterns.offset++; break; } @@ -370,7 +436,7 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) { VideoCore::g_renderer->hw_rasterizer->NotifyPicaRegisterChanged(id); if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::CommandProcessed, reinterpret_cast<void*>(&id)); + g_debug_context->OnEvent(DebugContext::Event::PicaCommandProcessed, reinterpret_cast<void*>(&id)); } void ProcessCommandList(const u32* list, u32 size) { diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h index bb3d4150..022a71f5 100644 --- a/src/video_core/command_processor.h +++ b/src/video_core/command_processor.h @@ -4,11 +4,11 @@ #pragma once +#include <type_traits> + #include "common/bit_field.h" #include "common/common_types.h" -#include "pica.h" - namespace Pica { namespace CommandProcessor { diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index 7b8ab72b..e9a85841 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -23,6 +23,7 @@ #include "common/vector_math.h" #include "video_core/pica.h" +#include "video_core/renderer_base.h" #include "video_core/utils.h" #include "video_core/video_core.h" @@ -84,15 +85,11 @@ void GeometryDumper::AddTriangle(Vertex& v0, Vertex& v1, Vertex& v2) { vertices.push_back(v1); vertices.push_back(v2); - int num_vertices = vertices.size(); + int num_vertices = (int)vertices.size(); faces.push_back({ num_vertices-3, num_vertices-2, num_vertices-1 }); } void GeometryDumper::Dump() { - // NOTE: Permanently enabling this just trashes the hard disk for no reason. - // Hence, this is currently disabled. - return; - static int index = 0; std::string filename = std::string("geometry_dump") + std::to_string(++index) + ".obj"; @@ -115,10 +112,6 @@ void GeometryDumper::Dump() { void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data, u32 swizzle_size, u32 main_offset, const Regs::VSOutputAttributes* output_attributes) { - // NOTE: Permanently enabling this just trashes hard disks for no reason. - // Hence, this is currently disabled. - return; - struct StuffToWrite { u8* pointer; u32 size; @@ -240,8 +233,8 @@ void DumpShader(const u32* binary_data, u32 binary_size, const u32* swizzle_data dvle.main_offset_words = main_offset; dvle.output_register_table_offset = write_offset - dvlb.dvle_offset; - dvle.output_register_table_size = output_info_table.size(); - QueueForWriting((u8*)output_info_table.data(), output_info_table.size() * sizeof(OutputRegisterInfo)); + dvle.output_register_table_size = static_cast<uint32_t>(output_info_table.size()); + QueueForWriting((u8*)output_info_table.data(), static_cast<u32>(output_info_table.size() * sizeof(OutputRegisterInfo))); // TODO: Create a label table for "main" @@ -496,31 +489,31 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture // Lookup base value Math::Vec3<int> ret; if (differential_mode) { - ret.r() = differential.r; - ret.g() = differential.g; - ret.b() = differential.b; + ret.r() = static_cast<int>(differential.r); + ret.g() = static_cast<int>(differential.g); + ret.b() = static_cast<int>(differential.b); if (x >= 2) { - ret.r() += differential.dr; - ret.g() += differential.dg; - ret.b() += differential.db; + ret.r() += static_cast<int>(differential.dr); + ret.g() += static_cast<int>(differential.dg); + ret.b() += static_cast<int>(differential.db); } ret.r() = Color::Convert5To8(ret.r()); ret.g() = Color::Convert5To8(ret.g()); ret.b() = Color::Convert5To8(ret.b()); } else { if (x < 2) { - ret.r() = Color::Convert4To8(separate.r1); - ret.g() = Color::Convert4To8(separate.g1); - ret.b() = Color::Convert4To8(separate.b1); + ret.r() = Color::Convert4To8(static_cast<u8>(separate.r1)); + ret.g() = Color::Convert4To8(static_cast<u8>(separate.g1)); + ret.b() = Color::Convert4To8(static_cast<u8>(separate.b1)); } else { - ret.r() = Color::Convert4To8(separate.r2); - ret.g() = Color::Convert4To8(separate.g2); - ret.b() = Color::Convert4To8(separate.b2); + ret.r() = Color::Convert4To8(static_cast<u8>(separate.r2)); + ret.g() = Color::Convert4To8(static_cast<u8>(separate.g2)); + ret.b() = Color::Convert4To8(static_cast<u8>(separate.b2)); } } // Add modifier - unsigned table_index = (x < 2) ? table_index_1.Value() : table_index_2.Value(); + unsigned table_index = static_cast<int>((x < 2) ? table_index_1.Value() : table_index_2.Value()); static const std::array<std::array<u8, 2>, 8> etc1_modifier_table = {{ { 2, 8 }, { 5, 17 }, { 9, 29 }, { 13, 42 }, @@ -564,10 +557,6 @@ TextureInfo TextureInfo::FromPicaRegister(const Regs::TextureConfig& config, } void DumpTexture(const Pica::Regs::TextureConfig& texture_config, u8* data) { - // NOTE: Permanently enabling this just trashes hard disks for no reason. - // Hence, this is currently disabled. - return; - #ifndef HAVE_PNG return; #else diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index 7926d64e..81eea30a 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -14,6 +14,8 @@ #include "common/vector_math.h" +#include "core/tracer/recorder.h" + #include "video_core/pica.h" namespace Pica { @@ -23,11 +25,14 @@ public: enum class Event { FirstEvent = 0, - CommandLoaded = FirstEvent, - CommandProcessed, + PicaCommandLoaded = FirstEvent, + PicaCommandProcessed, IncomingPrimitiveBatch, FinishedPrimitiveBatch, VertexLoaded, + IncomingDisplayTransfer, + GSPCommandProcessed, + BufferSwapped, NumEvents }; @@ -129,6 +134,8 @@ public: Event active_breakpoint; bool at_breakpoint = false; + std::shared_ptr<CiTrace::Recorder> recorder = nullptr; + private: /** * Private default constructor to make sure people always construct this through Construct() @@ -150,6 +157,11 @@ extern std::shared_ptr<DebugContext> g_debug_context; // TODO: Get rid of this g namespace DebugUtils { +#define PICA_DUMP_GEOMETRY 0 +#define PICA_DUMP_SHADERS 0 +#define PICA_DUMP_TEXTURES 0 +#define PICA_LOG_TEV 0 + // Simple utility class for dumping geometry data to an OBJ file class GeometryDumper { public: diff --git a/src/video_core/hwrasterizer_base.h b/src/video_core/hwrasterizer_base.h index dec193f8..c8746c60 100644 --- a/src/video_core/hwrasterizer_base.h +++ b/src/video_core/hwrasterizer_base.h @@ -4,8 +4,13 @@ #pragma once -#include "common/emu_window.h" -#include "video_core/vertex_shader.h" +#include "common/common_types.h" + +namespace Pica { +namespace VertexShader { +struct OutputVertex; +} +} class HWRasterizer { public: diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index 543d9c44..17cb6678 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp @@ -2,7 +2,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <string.h> +#include <cstring> +#include <unordered_map> #include "pica.h" @@ -10,6 +11,75 @@ namespace Pica { State g_state; +std::string Regs::GetCommandName(int index) { + static std::unordered_map<u32, std::string> map; + + if (map.empty()) { + #define ADD_FIELD(name) \ + map.insert({static_cast<u32>(PICA_REG_INDEX(name)), #name}); \ + /* TODO: change to Regs::name when VS2015 and other compilers support it */ \ + for (u32 i = PICA_REG_INDEX(name) + 1; i < PICA_REG_INDEX(name) + sizeof(Regs().name) / 4; ++i) \ + map.insert({i, #name + std::string("+") + std::to_string(i-PICA_REG_INDEX(name))}); \ + + ADD_FIELD(trigger_irq); + ADD_FIELD(cull_mode); + ADD_FIELD(viewport_size_x); + ADD_FIELD(viewport_size_y); + ADD_FIELD(viewport_depth_range); + ADD_FIELD(viewport_depth_far_plane); + ADD_FIELD(viewport_corner); + ADD_FIELD(texture0_enable); + ADD_FIELD(texture0); + ADD_FIELD(texture0_format); + ADD_FIELD(texture1); + ADD_FIELD(texture1_format); + ADD_FIELD(texture2); + ADD_FIELD(texture2_format); + ADD_FIELD(tev_stage0); + ADD_FIELD(tev_stage1); + ADD_FIELD(tev_stage2); + ADD_FIELD(tev_stage3); + ADD_FIELD(tev_combiner_buffer_input); + ADD_FIELD(tev_stage4); + ADD_FIELD(tev_stage5); + ADD_FIELD(tev_combiner_buffer_color); + ADD_FIELD(output_merger); + ADD_FIELD(framebuffer); + ADD_FIELD(vertex_attributes); + ADD_FIELD(index_array); + ADD_FIELD(num_vertices); + ADD_FIELD(trigger_draw); + ADD_FIELD(trigger_draw_indexed); + ADD_FIELD(vs_default_attributes_setup); + ADD_FIELD(command_buffer); + ADD_FIELD(triangle_topology); + ADD_FIELD(gs.bool_uniforms); + ADD_FIELD(gs.int_uniforms); + ADD_FIELD(gs.main_offset); + ADD_FIELD(gs.input_register_map); + ADD_FIELD(gs.uniform_setup); + ADD_FIELD(gs.program); + ADD_FIELD(gs.swizzle_patterns); + ADD_FIELD(vs.bool_uniforms); + ADD_FIELD(vs.int_uniforms); + ADD_FIELD(vs.main_offset); + ADD_FIELD(vs.input_register_map); + ADD_FIELD(vs.uniform_setup); + ADD_FIELD(vs.program); + ADD_FIELD(vs.swizzle_patterns); + +#undef ADD_FIELD + } + + // Return empty string if no match is found + auto it = map.find(index); + if (it != map.end()) { + return it->second; + } else { + return std::string(); + } +} + void Init() { } diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 9628a758..34b02b2f 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -5,10 +5,9 @@ #pragma once #include <array> +#include <cmath> #include <cstddef> -#include <initializer_list> -#include <map> -#include <vector> +#include <string> #include "common/assert.h" #include "common/bit_field.h" @@ -114,11 +113,22 @@ struct Regs { struct TextureConfig { enum WrapMode : u32 { ClampToEdge = 0, + ClampToBorder = 1, Repeat = 2, MirroredRepeat = 3, }; - INSERT_PADDING_WORDS(0x1); + enum TextureFilter : u32 { + Nearest = 0, + Linear = 1 + }; + + union { + BitField< 0, 8, u32> r; + BitField< 8, 8, u32> g; + BitField<16, 8, u32> b; + BitField<24, 8, u32> a; + } border_color; union { BitField< 0, 16, u32> height; @@ -126,8 +136,10 @@ struct Regs { }; union { - BitField< 8, 2, WrapMode> wrap_s; - BitField<12, 2, WrapMode> wrap_t; + BitField< 1, 1, TextureFilter> mag_filter; + BitField< 2, 1, TextureFilter> min_filter; + BitField< 8, 2, WrapMode> wrap_t; + BitField<12, 2, WrapMode> wrap_s; }; INSERT_PADDING_WORDS(0x1); @@ -194,6 +206,7 @@ struct Regs { case TextureFormat::IA8: return 4; + case TextureFormat::I4: case TextureFormat::A4: return 1; @@ -284,6 +297,7 @@ struct Regs { AddSigned = 3, Lerp = 4, Subtract = 5, + Dot3_RGB = 6, MultiplyThenAdd = 8, AddThenMultiply = 9, @@ -414,6 +428,11 @@ struct Regs { GreaterThanOrEqual = 7, }; + enum class StencilAction : u32 { + Keep = 0, + Xor = 5, + }; + struct { union { // If false, logic blending is used @@ -448,15 +467,35 @@ struct Regs { BitField< 8, 8, u32> ref; } alpha_test; - union { - BitField< 0, 1, u32> stencil_test_enable; - BitField< 4, 3, CompareFunc> stencil_test_func; - BitField< 8, 8, u32> stencil_replacement_value; - BitField<16, 8, u32> stencil_reference_value; - BitField<24, 8, u32> stencil_mask; - } stencil_test; + struct { + union { + // If true, enable stencil testing + BitField< 0, 1, u32> enable; - INSERT_PADDING_WORDS(0x1); + // Comparison operation for stencil testing + BitField< 4, 3, CompareFunc> func; + + // Value to calculate the new stencil value from + BitField< 8, 8, u32> replacement_value; + + // Value to compare against for stencil testing + BitField<16, 8, u32> reference_value; + + // Mask to apply on stencil test inputs + BitField<24, 8, u32> mask; + }; + + union { + // Action to perform when the stencil test fails + BitField< 0, 3, StencilAction> action_stencil_fail; + + // Action to perform when stencil testing passed but depth testing fails + BitField< 4, 3, StencilAction> action_depth_fail; + + // Action to perform when both stencil and depth testing pass + BitField< 8, 3, StencilAction> action_depth_pass; + }; + } stencil_test; union { BitField< 0, 1, u32> depth_test_enable; @@ -506,7 +545,7 @@ struct Regs { struct { INSERT_PADDING_WORDS(0x6); - DepthFormat depth_format; + DepthFormat depth_format; // TODO: Should be a BitField! BitField<16, 3, ColorFormat> color_format; INSERT_PADDING_WORDS(0x4); @@ -752,171 +791,123 @@ struct Regs { INSERT_PADDING_WORDS(0x20); enum class TriangleTopology : u32 { - List = 0, - Strip = 1, - Fan = 2, - ListIndexed = 3, // TODO: No idea if this is correct + List = 0, + Strip = 1, + Fan = 2, + Shader = 3, // Programmable setup unit implemented in a geometry shader }; BitField<8, 2, TriangleTopology> triangle_topology; - INSERT_PADDING_WORDS(0x51); + INSERT_PADDING_WORDS(0x21); - BitField<0, 16, u32> vs_bool_uniforms; - union { - BitField< 0, 8, u32> x; - BitField< 8, 8, u32> y; - BitField<16, 8, u32> z; - BitField<24, 8, u32> w; - } vs_int_uniforms[4]; + struct ShaderConfig { + BitField<0, 16, u32> bool_uniforms; - INSERT_PADDING_WORDS(0x5); + union { + BitField< 0, 8, u32> x; + BitField< 8, 8, u32> y; + BitField<16, 8, u32> z; + BitField<24, 8, u32> w; + } int_uniforms[4]; - // Offset to shader program entry point (in words) - BitField<0, 16, u32> vs_main_offset; + INSERT_PADDING_WORDS(0x5); - union { - BitField< 0, 4, u64> attribute0_register; - BitField< 4, 4, u64> attribute1_register; - BitField< 8, 4, u64> attribute2_register; - BitField<12, 4, u64> attribute3_register; - BitField<16, 4, u64> attribute4_register; - BitField<20, 4, u64> attribute5_register; - BitField<24, 4, u64> attribute6_register; - BitField<28, 4, u64> attribute7_register; - BitField<32, 4, u64> attribute8_register; - BitField<36, 4, u64> attribute9_register; - BitField<40, 4, u64> attribute10_register; - BitField<44, 4, u64> attribute11_register; - BitField<48, 4, u64> attribute12_register; - BitField<52, 4, u64> attribute13_register; - BitField<56, 4, u64> attribute14_register; - BitField<60, 4, u64> attribute15_register; - - int GetRegisterForAttribute(int attribute_index) const { - u64 fields[] = { - attribute0_register, attribute1_register, attribute2_register, attribute3_register, - attribute4_register, attribute5_register, attribute6_register, attribute7_register, - attribute8_register, attribute9_register, attribute10_register, attribute11_register, - attribute12_register, attribute13_register, attribute14_register, attribute15_register, + // Offset to shader program entry point (in words) + BitField<0, 16, u32> main_offset; + + union { + BitField< 0, 4, u64> attribute0_register; + BitField< 4, 4, u64> attribute1_register; + BitField< 8, 4, u64> attribute2_register; + BitField<12, 4, u64> attribute3_register; + BitField<16, 4, u64> attribute4_register; + BitField<20, 4, u64> attribute5_register; + BitField<24, 4, u64> attribute6_register; + BitField<28, 4, u64> attribute7_register; + BitField<32, 4, u64> attribute8_register; + BitField<36, 4, u64> attribute9_register; + BitField<40, 4, u64> attribute10_register; + BitField<44, 4, u64> attribute11_register; + BitField<48, 4, u64> attribute12_register; + BitField<52, 4, u64> attribute13_register; + BitField<56, 4, u64> attribute14_register; + BitField<60, 4, u64> attribute15_register; + + int GetRegisterForAttribute(int attribute_index) const { + u64 fields[] = { + attribute0_register, attribute1_register, attribute2_register, attribute3_register, + attribute4_register, attribute5_register, attribute6_register, attribute7_register, + attribute8_register, attribute9_register, attribute10_register, attribute11_register, + attribute12_register, attribute13_register, attribute14_register, attribute15_register, + }; + return (int)fields[attribute_index]; + } + } input_register_map; + + // OUTMAP_MASK, 0x28E, CODETRANSFER_END + INSERT_PADDING_WORDS(0x3); + + struct { + enum Format : u32 + { + FLOAT24 = 0, + FLOAT32 = 1 }; - return (int)fields[attribute_index]; - } - } vs_input_register_map; - INSERT_PADDING_WORDS(0x3); + bool IsFloat32() const { + return format == FLOAT32; + } - struct { - enum Format : u32 - { - FLOAT24 = 0, - FLOAT32 = 1 - }; + union { + // Index of the next uniform to write to + // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices + // TODO: Maybe the uppermost index is for the geometry shader? Investigate! + BitField<0, 7, u32> index; - bool IsFloat32() const { - return format == FLOAT32; - } + BitField<31, 1, Format> format; + }; - union { - // Index of the next uniform to write to - // TODO: ctrulib uses 8 bits for this, however that seems to yield lots of invalid indices - BitField<0, 7, u32> index; + // Writing to these registers sets the current uniform. + u32 set_value[8]; - BitField<31, 1, Format> format; - }; + } uniform_setup; - // Writing to these registers sets the "current" uniform. - // TODO: It's not clear how the hardware stores what the "current" uniform is. - u32 set_value[8]; + INSERT_PADDING_WORDS(0x2); - } vs_uniform_setup; + struct { + // Offset of the next instruction to write code to. + // Incremented with each instruction write. + u32 offset; - INSERT_PADDING_WORDS(0x2); + // Writing to these registers sets the "current" word in the shader program. + u32 set_word[8]; + } program; - struct { - // Offset of the next instruction to write code to. - // Incremented with each instruction write. - u32 offset; + INSERT_PADDING_WORDS(0x1); - // Writing to these registers sets the "current" word in the shader program. - // TODO: It's not clear how the hardware stores what the "current" word is. - u32 set_word[8]; - } vs_program; + // This register group is used to load an internal table of swizzling patterns, + // which are indexed by each shader instruction to specify vector component swizzling. + struct { + // Offset of the next swizzle pattern to write code to. + // Incremented with each instruction write. + u32 offset; - INSERT_PADDING_WORDS(0x1); + // Writing to these registers sets the current swizzle pattern in the table. + u32 set_word[8]; + } swizzle_patterns; - // This register group is used to load an internal table of swizzling patterns, - // which are indexed by each shader instruction to specify vector component swizzling. - struct { - // Offset of the next swizzle pattern to write code to. - // Incremented with each instruction write. - u32 offset; + INSERT_PADDING_WORDS(0x2); + }; - // Writing to these registers sets the "current" swizzle pattern in the table. - // TODO: It's not clear how the hardware stores what the "current" swizzle pattern is. - u32 set_word[8]; - } vs_swizzle_patterns; + ShaderConfig gs; + ShaderConfig vs; - INSERT_PADDING_WORDS(0x22); + INSERT_PADDING_WORDS(0x20); // Map register indices to names readable by humans // Used for debugging purposes, so performance is not an issue here - static std::string GetCommandName(int index) { - std::map<u32, std::string> map; - - #define ADD_FIELD(name) \ - do { \ - map.insert({PICA_REG_INDEX(name), #name}); \ - /* TODO: change to Regs::name when VS2015 and other compilers support it */ \ - for (u32 i = PICA_REG_INDEX(name) + 1; i < PICA_REG_INDEX(name) + sizeof(Regs().name) / 4; ++i) \ - map.insert({i, #name + std::string("+") + std::to_string(i-PICA_REG_INDEX(name))}); \ - } while(false) - - ADD_FIELD(trigger_irq); - ADD_FIELD(cull_mode); - ADD_FIELD(viewport_size_x); - ADD_FIELD(viewport_size_y); - ADD_FIELD(viewport_depth_range); - ADD_FIELD(viewport_depth_far_plane); - ADD_FIELD(viewport_corner); - ADD_FIELD(texture0_enable); - ADD_FIELD(texture0); - ADD_FIELD(texture0_format); - ADD_FIELD(texture1); - ADD_FIELD(texture1_format); - ADD_FIELD(texture2); - ADD_FIELD(texture2_format); - ADD_FIELD(tev_stage0); - ADD_FIELD(tev_stage1); - ADD_FIELD(tev_stage2); - ADD_FIELD(tev_stage3); - ADD_FIELD(tev_combiner_buffer_input); - ADD_FIELD(tev_stage4); - ADD_FIELD(tev_stage5); - ADD_FIELD(tev_combiner_buffer_color); - ADD_FIELD(output_merger); - ADD_FIELD(framebuffer); - ADD_FIELD(vertex_attributes); - ADD_FIELD(index_array); - ADD_FIELD(num_vertices); - ADD_FIELD(trigger_draw); - ADD_FIELD(trigger_draw_indexed); - ADD_FIELD(vs_default_attributes_setup); - ADD_FIELD(command_buffer); - ADD_FIELD(triangle_topology); - ADD_FIELD(vs_bool_uniforms); - ADD_FIELD(vs_int_uniforms); - ADD_FIELD(vs_main_offset); - ADD_FIELD(vs_input_register_map); - ADD_FIELD(vs_uniform_setup); - ADD_FIELD(vs_program); - ADD_FIELD(vs_swizzle_patterns); - - #undef ADD_FIELD - - // Return empty string if no match is found - return map[index]; - } + static std::string GetCommandName(int index); static inline size_t NumIds() { return sizeof(Regs) / sizeof(u32); @@ -982,17 +973,14 @@ ASSERT_REG_POSITION(trigger_draw_indexed, 0x22f); ASSERT_REG_POSITION(vs_default_attributes_setup, 0x232); ASSERT_REG_POSITION(command_buffer, 0x238); ASSERT_REG_POSITION(triangle_topology, 0x25e); -ASSERT_REG_POSITION(vs_bool_uniforms, 0x2b0); -ASSERT_REG_POSITION(vs_int_uniforms, 0x2b1); -ASSERT_REG_POSITION(vs_main_offset, 0x2ba); -ASSERT_REG_POSITION(vs_input_register_map, 0x2bb); -ASSERT_REG_POSITION(vs_uniform_setup, 0x2c0); -ASSERT_REG_POSITION(vs_program, 0x2cb); -ASSERT_REG_POSITION(vs_swizzle_patterns, 0x2d5); +ASSERT_REG_POSITION(gs, 0x280); +ASSERT_REG_POSITION(vs, 0x2b0); #undef ASSERT_REG_POSITION #endif // !defined(_MSC_VER) +static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig structure has incorrect size"); + // The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway. static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be"); static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be"); @@ -1014,7 +1002,7 @@ struct float24 { u32 mantissa = hex & 0xFFFF; u32 exponent = (hex >> 16) & 0x7F; u32 sign = hex >> 23; - ret.value = powf(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * powf(2.0f, -16.f)); + ret.value = std::pow(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * std::pow(2.0f, -16.f)); if (sign) ret.value = -ret.value; } @@ -1102,7 +1090,7 @@ struct State { Regs regs; /// Vertex shader memory - struct { + struct ShaderSetup { struct { Math::Vec4<float24> f[96]; std::array<bool, 16> b; @@ -1113,7 +1101,10 @@ struct State { std::array<u32, 1024> program_code; std::array<u32, 1024> swizzle_data; - } vs; + }; + + ShaderSetup vs; + ShaderSetup gs; /// Current Pica command list struct { diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp index 0120f289..2f22bdcc 100644 --- a/src/video_core/primitive_assembly.cpp +++ b/src/video_core/primitive_assembly.cpp @@ -20,8 +20,9 @@ template<typename VertexType> void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandler triangle_handler) { switch (topology) { + // TODO: Figure out what's different with TriangleTopology::Shader. case Regs::TriangleTopology::List: - case Regs::TriangleTopology::ListIndexed: + case Regs::TriangleTopology::Shader: if (buffer_index < 2) { buffer[buffer_index++] = vtx; } else { diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index 59d156ee..68b7cc05 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -126,6 +126,30 @@ static u32 GetDepth(int x, int y) { } } +static u8 GetStencil(int x, int y) { + const auto& framebuffer = g_state.regs.framebuffer; + const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); + u8* depth_buffer = Memory::GetPhysicalPointer(addr); + + y = framebuffer.height - y; + + const u32 coarse_y = y & ~7; + u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format); + u32 stride = framebuffer.width * bytes_per_pixel; + + u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; + u8* src_pixel = depth_buffer + src_offset; + + switch (framebuffer.depth_format) { + case Regs::DepthFormat::D24S8: + return Color::DecodeD24S8(src_pixel).y; + + default: + LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format); + return 0; + } +} + static void SetDepth(int x, int y, u32 value) { const auto& framebuffer = g_state.regs.framebuffer; const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); @@ -144,13 +168,15 @@ static void SetDepth(int x, int y, u32 value) { case Regs::DepthFormat::D16: Color::EncodeD16(value, dst_pixel); break; + case Regs::DepthFormat::D24: Color::EncodeD24(value, dst_pixel); break; + case Regs::DepthFormat::D24S8: - // TODO(Subv): Implement the stencil buffer - Color::EncodeD24S8(value, 0, dst_pixel); + Color::EncodeD24X8(value, dst_pixel); break; + default: LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); UNIMPLEMENTED(); @@ -158,6 +184,53 @@ static void SetDepth(int x, int y, u32 value) { } } +static void SetStencil(int x, int y, u8 value) { + const auto& framebuffer = g_state.regs.framebuffer; + const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress(); + u8* depth_buffer = Memory::GetPhysicalPointer(addr); + + y = framebuffer.height - y; + + const u32 coarse_y = y & ~7; + u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format); + u32 stride = framebuffer.width * bytes_per_pixel; + + u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride; + u8* dst_pixel = depth_buffer + dst_offset; + + switch (framebuffer.depth_format) { + case Pica::Regs::DepthFormat::D16: + case Pica::Regs::DepthFormat::D24: + // Nothing to do + break; + + case Pica::Regs::DepthFormat::D24S8: + Color::EncodeX24S8(value, dst_pixel); + break; + + default: + LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format); + UNIMPLEMENTED(); + break; + } +} + +// TODO: Should the stencil mask be applied to the "dest" or "ref" operands? Most likely not! +static u8 PerformStencilAction(Regs::StencilAction action, u8 dest, u8 ref) { + switch (action) { + case Regs::StencilAction::Keep: + return dest; + + case Regs::StencilAction::Xor: + return dest ^ ref; + + default: + LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action); + UNIMPLEMENTED(); + return 0; + } +} + // NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values struct Fix12P4 { Fix12P4() {} @@ -276,6 +349,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, auto textures = regs.GetTextures(); auto tev_stages = regs.GetTevStages(); + bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8; + const auto stencil_test = g_state.regs.output_merger.stencil_test; + // Enter rasterization loop, starting at the center of the topleft bounding box corner. // TODO: Not sure if looping through x first might be faster for (u16 y = min_y + 8; y < max_y; y += 0x10) { @@ -349,6 +425,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, val = std::min(val, (int)size - 1); return val; + case Regs::TextureConfig::ClampToBorder: + return val; + case Regs::TextureConfig::Repeat: return (int)((unsigned)val % size); @@ -367,17 +446,26 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, } }; - // Textures are laid out from bottom to top, hence we invert the t coordinate. - // NOTE: This may not be the right place for the inversion. - // TODO: Check if this applies to ETC textures, too. - s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); - t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); - - u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); - auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); - - texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); - DebugUtils::DumpTexture(texture.config, texture_data); + if ((texture.config.wrap_s == Regs::TextureConfig::ClampToBorder && (s < 0 || s >= texture.config.width)) + || (texture.config.wrap_t == Regs::TextureConfig::ClampToBorder && (t < 0 || t >= texture.config.height))) { + auto border_color = texture.config.border_color; + texture_color[i] = { border_color.r, border_color.g, border_color.b, border_color.a }; + } else { + // Textures are laid out from bottom to top, hence we invert the t coordinate. + // NOTE: This may not be the right place for the inversion. + // TODO: Check if this applies to ETC textures, too. + s = GetWrappedTexCoord(texture.config.wrap_s, s, texture.config.width); + t = texture.config.height - 1 - GetWrappedTexCoord(texture.config.wrap_t, t, texture.config.height); + + u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress()); + auto info = DebugUtils::TextureInfo::FromPicaRegister(texture.config, texture.format); + + // TODO: Apply the min and mag filters to the texture + texture_color[i] = DebugUtils::LookupTexture(texture_data, s, t, info); +#if PICA_DUMP_TEXTURES + DebugUtils::DumpTexture(texture.config, texture_data); +#endif + } } // Texture environment - consists of 6 stages of color and alpha combining. @@ -556,7 +644,18 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, result = (result * input[2].Cast<int>()) / 255; return result.Cast<u8>(); } - + case Operation::Dot3_RGB: + { + // Not fully accurate. + // Worst case scenario seems to yield a +/-3 error + // Some HW results indicate that the per-component computation can't have a higher precision than 1/256, + // while dot3_rgb( (0x80,g0,b0),(0x7F,g1,b1) ) and dot3_rgb( (0x80,g0,b0),(0x80,g1,b1) ) give different results + int result = ((input[0].r() * 2 - 255) * (input[1].r() * 2 - 255) + 128) / 256 + + ((input[0].g() * 2 - 255) * (input[1].g() * 2 - 255) + 128) / 256 + + ((input[0].b() * 2 - 255) * (input[1].b() * 2 - 255) + 128) / 256; + result = std::max(0, std::min(255, result)); + return { (u8)result, (u8)result, (u8)result }; + } default: LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op); UNIMPLEMENTED(); @@ -638,6 +737,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, } const auto& output_merger = regs.output_merger; + // TODO: Does alpha testing happen before or after stencil? if (output_merger.alpha_test.enable) { bool pass = false; @@ -679,6 +779,54 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, continue; } + u8 old_stencil = 0; + if (stencil_action_enable) { + old_stencil = GetStencil(x >> 4, y >> 4); + u8 dest = old_stencil & stencil_test.mask; + u8 ref = stencil_test.reference_value & stencil_test.mask; + + bool pass = false; + switch (stencil_test.func) { + case Regs::CompareFunc::Never: + pass = false; + break; + + case Regs::CompareFunc::Always: + pass = true; + break; + + case Regs::CompareFunc::Equal: + pass = (ref == dest); + break; + + case Regs::CompareFunc::NotEqual: + pass = (ref != dest); + break; + + case Regs::CompareFunc::LessThan: + pass = (ref < dest); + break; + + case Regs::CompareFunc::LessThanOrEqual: + pass = (ref <= dest); + break; + + case Regs::CompareFunc::GreaterThan: + pass = (ref > dest); + break; + + case Regs::CompareFunc::GreaterThanOrEqual: + pass = (ref >= dest); + break; + } + + if (!pass) { + u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.replacement_value); + SetStencil(x >> 4, y >> 4, new_stencil); + continue; + } + } + // TODO: Does depth indeed only get written even if depth testing is enabled? if (output_merger.depth_test_enable) { unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); @@ -723,11 +871,22 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0, break; } - if (!pass) + if (!pass) { + if (stencil_action_enable) { + u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.replacement_value); + SetStencil(x >> 4, y >> 4, new_stencil); + } continue; + } if (output_merger.depth_write_enable) SetDepth(x >> 4, y >> 4, z); + + if (stencil_action_enable) { + // TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway? + u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.replacement_value); + SetStencil(x >> 4, y >> 4, new_stencil); + } } auto dest = GetPixel(x >> 4, y >> 4); diff --git a/src/video_core/renderer_base.h b/src/video_core/renderer_base.h index 5757ac75..6587bcf2 100644 --- a/src/video_core/renderer_base.h +++ b/src/video_core/renderer_base.h @@ -4,10 +4,14 @@ #pragma once +#include <memory> + #include "common/common_types.h" #include "video_core/hwrasterizer_base.h" +class EmuWindow; + class RendererBase : NonCopyable { public: diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 518f7933..2db845da 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -2,10 +2,15 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <cstring> +#include <memory> + #include "common/color.h" +#include "common/math_util.h" -#include "core/settings.h" #include "core/hw/gpu.h" +#include "core/memory.h" +#include "core/settings.h" #include "video_core/pica.h" #include "video_core/utils.h" @@ -16,8 +21,6 @@ #include "generated/gl_3_2_core.h" -#include <memory> - static bool IsPassThroughTevStage(const Pica::Regs::TevStageConfig& stage) { return (stage.color_op == Pica::Regs::TevStageConfig::Operation::Replace && stage.alpha_op == Pica::Regs::TevStageConfig::Operation::Replace && @@ -813,12 +816,16 @@ void RasterizerOpenGL::ReloadColorBuffer() { } void RasterizerOpenGL::ReloadDepthBuffer() { + PAddr depth_buffer_addr = Pica::g_state.regs.framebuffer.GetDepthBufferPhysicalAddress(); + + if (depth_buffer_addr == 0) + return; + // TODO: Appears to work, but double-check endianness of depth values and order of depth-stencil - u8* depth_buffer = Memory::GetPhysicalPointer(Pica::g_state.regs.framebuffer.GetDepthBufferPhysicalAddress()); + u8* depth_buffer = Memory::GetPhysicalPointer(depth_buffer_addr); - if (depth_buffer == nullptr) { + if (depth_buffer == nullptr) return; - } u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index d7d422b1..ae7b26fc 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -4,7 +4,12 @@ #pragma once +#include <vector> + +#include "common/common_types.h" + #include "video_core/hwrasterizer_base.h" +#include "video_core/vertex_shader.h" #include "gl_state.h" #include "gl_rasterizer_cache.h" diff --git a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp index 2e4110a8..dc3ffdf2 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer_cache.cpp @@ -31,12 +31,18 @@ void RasterizerCacheOpenGL::LoadAndBindTexture(OpenGLState &state, unsigned text state.texture_units[texture_unit].texture_2d = new_texture->texture.handle; state.Apply(); - // TODO: Need to choose filters that correspond to PICA once register is declared - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, PicaToGL::TextureFilterMode(config.config.mag_filter)); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, PicaToGL::TextureFilterMode(config.config.min_filter)); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, PicaToGL::WrapMode(config.config.wrap_s)); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, PicaToGL::WrapMode(config.config.wrap_t)); + GLenum wrap_s = PicaToGL::WrapMode(config.config.wrap_s); + GLenum wrap_t = PicaToGL::WrapMode(config.config.wrap_t); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, wrap_s); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, wrap_t); + + if (wrap_s == GL_CLAMP_TO_BORDER || wrap_t == GL_CLAMP_TO_BORDER) { + auto border_color = PicaToGL::ColorRGBA8((u8*)&config.config.border_color.r); + glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, border_color.data()); + } const auto info = Pica::DebugUtils::TextureInfo::FromPicaRegister(config.config, config.format); diff --git a/src/video_core/renderer_opengl/gl_resource_manager.cpp b/src/video_core/renderer_opengl/gl_resource_manager.cpp deleted file mode 100644 index 8f4ae28a..00000000 --- a/src/video_core/renderer_opengl/gl_resource_manager.cpp +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright 2015 Citra Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include "video_core/renderer_opengl/gl_resource_manager.h" -#include "video_core/renderer_opengl/gl_shader_util.h" - -// Textures -OGLTexture::OGLTexture() : handle(0) { -} - -OGLTexture::~OGLTexture() { - Release(); -} - -void OGLTexture::Create() { - if (handle != 0) { - return; - } - - glGenTextures(1, &handle); -} - -void OGLTexture::Release() { - glDeleteTextures(1, &handle); - handle = 0; -} - -// Shaders -OGLShader::OGLShader() : handle(0) { -} - -OGLShader::~OGLShader() { - Release(); -} - -void OGLShader::Create(const char* vert_shader, const char* frag_shader) { - if (handle != 0) { - return; - } - - handle = ShaderUtil::LoadShaders(vert_shader, frag_shader); -} - -void OGLShader::Release() { - glDeleteProgram(handle); - handle = 0; -} - -// Buffer objects -OGLBuffer::OGLBuffer() : handle(0) { -} - -OGLBuffer::~OGLBuffer() { - Release(); -} - -void OGLBuffer::Create() { - if (handle != 0) { - return; - } - - glGenBuffers(1, &handle); -} - -void OGLBuffer::Release() { - glDeleteBuffers(1, &handle); - handle = 0; -} - -// Vertex array objects -OGLVertexArray::OGLVertexArray() : handle(0) { -} - -OGLVertexArray::~OGLVertexArray() { - Release(); -} - -void OGLVertexArray::Create() { - if (handle != 0) { - return; - } - - glGenVertexArrays(1, &handle); -} - -void OGLVertexArray::Release() { - glDeleteVertexArrays(1, &handle); - handle = 0; -} - -// Framebuffers -OGLFramebuffer::OGLFramebuffer() : handle(0) { -} - -OGLFramebuffer::~OGLFramebuffer() { - Release(); -} - -void OGLFramebuffer::Create() { - if (handle != 0) { - return; - } - - glGenFramebuffers(1, &handle); -} - -void OGLFramebuffer::Release() { - glDeleteFramebuffers(1, &handle); - handle = 0; -} diff --git a/src/video_core/renderer_opengl/gl_resource_manager.h b/src/video_core/renderer_opengl/gl_resource_manager.h index 975720d0..6f9dc012 100644 --- a/src/video_core/renderer_opengl/gl_resource_manager.h +++ b/src/video_core/renderer_opengl/gl_resource_manager.h @@ -4,76 +4,124 @@ #pragma once +#include <utility> + #include "common/common_types.h" -#include "generated/gl_3_2_core.h" +#include "video_core/renderer_opengl/generated/gl_3_2_core.h" +#include "video_core/renderer_opengl/gl_shader_util.h" -class OGLTexture : public NonCopyable { +class OGLTexture : private NonCopyable { public: - OGLTexture(); - ~OGLTexture(); + OGLTexture() = default; + OGLTexture(OGLTexture&& o) { std::swap(handle, o.handle); } + ~OGLTexture() { Release(); } + OGLTexture& operator=(OGLTexture&& o) { std::swap(handle, o.handle); return *this; } /// Creates a new internal OpenGL resource and stores the handle - void Create(); + void Create() { + if (handle != 0) return; + glGenTextures(1, &handle); + } /// Deletes the internal OpenGL resource - void Release(); + void Release() { + if (handle == 0) return; + glDeleteTextures(1, &handle); + handle = 0; + } - GLuint handle; + GLuint handle = 0; }; -class OGLShader : public NonCopyable { +class OGLShader : private NonCopyable { public: - OGLShader(); - ~OGLShader(); + OGLShader() = default; + OGLShader(OGLShader&& o) { std::swap(handle, o.handle); } + ~OGLShader() { Release(); } + OGLShader& operator=(OGLShader&& o) { std::swap(handle, o.handle); return *this; } /// Creates a new internal OpenGL resource and stores the handle - void Create(const char* vert_shader, const char* frag_shader); + void Create(const char* vert_shader, const char* frag_shader) { + if (handle != 0) return; + handle = ShaderUtil::LoadShaders(vert_shader, frag_shader); + } /// Deletes the internal OpenGL resource - void Release(); + void Release() { + if (handle == 0) return; + glDeleteProgram(handle); + handle = 0; + } - GLuint handle; + GLuint handle = 0; }; -class OGLBuffer : public NonCopyable { +class OGLBuffer : private NonCopyable { public: - OGLBuffer(); - ~OGLBuffer(); + OGLBuffer() = default; + OGLBuffer(OGLBuffer&& o) { std::swap(handle, o.handle); } + ~OGLBuffer() { Release(); } + OGLBuffer& operator=(OGLBuffer&& o) { std::swap(handle, o.handle); return *this; } /// Creates a new internal OpenGL resource and stores the handle - void Create(); + void Create() { + if (handle != 0) return; + glGenBuffers(1, &handle); + } /// Deletes the internal OpenGL resource - void Release(); + void Release() { + if (handle == 0) return; + glDeleteBuffers(1, &handle); + handle = 0; + } - GLuint handle; + GLuint handle = 0; }; -class OGLVertexArray : public NonCopyable { +class OGLVertexArray : private NonCopyable { public: - OGLVertexArray(); - ~OGLVertexArray(); + OGLVertexArray() = default; + OGLVertexArray(OGLVertexArray&& o) { std::swap(handle, o.handle); } + ~OGLVertexArray() { Release(); } + OGLVertexArray& operator=(OGLVertexArray&& o) { std::swap(handle, o.handle); return *this; } /// Creates a new internal OpenGL resource and stores the handle - void Create(); + void Create() { + if (handle != 0) return; + glGenVertexArrays(1, &handle); + } /// Deletes the internal OpenGL resource - void Release(); + void Release() { + if (handle == 0) return; + glDeleteVertexArrays(1, &handle); + handle = 0; + } - GLuint handle; + GLuint handle = 0; }; -class OGLFramebuffer : public NonCopyable { +class OGLFramebuffer : private NonCopyable { public: - OGLFramebuffer(); - ~OGLFramebuffer(); + OGLFramebuffer() = default; + OGLFramebuffer(OGLFramebuffer&& o) { std::swap(handle, o.handle); } + ~OGLFramebuffer() { Release(); } + OGLFramebuffer& operator=(OGLFramebuffer&& o) { std::swap(handle, o.handle); return *this; } /// Creates a new internal OpenGL resource and stores the handle - void Create(); + void Create() { + if (handle != 0) return; + glGenFramebuffers(1, &handle); + } /// Deletes the internal OpenGL resource - void Release(); + void Release() { + if (handle == 0) return; + glDeleteFramebuffers(1, &handle); + handle = 0; + } - GLuint handle; + GLuint handle = 0; }; diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 3526e16d..9efc1533 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -147,20 +147,17 @@ void OpenGLState::Apply() { // Textures for (unsigned texture_index = 0; texture_index < ARRAY_SIZE(texture_units); ++texture_index) { - if (texture_units[texture_index].enabled_2d != cur_state.texture_units[texture_index].enabled_2d) { + if (texture_units[texture_index].enabled_2d != cur_state.texture_units[texture_index].enabled_2d || + texture_units[texture_index].texture_2d != cur_state.texture_units[texture_index].texture_2d) { + glActiveTexture(GL_TEXTURE0 + texture_index); if (texture_units[texture_index].enabled_2d) { - glEnable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D, texture_units[texture_index].texture_2d); } else { - glDisable(GL_TEXTURE_2D); + glBindTexture(GL_TEXTURE_2D, 0); } } - - if (texture_units[texture_index].texture_2d != cur_state.texture_units[texture_index].texture_2d) { - glActiveTexture(GL_TEXTURE0 + texture_index); - glBindTexture(GL_TEXTURE_2D, texture_units[texture_index].texture_2d); - } } // Framebuffer diff --git a/src/video_core/renderer_opengl/pica_to_gl.h b/src/video_core/renderer_opengl/pica_to_gl.h index e566f9f7..3b562da8 100644 --- a/src/video_core/renderer_opengl/pica_to_gl.h +++ b/src/video_core/renderer_opengl/pica_to_gl.h @@ -12,10 +12,37 @@ namespace PicaToGL { +inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) { + static const GLenum filter_mode_table[] = { + GL_NEAREST, // TextureFilter::Nearest + GL_LINEAR // TextureFilter::Linear + }; + + // Range check table for input + if (mode >= ARRAY_SIZE(filter_mode_table)) { + LOG_CRITICAL(Render_OpenGL, "Unknown texture filtering mode %d", mode); + UNREACHABLE(); + + return GL_LINEAR; + } + + GLenum gl_mode = filter_mode_table[mode]; + + // Check for dummy values indicating an unknown mode + if (gl_mode == 0) { + LOG_CRITICAL(Render_OpenGL, "Unknown texture filtering mode %d", mode); + UNIMPLEMENTED(); + + return GL_LINEAR; + } + + return gl_mode; +} + inline GLenum WrapMode(Pica::Regs::TextureConfig::WrapMode mode) { static const GLenum wrap_mode_table[] = { GL_CLAMP_TO_EDGE, // WrapMode::ClampToEdge - 0, // Unknown + GL_CLAMP_TO_BORDER,// WrapMode::ClampToBorder GL_REPEAT, // WrapMode::Repeat GL_MIRRORED_REPEAT // WrapMode::MirroredRepeat }; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 3399ca12..96e12839 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -2,22 +2,27 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include <algorithm> +#include <cstddef> +#include <cstdlib> + +#include "common/assert.h" +#include "common/emu_window.h" +#include "common/logging/log.h" +#include "common/profiler_reporting.h" + #include "core/hw/gpu.h" #include "core/hw/hw.h" #include "core/hw/lcd.h" #include "core/memory.h" #include "core/settings.h" -#include "common/emu_window.h" -#include "common/logging/log.h" -#include "common/profiler_reporting.h" - #include "video_core/video_core.h" #include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_shaders.h" -#include <algorithm> +#include "video_core/debug_utils/debug_utils.h" /** * Vertex structure that the drawn screen rectangles are composed of. @@ -126,6 +131,10 @@ void RendererOpenGL::SwapBuffers() { hw_rasterizer->Reset(); } } + + if (Pica::g_debug_context && Pica::g_debug_context->recorder) { + Pica::g_debug_context->recorder->FrameFinished(); + } } /** diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp index 87006a83..960ae577 100644 --- a/src/video_core/vertex_shader.cpp +++ b/src/video_core/vertex_shader.cpp @@ -2,8 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include <stack> - +#include <boost/container/static_vector.hpp> #include <boost/range/algorithm.hpp> #include <common/file_util.h> @@ -27,7 +26,7 @@ namespace Pica { namespace VertexShader { struct VertexShaderState { - const u32* program_counter; + u32 program_counter; const float24* input_register_table[16]; Math::Vec4<float24> output_registers[16]; @@ -53,7 +52,7 @@ struct VertexShaderState { }; // TODO: Is there a maximal size for this? - std::stack<CallStackElement> call_stack; + boost::container::static_vector<CallStackElement, 16> call_stack; struct { u32 max_offset; // maximum program counter ever reached @@ -71,15 +70,15 @@ static void ProcessShaderCode(VertexShaderState& state) { while (true) { if (!state.call_stack.empty()) { - auto& top = state.call_stack.top(); - if (state.program_counter - program_code.data() == top.final_address) { + auto& top = state.call_stack.back(); + if (state.program_counter == top.final_address) { state.address_registers[2] += top.loop_increment; if (top.repeat_counter-- == 0) { - state.program_counter = &program_code[top.return_address]; - state.call_stack.pop(); + state.program_counter = top.return_address; + state.call_stack.pop_back(); } else { - state.program_counter = &program_code[top.loop_address]; + state.program_counter = top.loop_address; } // TODO: Is "trying again" accurate to hardware? @@ -88,17 +87,16 @@ static void ProcessShaderCode(VertexShaderState& state) { } bool exit_loop = false; - const Instruction& instr = *(const Instruction*)state.program_counter; - const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.common.operand_desc_id]; + const Instruction instr = { program_code[state.program_counter] }; + const SwizzlePattern swizzle = { swizzle_data[instr.common.operand_desc_id] }; - static auto call = [&program_code](VertexShaderState& state, u32 offset, u32 num_instructions, + static auto call = [](VertexShaderState& state, u32 offset, u32 num_instructions, u32 return_offset, u8 repeat_count, u8 loop_increment) { - state.program_counter = &program_code[offset] - 1; // -1 to make sure when incrementing the PC we end up at the correct offset - state.call_stack.push({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); + state.program_counter = offset - 1; // -1 to make sure when incrementing the PC we end up at the correct offset + ASSERT(state.call_stack.size() < state.call_stack.capacity()); + state.call_stack.push_back({ offset + num_instructions, return_offset, repeat_count, loop_increment, offset }); }; - u32 binary_offset = state.program_counter - program_code.data(); - - state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + binary_offset); + state.debug.max_offset = std::max<u32>(state.debug.max_offset, 1 + state.program_counter); auto LookupSourceRegister = [&](const SourceRegister& source_reg) -> const float24* { switch (source_reg.GetRegisterType()) { @@ -221,7 +219,7 @@ static void ProcessShaderCode(VertexShaderState& state) { for (int i = 0; i < num_components; ++i) dot = dot + src1[i] * src2[i]; - for (int i = 0; i < num_components; ++i) { + for (int i = 0; i < 4; ++i) { if (!swizzle.DestComponentEnabled(i)) continue; @@ -442,13 +440,13 @@ static void ProcessShaderCode(VertexShaderState& state) { case OpCode::Id::JMPC: if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { - state.program_counter = &program_code[instr.flow_control.dest_offset] - 1; + state.program_counter = instr.flow_control.dest_offset - 1; } break; case OpCode::Id::JMPU: if (uniforms.b[instr.flow_control.bool_uniform_id]) { - state.program_counter = &program_code[instr.flow_control.dest_offset] - 1; + state.program_counter = instr.flow_control.dest_offset - 1; } break; @@ -456,7 +454,7 @@ static void ProcessShaderCode(VertexShaderState& state) { call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, - binary_offset + 1, 0, 0); + state.program_counter + 1, 0, 0); break; case OpCode::Id::CALLU: @@ -464,7 +462,7 @@ static void ProcessShaderCode(VertexShaderState& state) { call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, - binary_offset + 1, 0, 0); + state.program_counter + 1, 0, 0); } break; @@ -473,7 +471,7 @@ static void ProcessShaderCode(VertexShaderState& state) { call(state, instr.flow_control.dest_offset, instr.flow_control.num_instructions, - binary_offset + 1, 0, 0); + state.program_counter + 1, 0, 0); } break; @@ -483,8 +481,8 @@ static void ProcessShaderCode(VertexShaderState& state) { case OpCode::Id::IFU: if (uniforms.b[instr.flow_control.bool_uniform_id]) { call(state, - binary_offset + 1, - instr.flow_control.dest_offset - binary_offset - 1, + state.program_counter + 1, + instr.flow_control.dest_offset - state.program_counter - 1, instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); } else { call(state, @@ -501,8 +499,8 @@ static void ProcessShaderCode(VertexShaderState& state) { if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) { call(state, - binary_offset + 1, - instr.flow_control.dest_offset - binary_offset - 1, + state.program_counter + 1, + instr.flow_control.dest_offset - state.program_counter - 1, instr.flow_control.dest_offset + instr.flow_control.num_instructions, 0, 0); } else { call(state, @@ -519,8 +517,8 @@ static void ProcessShaderCode(VertexShaderState& state) { state.address_registers[2] = uniforms.i[instr.flow_control.int_uniform_id].y; call(state, - binary_offset + 1, - instr.flow_control.dest_offset - binary_offset + 1, + state.program_counter + 1, + instr.flow_control.dest_offset - state.program_counter + 1, instr.flow_control.dest_offset + 1, uniforms.i[instr.flow_control.int_uniform_id].x, uniforms.i[instr.flow_control.int_uniform_id].z); @@ -546,20 +544,17 @@ static void ProcessShaderCode(VertexShaderState& state) { static Common::Profiling::TimingCategory shader_category("Vertex Shader"); -OutputVertex RunShader(const InputVertex& input, int num_attributes) { +OutputVertex RunShader(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup) { Common::Profiling::ScopeTimer timer(shader_category); - const auto& regs = g_state.regs; - const auto& vs = g_state.vs; VertexShaderState state; - const u32* main = &vs.program_code[regs.vs_main_offset]; - state.program_counter = (u32*)main; + state.program_counter = config.main_offset; state.debug.max_offset = 0; state.debug.max_opdesc_id = 0; // Setup input register table - const auto& attribute_register_map = regs.vs_input_register_map; + const auto& attribute_register_map = config.input_register_map; float24 dummy_register; boost::fill(state.input_register_table, &dummy_register); @@ -584,16 +579,18 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) { state.conditional_code[1] = false; ProcessShaderCode(state); - DebugUtils::DumpShader(vs.program_code.data(), state.debug.max_offset, vs.swizzle_data.data(), - state.debug.max_opdesc_id, regs.vs_main_offset, - regs.vs_output_attributes); +#if PICA_DUMP_SHADERS + DebugUtils::DumpShader(setup.program_code.data(), state.debug.max_offset, setup.swizzle_data.data(), + state.debug.max_opdesc_id, config.main_offset, + g_state.regs.vs_output_attributes); // TODO: Don't hardcode VS here +#endif // Setup output data OutputVertex ret; // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to // figure out what those circumstances are and enable the remaining outputs then. for (int i = 0; i < 7; ++i) { - const auto& output_register_map = regs.vs_output_attributes[i]; + const auto& output_register_map = g_state.regs.vs_output_attributes[i]; // TODO: Don't hardcode VS here u32 semantics[4] = { output_register_map.map_x, output_register_map.map_y, diff --git a/src/video_core/vertex_shader.h b/src/video_core/vertex_shader.h index 7471a6de..97f9250d 100644 --- a/src/video_core/vertex_shader.h +++ b/src/video_core/vertex_shader.h @@ -4,11 +4,10 @@ #pragma once -#include <initializer_list> +#include <type_traits> -#include <common/common_types.h> +#include "common/vector_math.h" -#include "math.h" #include "pica.h" namespace Pica { @@ -66,7 +65,7 @@ struct OutputVertex { static_assert(std::is_pod<OutputVertex>::value, "Structure is not POD"); static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); -OutputVertex RunShader(const InputVertex& input, int num_attributes); +OutputVertex RunShader(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const State::ShaderSetup& setup); } // namespace diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h index 3f24df7b..14b33c9d 100644 --- a/src/video_core/video_core.h +++ b/src/video_core/video_core.h @@ -4,12 +4,11 @@ #pragma once -#include "common/emu_window.h" - -#include "renderer_base.h" - #include <atomic> +class EmuWindow; +class RendererBase; + //////////////////////////////////////////////////////////////////////////////////////////////////// // Video Core namespace |