aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/video_core
diff options
context:
space:
mode:
Diffstat (limited to 'src/video_core')
-rw-r--r--src/video_core/command_processor.cpp155
-rw-r--r--src/video_core/renderer_opengl/gl_rasterizer.cpp7
-rw-r--r--src/video_core/vertex_shader.cpp6
3 files changed, 102 insertions, 66 deletions
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index ef9584ab..243abe84 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -206,92 +206,115 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
std::map<u32, u32> ranges;
} memory_accesses;
+ // Simple circular-replacement vertex cache
+ // The size has been tuned for optimal balance between hit-rate and the cost of lookup
+ const size_t VERTEX_CACHE_SIZE = 32;
+ std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
+ std::array<VertexShader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache;
+
+ unsigned int vertex_cache_pos = 0;
+ vertex_cache_ids.fill(-1);
+
for (unsigned int index = 0; index < regs.num_vertices; ++index)
{
unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index;
+ // -1 is a common special value used for primitive restart. Since it's unknown if
+ // the PICA supports it, and it would mess up the caching, guard against it here.
+ ASSERT(vertex != -1);
+
+ bool vertex_cache_hit = false;
+ VertexShader::OutputVertex output;
+
if (is_indexed) {
- // TODO: Implement some sort of vertex cache!
if (g_debug_context && Pica::g_debug_context->recorder) {
int size = index_u16 ? 2 : 1;
memory_accesses.AddAccess(base_address + index_info.offset + size * index, size);
}
- }
-
- // Initialize data for the current vertex
- VertexShader::InputVertex input;
-
- // Load a debugging token to check whether this gets loaded by the running
- // application or not.
- static const float24 debug_token = float24::FromRawFloat24(0x00abcdef);
- input.attr[0].w = debug_token;
-
- for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
- // Load the default attribute if we're configured to do so, this data will be overwritten by the loader data if it's set
- if (attribute_config.IsDefaultAttribute(i)) {
- input.attr[i] = g_state.vs.default_attributes[i];
- LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
- i, vertex, index,
- input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
- input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
- }
- // Load per-vertex data from the loader arrays
- for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
- u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
- const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
-
- if (g_debug_context && Pica::g_debug_context->recorder) {
- memory_accesses.AddAccess(source_addr,
- (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
- : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
+ for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
+ if (vertex == vertex_cache_ids[i]) {
+ output = vertex_cache[i];
+ vertex_cache_hit = true;
+ break;
}
-
- const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *(s8*)srcdata :
- (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *(u8*)srcdata :
- (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *(s16*)srcdata :
- *(float*)srcdata;
-
- input.attr[i][comp] = float24::FromFloat32(srcval);
- LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f",
- comp, i, vertex, index,
- attribute_config.GetPhysicalBaseAddress(),
- vertex_attribute_sources[i] - base_address,
- vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
- input.attr[i][comp].ToFloat32());
}
}
- // HACK: Some games do not initialize the vertex position's w component. This leads
- // to critical issues since it messes up perspective division. As a
- // workaround, we force the fourth component to 1.0 if we find this to be the
- // case.
- // To do this, we additionally have to assume that the first input attribute
- // is the vertex position, since there's no information about this other than
- // the empiric observation that this is usually the case.
- if (input.attr[0].w == debug_token)
- input.attr[0].w = float24::FromFloat32(1.0);
+ if (!vertex_cache_hit) {
+ // Initialize data for the current vertex
+ VertexShader::InputVertex input;
+
+ for (int i = 0; i < attribute_config.GetNumTotalAttributes(); ++i) {
+ if (vertex_attribute_elements[i] != 0) {
+ // Default attribute values set if array elements have < 4 components. This
+ // is *not* carried over from the default attribute settings even if they're
+ // enabled for this attribute.
+ static const float24 zero = float24::FromFloat32(0.0f);
+ static const float24 one = float24::FromFloat32(1.0f);
+ input.attr[i] = Math::Vec4<float24>(zero, zero, zero, one);
+
+ // Load per-vertex data from the loader arrays
+ for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
+ u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
+ const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
+
+ if (g_debug_context && Pica::g_debug_context->recorder) {
+ memory_accesses.AddAccess(source_addr,
+ (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
+ : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
+ }
+
+ const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *(s8*)srcdata :
+ (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *(u8*)srcdata :
+ (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? *(s16*)srcdata :
+ *(float*)srcdata;
+
+ input.attr[i][comp] = float24::FromFloat32(srcval);
+ LOG_TRACE(HW_GPU, "Loaded component %x of attribute %x for vertex %x (index %x) from 0x%08x + 0x%08lx + 0x%04lx: %f",
+ comp, i, vertex, index,
+ attribute_config.GetPhysicalBaseAddress(),
+ vertex_attribute_sources[i] - base_address,
+ vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i],
+ input.attr[i][comp].ToFloat32());
+ }
+ } else if (attribute_config.IsDefaultAttribute(i)) {
+ // Load the default attribute if we're configured to do so
+ input.attr[i] = g_state.vs.default_attributes[i];
+ LOG_TRACE(HW_GPU, "Loaded default attribute %x for vertex %x (index %x): (%f, %f, %f, %f)",
+ i, vertex, index,
+ input.attr[i][0].ToFloat32(), input.attr[i][1].ToFloat32(),
+ input.attr[i][2].ToFloat32(), input.attr[i][3].ToFloat32());
+ } else {
+ // TODO(yuriks): In this case, no data gets loaded and the vertex
+ // remains with the last value it had. This isn't currently maintained
+ // as global state, however, and so won't work in Citra yet.
+ }
+ }
- if (g_debug_context)
- g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
+ if (g_debug_context)
+ g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input);
#if PICA_DUMP_GEOMETRY
- // NOTE: When dumping geometry, we simply assume that the first input attribute
- // corresponds to the position for now.
- DebugUtils::GeometryDumper::Vertex dumped_vertex = {
- input.attr[0][0].ToFloat32(), input.attr[0][1].ToFloat32(), input.attr[0][2].ToFloat32()
- };
- using namespace std::placeholders;
- dumping_primitive_assembler.SubmitVertex(dumped_vertex,
- std::bind(&DebugUtils::GeometryDumper::AddTriangle,
- &geometry_dumper, _1, _2, _3));
+ // NOTE: When dumping geometry, we simply assume that the first input attribute
+ // corresponds to the position for now.
+ DebugUtils::GeometryDumper::Vertex dumped_vertex = {
+ input.attr[0][0].ToFloat32(), input.attr[0][1].ToFloat32(), input.attr[0][2].ToFloat32()
+ };
+ using namespace std::placeholders;
+ dumping_primitive_assembler.SubmitVertex(dumped_vertex,
+ std::bind(&DebugUtils::GeometryDumper::AddTriangle,
+ &geometry_dumper, _1, _2, _3));
#endif
- // Send to vertex shader
- VertexShader::OutputVertex output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes(), g_state.regs.vs, g_state.vs);
+ // Send to vertex shader
+ output = VertexShader::RunShader(input, attribute_config.GetNumTotalAttributes(), g_state.regs.vs, g_state.vs);
- if (is_indexed) {
- // TODO: Add processed vertex to vertex cache!
+ if (is_indexed) {
+ vertex_cache[vertex_cache_pos] = output;
+ vertex_cache_ids[vertex_cache_pos] = vertex;
+ vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
+ }
}
if (Settings::values.use_hw_renderer) {
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 2db845da..1fc4e56b 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -7,6 +7,7 @@
#include "common/color.h"
#include "common/math_util.h"
+#include "common/profiler.h"
#include "core/hw/gpu.h"
#include "core/memory.h"
@@ -873,11 +874,15 @@ void RasterizerOpenGL::ReloadDepthBuffer() {
state.Apply();
}
+Common::Profiling::TimingCategory buffer_commit_category("Framebuffer Commit");
+
void RasterizerOpenGL::CommitColorBuffer() {
if (last_fb_color_addr != 0) {
u8* color_buffer = Memory::GetPhysicalPointer(last_fb_color_addr);
if (color_buffer != nullptr) {
+ Common::Profiling::ScopeTimer timer(buffer_commit_category);
+
u32 bytes_per_pixel = Pica::Regs::BytesPerColorPixel(fb_color_texture.format);
std::unique_ptr<u8[]> temp_gl_color_buffer(new u8[fb_color_texture.width * fb_color_texture.height * bytes_per_pixel]);
@@ -913,6 +918,8 @@ void RasterizerOpenGL::CommitDepthBuffer() {
u8* depth_buffer = Memory::GetPhysicalPointer(last_fb_depth_addr);
if (depth_buffer != nullptr) {
+ Common::Profiling::ScopeTimer timer(buffer_commit_category);
+
u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(fb_depth_texture.format);
// OpenGL needs 4 bpp alignment for D24
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 960ae577..5f66f345 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -609,6 +609,12 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes, const Regs:
}
}
+ // The hardware takes the absolute and saturates vertex colors like this, *before* doing interpolation
+ for (int i = 0; i < 4; ++i) {
+ ret.color[i] = float24::FromFloat32(
+ std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
+ }
+
LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),