aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/video_core/vertex_shader.cpp
diff options
context:
space:
mode:
authorGravatar Tony Wasserka <neobrainx@gmail.com>2015-03-12 14:18:46 +0100
committerGravatar Tony Wasserka <neobrainx@gmail.com>2015-03-12 14:18:46 +0100
commite4f5ec6272016dd34afe4e8901a9e8027324ba21 (patch)
tree60f2db6f42538875bf8e552926e5df4dbddfab58 /src/video_core/vertex_shader.cpp
parented5b275d21612906e6eeb4b1f344aa0f1eb31c10 (diff)
Pica/VertexShader: Fix a bug caused due to incorrect assumptions of consecutive output register tables.
We now write create a temporary buffer for output registers and copy all of them to the actual output vertex structure after the shader has run. This is technically not necessary, but it's easier to vectorize in the future.
Diffstat (limited to 'src/video_core/vertex_shader.cpp')
-rw-r--r--src/video_core/vertex_shader.cpp44
1 files changed, 24 insertions, 20 deletions
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 4eb3e743..e8d86517 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -72,7 +72,7 @@ struct VertexShaderState {
u32* program_counter;
const float24* input_register_table[16];
- float24* output_register_table[7*4];
+ Math::Vec4<float24> output_registers[16];
Math::Vec4<float24> temporary_registers[16];
bool conditional_code[2];
@@ -198,8 +198,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
src2[3] = src2[3] * float24::FromFloat32(-1);
}
- float24* dest = (instr.common.dest.Value() < 0x08) ? state.output_register_table[4*instr.common.dest.Value().GetIndex()]
- : (instr.common.dest.Value() < 0x10) ? dummy_vec4_float24
+ float24* dest = (instr.common.dest.Value() < 0x10) ? &state.output_registers[instr.common.dest.Value().GetIndex()][0]
: (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0]
: dummy_vec4_float24;
@@ -409,8 +408,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
src3[3] = src3[3] * float24::FromFloat32(-1);
}
- float24* dest = (instr.mad.dest.Value() < 0x08) ? state.output_register_table[4*instr.mad.dest.Value().GetIndex()]
- : (instr.mad.dest.Value() < 0x10) ? dummy_vec4_float24
+ float24* dest = (instr.mad.dest.Value() < 0x10) ? &state.output_registers[instr.mad.dest.Value().GetIndex()][0]
: (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0]
: dummy_vec4_float24;
@@ -587,12 +585,18 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) {
if(num_attributes > 14) state.input_register_table[attribute_register_map.attribute14_register] = &input.attr[14].x;
if(num_attributes > 15) state.input_register_table[attribute_register_map.attribute15_register] = &input.attr[15].x;
- // Setup output register table
- OutputVertex ret;
- // Zero output so that attributes which aren't output won't have denormals in them, which will
- // slow us down later.
- memset(&ret, 0, sizeof(ret));
+ state.conditional_code[0] = false;
+ state.conditional_code[1] = false;
+
+ ProcessShaderCode(state);
+ DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(),
+ state.debug.max_opdesc_id, registers.vs_main_offset,
+ registers.vs_output_attributes);
+ // Setup output data
+ OutputVertex ret;
+ // TODO(neobrain): Under some circumstances, up to 16 attributes may be output. We need to
+ // figure out what those circumstances are and enable the remaining outputs then.
for (int i = 0; i < 7; ++i) {
const auto& output_register_map = registers.vs_output_attributes[i];
@@ -601,18 +605,18 @@ OutputVertex RunShader(const InputVertex& input, int num_attributes) {
output_register_map.map_z, output_register_map.map_w
};
- for (int comp = 0; comp < 4; ++comp)
- state.output_register_table[4*i+comp] = ((float24*)&ret) + semantics[comp];
+ for (int comp = 0; comp < 4; ++comp) {
+ float24* out = ((float24*)&ret) + semantics[comp];
+ if (semantics[comp] != Regs::VSOutputAttributes::INVALID) {
+ *out = state.output_registers[i][comp];
+ } else {
+ // Zero output so that attributes which aren't output won't have denormals in them,
+ // which would slow us down later.
+ memset(out, 0, sizeof(*out));
+ }
+ }
}
- state.conditional_code[0] = false;
- state.conditional_code[1] = false;
-
- ProcessShaderCode(state);
- DebugUtils::DumpShader(shader_memory.data(), state.debug.max_offset, swizzle_data.data(),
- state.debug.max_opdesc_id, registers.vs_main_offset,
- registers.vs_output_attributes);
-
LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),