aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
m---------externals/nihstro0
-rw-r--r--src/core/core.cpp4
-rw-r--r--src/core/hle/kernel/session.h10
-rw-r--r--src/core/hle/kernel/thread.cpp11
-rw-r--r--src/core/hle/kernel/thread.h8
-rw-r--r--src/video_core/pica.h67
-rw-r--r--src/video_core/rasterizer.cpp49
-rw-r--r--src/video_core/vertex_shader.cpp20
8 files changed, 143 insertions, 26 deletions
diff --git a/externals/nihstro b/externals/nihstro
-Subproject 4a78588b308564f7ebae193e0ae00d9a0d5741d
+Subproject 81f1804a43f625e3a1a20752c0db70a41341038
diff --git a/src/core/core.cpp b/src/core/core.cpp
index bb2ed7a9..b5c25823 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -61,10 +61,6 @@ int Init() {
g_sys_core = new ARM_DynCom(USER32MODE);
g_app_core = new ARM_DynCom(USER32MODE);
- // TODO: Whenever TLS is implemented, this should contain
- // the address of the 0x200-byte TLS
- g_app_core->SetCP15Register(CP15_THREAD_URO, Memory::TLS_AREA_VADDR);
-
LOG_DEBUG(Core, "Initialized OK");
return 0;
}
diff --git a/src/core/hle/kernel/session.h b/src/core/hle/kernel/session.h
index 0fd18148..8c3886ff 100644
--- a/src/core/hle/kernel/session.h
+++ b/src/core/hle/kernel/session.h
@@ -5,6 +5,7 @@
#pragma once
#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/thread.h"
#include "core/mem_map.h"
namespace Kernel {
@@ -12,12 +13,15 @@ namespace Kernel {
static const int kCommandHeaderOffset = 0x80; ///< Offset into command buffer of header
/**
- * Returns a pointer to the command buffer in kernel memory
+ * Returns a pointer to the command buffer in the current thread's TLS
+ * TODO(Subv): This is not entirely correct, the command buffer should be copied from
+ * the thread's TLS to an intermediate buffer in kernel memory, and then copied again to
+ * the service handler process' memory.
* @param offset Optional offset into command buffer
* @return Pointer to command buffer
*/
-inline static u32* GetCommandBuffer(const int offset=0) {
- return (u32*)Memory::GetPointer(Memory::TLS_AREA_VADDR + kCommandHeaderOffset + offset);
+inline static u32* GetCommandBuffer(const int offset = 0) {
+ return (u32*)Memory::GetPointer(GetCurrentThread()->GetTLSAddress() + kCommandHeaderOffset + offset);
}
/**
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 0a3fd7cb..5de8f9a7 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -197,6 +197,7 @@ static void SwitchContext(Thread* new_thread) {
new_thread->current_priority = new_thread->nominal_priority;
Core::g_app_core->LoadContext(new_thread->context);
+ Core::g_app_core->SetCP15Register(CP15_THREAD_URO, new_thread->GetTLSAddress());
} else {
current_thread = nullptr;
}
@@ -402,6 +403,12 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
thread->name = std::move(name);
thread->callback_handle = wakeup_callback_handle_table.Create(thread).MoveFrom();
+ VAddr tls_address = Memory::TLS_AREA_VADDR + (thread->thread_id - 1) * 0x200;
+
+ ASSERT_MSG(tls_address < Memory::TLS_AREA_VADDR_END, "Too many threads");
+
+ thread->tls_address = tls_address;
+
// TODO(peachum): move to ScheduleThread() when scheduler is added so selected core is used
// to initialize the context
Core::g_app_core->ResetContext(thread->context, stack_top, entry_point, arg);
@@ -495,6 +502,10 @@ void Thread::SetWaitSynchronizationOutput(s32 output) {
context.cpu_registers[1] = output;
}
+VAddr Thread::GetTLSAddress() const {
+ return tls_address;
+}
+
////////////////////////////////////////////////////////////////////////////////////////////////////
void ThreadingInit() {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 9958b16e..6891c8c2 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -135,6 +135,12 @@ public:
*/
void Stop();
+ /*
+ * Returns the Thread Local Storage address of the current thread
+ * @returns VAddr of the thread's TLS
+ */
+ VAddr GetTLSAddress() const;
+
Core::ThreadContext context;
u32 thread_id;
@@ -150,6 +156,8 @@ public:
s32 processor_id;
+ VAddr tls_address; ///< Address of the Thread Local Storage of the thread
+
/// Mutexes currently held by this thread, which will be released when it exits.
boost::container::flat_set<SharedPtr<Mutex>> held_mutexes;
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index e4a91058..5e169ff6 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -226,7 +226,8 @@ struct Regs {
Texture1 = 0x4,
Texture2 = 0x5,
Texture3 = 0x6,
- // 0x7-0xc = primary color??
+
+ PreviousBuffer = 0xd,
Constant = 0xe,
Previous = 0xf,
};
@@ -299,7 +300,18 @@ struct Regs {
BitField<24, 8, u32> const_a;
};
- INSERT_PADDING_WORDS(0x1);
+ union {
+ BitField< 0, 2, u32> color_scale;
+ BitField<16, 2, u32> alpha_scale;
+ };
+
+ inline unsigned GetColorMultiplier() const {
+ return (color_scale < 3) ? (1 << color_scale) : 1;
+ }
+
+ inline unsigned GetAlphaMultiplier() const {
+ return (alpha_scale < 3) ? (1 << alpha_scale) : 1;
+ }
};
TevStageConfig tev_stage0;
@@ -309,11 +321,36 @@ struct Regs {
TevStageConfig tev_stage2;
INSERT_PADDING_WORDS(0x3);
TevStageConfig tev_stage3;
- INSERT_PADDING_WORDS(0x13);
+ INSERT_PADDING_WORDS(0x3);
+
+ union {
+ // Tev stages 0-3 write their output to the combiner buffer if the corresponding bit in
+ // these masks are set
+ BitField< 8, 4, u32> update_mask_rgb;
+ BitField<12, 4, u32> update_mask_a;
+
+ bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const {
+ return (stage_index < 4) && (update_mask_rgb & (1 << stage_index));
+ }
+
+ bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const {
+ return (stage_index < 4) && (update_mask_a & (1 << stage_index));
+ }
+ } tev_combiner_buffer_input;
+
+ INSERT_PADDING_WORDS(0xf);
TevStageConfig tev_stage4;
INSERT_PADDING_WORDS(0x3);
TevStageConfig tev_stage5;
- INSERT_PADDING_WORDS(0x3);
+
+ union {
+ BitField< 0, 8, u32> r;
+ BitField< 8, 8, u32> g;
+ BitField<16, 8, u32> b;
+ BitField<24, 8, u32> a;
+ } tev_combiner_buffer_color;
+
+ INSERT_PADDING_WORDS(0x2);
const std::array<Regs::TevStageConfig,6> GetTevStages() const {
return { tev_stage0, tev_stage1,
@@ -426,9 +463,7 @@ struct Regs {
D24S8 = 3
};
- /*
- * Returns the number of bytes in the specified depth format
- */
+ // Returns the number of bytes in the specified depth format
static u32 BytesPerDepthPixel(DepthFormat format) {
switch (format) {
case DepthFormat::D16:
@@ -443,6 +478,20 @@ struct Regs {
}
}
+ // Returns the number of bits per depth component of the specified depth format
+ static u32 DepthBitsPerPixel(DepthFormat format) {
+ switch (format) {
+ case DepthFormat::D16:
+ return 16;
+ case DepthFormat::D24:
+ case DepthFormat::D24S8:
+ return 24;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
+ UNIMPLEMENTED();
+ }
+ }
+
struct {
// Components are laid out in reverse byte order, most significant bits first.
enum ColorFormat : u32 {
@@ -784,8 +833,10 @@ struct Regs {
ADD_FIELD(tev_stage1);
ADD_FIELD(tev_stage2);
ADD_FIELD(tev_stage3);
+ ADD_FIELD(tev_combiner_buffer_input);
ADD_FIELD(tev_stage4);
ADD_FIELD(tev_stage5);
+ ADD_FIELD(tev_combiner_buffer_color);
ADD_FIELD(output_merger);
ADD_FIELD(framebuffer);
ADD_FIELD(vertex_attributes);
@@ -859,8 +910,10 @@ ASSERT_REG_POSITION(tev_stage0, 0xc0);
ASSERT_REG_POSITION(tev_stage1, 0xc8);
ASSERT_REG_POSITION(tev_stage2, 0xd0);
ASSERT_REG_POSITION(tev_stage3, 0xd8);
+ASSERT_REG_POSITION(tev_combiner_buffer_input, 0xe0);
ASSERT_REG_POSITION(tev_stage4, 0xf0);
ASSERT_REG_POSITION(tev_stage5, 0xf8);
+ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd);
ASSERT_REG_POSITION(output_merger, 0x100);
ASSERT_REG_POSITION(framebuffer, 0x110);
ASSERT_REG_POSITION(vertex_attributes, 0x200);
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 3b3fef48..46a326bb 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -90,7 +90,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
UNIMPLEMENTED();
}
- return {};
+ return {0, 0, 0, 0};
}
static u32 GetDepth(int x, int y) {
@@ -376,7 +376,13 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
// with some basic arithmetic. Alpha combiners can be configured separately but work
// analogously.
Math::Vec4<u8> combiner_output;
- for (const auto& tev_stage : tev_stages) {
+ Math::Vec4<u8> combiner_buffer = {
+ registers.tev_combiner_buffer_color.r, registers.tev_combiner_buffer_color.g,
+ registers.tev_combiner_buffer_color.b, registers.tev_combiner_buffer_color.a
+ };
+
+ for (unsigned tev_stage_index = 0; tev_stage_index < tev_stages.size(); ++tev_stage_index) {
+ const auto& tev_stage = tev_stages[tev_stage_index];
using Source = Regs::TevStageConfig::Source;
using ColorModifier = Regs::TevStageConfig::ColorModifier;
using AlphaModifier = Regs::TevStageConfig::AlphaModifier;
@@ -398,6 +404,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
case Source::Texture2:
return texture_color[2];
+ case Source::PreviousBuffer:
+ return combiner_buffer;
+
case Source::Constant:
return {tev_stage.const_r, tev_stage.const_g, tev_stage.const_b, tev_stage.const_a};
@@ -407,7 +416,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
default:
LOG_ERROR(HW_GPU, "Unknown color combiner source %d\n", (int)source);
UNIMPLEMENTED();
- return {};
+ return {0, 0, 0, 0};
}
};
@@ -490,6 +499,16 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
return result.Cast<u8>();
}
+ case Operation::AddSigned:
+ {
+ // TODO(bunnei): Verify that the color conversion from (float) 0.5f to (byte) 128 is correct
+ auto result = input[0].Cast<int>() + input[1].Cast<int>() - Math::MakeVec<int>(128, 128, 128);
+ result.r() = MathUtil::Clamp<int>(result.r(), 0, 255);
+ result.g() = MathUtil::Clamp<int>(result.g(), 0, 255);
+ result.b() = MathUtil::Clamp<int>(result.b(), 0, 255);
+ return result.Cast<u8>();
+ }
+
case Operation::Lerp:
return ((input[0] * input[2] + input[1] * (Math::MakeVec<u8>(255, 255, 255) - input[2]).Cast<u8>()) / 255).Cast<u8>();
@@ -524,7 +543,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
default:
LOG_ERROR(HW_GPU, "Unknown color combiner operation %d\n", (int)op);
UNIMPLEMENTED();
- return {};
+ return {0, 0, 0};
}
};
@@ -578,7 +597,20 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
};
auto alpha_output = AlphaCombine(tev_stage.alpha_op, alpha_result);
- combiner_output = Math::MakeVec(color_output, alpha_output);
+ combiner_output[0] = std::min((unsigned)255, color_output.r() * tev_stage.GetColorMultiplier());
+ combiner_output[1] = std::min((unsigned)255, color_output.g() * tev_stage.GetColorMultiplier());
+ combiner_output[2] = std::min((unsigned)255, color_output.b() * tev_stage.GetColorMultiplier());
+ combiner_output[3] = std::min((unsigned)255, alpha_output * tev_stage.GetAlphaMultiplier());
+
+ if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferColor(tev_stage_index)) {
+ combiner_buffer.r() = combiner_output.r();
+ combiner_buffer.g() = combiner_output.g();
+ combiner_buffer.b() = combiner_output.b();
+ }
+
+ if (registers.tev_combiner_buffer_input.TevStageUpdatesCombinerBufferAlpha(tev_stage_index)) {
+ combiner_buffer.a() = combiner_output.a();
+ }
}
if (registers.output_merger.alpha_test.enable) {
@@ -624,9 +656,10 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
// TODO: Does depth indeed only get written even if depth testing is enabled?
if (registers.output_merger.depth_test_enable) {
- u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +
- v1.screenpos[2].ToFloat32() * w1 +
- v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
+ unsigned num_bits = Pica::Regs::DepthBitsPerPixel(registers.framebuffer.depth_format);
+ u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 +
+ v1.screenpos[2].ToFloat32() * w1 +
+ v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum);
u32 ref_z = GetDepth(x >> 4, y >> 4);
bool pass = false;
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index 51f4e58b..885b7de5 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -235,6 +235,15 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
}
+ case OpCode::Id::FLR:
+ for (int i = 0; i < 4; ++i) {
+ if (!swizzle.DestComponentEnabled(i))
+ continue;
+
+ dest[i] = float24::FromFloat32(std::floor(src1[i].ToFloat32()));
+ }
+ break;
+
case OpCode::Id::MAX:
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
@@ -366,12 +375,15 @@ static void ProcessShaderCode(VertexShaderState& state) {
case OpCode::Type::MultiplyAdd:
{
- if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) {
+ if ((instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) ||
+ (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI)) {
const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id];
- const float24* src1_ = LookupSourceRegister(instr.mad.src1);
- const float24* src2_ = LookupSourceRegister(instr.mad.src2);
- const float24* src3_ = LookupSourceRegister(instr.mad.src3);
+ bool is_inverted = (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MADI);
+
+ const float24* src1_ = LookupSourceRegister(instr.mad.GetSrc1(is_inverted));
+ const float24* src2_ = LookupSourceRegister(instr.mad.GetSrc2(is_inverted));
+ const float24* src3_ = LookupSourceRegister(instr.mad.GetSrc3(is_inverted));
const bool negate_src1 = ((bool)swizzle.negate_src1 != false);
const bool negate_src2 = ((bool)swizzle.negate_src2 != false);