aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar bunnei <bunneidev@gmail.com>2014-08-31 00:53:07 -0400
committerGravatar bunnei <bunneidev@gmail.com>2014-08-31 00:53:07 -0400
commit76372feb1959c0f53d02c2278ef4a14b794a808d (patch)
tree2c25fd17b90fac39fa1b1de238b7ffc15a01abae
parent038a51aac18df7fbb5fc58cff9811fc58aa8ee54 (diff)
parentaabfcfe6ad5b64016ceccfae4ac7d441dd2c2619 (diff)
Merge pull request #84 from bunnei/fix-hw-synchronization
Fix GPU/HW synchronization
-rw-r--r--src/citra/citra.cpp4
-rw-r--r--src/core/core.cpp26
-rw-r--r--src/core/core.h11
-rw-r--r--src/core/hw/gpu.cpp44
4 files changed, 51 insertions, 34 deletions
diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp
index 9399ff29..7dc721dc 100644
--- a/src/citra/citra.cpp
+++ b/src/citra/citra.cpp
@@ -31,7 +31,9 @@ int __cdecl main(int argc, char **argv) {
return -1;
}
- Core::RunLoop();
+ while(true) {
+ Core::RunLoop();
+ }
delete emu_window;
diff --git a/src/core/core.cpp b/src/core/core.cpp
index fc990937..f21801e5 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -6,6 +6,8 @@
#include "common/log.h"
#include "common/symbols.h"
+#include "video_core/video_core.h"
+
#include "core/core.h"
#include "core/mem_map.h"
#include "core/hw/hw.h"
@@ -24,29 +26,17 @@ ARM_Interface* g_app_core = nullptr; ///< ARM11 application core
ARM_Interface* g_sys_core = nullptr; ///< ARM11 system (OS) core
/// Run the core CPU loop
-void RunLoop() {
- for (;;){
- // This function loops for 100 instructions in the CPU before trying to update hardware.
- // This is a little bit faster than SingleStep, and should be pretty much equivalent. The
- // number of instructions chosen is fairly arbitrary, however a large number will more
- // drastically affect the frequency of GSP interrupts and likely break things. The point of
- // this is to just loop in the CPU for more than 1 instruction to reduce overhead and make
- // it a little bit faster...
- g_app_core->Run(100);
- HW::Update();
- if (HLE::g_reschedule) {
- Kernel::Reschedule();
- }
+void RunLoop(int tight_loop) {
+ g_app_core->Run(tight_loop);
+ HW::Update();
+ if (HLE::g_reschedule) {
+ Kernel::Reschedule();
}
}
/// Step the CPU one instruction
void SingleStep() {
- g_app_core->Step();
- HW::Update();
- if (HLE::g_reschedule) {
- Kernel::Reschedule();
- }
+ RunLoop(1);
}
/// Halt the core
diff --git a/src/core/core.h b/src/core/core.h
index 4b42dabc..9c72c8b3 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -19,8 +19,15 @@ extern ARM_Interface* g_sys_core; ///< ARM11 system (OS) core
/// Start the core
void Start();
-/// Run the core CPU loop
-void RunLoop();
+/**
+ * Run the core CPU loop
+ * This function loops for 100 instructions in the CPU before trying to update hardware. This is a
+ * little bit faster than SingleStep, and should be pretty much equivalent. The number of
+ * instructions chosen is fairly arbitrary, however a large number will more drastically affect the
+ * frequency of GSP interrupts and likely break things. The point of this is to just loop in the CPU
+ * for more than 1 instruction to reduce overhead and make it a little bit faster...
+ */
+void RunLoop(int tight_loop=100);
/// Step the CPU one instruction
void SingleStep();
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index f1f3e7ab..8709b8eb 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -24,6 +24,7 @@ Regs g_regs;
u32 g_cur_line = 0; ///< Current vertical screen line
u64 g_last_line_ticks = 0; ///< CPU tick count from last vertical screen line
+u64 g_last_frame_ticks = 0; ///< CPU tick count from last frame
template <typename T>
inline void Read(T &var, const u32 raw_addr) {
@@ -179,27 +180,44 @@ void Update() {
auto& framebuffer_top = g_regs.framebuffer_config[0];
u64 current_ticks = Core::g_app_core->GetTicks();
- // Synchronize line...
- if ((current_ticks - g_last_line_ticks) >= GPU::kFrameTicks / framebuffer_top.height) {
- GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0);
- g_cur_line++;
- g_last_line_ticks = current_ticks;
- }
+ // Update the frame after a certain number of CPU ticks have elapsed. This assumes that the
+ // active frame in memory is always complete to render. There also may be issues with this
+ // becoming out-of-synch with GSP synchrinization code (as follows). At this time, this seems to
+ // be the most effective solution for both homebrew and retail applications. With retail, this
+ // could be moved below (and probably would guarantee more accurate synchronization). However,
+ // primitive homebrew relies on a vertical blank interrupt to happen inevitably (regardless of a
+ // threading reschedule).
- // Synchronize frame...
- if (g_cur_line >= framebuffer_top.height) {
- g_cur_line = 0;
- GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1);
+ if ((current_ticks - g_last_frame_ticks) > GPU::kFrameTicks) {
VideoCore::g_renderer->SwapBuffers();
- Kernel::WaitCurrentThread(WAITTYPE_VBLANK);
- HLE::Reschedule(__func__);
+ g_last_frame_ticks = current_ticks;
+ }
+
+ // Synchronize GPU on a thread reschedule: Because we cannot accurately predict a vertical
+ // blank, we need to simulate it. Based on testing, it seems that retail applications work more
+ // accurately when this is signalled between thread switches.
+
+ if (HLE::g_reschedule) {
+
+ // Synchronize line...
+ if ((current_ticks - g_last_line_ticks) >= GPU::kFrameTicks / framebuffer_top.height) {
+ GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0);
+ g_cur_line++;
+ g_last_line_ticks = current_ticks;
+ }
+
+ // Synchronize frame...
+ if (g_cur_line >= framebuffer_top.height) {
+ g_cur_line = 0;
+ GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1);
+ }
}
}
/// Initialize hardware
void Init() {
g_cur_line = 0;
- g_last_line_ticks = Core::g_app_core->GetTicks();
+ g_last_frame_ticks = g_last_line_ticks = Core::g_app_core->GetTicks();
auto& framebuffer_top = g_regs.framebuffer_config[0];
auto& framebuffer_sub = g_regs.framebuffer_config[1];