aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/citra/CMakeLists.txt5
-rw-r--r--src/citra/citra.cpp40
-rw-r--r--src/citra_qt/CMakeLists.txt2
-rw-r--r--src/citra_qt/bootmanager.cpp2
-rw-r--r--src/citra_qt/bootmanager.h5
-rw-r--r--src/citra_qt/debugger/graphics_breakpoint_observer.h2
-rw-r--r--src/citra_qt/debugger/graphics_framebuffer.cpp62
-rw-r--r--src/citra_qt/debugger/graphics_framebuffer.h4
-rw-r--r--src/citra_qt/debugger/graphics_tracing.cpp170
-rw-r--r--src/citra_qt/debugger/graphics_tracing.h32
-rw-r--r--src/citra_qt/main.cpp9
-rw-r--r--src/common/color.h27
-rw-r--r--src/common/common_funcs.h6
-rw-r--r--src/common/file_util.h10
-rw-r--r--src/core/CMakeLists.txt3
-rw-r--r--src/core/hle/applets/applet.cpp9
-rw-r--r--src/core/hle/applets/applet.h6
-rw-r--r--src/core/hle/applets/swkbd.cpp8
-rw-r--r--src/core/hle/applets/swkbd.h3
-rw-r--r--src/core/hle/kernel/kernel.h1
-rw-r--r--src/core/hle/kernel/process.h1
-rw-r--r--src/core/hle/kernel/shared_memory.h3
-rw-r--r--src/core/hle/service/apt/apt.h9
-rw-r--r--src/core/hle/service/gsp_gpu.cpp2
-rw-r--r--src/core/hw/gpu.cpp78
-rw-r--r--src/core/hw/hw.cpp30
-rw-r--r--src/core/hw/lcd.cpp10
-rw-r--r--src/core/tracer/citrace.h101
-rw-r--r--src/core/tracer/recorder.cpp187
-rw-r--r--src/core/tracer/recorder.h90
-rw-r--r--src/video_core/command_processor.cpp60
-rw-r--r--src/video_core/debug_utils/debug_utils.h4
-rw-r--r--src/video_core/pica.h43
-rw-r--r--src/video_core/rasterizer.cpp142
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp6
35 files changed, 1107 insertions, 65 deletions
diff --git a/src/citra/CMakeLists.txt b/src/citra/CMakeLists.txt
index 5e8cbfa3..91868731 100644
--- a/src/citra/CMakeLists.txt
+++ b/src/citra/CMakeLists.txt
@@ -16,8 +16,11 @@ create_directory_groups(${SRCS} ${HEADERS})
add_executable(citra ${SRCS} ${HEADERS})
target_link_libraries(citra core common video_core)
target_link_libraries(citra ${GLFW_LIBRARIES} ${OPENGL_gl_LIBRARY} inih)
+if (MSVC)
+ target_link_libraries(citra getopt)
+endif()
target_link_libraries(citra ${PLATFORM_LIBRARIES})
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux|FreeBSD|OpenBSD|NetBSD")
install(TARGETS citra RUNTIME DESTINATION "${CMAKE_INSTALL_PREFIX}/bin")
-endif()
+endif() \ No newline at end of file
diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp
index a59726c7..182646f4 100644
--- a/src/citra/citra.cpp
+++ b/src/citra/citra.cpp
@@ -3,6 +3,15 @@
// Refer to the license.txt file included.
#include <string>
+#include <thread>
+#include <iostream>
+
+#ifdef _MSC_VER
+#include <getopt.h>
+#else
+#include <unistd.h>
+#include <getopt.h>
+#endif
#include "common/logging/log.h"
#include "common/logging/backend.h"
@@ -18,12 +27,39 @@
#include "video_core/video_core.h"
+
+static void PrintHelp()
+{
+ std::cout << "Usage: citra <filename>" << std::endl;
+}
+
/// Application entry point
int main(int argc, char **argv) {
+ int option_index = 0;
+ std::string boot_filename;
+ static struct option long_options[] = {
+ { "help", no_argument, 0, 'h' },
+ { 0, 0, 0, 0 }
+ };
+
+ while (optind < argc) {
+ char arg = getopt_long(argc, argv, ":h", long_options, &option_index);
+ if (arg != -1) {
+ switch (arg) {
+ case 'h':
+ PrintHelp();
+ return 0;
+ }
+ } else {
+ boot_filename = argv[optind];
+ optind++;
+ }
+ }
+
Log::Filter log_filter(Log::Level::Debug);
Log::SetFilter(&log_filter);
- if (argc < 2) {
+ if (boot_filename.empty()) {
LOG_CRITICAL(Frontend, "Failed to load ROM: No ROM specified");
return -1;
}
@@ -31,7 +67,7 @@ int main(int argc, char **argv) {
Config config;
log_filter.ParseFilterString(Settings::values.log_filter);
- std::string boot_filename = argv[1];
+
EmuWindow_GLFW* emu_window = new EmuWindow_GLFW;
VideoCore::g_hw_renderer_enabled = Settings::values.use_hw_renderer;
diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt
index d4c0cecc..47aaeca2 100644
--- a/src/citra_qt/CMakeLists.txt
+++ b/src/citra_qt/CMakeLists.txt
@@ -12,6 +12,7 @@ set(SRCS
debugger/graphics_breakpoints.cpp
debugger/graphics_cmdlists.cpp
debugger/graphics_framebuffer.cpp
+ debugger/graphics_tracing.cpp
debugger/graphics_vertex_shader.cpp
debugger/profiler.cpp
debugger/ramview.cpp
@@ -35,6 +36,7 @@ set(HEADERS
debugger/graphics_breakpoints_p.h
debugger/graphics_cmdlists.h
debugger/graphics_framebuffer.h
+ debugger/graphics_tracing.h
debugger/graphics_vertex_shader.h
debugger/profiler.h
debugger/ramview.h
diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp
index 9d36364d..fa7bce46 100644
--- a/src/citra_qt/bootmanager.cpp
+++ b/src/citra_qt/bootmanager.cpp
@@ -65,7 +65,7 @@ void EmuThread::run() {
was_active = false;
} else {
std::unique_lock<std::mutex> lock(running_mutex);
- running_cv.wait(lock, [this]{ return IsRunning() || stop_run; });
+ running_cv.wait(lock, [this]{ return IsRunning() || exec_step || stop_run; });
}
}
diff --git a/src/citra_qt/bootmanager.h b/src/citra_qt/bootmanager.h
index 47512431..1a1e0e6a 100644
--- a/src/citra_qt/bootmanager.h
+++ b/src/citra_qt/bootmanager.h
@@ -35,7 +35,10 @@ public:
* Steps the emulation thread by a single CPU instruction (if the CPU is not already running)
* @note This function is thread-safe
*/
- void ExecStep() { exec_step = true; }
+ void ExecStep() {
+ exec_step = true;
+ running_cv.notify_all();
+ }
/**
* Sets whether the emulation thread is running or not
diff --git a/src/citra_qt/debugger/graphics_breakpoint_observer.h b/src/citra_qt/debugger/graphics_breakpoint_observer.h
index f0d3361f..02a0f4f4 100644
--- a/src/citra_qt/debugger/graphics_breakpoint_observer.h
+++ b/src/citra_qt/debugger/graphics_breakpoint_observer.h
@@ -13,7 +13,7 @@
* This is because the Pica breakpoint callbacks are called from a non-GUI thread, while
* the widget usually wants to perform reactions in the GUI thread.
*/
-class BreakPointObserverDock : public QDockWidget, private Pica::DebugContext::BreakPointObserver {
+class BreakPointObserverDock : public QDockWidget, protected Pica::DebugContext::BreakPointObserver {
Q_OBJECT
public:
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
index 6bbe7572..39eefbf7 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -55,7 +55,9 @@ GraphicsFramebufferWidget::GraphicsFramebufferWidget(std::shared_ptr<Pica::Debug
framebuffer_format_control->addItem(tr("RGBA4"));
framebuffer_format_control->addItem(tr("D16"));
framebuffer_format_control->addItem(tr("D24"));
- framebuffer_format_control->addItem(tr("D24S8"));
+ framebuffer_format_control->addItem(tr("D24X8"));
+ framebuffer_format_control->addItem(tr("X24S8"));
+ framebuffer_format_control->addItem(tr("(unknown)"));
// TODO: This QLabel should shrink the image to the available space rather than just expanding...
framebuffer_picture_label = new QLabel;
@@ -184,8 +186,32 @@ void GraphicsFramebufferWidget::OnUpdate()
framebuffer_address = framebuffer.GetColorBufferPhysicalAddress();
framebuffer_width = framebuffer.GetWidth();
framebuffer_height = framebuffer.GetHeight();
- // TODO: It's unknown how this format is actually specified
- framebuffer_format = Format::RGBA8;
+
+ switch (framebuffer.color_format) {
+ case Pica::Regs::ColorFormat::RGBA8:
+ framebuffer_format = Format::RGBA8;
+ break;
+
+ case Pica::Regs::ColorFormat::RGB8:
+ framebuffer_format = Format::RGB8;
+ break;
+
+ case Pica::Regs::ColorFormat::RGB5A1:
+ framebuffer_format = Format::RGB5A1;
+ break;
+
+ case Pica::Regs::ColorFormat::RGB565:
+ framebuffer_format = Format::RGB565;
+ break;
+
+ case Pica::Regs::ColorFormat::RGBA4:
+ framebuffer_format = Format::RGBA4;
+ break;
+
+ default:
+ framebuffer_format = Format::Unknown;
+ break;
+ }
break;
}
@@ -197,7 +223,24 @@ void GraphicsFramebufferWidget::OnUpdate()
framebuffer_address = framebuffer.GetDepthBufferPhysicalAddress();
framebuffer_width = framebuffer.GetWidth();
framebuffer_height = framebuffer.GetHeight();
- framebuffer_format = Format::D16;
+
+ switch (framebuffer.depth_format) {
+ case Pica::Regs::DepthFormat::D16:
+ framebuffer_format = Format::D16;
+ break;
+
+ case Pica::Regs::DepthFormat::D24:
+ framebuffer_format = Format::D24;
+ break;
+
+ case Pica::Regs::DepthFormat::D24S8:
+ framebuffer_format = Format::D24X8;
+ break;
+
+ default:
+ framebuffer_format = Format::Unknown;
+ break;
+ }
break;
}
@@ -258,7 +301,7 @@ void GraphicsFramebufferWidget::OnUpdate()
color.b() = (data >> 16) & 0xFF;
break;
}
- case Format::D24S8:
+ case Format::D24X8:
{
Math::Vec2<u32> data = Color::DecodeD24S8(pixel);
color.r() = data.x & 0xFF;
@@ -266,6 +309,12 @@ void GraphicsFramebufferWidget::OnUpdate()
color.b() = (data.x >> 16) & 0xFF;
break;
}
+ case Format::X24S8:
+ {
+ Math::Vec2<u32> data = Color::DecodeD24S8(pixel);
+ color.r() = color.g() = color.b() = data.y;
+ break;
+ }
default:
qDebug() << "Unknown fb color format " << static_cast<int>(framebuffer_format);
break;
@@ -286,7 +335,8 @@ void GraphicsFramebufferWidget::OnUpdate()
u32 GraphicsFramebufferWidget::BytesPerPixel(GraphicsFramebufferWidget::Format format) {
switch (format) {
case Format::RGBA8:
- case Format::D24S8:
+ case Format::D24X8:
+ case Format::X24S8:
return 4;
case Format::RGB8:
case Format::D24:
diff --git a/src/citra_qt/debugger/graphics_framebuffer.h b/src/citra_qt/debugger/graphics_framebuffer.h
index 4cb396ff..e9eae679 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.h
+++ b/src/citra_qt/debugger/graphics_framebuffer.h
@@ -35,7 +35,9 @@ class GraphicsFramebufferWidget : public BreakPointObserverDock {
RGBA4 = 4,
D16 = 5,
D24 = 6,
- D24S8 = 7
+ D24X8 = 7,
+ X24S8 = 8,
+ Unknown = 9
};
static u32 BytesPerPixel(Format format);
diff --git a/src/citra_qt/debugger/graphics_tracing.cpp b/src/citra_qt/debugger/graphics_tracing.cpp
new file mode 100644
index 00000000..3f20f149
--- /dev/null
+++ b/src/citra_qt/debugger/graphics_tracing.cpp
@@ -0,0 +1,170 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <memory>
+
+#include <QBoxLayout>
+#include <QComboBox>
+#include <QFileDialog>
+#include <QLabel>
+#include <QMessageBox>
+#include <QPushButton>
+#include <QSpinBox>
+
+#include <boost/range/algorithm/copy.hpp>
+
+#include "core/hw/gpu.h"
+#include "core/hw/lcd.h"
+
+#include "video_core/pica.h"
+
+#include "nihstro/float24.h"
+
+#include "graphics_tracing.h"
+
+GraphicsTracingWidget::GraphicsTracingWidget(std::shared_ptr<Pica::DebugContext> debug_context,
+ QWidget* parent)
+ : BreakPointObserverDock(debug_context, tr("CiTrace Recorder"), parent) {
+
+ setObjectName("CiTracing");
+
+ QPushButton* start_recording = new QPushButton(tr("Start Recording"));
+ QPushButton* stop_recording = new QPushButton(QIcon::fromTheme("document-save"), tr("Stop and Save"));
+ QPushButton* abort_recording = new QPushButton(tr("Abort Recording"));
+
+ connect(this, SIGNAL(SetStartTracingButtonEnabled(bool)), start_recording, SLOT(setVisible(bool)));
+ connect(this, SIGNAL(SetStopTracingButtonEnabled(bool)), stop_recording, SLOT(setVisible(bool)));
+ connect(this, SIGNAL(SetAbortTracingButtonEnabled(bool)), abort_recording, SLOT(setVisible(bool)));
+ connect(start_recording, SIGNAL(clicked()), this, SLOT(StartRecording()));
+ connect(stop_recording, SIGNAL(clicked()), this, SLOT(StopRecording()));
+ connect(abort_recording, SIGNAL(clicked()), this, SLOT(AbortRecording()));
+
+ stop_recording->setVisible(false);
+ abort_recording->setVisible(false);
+
+ auto main_widget = new QWidget;
+ auto main_layout = new QVBoxLayout;
+ {
+ auto sub_layout = new QHBoxLayout;
+ sub_layout->addWidget(start_recording);
+ sub_layout->addWidget(stop_recording);
+ sub_layout->addWidget(abort_recording);
+ main_layout->addLayout(sub_layout);
+ }
+ main_widget->setLayout(main_layout);
+ setWidget(main_widget);
+}
+
+void GraphicsTracingWidget::StartRecording() {
+ auto context = context_weak.lock();
+ if (!context)
+ return;
+
+ auto shader_binary = Pica::g_state.vs.program_code;
+ auto swizzle_data = Pica::g_state.vs.swizzle_data;
+
+ // Encode floating point numbers to 24-bit values
+ // TODO: Drop this explicit conversion once we store float24 values bit-correctly internally.
+ std::array<uint32_t, 4 * 16> default_attributes;
+ for (unsigned i = 0; i < 16; ++i) {
+ for (unsigned comp = 0; comp < 3; ++comp) {
+ default_attributes[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs.default_attributes[i][comp].ToFloat32());
+ }
+ }
+
+ std::array<uint32_t, 4 * 96> vs_float_uniforms;
+ for (unsigned i = 0; i < 96; ++i)
+ for (unsigned comp = 0; comp < 3; ++comp)
+ vs_float_uniforms[4 * i + comp] = nihstro::to_float24(Pica::g_state.vs.uniforms.f[i][comp].ToFloat32());
+
+ CiTrace::Recorder::InitialState state;
+ std::copy_n((u32*)&GPU::g_regs, sizeof(GPU::g_regs) / sizeof(u32), std::back_inserter(state.gpu_registers));
+ std::copy_n((u32*)&LCD::g_regs, sizeof(LCD::g_regs) / sizeof(u32), std::back_inserter(state.lcd_registers));
+ std::copy_n((u32*)&Pica::g_state.regs, sizeof(Pica::g_state.regs) / sizeof(u32), std::back_inserter(state.pica_registers));
+ boost::copy(default_attributes, std::back_inserter(state.default_attributes));
+ boost::copy(shader_binary, std::back_inserter(state.vs_program_binary));
+ boost::copy(swizzle_data, std::back_inserter(state.vs_swizzle_data));
+ boost::copy(vs_float_uniforms, std::back_inserter(state.vs_float_uniforms));
+ //boost::copy(TODO: Not implemented, std::back_inserter(state.gs_program_binary));
+ //boost::copy(TODO: Not implemented, std::back_inserter(state.gs_swizzle_data));
+ //boost::copy(TODO: Not implemented, std::back_inserter(state.gs_float_uniforms));
+
+ auto recorder = new CiTrace::Recorder(state);
+ context->recorder = std::shared_ptr<CiTrace::Recorder>(recorder);
+
+ emit SetStartTracingButtonEnabled(false);
+ emit SetStopTracingButtonEnabled(true);
+ emit SetAbortTracingButtonEnabled(true);
+}
+
+void GraphicsTracingWidget::StopRecording() {
+ auto context = context_weak.lock();
+ if (!context)
+ return;
+
+ QString filename = QFileDialog::getSaveFileName(this, tr("Save CiTrace"), "citrace.ctf",
+ tr("CiTrace File (*.ctf)"));
+
+ if (filename.isEmpty()) {
+ // If the user canceled the dialog, keep recording
+ return;
+ }
+
+ context->recorder->Finish(filename.toStdString());
+ context->recorder = nullptr;
+
+ emit SetStopTracingButtonEnabled(false);
+ emit SetAbortTracingButtonEnabled(false);
+ emit SetStartTracingButtonEnabled(true);
+}
+
+void GraphicsTracingWidget::AbortRecording() {
+ auto context = context_weak.lock();
+ if (!context)
+ return;
+
+ context->recorder = nullptr;
+
+ emit SetStopTracingButtonEnabled(false);
+ emit SetAbortTracingButtonEnabled(false);
+ emit SetStartTracingButtonEnabled(true);
+}
+
+void GraphicsTracingWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) {
+ widget()->setEnabled(true);
+}
+
+void GraphicsTracingWidget::OnResumed() {
+ widget()->setEnabled(false);
+}
+
+void GraphicsTracingWidget::OnEmulationStarting(EmuThread* emu_thread) {
+ // Disable tracing starting/stopping until a GPU breakpoint is reached
+ widget()->setEnabled(false);
+}
+
+void GraphicsTracingWidget::OnEmulationStopping() {
+ // TODO: Is it safe to access the context here?
+
+ auto context = context_weak.lock();
+ if (!context)
+ return;
+
+
+ if (context->recorder) {
+ auto reply = QMessageBox::question(this, tr("CiTracing still active"),
+ tr("A CiTrace is still being recorded. Do you want to save it? If not, all recorded data will be discarded."),
+ QMessageBox::Yes | QMessageBox::No, QMessageBox::Yes);
+
+ if (reply == QMessageBox::Yes) {
+ StopRecording();
+ } else {
+ AbortRecording();
+ }
+ }
+
+ // If the widget was disabled before, enable it now to allow starting
+ // tracing before starting the next emulation session
+ widget()->setEnabled(true);
+}
diff --git a/src/citra_qt/debugger/graphics_tracing.h b/src/citra_qt/debugger/graphics_tracing.h
new file mode 100644
index 00000000..2a0e4819
--- /dev/null
+++ b/src/citra_qt/debugger/graphics_tracing.h
@@ -0,0 +1,32 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include "graphics_breakpoint_observer.h"
+
+class EmuThread;
+
+class GraphicsTracingWidget : public BreakPointObserverDock {
+ Q_OBJECT
+
+public:
+ GraphicsTracingWidget(std::shared_ptr<Pica::DebugContext> debug_context, QWidget* parent = nullptr);
+
+private slots:
+ void StartRecording();
+ void StopRecording();
+ void AbortRecording();
+
+ void OnBreakPointHit(Pica::DebugContext::Event event, void* data) override;
+ void OnResumed() override;
+
+ void OnEmulationStarting(EmuThread* emu_thread);
+ void OnEmulationStopping();
+
+signals:
+ void SetStartTracingButtonEnabled(bool enable);
+ void SetStopTracingButtonEnabled(bool enable);
+ void SetAbortTracingButtonEnabled(bool enable);
+};
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index d23bafaf..2746de77 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -32,6 +32,7 @@
#include "debugger/graphics_breakpoints.h"
#include "debugger/graphics_cmdlists.h"
#include "debugger/graphics_framebuffer.h"
+#include "debugger/graphics_tracing.h"
#include "debugger/graphics_vertex_shader.h"
#include "debugger/profiler.h"
@@ -94,6 +95,10 @@ GMainWindow::GMainWindow() : emu_thread(nullptr)
addDockWidget(Qt::RightDockWidgetArea, graphicsVertexShaderWidget);
graphicsVertexShaderWidget->hide();
+ auto graphicsTracingWidget = new GraphicsTracingWidget(Pica::g_debug_context, this);
+ addDockWidget(Qt::RightDockWidgetArea, graphicsTracingWidget);
+ graphicsTracingWidget->hide();
+
QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging"));
debug_menu->addAction(profilerWidget->toggleViewAction());
debug_menu->addAction(disasmWidget->toggleViewAction());
@@ -104,6 +109,7 @@ GMainWindow::GMainWindow() : emu_thread(nullptr)
debug_menu->addAction(graphicsBreakpointsWidget->toggleViewAction());
debug_menu->addAction(graphicsFramebufferWidget->toggleViewAction());
debug_menu->addAction(graphicsVertexShaderWidget->toggleViewAction());
+ debug_menu->addAction(graphicsTracingWidget->toggleViewAction());
// Set default UI state
// geometry: 55% of the window contents are in the upper screen half, 45% in the lower half
@@ -148,6 +154,9 @@ GMainWindow::GMainWindow() : emu_thread(nullptr)
connect(this, SIGNAL(EmulationStopping()), registersWidget, SLOT(OnEmulationStopping()));
connect(this, SIGNAL(EmulationStarting(EmuThread*)), render_window, SLOT(OnEmulationStarting(EmuThread*)));
connect(this, SIGNAL(EmulationStopping()), render_window, SLOT(OnEmulationStopping()));
+ connect(this, SIGNAL(EmulationStarting(EmuThread*)), graphicsTracingWidget, SLOT(OnEmulationStarting(EmuThread*)));
+ connect(this, SIGNAL(EmulationStopping()), graphicsTracingWidget, SLOT(OnEmulationStopping()));
+
// Setup hotkeys
RegisterHotkey("Main Window", "Load File", QKeySequence::Open);
diff --git a/src/common/color.h b/src/common/color.h
index 422fdc8a..9dafdca0 100644
--- a/src/common/color.h
+++ b/src/common/color.h
@@ -208,7 +208,32 @@ inline void EncodeD24(u32 value, u8* bytes) {
* @param bytes Pointer where to store the encoded value
*/
inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) {
- *reinterpret_cast<u32_le*>(bytes) = (stencil << 24) | depth;
+ bytes[0] = depth & 0xFF;
+ bytes[1] = (depth >> 8) & 0xFF;
+ bytes[2] = (depth >> 16) & 0xFF;
+ bytes[3] = stencil;
+}
+
+/**
+ * Encode a 24 bit depth value as D24X8 format (32 bits per pixel with 8 bits unused)
+ * @param depth 24 bit source depth value to encode
+ * @param bytes Pointer where to store the encoded value
+ * @note unused bits will not be modified
+ */
+inline void EncodeD24X8(u32 depth, u8* bytes) {
+ bytes[0] = depth & 0xFF;
+ bytes[1] = (depth >> 8) & 0xFF;
+ bytes[2] = (depth >> 16) & 0xFF;
+}
+
+/**
+ * Encode an 8 bit stencil value as X24S8 format (32 bits per pixel with 24 bits unused)
+ * @param stencil 8 bit source stencil value to encode
+ * @param bytes Pointer where to store the encoded value
+ * @note unused bits will not be modified
+ */
+inline void EncodeX24S8(u8 stencil, u8* bytes) {
+ bytes[3] = stencil;
}
} // namespace
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index c4fb3d9c..59bd16db 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -69,8 +69,10 @@ inline u64 _rotr64(u64 x, unsigned int shift){
}
#else // _MSC_VER
- // Function Cross-Compatibility
- #define snprintf _snprintf
+ #if (_MSC_VER < 1900)
+ // Function Cross-Compatibility
+ #define snprintf _snprintf
+ #endif
// Locale Cross-Compatibility
#define locale_t _locale_t
diff --git a/src/common/file_util.h b/src/common/file_util.h
index 8fe772ae..9637d1b8 100644
--- a/src/common/file_util.h
+++ b/src/common/file_util.h
@@ -181,6 +181,10 @@ public:
template <typename T>
size_t WriteArray(const T* data, size_t length)
{
+ static_assert(std::is_standard_layout<T>::value, "Given array does not consist of standard layout objects");
+ // TODO: gcc 4.8 does not support is_trivially_copyable, but we really should check for it here.
+ //static_assert(std::is_trivially_copyable<T>::value, "Given array does not consist of trivially copyable objects");
+
if (!IsOpen()) {
m_good = false;
return -1;
@@ -203,6 +207,12 @@ public:
return WriteArray(reinterpret_cast<const char*>(data), length);
}
+ template<typename T>
+ size_t WriteObject(const T& object) {
+ static_assert(!std::is_pointer<T>::value, "Given object is a pointer");
+ return WriteArray(&object, 1);
+ }
+
bool IsOpen() { return nullptr != m_file; }
// m_good is set to false when a read, write or other function fails
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index 9b004440..8267ee58 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -115,6 +115,7 @@ set(SRCS
loader/elf.cpp
loader/loader.cpp
loader/ncch.cpp
+ tracer/recorder.cpp
mem_map.cpp
memory.cpp
settings.cpp
@@ -243,6 +244,8 @@ set(HEADERS
loader/elf.h
loader/loader.h
loader/ncch.h
+ tracer/recorder.h
+ tracer/citrace.h
mem_map.h
memory.h
memory_setup.h
diff --git a/src/core/hle/applets/applet.cpp b/src/core/hle/applets/applet.cpp
index 4dcce729..826f6cbb 100644
--- a/src/core/hle/applets/applet.cpp
+++ b/src/core/hle/applets/applet.cpp
@@ -2,12 +2,19 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <cstddef>
+#include <memory>
+#include <type_traits>
+#include <unordered_map>
+
#include "common/assert.h"
-#include "common/logging/log.h"
+#include "common/common_types.h"
#include "core/core_timing.h"
#include "core/hle/applets/applet.h"
#include "core/hle/applets/swkbd.h"
+#include "core/hle/result.h"
+#include "core/hle/service/apt/apt.h"
////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/applets/applet.h b/src/core/hle/applets/applet.h
index fe537e70..b235d0b8 100644
--- a/src/core/hle/applets/applet.h
+++ b/src/core/hle/applets/applet.h
@@ -4,9 +4,9 @@
#pragma once
-#include "common/common_types.h"
-#include "core/hle/kernel/kernel.h"
-#include "core/hle/kernel/shared_memory.h"
+#include <memory>
+
+#include "core/hle/result.h"
#include "core/hle/service/apt/apt.h"
namespace HLE {
diff --git a/src/core/hle/applets/swkbd.cpp b/src/core/hle/applets/swkbd.cpp
index 7431ebcf..1db6b5a1 100644
--- a/src/core/hle/applets/swkbd.cpp
+++ b/src/core/hle/applets/swkbd.cpp
@@ -2,13 +2,21 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#include <cstring>
+#include <string>
+
#include "common/assert.h"
#include "common/logging/log.h"
#include "common/string_util.h"
#include "core/hle/applets/swkbd.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/shared_memory.h"
#include "core/hle/service/hid/hid.h"
#include "core/hle/service/gsp_gpu.h"
+#include "core/hle/result.h"
+#include "core/memory.h"
+
#include "video_core/video_core.h"
////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/src/core/hle/applets/swkbd.h b/src/core/hle/applets/swkbd.h
index 98e81c48..cb95b8d9 100644
--- a/src/core/hle/applets/swkbd.h
+++ b/src/core/hle/applets/swkbd.h
@@ -5,9 +5,12 @@
#pragma once
#include "common/common_types.h"
+#include "common/common_funcs.h"
+
#include "core/hle/applets/applet.h"
#include "core/hle/kernel/kernel.h"
#include "core/hle/kernel/shared_memory.h"
+#include "core/hle/result.h"
#include "core/hle/service/apt/apt.h"
namespace HLE {
diff --git a/src/core/hle/kernel/kernel.h b/src/core/hle/kernel/kernel.h
index 4c4486c1..4d4276f7 100644
--- a/src/core/hle/kernel/kernel.h
+++ b/src/core/hle/kernel/kernel.h
@@ -86,6 +86,7 @@ public:
case HandleType::Process:
case HandleType::AddressArbiter:
case HandleType::ResourceLimit:
+ case HandleType::CodeSet:
return false;
}
}
diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h
index 92fa0fa6..83d3acea 100644
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@@ -6,6 +6,7 @@
#include <bitset>
#include <cstddef>
+#include <memory>
#include <string>
#include <boost/container/static_vector.hpp>
diff --git a/src/core/hle/kernel/shared_memory.h b/src/core/hle/kernel/shared_memory.h
index 20426689..7a292277 100644
--- a/src/core/hle/kernel/shared_memory.h
+++ b/src/core/hle/kernel/shared_memory.h
@@ -4,9 +4,12 @@
#pragma once
+#include <string>
+
#include "common/common_types.h"
#include "core/hle/kernel/kernel.h"
+#include "core/hle/result.h"
namespace Kernel {
diff --git a/src/core/hle/service/apt/apt.h b/src/core/hle/service/apt/apt.h
index 9f080250..72972d05 100644
--- a/src/core/hle/service/apt/apt.h
+++ b/src/core/hle/service/apt/apt.h
@@ -4,11 +4,14 @@
#pragma once
-#include <array>
-#include "core/hle/result.h"
-#include "core/hle/service/service.h"
+#include "common/common_types.h"
+
+#include "core/hle/kernel/kernel.h"
namespace Service {
+
+class Interface;
+
namespace APT {
/// Holds information about the parameters used in Send/Glance/ReceiveParameter
diff --git a/src/core/hle/service/gsp_gpu.cpp b/src/core/hle/service/gsp_gpu.cpp
index f175085e..3910d022 100644
--- a/src/core/hle/service/gsp_gpu.cpp
+++ b/src/core/hle/service/gsp_gpu.cpp
@@ -349,7 +349,7 @@ void SignalInterrupt(InterruptId interrupt_id) {
/// Executes the next GSP command
static void ExecuteCommand(const Command& command, u32 thread_id) {
// Utility function to convert register ID to address
- auto WriteGPURegister = [](u32 id, u32 data) {
+ static auto WriteGPURegister = [](u32 id, u32 data) {
GPU::Write<u32>(0x1EF00000 + 4 * id, data);
};
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index a1789f9c..a3a7d128 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -21,12 +21,17 @@
#include "core/hw/hw.h"
#include "core/hw/gpu.h"
+#include "core/tracer/recorder.h"
+
#include "video_core/command_processor.h"
#include "video_core/hwrasterizer_base.h"
#include "video_core/renderer_base.h"
#include "video_core/utils.h"
#include "video_core/video_core.h"
+#include "video_core/debug_utils/debug_utils.h"
+
+
namespace GPU {
Regs g_regs;
@@ -101,39 +106,43 @@ inline void Write(u32 addr, const T data) {
const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].trigger));
auto& config = g_regs.memory_fill_config[is_second_filler];
- if (config.address_start && config.trigger) {
- u8* start = Memory::GetPhysicalPointer(config.GetStartAddress());
- u8* end = Memory::GetPhysicalPointer(config.GetEndAddress());
-
- if (config.fill_24bit) {
- // fill with 24-bit values
- for (u8* ptr = start; ptr < end; ptr += 3) {
- ptr[0] = config.value_24bit_r;
- ptr[1] = config.value_24bit_g;
- ptr[2] = config.value_24bit_b;
+ if (config.trigger) {
+ if (config.address_start) { // Some games pass invalid values here
+ u8* start = Memory::GetPhysicalPointer(config.GetStartAddress());
+ u8* end = Memory::GetPhysicalPointer(config.GetEndAddress());
+
+ if (config.fill_24bit) {
+ // fill with 24-bit values
+ for (u8* ptr = start; ptr < end; ptr += 3) {
+ ptr[0] = config.value_24bit_r;
+ ptr[1] = config.value_24bit_g;
+ ptr[2] = config.value_24bit_b;
+ }
+ } else if (config.fill_32bit) {
+ // fill with 32-bit values
+ for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr)
+ *ptr = config.value_32bit;
+ } else {
+ // fill with 16-bit values
+ for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr)
+ *ptr = config.value_16bit;
}
- } else if (config.fill_32bit) {
- // fill with 32-bit values
- for (u32* ptr = (u32*)start; ptr < (u32*)end; ++ptr)
- *ptr = config.value_32bit;
- } else {
- // fill with 16-bit values
- for (u16* ptr = (u16*)start; ptr < (u16*)end; ++ptr)
- *ptr = config.value_16bit;
- }
- LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress());
+ LOG_TRACE(HW_GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress());
- config.trigger = 0;
- config.finished = 1;
+ if (!is_second_filler) {
+ GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0);
+ } else {
+ GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1);
+ }
- if (!is_second_filler) {
- GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC0);
- } else {
- GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PSC1);
+ VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
}
- VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetStartAddress(), config.GetEndAddress() - config.GetStartAddress());
+ // Reset "trigger" flag and set the "finish" flag
+ // NOTE: This was confirmed to happen on hardware even if "address_start" is zero.
+ config.trigger = 0;
+ config.finished = 1;
}
break;
}
@@ -270,6 +279,7 @@ inline void Write(u32 addr, const T data) {
config.GetPhysicalOutputAddress(), output_width, output_height,
config.output_format.Value(), config.flags);
+ g_regs.display_transfer_config.trigger = 0;
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PPF);
VideoCore::g_renderer->hw_rasterizer->NotifyFlush(config.GetPhysicalOutputAddress(), output_size);
@@ -284,7 +294,14 @@ inline void Write(u32 addr, const T data) {
if (config.trigger & 1)
{
u32* buffer = (u32*)Memory::GetPhysicalPointer(config.GetPhysicalAddress());
+
+ if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
+ Pica::g_debug_context->recorder->MemoryAccessed((u8*)buffer, config.size * sizeof(u32), config.GetPhysicalAddress());
+ }
+
Pica::CommandProcessor::ProcessCommandList(buffer, config.size);
+
+ g_regs.command_processor_config.trigger = 0;
}
break;
}
@@ -292,6 +309,13 @@ inline void Write(u32 addr, const T data) {
default:
break;
}
+
+ // Notify tracer about the register write
+ // This is happening *after* handling the write to make sure we properly catch all memory reads.
+ if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
+ // addr + GPU VBase - IO VBase + IO PBase
+ Pica::g_debug_context->recorder->RegisterWritten<T>(addr + 0x1EF00000 - 0x1EC00000 + 0x10100000, data);
+ }
}
// Explicitly instantiate template functions because we aren't defining this in the header:
diff --git a/src/core/hw/hw.cpp b/src/core/hw/hw.cpp
index c7006a49..b5fdbf9c 100644
--- a/src/core/hw/hw.cpp
+++ b/src/core/hw/hw.cpp
@@ -15,6 +15,21 @@ template <typename T>
inline void Read(T &var, const u32 addr) {
switch (addr & 0xFFFFF000) {
case VADDR_GPU:
+ case VADDR_GPU + 0x1000:
+ case VADDR_GPU + 0x2000:
+ case VADDR_GPU + 0x3000:
+ case VADDR_GPU + 0x4000:
+ case VADDR_GPU + 0x5000:
+ case VADDR_GPU + 0x6000:
+ case VADDR_GPU + 0x7000:
+ case VADDR_GPU + 0x8000:
+ case VADDR_GPU + 0x9000:
+ case VADDR_GPU + 0xA000:
+ case VADDR_GPU + 0xB000:
+ case VADDR_GPU + 0xC000:
+ case VADDR_GPU + 0xD000:
+ case VADDR_GPU + 0xE000:
+ case VADDR_GPU + 0xF000:
GPU::Read(var, addr);
break;
case VADDR_LCD:
@@ -29,6 +44,21 @@ template <typename T>
inline void Write(u32 addr, const T data) {
switch (addr & 0xFFFFF000) {
case VADDR_GPU:
+ case VADDR_GPU + 0x1000:
+ case VADDR_GPU + 0x2000:
+ case VADDR_GPU + 0x3000:
+ case VADDR_GPU + 0x4000:
+ case VADDR_GPU + 0x5000:
+ case VADDR_GPU + 0x6000:
+ case VADDR_GPU + 0x7000:
+ case VADDR_GPU + 0x8000:
+ case VADDR_GPU + 0x9000:
+ case VADDR_GPU + 0xA000:
+ case VADDR_GPU + 0xB000:
+ case VADDR_GPU + 0xC000:
+ case VADDR_GPU + 0xD000:
+ case VADDR_GPU + 0xE000:
+ case VADDR_GPU + 0xF000:
GPU::Write(addr, data);
break;
case VADDR_LCD:
diff --git a/src/core/hw/lcd.cpp b/src/core/hw/lcd.cpp
index cdb757a1..6f93709e 100644
--- a/src/core/hw/lcd.cpp
+++ b/src/core/hw/lcd.cpp
@@ -10,6 +10,9 @@
#include "core/hw/hw.h"
#include "core/hw/lcd.h"
+#include "core/tracer/recorder.h"
+#include "video_core/debug_utils/debug_utils.h"
+
namespace LCD {
Regs g_regs;
@@ -40,6 +43,13 @@ inline void Write(u32 addr, const T data) {
}
g_regs[index] = static_cast<u32>(data);
+
+ // Notify tracer about the register write
+ // This is happening *after* handling the write to make sure we properly catch all memory reads.
+ if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
+ // addr + GPU VBase - IO VBase + IO PBase
+ Pica::g_debug_context->recorder->RegisterWritten<T>(addr + HW::VADDR_LCD - 0x1EC00000 + 0x10100000, data);
+ }
}
// Explicitly instantiate template functions because we aren't defining this in the header:
diff --git a/src/core/tracer/citrace.h b/src/core/tracer/citrace.h
new file mode 100644
index 00000000..5deb6ce9
--- /dev/null
+++ b/src/core/tracer/citrace.h
@@ -0,0 +1,101 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <cstdint>
+
+namespace CiTrace {
+
+// NOTE: Things are stored in little-endian
+
+#pragma pack(1)
+
+struct CTHeader {
+ static const char* ExpectedMagicWord() {
+ return "CiTr";
+ }
+
+ static uint32_t ExpectedVersion() {
+ return 1;
+ }
+
+ char magic[4];
+ uint32_t version;
+ uint32_t header_size;
+
+ struct {
+ // NOTE: Register range sizes are technically hardware-constants, but the actual limits
+ // aren't known. Hence we store the presumed limits along the offsets.
+ // Sizes are given in uint32_t units.
+ uint32_t gpu_registers;
+ uint32_t gpu_registers_size;
+ uint32_t lcd_registers;
+ uint32_t lcd_registers_size;
+ uint32_t pica_registers;
+ uint32_t pica_registers_size;
+ uint32_t default_attributes;
+ uint32_t default_attributes_size;
+ uint32_t vs_program_binary;
+ uint32_t vs_program_binary_size;
+ uint32_t vs_swizzle_data;
+ uint32_t vs_swizzle_data_size;
+ uint32_t vs_float_uniforms;
+ uint32_t vs_float_uniforms_size;
+ uint32_t gs_program_binary;
+ uint32_t gs_program_binary_size;
+ uint32_t gs_swizzle_data;
+ uint32_t gs_swizzle_data_size;
+ uint32_t gs_float_uniforms;
+ uint32_t gs_float_uniforms_size;
+
+ // Other things we might want to store here:
+ // - Initial framebuffer data, maybe even a full copy of FCRAM/VRAM
+ // - Lookup tables for fragment lighting
+ // - Lookup tables for procedural textures
+ } initial_state_offsets;
+
+ uint32_t stream_offset;
+ uint32_t stream_size;
+};
+
+enum CTStreamElementType : uint32_t {
+ FrameMarker = 0xE1,
+ MemoryLoad = 0xE2,
+ RegisterWrite = 0xE3,
+};
+
+struct CTMemoryLoad {
+ uint32_t file_offset;
+ uint32_t size;
+ uint32_t physical_address;
+ uint32_t pad;
+};
+
+struct CTRegisterWrite {
+ uint32_t physical_address;
+
+ enum : uint32_t {
+ SIZE_8 = 0xD1,
+ SIZE_16 = 0xD2,
+ SIZE_32 = 0xD3,
+ SIZE_64 = 0xD4
+ } size;
+
+ // TODO: Make it clearer which bits of this member are used for sizes other than 32 bits
+ uint64_t value;
+};
+
+struct CTStreamElement {
+ CTStreamElementType type;
+
+ union {
+ CTMemoryLoad memory_load;
+ CTRegisterWrite register_write;
+ };
+};
+
+#pragma pack()
+
+}
diff --git a/src/core/tracer/recorder.cpp b/src/core/tracer/recorder.cpp
new file mode 100644
index 00000000..656706c0
--- /dev/null
+++ b/src/core/tracer/recorder.cpp
@@ -0,0 +1,187 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include <cstring>
+
+#include "common/assert.h"
+#include "common/file_util.h"
+#include "common/logging/log.h"
+
+#include "recorder.h"
+
+namespace CiTrace {
+
+Recorder::Recorder(const InitialState& initial_state) : initial_state(initial_state) {
+
+}
+
+void Recorder::Finish(const std::string& filename) {
+ // Setup CiTrace header
+ CTHeader header;
+ std::memcpy(header.magic, CTHeader::ExpectedMagicWord(), 4);
+ header.version = CTHeader::ExpectedVersion();
+ header.header_size = sizeof(CTHeader);
+
+ // Calculate file offsets
+ auto& initial = header.initial_state_offsets;
+
+ initial.gpu_registers_size = initial_state.gpu_registers.size();
+ initial.lcd_registers_size = initial_state.lcd_registers.size();
+ initial.pica_registers_size = initial_state.pica_registers.size();
+ initial.default_attributes_size = initial_state.default_attributes.size();
+ initial.vs_program_binary_size = initial_state.vs_program_binary.size();
+ initial.vs_swizzle_data_size = initial_state.vs_swizzle_data.size();
+ initial.vs_float_uniforms_size = initial_state.vs_float_uniforms.size();
+ initial.gs_program_binary_size = initial_state.gs_program_binary.size();
+ initial.gs_swizzle_data_size = initial_state.gs_swizzle_data.size();
+ initial.gs_float_uniforms_size = initial_state.gs_float_uniforms.size();
+ header.stream_size = stream.size();
+
+ initial.gpu_registers = sizeof(header);
+ initial.lcd_registers = initial.gpu_registers + initial.gpu_registers_size * sizeof(u32);
+ initial.pica_registers = initial.lcd_registers + initial.lcd_registers_size * sizeof(u32);;
+ initial.default_attributes = initial.pica_registers + initial.pica_registers_size * sizeof(u32);
+ initial.vs_program_binary = initial.default_attributes + initial.default_attributes_size * sizeof(u32);
+ initial.vs_swizzle_data = initial.vs_program_binary + initial.vs_program_binary_size * sizeof(u32);
+ initial.vs_float_uniforms = initial.vs_swizzle_data + initial.vs_swizzle_data_size * sizeof(u32);
+ initial.gs_program_binary = initial.vs_float_uniforms + initial.vs_float_uniforms_size * sizeof(u32);
+ initial.gs_swizzle_data = initial.gs_program_binary + initial.gs_program_binary_size * sizeof(u32);
+ initial.gs_float_uniforms = initial.gs_swizzle_data + initial.gs_swizzle_data_size * sizeof(u32);
+ header.stream_offset = initial.gs_float_uniforms + initial.gs_float_uniforms_size * sizeof(u32);
+
+ // Iterate through stream elements, update relevant stream element data
+ for (auto& stream_element : stream) {
+ switch (stream_element.data.type) {
+ case MemoryLoad:
+ {
+ auto& file_offset = memory_regions[stream_element.hash];
+ if (!stream_element.uses_existing_data) {
+ file_offset = header.stream_offset;
+ }
+ stream_element.data.memory_load.file_offset = file_offset;
+ break;
+ }
+
+ default:
+ // Other commands don't use any extra data
+ DEBUG_ASSERT(stream_element.extra_data.size() == 0);
+ break;
+ }
+ header.stream_offset += stream_element.extra_data.size();
+ }
+
+ try {
+ // Open file and write header
+ FileUtil::IOFile file(filename, "wb");
+ size_t written = file.WriteObject(header);
+ if (written != 1 || file.Tell() != initial.gpu_registers)
+ throw "Failed to write header";
+
+ // Write initial state
+ written = file.WriteArray(initial_state.gpu_registers.data(), initial_state.gpu_registers.size());
+ if (written != initial_state.gpu_registers.size() || file.Tell() != initial.lcd_registers)
+ throw "Failed to write GPU registers";
+
+ written = file.WriteArray(initial_state.lcd_registers.data(), initial_state.lcd_registers.size());
+ if (written != initial_state.lcd_registers.size() || file.Tell() != initial.pica_registers)
+ throw "Failed to write LCD registers";
+
+ written = file.WriteArray(initial_state.pica_registers.data(), initial_state.pica_registers.size());
+ if (written != initial_state.pica_registers.size() || file.Tell() != initial.default_attributes)
+ throw "Failed to write Pica registers";
+
+ written = file.WriteArray(initial_state.default_attributes.data(), initial_state.default_attributes.size());
+ if (written != initial_state.default_attributes.size() || file.Tell() != initial.vs_program_binary)
+ throw "Failed to write default vertex attributes";
+
+ written = file.WriteArray(initial_state.vs_program_binary.data(), initial_state.vs_program_binary.size());
+ if (written != initial_state.vs_program_binary.size() || file.Tell() != initial.vs_swizzle_data)
+ throw "Failed to write vertex shader program binary";
+
+ written = file.WriteArray(initial_state.vs_swizzle_data.data(), initial_state.vs_swizzle_data.size());
+ if (written != initial_state.vs_swizzle_data.size() || file.Tell() != initial.vs_float_uniforms)
+ throw "Failed to write vertex shader swizzle data";
+
+ written = file.WriteArray(initial_state.vs_float_uniforms.data(), initial_state.vs_float_uniforms.size());
+ if (written != initial_state.vs_float_uniforms.size() || file.Tell() != initial.gs_program_binary)
+ throw "Failed to write vertex shader float uniforms";
+
+ written = file.WriteArray(initial_state.gs_program_binary.data(), initial_state.gs_program_binary.size());
+ if (written != initial_state.gs_program_binary.size() || file.Tell() != initial.gs_swizzle_data)
+ throw "Failed to write geomtry shader program binary";
+
+ written = file.WriteArray(initial_state.gs_swizzle_data.data(), initial_state.gs_swizzle_data.size());
+ if (written != initial_state.gs_swizzle_data.size() || file.Tell() != initial.gs_float_uniforms)
+ throw "Failed to write geometry shader swizzle data";
+
+ written = file.WriteArray(initial_state.gs_float_uniforms.data(), initial_state.gs_float_uniforms.size());
+ if (written != initial_state.gs_float_uniforms.size() || file.Tell() != initial.gs_float_uniforms + sizeof(u32) * initial.gs_float_uniforms_size)
+ throw "Failed to write geometry shader float uniforms";
+
+ // Iterate through stream elements, write "extra data"
+ for (const auto& stream_element : stream) {
+ if (stream_element.extra_data.size() == 0)
+ continue;
+
+ written = file.WriteBytes(stream_element.extra_data.data(), stream_element.extra_data.size());
+ if (written != stream_element.extra_data.size())
+ throw "Failed to write extra data";
+ }
+
+ if (file.Tell() != header.stream_offset)
+ throw "Unexpected end of extra data";
+
+ // Write actual stream elements
+ for (const auto& stream_element : stream) {
+ if (1 != file.WriteObject(stream_element.data))
+ throw "Failed to write stream element";
+ }
+ } catch(const char* str) {
+ LOG_ERROR(HW_GPU, "Writing CiTrace file failed: %s", str);
+ }
+}
+
+void Recorder::FrameFinished() {
+ stream.push_back( { FrameMarker } );
+}
+
+void Recorder::MemoryAccessed(const u8* data, u32 size, u32 physical_address) {
+ StreamElement element = { MemoryLoad };
+ element.data.memory_load.size = size;
+ element.data.memory_load.physical_address = physical_address;
+
+ // Compute hash over given memory region to check if the contents are already stored internally
+ boost::crc_32_type result;
+ result.process_bytes(data, size);
+ element.hash = result.checksum();
+
+ element.uses_existing_data = (memory_regions.find(element.hash) != memory_regions.end());
+ if (!element.uses_existing_data) {
+ element.extra_data.resize(size);
+ memcpy(element.extra_data.data(), data, size);
+ memory_regions.insert({element.hash, 0}); // file offset will be initialized in Finish()
+ }
+
+ stream.push_back(element);
+}
+
+template<typename T>
+void Recorder::RegisterWritten(u32 physical_address, T value) {
+ StreamElement element = { RegisterWrite };
+ element.data.register_write.size = (sizeof(T) == 1) ? CTRegisterWrite::SIZE_8
+ : (sizeof(T) == 2) ? CTRegisterWrite::SIZE_16
+ : (sizeof(T) == 4) ? CTRegisterWrite::SIZE_32
+ : CTRegisterWrite::SIZE_64;
+ element.data.register_write.physical_address = physical_address;
+ element.data.register_write.value = value;
+
+ stream.push_back(element);
+}
+
+template void Recorder::RegisterWritten(u32,u8);
+template void Recorder::RegisterWritten(u32,u16);
+template void Recorder::RegisterWritten(u32,u32);
+template void Recorder::RegisterWritten(u32,u64);
+
+}
diff --git a/src/core/tracer/recorder.h b/src/core/tracer/recorder.h
new file mode 100644
index 00000000..6e4b7001
--- /dev/null
+++ b/src/core/tracer/recorder.h
@@ -0,0 +1,90 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <unordered_map>
+#include <vector>
+
+#include <boost/crc.hpp>
+
+#include "common/common_types.h"
+
+#include "citrace.h"
+
+namespace CiTrace {
+
+class Recorder {
+public:
+ struct InitialState {
+ std::vector<u32> gpu_registers;
+ std::vector<u32> lcd_registers;
+ std::vector<u32> pica_registers;
+ std::vector<u32> default_attributes;
+ std::vector<u32> vs_program_binary;
+ std::vector<u32> vs_swizzle_data;
+ std::vector<u32> vs_float_uniforms;
+ std::vector<u32> gs_program_binary;
+ std::vector<u32> gs_swizzle_data;
+ std::vector<u32> gs_float_uniforms;
+ };
+
+ /**
+ * Recorder constructor
+ * @param default_attributes Pointer to an array of 32-bit-aligned 24-bit floating point values.
+ * @param vs_float_uniforms Pointer to an array of 32-bit-aligned 24-bit floating point values.
+ */
+ Recorder(const InitialState& initial_state);
+
+ /// Finish recording of this Citrace and save it using the given filename.
+ void Finish(const std::string& filename);
+
+ /// Mark end of a frame
+ void FrameFinished();
+
+ /**
+ * Store a copy of the given memory range in the recording.
+ * @note Use this whenever the GPU is about to access a particular memory region.
+ * @note The implementation will make sure to minimize redundant memory updates.
+ */
+ void MemoryAccessed(const u8* data, u32 size, u32 physical_address);
+
+ /**
+ * Record a register write.
+ * @note Use this whenever a GPU-related MMIO register has been written to.
+ */
+ template<typename T>
+ void RegisterWritten(u32 physical_address, T value);
+
+private:
+ // Initial state of recording start
+ InitialState initial_state;
+
+ // Command stream
+ struct StreamElement {
+ CTStreamElement data;
+
+ /**
+ * Extra data to store along "core" data.
+ * This is e.g. used for data used in MemoryUpdates.
+ */
+ std::vector<u8> extra_data;
+
+ /// Optional CRC hash (e.g. for hashing memory regions)
+ boost::crc_32_type::value_type hash;
+
+ /// If true, refer to data already written to the output file instead of extra_data
+ bool uses_existing_data;
+ };
+
+ std::vector<StreamElement> stream;
+
+ /**
+ * Internal cache which maps hashes of memory contents to file offsets at which those memory
+ * contents are stored.
+ */
+ std::unordered_map<boost::crc_32_type::value_type /*hash*/, u32 /*file_offset*/> memory_regions;
+};
+
+} // namespace
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 110caec7..2a1c885a 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -123,12 +123,55 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
PrimitiveAssembler<VertexShader::OutputVertex> primitive_assembler(regs.triangle_topology.Value());
PrimitiveAssembler<DebugUtils::GeometryDumper::Vertex> dumping_primitive_assembler(regs.triangle_topology.Value());
+ if (g_debug_context) {
+ for (int i = 0; i < 3; ++i) {
+ const auto texture = regs.GetTextures()[i];
+ if (!texture.enabled)
+ continue;
+
+ u8* texture_data = Memory::GetPhysicalPointer(texture.config.GetPhysicalAddress());
+ if (g_debug_context && Pica::g_debug_context->recorder)
+ g_debug_context->recorder->MemoryAccessed(texture_data, Pica::Regs::NibblesPerPixel(texture.format) * texture.config.width / 2 * texture.config.height, texture.config.GetPhysicalAddress());
+ }
+ }
+
+ class {
+ /// Combine overlapping and close ranges
+ void SimplifyRanges() {
+ for (auto it = ranges.begin(); it != ranges.end(); ++it) {
+ // NOTE: We add 32 to the range end address to make sure "close" ranges are combined, too
+ auto it2 = std::next(it);
+ while (it2 != ranges.end() && it->first + it->second + 32 >= it2->first) {
+ it->second = std::max(it->second, it2->first + it2->second - it->first);
+ it2 = ranges.erase(it2);
+ }
+ }
+ }
+
+ public:
+ /// Record a particular memory access in the list
+ void AddAccess(u32 paddr, u32 size) {
+ // Create new range or extend existing one
+ ranges[paddr] = std::max(ranges[paddr], size);
+
+ // Simplify ranges...
+ SimplifyRanges();
+ }
+
+ /// Map of accessed ranges (mapping start address to range size)
+ std::map<u32, u32> ranges;
+ } memory_accesses;
+
for (unsigned int index = 0; index < regs.num_vertices; ++index)
{
unsigned int vertex = is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index]) : index;
if (is_indexed) {
// TODO: Implement some sort of vertex cache!
+ if (g_debug_context && Pica::g_debug_context->recorder) {
+ int size = index_u16 ? 2 : 1;
+ memory_accesses.AddAccess(base_address + index_info.offset + size * index, size);
+ }
}
// Initialize data for the current vertex
@@ -151,7 +194,14 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
// Load per-vertex data from the loader arrays
for (unsigned int comp = 0; comp < vertex_attribute_elements[i]; ++comp) {
- const u8* srcdata = Memory::GetPhysicalPointer(vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i]);
+ u32 source_addr = vertex_attribute_sources[i] + vertex_attribute_strides[i] * vertex + comp * vertex_attribute_element_size[i];
+ const u8* srcdata = Memory::GetPhysicalPointer(source_addr);
+
+ if (g_debug_context && Pica::g_debug_context->recorder) {
+ memory_accesses.AddAccess(source_addr,
+ (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::FLOAT) ? 4
+ : (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::SHORT) ? 2 : 1);
+ }
const float srcval = (vertex_attribute_formats[i] == Regs::VertexAttributeFormat::BYTE) ? *(s8*)srcdata :
(vertex_attribute_formats[i] == Regs::VertexAttributeFormat::UBYTE) ? *(u8*)srcdata :
@@ -213,14 +263,20 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
}
}
+ for (auto& range : memory_accesses.ranges) {
+ g_debug_context->recorder->MemoryAccessed(Memory::GetPhysicalPointer(range.first),
+ range.second, range.first);
+ }
+
if (Settings::values.use_hw_renderer) {
VideoCore::g_renderer->hw_rasterizer->DrawTriangles();
}
geometry_dumper.Dump();
- if (g_debug_context)
+ if (g_debug_context) {
g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
+ }
break;
}
diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h
index 7926d64e..2573292e 100644
--- a/src/video_core/debug_utils/debug_utils.h
+++ b/src/video_core/debug_utils/debug_utils.h
@@ -14,6 +14,8 @@
#include "common/vector_math.h"
+#include "core/tracer/recorder.h"
+
#include "video_core/pica.h"
namespace Pica {
@@ -129,6 +131,8 @@ public:
Event active_breakpoint;
bool at_breakpoint = false;
+ std::shared_ptr<CiTrace::Recorder> recorder = nullptr;
+
private:
/**
* Private default constructor to make sure people always construct this through Construct()
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index feb20214..46a7b21d 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -420,6 +420,11 @@ struct Regs {
GreaterThanOrEqual = 7,
};
+ enum class StencilAction : u32 {
+ Keep = 0,
+ Xor = 5,
+ };
+
struct {
union {
// If false, logic blending is used
@@ -454,15 +459,35 @@ struct Regs {
BitField< 8, 8, u32> ref;
} alpha_test;
- union {
- BitField< 0, 1, u32> stencil_test_enable;
- BitField< 4, 3, CompareFunc> stencil_test_func;
- BitField< 8, 8, u32> stencil_replacement_value;
- BitField<16, 8, u32> stencil_reference_value;
- BitField<24, 8, u32> stencil_mask;
- } stencil_test;
+ struct {
+ union {
+ // If true, enable stencil testing
+ BitField< 0, 1, u32> enable;
- INSERT_PADDING_WORDS(0x1);
+ // Comparison operation for stencil testing
+ BitField< 4, 3, CompareFunc> func;
+
+ // Value to calculate the new stencil value from
+ BitField< 8, 8, u32> replacement_value;
+
+ // Value to compare against for stencil testing
+ BitField<16, 8, u32> reference_value;
+
+ // Mask to apply on stencil test inputs
+ BitField<24, 8, u32> mask;
+ };
+
+ union {
+ // Action to perform when the stencil test fails
+ BitField< 0, 3, StencilAction> action_stencil_fail;
+
+ // Action to perform when stencil testing passed but depth testing fails
+ BitField< 4, 3, StencilAction> action_depth_fail;
+
+ // Action to perform when both stencil and depth testing pass
+ BitField< 8, 3, StencilAction> action_depth_pass;
+ };
+ } stencil_test;
union {
BitField< 0, 1, u32> depth_test_enable;
@@ -512,7 +537,7 @@ struct Regs {
struct {
INSERT_PADDING_WORDS(0x6);
- DepthFormat depth_format;
+ DepthFormat depth_format; // TODO: Should be a BitField!
BitField<16, 3, ColorFormat> color_format;
INSERT_PADDING_WORDS(0x4);
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 70b11574..c381c2bd 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -126,6 +126,30 @@ static u32 GetDepth(int x, int y) {
}
}
+static u8 GetStencil(int x, int y) {
+ const auto& framebuffer = g_state.regs.framebuffer;
+ const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
+ u8* depth_buffer = Memory::GetPhysicalPointer(addr);
+
+ y = framebuffer.height - y;
+
+ const u32 coarse_y = y & ~7;
+ u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format);
+ u32 stride = framebuffer.width * bytes_per_pixel;
+
+ u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+ u8* src_pixel = depth_buffer + src_offset;
+
+ switch (framebuffer.depth_format) {
+ case Regs::DepthFormat::D24S8:
+ return Color::DecodeD24S8(src_pixel).y;
+
+ default:
+ LOG_WARNING(HW_GPU, "GetStencil called for function which doesn't have a stencil component (format %u)", framebuffer.depth_format);
+ return 0;
+ }
+}
+
static void SetDepth(int x, int y, u32 value) {
const auto& framebuffer = g_state.regs.framebuffer;
const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
@@ -144,13 +168,46 @@ static void SetDepth(int x, int y, u32 value) {
case Regs::DepthFormat::D16:
Color::EncodeD16(value, dst_pixel);
break;
+
case Regs::DepthFormat::D24:
Color::EncodeD24(value, dst_pixel);
break;
+
case Regs::DepthFormat::D24S8:
- // TODO(Subv): Implement the stencil buffer
- Color::EncodeD24S8(value, 0, dst_pixel);
+ Color::EncodeD24X8(value, dst_pixel);
break;
+
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
+ UNIMPLEMENTED();
+ break;
+ }
+}
+
+static void SetStencil(int x, int y, u8 value) {
+ const auto& framebuffer = g_state.regs.framebuffer;
+ const PAddr addr = framebuffer.GetDepthBufferPhysicalAddress();
+ u8* depth_buffer = Memory::GetPhysicalPointer(addr);
+
+ y = framebuffer.height - y;
+
+ const u32 coarse_y = y & ~7;
+ u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(framebuffer.depth_format);
+ u32 stride = framebuffer.width * bytes_per_pixel;
+
+ u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+ u8* dst_pixel = depth_buffer + dst_offset;
+
+ switch (framebuffer.depth_format) {
+ case Pica::Regs::DepthFormat::D16:
+ case Pica::Regs::DepthFormat::D24:
+ // Nothing to do
+ break;
+
+ case Pica::Regs::DepthFormat::D24S8:
+ Color::EncodeX24S8(value, dst_pixel);
+ break;
+
default:
LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", framebuffer.depth_format);
UNIMPLEMENTED();
@@ -158,6 +215,22 @@ static void SetDepth(int x, int y, u32 value) {
}
}
+// TODO: Should the stencil mask be applied to the "dest" or "ref" operands? Most likely not!
+static u8 PerformStencilAction(Regs::StencilAction action, u8 dest, u8 ref) {
+ switch (action) {
+ case Regs::StencilAction::Keep:
+ return dest;
+
+ case Regs::StencilAction::Xor:
+ return dest ^ ref;
+
+ default:
+ LOG_CRITICAL(HW_GPU, "Unknown stencil action %x", (int)action);
+ UNIMPLEMENTED();
+ return 0;
+ }
+}
+
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
struct Fix12P4 {
Fix12P4() {}
@@ -276,6 +349,9 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
auto textures = regs.GetTextures();
auto tev_stages = regs.GetTevStages();
+ bool stencil_action_enable = g_state.regs.output_merger.stencil_test.enable && g_state.regs.framebuffer.depth_format == Regs::DepthFormat::D24S8;
+ const auto stencil_test = g_state.regs.output_merger.stencil_test;
+
// Enter rasterization loop, starting at the center of the topleft bounding box corner.
// TODO: Not sure if looping through x first might be faster
for (u16 y = min_y + 8; y < max_y; y += 0x10) {
@@ -647,6 +723,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
}
const auto& output_merger = regs.output_merger;
+ // TODO: Does alpha testing happen before or after stencil?
if (output_merger.alpha_test.enable) {
bool pass = false;
@@ -688,6 +765,54 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
continue;
}
+ u8 old_stencil = 0;
+ if (stencil_action_enable) {
+ old_stencil = GetStencil(x >> 4, y >> 4);
+ u8 dest = old_stencil & stencil_test.mask;
+ u8 ref = stencil_test.reference_value & stencil_test.mask;
+
+ bool pass = false;
+ switch (stencil_test.func) {
+ case Regs::CompareFunc::Never:
+ pass = false;
+ break;
+
+ case Regs::CompareFunc::Always:
+ pass = true;
+ break;
+
+ case Regs::CompareFunc::Equal:
+ pass = (ref == dest);
+ break;
+
+ case Regs::CompareFunc::NotEqual:
+ pass = (ref != dest);
+ break;
+
+ case Regs::CompareFunc::LessThan:
+ pass = (ref < dest);
+ break;
+
+ case Regs::CompareFunc::LessThanOrEqual:
+ pass = (ref <= dest);
+ break;
+
+ case Regs::CompareFunc::GreaterThan:
+ pass = (ref > dest);
+ break;
+
+ case Regs::CompareFunc::GreaterThanOrEqual:
+ pass = (ref >= dest);
+ break;
+ }
+
+ if (!pass) {
+ u8 new_stencil = PerformStencilAction(stencil_test.action_stencil_fail, old_stencil, stencil_test.replacement_value);
+ SetStencil(x >> 4, y >> 4, new_stencil);
+ continue;
+ }
+ }
+
// TODO: Does depth indeed only get written even if depth testing is enabled?
if (output_merger.depth_test_enable) {
unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format);
@@ -732,11 +857,22 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
break;
}
- if (!pass)
+ if (!pass) {
+ if (stencil_action_enable) {
+ u8 new_stencil = PerformStencilAction(stencil_test.action_depth_fail, old_stencil, stencil_test.replacement_value);
+ SetStencil(x >> 4, y >> 4, new_stencil);
+ }
continue;
+ }
if (output_merger.depth_write_enable)
SetDepth(x >> 4, y >> 4, z);
+
+ if (stencil_action_enable) {
+ // TODO: What happens if stencil testing is enabled, but depth testing is not? Will stencil get updated anyway?
+ u8 new_stencil = PerformStencilAction(stencil_test.action_depth_pass, old_stencil, stencil_test.replacement_value);
+ SetStencil(x >> 4, y >> 4, new_stencil);
+ }
}
auto dest = GetPixel(x >> 4, y >> 4);
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 9799f74f..96e12839 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -22,6 +22,8 @@
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_shaders.h"
+#include "video_core/debug_utils/debug_utils.h"
+
/**
* Vertex structure that the drawn screen rectangles are composed of.
*/
@@ -129,6 +131,10 @@ void RendererOpenGL::SwapBuffers() {
hw_rasterizer->Reset();
}
}
+
+ if (Pica::g_debug_context && Pica::g_debug_context->recorder) {
+ Pica::g_debug_context->recorder->FrameFinished();
+ }
}
/**