aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/citra/citra.cpp3
-rw-r--r--src/citra/default_ini.h19
-rw-r--r--src/citra/emu_window/emu_window_glfw.cpp11
-rw-r--r--src/citra_qt/CMakeLists.txt9
-rw-r--r--src/citra_qt/bootmanager.cpp3
-rw-r--r--src/citra_qt/debugger/graphics_framebuffer.cpp65
-rw-r--r--src/citra_qt/debugger/graphics_framebuffer.h10
-rw-r--r--src/citra_qt/debugger/graphics_vertex_shader.cpp41
-rw-r--r--src/citra_qt/debugger/profiler.cpp138
-rw-r--r--src/citra_qt/debugger/profiler.h50
-rw-r--r--src/citra_qt/debugger/profiler.ui33
-rw-r--r--src/citra_qt/main.cpp9
-rw-r--r--src/citra_qt/main.h2
-rw-r--r--src/common/CMakeLists.txt4
-rw-r--r--src/common/assert.h31
-rw-r--r--src/common/common.h36
-rw-r--r--src/common/common_funcs.h77
-rw-r--r--src/common/emu_window.cpp50
-rw-r--r--src/common/emu_window.h32
-rw-r--r--src/common/logging/backend.cpp9
-rw-r--r--src/common/logging/backend.h3
-rw-r--r--src/common/logging/filter.h2
-rw-r--r--src/common/logging/text_formatter.cpp7
-rw-r--r--src/common/logging/text_formatter.h3
-rw-r--r--src/common/profiler.cpp182
-rw-r--r--src/common/profiler.h152
-rw-r--r--src/common/profiler_reporting.h108
-rw-r--r--src/common/swap.h111
-rw-r--r--src/common/synchronized_wrapper.h69
-rw-r--r--src/common/thread.h19
-rw-r--r--src/core/arm/dyncom/arm_dyncom_interpreter.cpp80
-rw-r--r--src/core/hle/hle.cpp6
-rw-r--r--src/core/hle/service/cfg/cfg.cpp15
-rw-r--r--src/core/hw/gpu.cpp4
-rw-r--r--src/core/hw/gpu.h8
-rw-r--r--src/video_core/color.h57
-rw-r--r--src/video_core/command_processor.cpp6
-rw-r--r--src/video_core/debug_utils/debug_utils.cpp2
-rw-r--r--src/video_core/pica.h36
-rw-r--r--src/video_core/primitive_assembly.cpp9
-rw-r--r--src/video_core/rasterizer.cpp54
-rw-r--r--src/video_core/renderer_opengl/renderer_opengl.cpp60
-rw-r--r--src/video_core/vertex_shader.cpp75
-rw-r--r--src/video_core/video_core.cpp3
-rw-r--r--src/video_core/video_core.h1
45 files changed, 1356 insertions, 348 deletions
diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp
index 69f0b35b..2c6ced92 100644
--- a/src/citra/citra.cpp
+++ b/src/citra/citra.cpp
@@ -22,7 +22,8 @@
int __cdecl main(int argc, char **argv) {
std::shared_ptr<Log::Logger> logger = Log::InitGlobalLogger();
Log::Filter log_filter(Log::Level::Debug);
- std::thread logging_thread(Log::TextLoggingLoop, logger, &log_filter);
+ Log::SetFilter(&log_filter);
+ std::thread logging_thread(Log::TextLoggingLoop, logger);
SCOPE_EXIT({
logger->Close();
logging_thread.join();
diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h
index 798ff6e8..fc02a3ce 100644
--- a/src/citra/default_ini.h
+++ b/src/citra/default_ini.h
@@ -33,17 +33,28 @@ pad_cleft =
pad_cright =
[Core]
-gpu_refresh_rate = ## 30 (default)
-frame_skip = ## 0: No frameskip (default), 1 : 2x frameskip, 2 : 4x frameskip, etc.
+# The refresh rate for the GPU
+# Defaults to 30
+gpu_refresh_rate =
+
+# The applied frameskip amount. Must be a power of two.
+# 0 (default): No frameskip, 1: x2 frameskip, 2: x4 frameskip, 3: x8 frameskip, etc.
+frame_skip =
[Data Storage]
+# Whether to create a virtual SD card.
+# 1 (default): Yes, 0: No
use_virtual_sd =
[System Region]
-region_value = ## 0 : Japan, 1 : Usa (default), 2 : Europe, 3 : Australia, 4 : China, 5 : Korea, 6 : Taiwan.
+# The system region that Citra will use during emulation
+# 0: Japan, 1: USA (default), 2: Europe, 3: Australia, 4: China, 5: Korea, 6: Taiwan
+region_value =
[Miscellaneous]
-log_filter = *:Info ## Examples: *:Debug Kernel.SVC:Trace Service.*:Critical
+# A filter which removes logs below a certain logging level.
+# Examples: *:Debug Kernel.SVC:Trace Service.*:Critical
+log_filter = *:Info
)";
}
diff --git a/src/citra/emu_window/emu_window_glfw.cpp b/src/citra/emu_window/emu_window_glfw.cpp
index ec3e8cf3..81231e1e 100644
--- a/src/citra/emu_window/emu_window_glfw.cpp
+++ b/src/citra/emu_window/emu_window_glfw.cpp
@@ -36,20 +36,13 @@ const bool EmuWindow_GLFW::IsOpen() {
}
void EmuWindow_GLFW::OnFramebufferResizeEvent(GLFWwindow* win, int width, int height) {
- ASSERT(width > 0);
- ASSERT(height > 0);
-
- GetEmuWindow(win)->NotifyFramebufferSizeChanged(std::pair<unsigned,unsigned>(width, height));
+ GetEmuWindow(win)->NotifyFramebufferLayoutChanged(EmuWindow::FramebufferLayout::DefaultScreenLayout(width, height));
}
void EmuWindow_GLFW::OnClientAreaResizeEvent(GLFWwindow* win, int width, int height) {
- ASSERT(width > 0);
- ASSERT(height > 0);
-
// NOTE: GLFW provides no proper way to set a minimal window size.
// Hence, we just ignore the corresponding EmuWindow hint.
-
- GetEmuWindow(win)->NotifyClientAreaSizeChanged(std::pair<unsigned,unsigned>(width, height));
+ OnFramebufferResizeEvent(win, width, height);
}
/// EmuWindow_GLFW constructor
diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt
index 586bc84b..ff780cad 100644
--- a/src/citra_qt/CMakeLists.txt
+++ b/src/citra_qt/CMakeLists.txt
@@ -13,6 +13,7 @@ set(SRCS
debugger/graphics_cmdlists.cpp
debugger/graphics_framebuffer.cpp
debugger/graphics_vertex_shader.cpp
+ debugger/profiler.cpp
debugger/ramview.cpp
debugger/registers.cpp
util/spinbox.cpp
@@ -35,6 +36,7 @@ set(HEADERS
debugger/graphics_cmdlists.h
debugger/graphics_framebuffer.h
debugger/graphics_vertex_shader.h
+ debugger/profiler.h
debugger/ramview.h
debugger/registers.h
util/spinbox.h
@@ -48,6 +50,7 @@ set(UIS
config/controller_config.ui
debugger/callstack.ui
debugger/disassembler.ui
+ debugger/profiler.ui
debugger/registers.ui
hotkeys.ui
main.ui
@@ -61,7 +64,11 @@ else()
qt4_wrap_ui(UI_HDRS ${UIS})
endif()
-add_executable(citra-qt ${SRCS} ${HEADERS} ${UI_HDRS})
+if (APPLE)
+ add_executable(citra-qt MACOSX_BUNDLE ${SRCS} ${HEADERS} ${UI_HDRS})
+else()
+ add_executable(citra-qt ${SRCS} ${HEADERS} ${UI_HDRS})
+endif()
target_link_libraries(citra-qt core common video_core qhexedit)
target_link_libraries(citra-qt ${OPENGL_gl_LIBRARY} ${CITRA_QT_LIBS})
target_link_libraries(citra-qt ${PLATFORM_LIBRARIES})
diff --git a/src/citra_qt/bootmanager.cpp b/src/citra_qt/bootmanager.cpp
index 6514288a..a040e75c 100644
--- a/src/citra_qt/bootmanager.cpp
+++ b/src/citra_qt/bootmanager.cpp
@@ -155,6 +155,7 @@ GRenderWindow::GRenderWindow(QWidget* parent) : QWidget(parent), emu_thread(this
child = new GGLWidgetInternal(fmt, this);
QBoxLayout* layout = new QHBoxLayout(this);
+
resize(VideoCore::kScreenTopWidth, VideoCore::kScreenTopHeight + VideoCore::kScreenBottomHeight);
layout->addWidget(child);
layout->setMargin(0);
@@ -234,7 +235,7 @@ void GRenderWindow::OnFramebufferSizeChanged()
unsigned height = child->QPaintDevice::height();
#endif
- NotifyFramebufferSizeChanged(std::make_pair(width, height));
+ NotifyFramebufferLayoutChanged(EmuWindow::FramebufferLayout::DefaultScreenLayout(width, height));
}
void GRenderWindow::BackupGeometry()
diff --git a/src/citra_qt/debugger/graphics_framebuffer.cpp b/src/citra_qt/debugger/graphics_framebuffer.cpp
index 5bd6c023..d621d720 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.cpp
+++ b/src/citra_qt/debugger/graphics_framebuffer.cpp
@@ -27,6 +27,7 @@ GraphicsFramebufferWidget::GraphicsFramebufferWidget(std::shared_ptr<Pica::Debug
framebuffer_source_list = new QComboBox;
framebuffer_source_list->addItem(tr("Active Render Target"));
+ framebuffer_source_list->addItem(tr("Active Depth Buffer"));
framebuffer_source_list->addItem(tr("Custom"));
framebuffer_source_list->setCurrentIndex(static_cast<int>(framebuffer_source));
@@ -49,6 +50,9 @@ GraphicsFramebufferWidget::GraphicsFramebufferWidget(std::shared_ptr<Pica::Debug
framebuffer_format_control->addItem(tr("RGB5A1"));
framebuffer_format_control->addItem(tr("RGB565"));
framebuffer_format_control->addItem(tr("RGBA4"));
+ framebuffer_format_control->addItem(tr("D16"));
+ framebuffer_format_control->addItem(tr("D24"));
+ framebuffer_format_control->addItem(tr("D24S8"));
// TODO: This QLabel should shrink the image to the available space rather than just expanding...
framebuffer_picture_label = new QLabel;
@@ -172,8 +176,7 @@ void GraphicsFramebufferWidget::OnUpdate()
{
// TODO: Store a reference to the registers in the debug context instead of accessing them directly...
- auto framebuffer = Pica::registers.framebuffer;
- using Framebuffer = decltype(framebuffer);
+ const auto& framebuffer = Pica::registers.framebuffer;
framebuffer_address = framebuffer.GetColorBufferPhysicalAddress();
framebuffer_width = framebuffer.GetWidth();
@@ -184,6 +187,18 @@ void GraphicsFramebufferWidget::OnUpdate()
break;
}
+ case Source::DepthBuffer:
+ {
+ const auto& framebuffer = Pica::registers.framebuffer;
+
+ framebuffer_address = framebuffer.GetDepthBufferPhysicalAddress();
+ framebuffer_width = framebuffer.GetWidth();
+ framebuffer_height = framebuffer.GetHeight();
+ framebuffer_format = Format::D16;
+
+ break;
+ }
+
case Source::Custom:
{
// Keep user-specified values
@@ -197,15 +212,16 @@ void GraphicsFramebufferWidget::OnUpdate()
// TODO: Implement a good way to visualize alpha components!
// TODO: Unify this decoding code with the texture decoder
- u32 bytes_per_pixel = GPU::Regs::BytesPerPixel(GPU::Regs::PixelFormat(framebuffer_format));
+ u32 bytes_per_pixel = GraphicsFramebufferWidget::BytesPerPixel(framebuffer_format);
QImage decoded_image(framebuffer_width, framebuffer_height, QImage::Format_ARGB32);
- u8* color_buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
+ u8* buffer = Memory::GetPointer(Pica::PAddrToVAddr(framebuffer_address));
+
for (unsigned int y = 0; y < framebuffer_height; ++y) {
for (unsigned int x = 0; x < framebuffer_width; ++x) {
const u32 coarse_y = y & ~7;
u32 offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * framebuffer_width * bytes_per_pixel;
- const u8* pixel = color_buffer + offset;
+ const u8* pixel = buffer + offset;
Math::Vec4<u8> color = { 0, 0, 0, 0 };
switch (framebuffer_format) {
@@ -224,6 +240,29 @@ void GraphicsFramebufferWidget::OnUpdate()
case Format::RGBA4:
color = Color::DecodeRGBA4(pixel);
break;
+ case Format::D16:
+ {
+ u32 data = Color::DecodeD16(pixel);
+ color.r() = data & 0xFF;
+ color.g() = (data >> 8) & 0xFF;
+ break;
+ }
+ case Format::D24:
+ {
+ u32 data = Color::DecodeD24(pixel);
+ color.r() = data & 0xFF;
+ color.g() = (data >> 8) & 0xFF;
+ color.b() = (data >> 16) & 0xFF;
+ break;
+ }
+ case Format::D24S8:
+ {
+ Math::Vec2<u32> data = Color::DecodeD24S8(pixel);
+ color.r() = data.x & 0xFF;
+ color.g() = (data.x >> 8) & 0xFF;
+ color.b() = (data.x >> 16) & 0xFF;
+ break;
+ }
default:
qDebug() << "Unknown fb color format " << static_cast<int>(framebuffer_format);
break;
@@ -240,3 +279,19 @@ void GraphicsFramebufferWidget::OnUpdate()
framebuffer_format_control->setCurrentIndex(static_cast<int>(framebuffer_format));
framebuffer_picture_label->setPixmap(pixmap);
}
+
+u32 GraphicsFramebufferWidget::BytesPerPixel(GraphicsFramebufferWidget::Format format) {
+ switch (format) {
+ case Format::RGBA8:
+ case Format::D24S8:
+ return 4;
+ case Format::RGB8:
+ case Format::D24:
+ return 3;
+ case Format::RGB5A1:
+ case Format::RGB565:
+ case Format::RGBA4:
+ case Format::D16:
+ return 2;
+ }
+}
diff --git a/src/citra_qt/debugger/graphics_framebuffer.h b/src/citra_qt/debugger/graphics_framebuffer.h
index 15ebd1f7..4cb396ff 100644
--- a/src/citra_qt/debugger/graphics_framebuffer.h
+++ b/src/citra_qt/debugger/graphics_framebuffer.h
@@ -20,8 +20,9 @@ class GraphicsFramebufferWidget : public BreakPointObserverDock {
using Event = Pica::DebugContext::Event;
enum class Source {
- PicaTarget = 0,
- Custom = 1,
+ PicaTarget = 0,
+ DepthBuffer = 1,
+ Custom = 2,
// TODO: Add GPU framebuffer sources!
};
@@ -32,8 +33,13 @@ class GraphicsFramebufferWidget : public BreakPointObserverDock {
RGB5A1 = 2,
RGB565 = 3,
RGBA4 = 4,
+ D16 = 5,
+ D24 = 6,
+ D24S8 = 7
};
+ static u32 BytesPerPixel(Format format);
+
public:
GraphicsFramebufferWidget(std::shared_ptr<Pica::DebugContext> debug_context, QWidget* parent = nullptr);
diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp
index 06eaf0bf..3b072d01 100644
--- a/src/citra_qt/debugger/graphics_vertex_shader.cpp
+++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp
@@ -12,6 +12,7 @@
#include "graphics_vertex_shader.h"
+using nihstro::OpCode;
using nihstro::Instruction;
using nihstro::SourceRegister;
using nihstro::SwizzlePattern;
@@ -78,7 +79,7 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
const SwizzlePattern& swizzle = info.swizzle_info[instr.common.operand_desc_id].pattern;
// longest known instruction name: "setemit "
- output << std::setw(8) << std::left << instr.opcode.GetInfo().name;
+ output << std::setw(8) << std::left << instr.opcode.Value().GetInfo().name;
// e.g. "-c92.xyzw"
static auto print_input = [](std::stringstream& output, const SourceRegister& input,
@@ -109,16 +110,16 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
print_input_indexed(output, input, negate, swizzle_mask, address_register_name);
};
- switch (instr.opcode.GetInfo().type) {
- case Instruction::OpCodeType::Trivial:
+ switch (instr.opcode.Value().GetInfo().type) {
+ case OpCode::Type::Trivial:
// Nothing to do here
break;
- case Instruction::OpCodeType::Arithmetic:
+ case OpCode::Type::Arithmetic:
{
// Use custom code for special instructions
- switch (instr.opcode.EffectiveOpCode()) {
- case Instruction::OpCode::CMP:
+ switch (instr.opcode.Value().EffectiveOpCode()) {
+ case OpCode::Id::CMP:
{
// NOTE: CMP always writes both cc components, so we do not consider the dest mask here.
output << std::setw(4) << std::right << "cc.";
@@ -142,13 +143,13 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
default:
{
- bool src_is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed);
+ bool src_is_inverted = 0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed);
- if (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::Dest) {
+ if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::Dest) {
// e.g. "r12.xy__"
- output << std::setw(4) << std::right << instr.common.dest.GetName() + ".";
+ output << std::setw(4) << std::right << instr.common.dest.Value().GetName() + ".";
output << swizzle.DestMaskToString();
- } else if (instr.opcode.GetInfo().subtype == Instruction::OpCodeInfo::MOVA) {
+ } else if (instr.opcode.Value().GetInfo().subtype == OpCode::Info::MOVA) {
output << std::setw(4) << std::right << "a0.";
output << swizzle.DestMaskToString();
} else {
@@ -156,7 +157,7 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
}
output << " ";
- if (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::Src1) {
+ if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::Src1) {
SourceRegister src1 = instr.common.GetSrc1(src_is_inverted);
print_input_indexed(output, src1, swizzle.negate_src1, swizzle.SelectorToString(false), instr.common.AddressRegisterName());
} else {
@@ -164,7 +165,7 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
}
// TODO: In some cases, the Address Register is used as an index for SRC2 instead of SRC1
- if (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::Src2) {
+ if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::Src2) {
SourceRegister src2 = instr.common.GetSrc2(src_is_inverted);
print_input(output, src2, swizzle.negate_src2, swizzle.SelectorToString(false));
}
@@ -175,17 +176,17 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
break;
}
- case Instruction::OpCodeType::Conditional:
+ case OpCode::Type::Conditional:
{
- switch (instr.opcode.EffectiveOpCode()) {
- case Instruction::OpCode::LOOP:
+ switch (instr.opcode.Value().EffectiveOpCode()) {
+ case OpCode::Id::LOOP:
output << "(unknown instruction format)";
break;
default:
output << "if ";
- if (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::HasCondition) {
+ if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::HasCondition) {
const char* ops[] = {
" || ", " && ", "", ""
};
@@ -198,22 +199,22 @@ QVariant GraphicsVertexShaderModel::data(const QModelIndex& index, int role) con
output << ((!instr.flow_control.refy) ? "!" : " ") << "cc.y";
output << " ";
- } else if (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::HasUniformIndex) {
+ } else if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::HasUniformIndex) {
output << "b" << instr.flow_control.bool_uniform_id << " ";
}
u32 target_addr = instr.flow_control.dest_offset;
u32 target_addr_else = instr.flow_control.dest_offset;
- if (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::HasAlternative) {
+ if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::HasAlternative) {
output << "else jump to 0x" << std::setw(4) << std::right << std::setfill('0') << 4 * instr.flow_control.dest_offset << " ";
- } else if (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::HasExplicitDest) {
+ } else if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::HasExplicitDest) {
output << "jump to 0x" << std::setw(4) << std::right << std::setfill('0') << 4 * instr.flow_control.dest_offset << " ";
} else {
// TODO: Handle other cases
}
- if (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::HasFinishPoint) {
+ if (instr.opcode.Value().GetInfo().subtype & OpCode::Info::HasFinishPoint) {
output << "(return on " << std::setw(4) << std::right << std::setfill('0')
<< 4 * instr.flow_control.dest_offset + 4 * instr.flow_control.num_instructions << ")";
}
diff --git a/src/citra_qt/debugger/profiler.cpp b/src/citra_qt/debugger/profiler.cpp
new file mode 100644
index 00000000..ae0568b6
--- /dev/null
+++ b/src/citra_qt/debugger/profiler.cpp
@@ -0,0 +1,138 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "profiler.h"
+
+#include "common/profiler_reporting.h"
+
+using namespace Common::Profiling;
+
+static QVariant GetDataForColumn(int col, const AggregatedDuration& duration)
+{
+ static auto duration_to_float = [](Duration dur) -> float {
+ using FloatMs = std::chrono::duration<float, std::chrono::milliseconds::period>;
+ return std::chrono::duration_cast<FloatMs>(dur).count();
+ };
+
+ switch (col) {
+ case 1: return duration_to_float(duration.avg);
+ case 2: return duration_to_float(duration.min);
+ case 3: return duration_to_float(duration.max);
+ default: return QVariant();
+ }
+}
+
+static const TimingCategoryInfo* GetCategoryInfo(int id)
+{
+ const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
+ if (id >= categories.size()) {
+ return nullptr;
+ } else {
+ return &categories[id];
+ }
+}
+
+ProfilerModel::ProfilerModel(QObject* parent) : QAbstractItemModel(parent)
+{
+ updateProfilingInfo();
+ const auto& categories = GetProfilingManager().GetTimingCategoriesInfo();
+ results.time_per_category.resize(categories.size());
+}
+
+QVariant ProfilerModel::headerData(int section, Qt::Orientation orientation, int role) const
+{
+ if (orientation == Qt::Horizontal && role == Qt::DisplayRole) {
+ switch (section) {
+ case 0: return tr("Category");
+ case 1: return tr("Avg");
+ case 2: return tr("Min");
+ case 3: return tr("Max");
+ }
+ }
+
+ return QVariant();
+}
+
+QModelIndex ProfilerModel::index(int row, int column, const QModelIndex& parent) const
+{
+ return createIndex(row, column);
+}
+
+QModelIndex ProfilerModel::parent(const QModelIndex& child) const
+{
+ return QModelIndex();
+}
+
+int ProfilerModel::columnCount(const QModelIndex& parent) const
+{
+ return 4;
+}
+
+int ProfilerModel::rowCount(const QModelIndex& parent) const
+{
+ if (parent.isValid()) {
+ return 0;
+ } else {
+ return results.time_per_category.size() + 2;
+ }
+}
+
+QVariant ProfilerModel::data(const QModelIndex& index, int role) const
+{
+ if (role == Qt::DisplayRole) {
+ if (index.row() == 0) {
+ if (index.column() == 0) {
+ return tr("Frame");
+ } else {
+ return GetDataForColumn(index.column(), results.frame_time);
+ }
+ } else if (index.row() == 1) {
+ if (index.column() == 0) {
+ return tr("Frame (with swapping)");
+ } else {
+ return GetDataForColumn(index.column(), results.interframe_time);
+ }
+ } else {
+ if (index.column() == 0) {
+ const TimingCategoryInfo* info = GetCategoryInfo(index.row() - 2);
+ return info != nullptr ? QString(info->name) : QVariant();
+ } else {
+ if (index.row() - 2 < results.time_per_category.size()) {
+ return GetDataForColumn(index.column(), results.time_per_category[index.row() - 2]);
+ } else {
+ return QVariant();
+ }
+ }
+ }
+ }
+
+ return QVariant();
+}
+
+void ProfilerModel::updateProfilingInfo()
+{
+ results = GetTimingResultsAggregator()->GetAggregatedResults();
+ emit dataChanged(createIndex(0, 1), createIndex(rowCount() - 1, 3));
+}
+
+ProfilerWidget::ProfilerWidget(QWidget* parent) : QDockWidget(parent)
+{
+ ui.setupUi(this);
+
+ model = new ProfilerModel(this);
+ ui.treeView->setModel(model);
+
+ connect(this, SIGNAL(visibilityChanged(bool)), SLOT(setProfilingInfoUpdateEnabled(bool)));
+ connect(&update_timer, SIGNAL(timeout()), model, SLOT(updateProfilingInfo()));
+}
+
+void ProfilerWidget::setProfilingInfoUpdateEnabled(bool enable)
+{
+ if (enable) {
+ update_timer.start(100);
+ model->updateProfilingInfo();
+ } else {
+ update_timer.stop();
+ }
+}
diff --git a/src/citra_qt/debugger/profiler.h b/src/citra_qt/debugger/profiler.h
new file mode 100644
index 00000000..a6d87aa0
--- /dev/null
+++ b/src/citra_qt/debugger/profiler.h
@@ -0,0 +1,50 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <QAbstractItemModel>
+#include <QDockWidget>
+#include <QTimer>
+#include "ui_profiler.h"
+
+#include "common/profiler_reporting.h"
+
+class ProfilerModel : public QAbstractItemModel
+{
+ Q_OBJECT
+
+public:
+ ProfilerModel(QObject* parent);
+
+ QVariant headerData(int section, Qt::Orientation orientation, int role = Qt::DisplayRole) const;
+ QModelIndex index(int row, int column, const QModelIndex& parent = QModelIndex()) const override;
+ QModelIndex parent(const QModelIndex& child) const override;
+ int columnCount(const QModelIndex& parent = QModelIndex()) const override;
+ int rowCount(const QModelIndex& parent = QModelIndex()) const override;
+ QVariant data(const QModelIndex& index, int role = Qt::DisplayRole) const override;
+
+public slots:
+ void updateProfilingInfo();
+
+private:
+ Common::Profiling::AggregatedFrameResult results;
+};
+
+class ProfilerWidget : public QDockWidget
+{
+ Q_OBJECT
+
+public:
+ ProfilerWidget(QWidget* parent = 0);
+
+private slots:
+ void setProfilingInfoUpdateEnabled(bool enable);
+
+private:
+ Ui::Profiler ui;
+ ProfilerModel* model;
+
+ QTimer update_timer;
+};
diff --git a/src/citra_qt/debugger/profiler.ui b/src/citra_qt/debugger/profiler.ui
new file mode 100644
index 00000000..d3c9a9a1
--- /dev/null
+++ b/src/citra_qt/debugger/profiler.ui
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<ui version="4.0">
+ <class>Profiler</class>
+ <widget class="QDockWidget" name="Profiler">
+ <property name="geometry">
+ <rect>
+ <x>0</x>
+ <y>0</y>
+ <width>400</width>
+ <height>300</height>
+ </rect>
+ </property>
+ <property name="windowTitle">
+ <string>Profiler</string>
+ </property>
+ <widget class="QWidget" name="dockWidgetContents">
+ <layout class="QVBoxLayout" name="verticalLayout">
+ <item>
+ <widget class="QTreeView" name="treeView">
+ <property name="alternatingRowColors">
+ <bool>true</bool>
+ </property>
+ <property name="uniformRowHeights">
+ <bool>true</bool>
+ </property>
+ </widget>
+ </item>
+ </layout>
+ </widget>
+ </widget>
+ <resources/>
+ <connections/>
+</ui>
diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp
index 881c7d33..e5ca0412 100644
--- a/src/citra_qt/main.cpp
+++ b/src/citra_qt/main.cpp
@@ -35,6 +35,7 @@
#include "debugger/graphics_cmdlists.h"
#include "debugger/graphics_framebuffer.h"
#include "debugger/graphics_vertex_shader.h"
+#include "debugger/profiler.h"
#include "core/settings.h"
#include "core/system.h"
@@ -57,6 +58,10 @@ GMainWindow::GMainWindow()
render_window = new GRenderWindow;
render_window->hide();
+ profilerWidget = new ProfilerWidget(this);
+ addDockWidget(Qt::BottomDockWidgetArea, profilerWidget);
+ profilerWidget->hide();
+
disasmWidget = new DisassemblerWidget(this, render_window->GetEmuThread());
addDockWidget(Qt::BottomDockWidgetArea, disasmWidget);
disasmWidget->hide();
@@ -90,6 +95,7 @@ GMainWindow::GMainWindow()
graphicsVertexShaderWidget->hide();
QMenu* debug_menu = ui.menu_View->addMenu(tr("Debugging"));
+ debug_menu->addAction(profilerWidget->toggleViewAction());
debug_menu->addAction(disasmWidget->toggleViewAction());
debug_menu->addAction(registersWidget->toggleViewAction());
debug_menu->addAction(callstackWidget->toggleViewAction());
@@ -310,7 +316,8 @@ int __cdecl main(int argc, char* argv[])
{
std::shared_ptr<Log::Logger> logger = Log::InitGlobalLogger();
Log::Filter log_filter(Log::Level::Info);
- std::thread logging_thread(Log::TextLoggingLoop, logger, &log_filter);
+ Log::SetFilter(&log_filter);
+ std::thread logging_thread(Log::TextLoggingLoop, logger);
SCOPE_EXIT({
logger->Close();
logging_thread.join();
diff --git a/src/citra_qt/main.h b/src/citra_qt/main.h
index dd53489d..9b57c577 100644
--- a/src/citra_qt/main.h
+++ b/src/citra_qt/main.h
@@ -11,6 +11,7 @@
class GImageInfo;
class GRenderWindow;
+class ProfilerWidget;
class DisassemblerWidget;
class RegistersWidget;
class CallstackWidget;
@@ -54,6 +55,7 @@ private:
GRenderWindow* render_window;
+ ProfilerWidget* profilerWidget;
DisassemblerWidget* disasmWidget;
RegistersWidget* registersWidget;
CallstackWidget* callstackWidget;
diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt
index b05c3554..daa2d59d 100644
--- a/src/common/CMakeLists.txt
+++ b/src/common/CMakeLists.txt
@@ -14,6 +14,7 @@ set(SRCS
mem_arena.cpp
memory_util.cpp
misc.cpp
+ profiler.cpp
scm_rev.cpp
string_util.cpp
symbols.cpp
@@ -48,11 +49,14 @@ set(HEADERS
mem_arena.h
memory_util.h
platform.h
+ profiler.h
+ profiler_reporting.h
scm_rev.h
scope_exit.h
string_util.h
swap.h
symbols.h
+ synchronized_wrapper.h
thread.h
thread_queue_list.h
thunk.h
diff --git a/src/common/assert.h b/src/common/assert.h
index 3b2232a7..9ca7adb1 100644
--- a/src/common/assert.h
+++ b/src/common/assert.h
@@ -4,24 +4,43 @@
#pragma once
+#include <cstdlib>
+
#include "common/common_funcs.h"
+// For asserts we'd like to keep all the junk executed when an assert happens away from the
+// important code in the function. One way of doing this is to put all the relevant code inside a
+// lambda and force the compiler to not inline it. Unfortunately, MSVC seems to have no syntax to
+// specify __declspec on lambda functions, so what we do instead is define a noinline wrapper
+// template that calls the lambda. This seems to generate an extra instruction at the call-site
+// compared to the ideal implementation (which wouldn't support ASSERT_MSG parameters), but is good
+// enough for our purposes.
+template <typename Fn>
+#if defined(_MSC_VER)
+ __declspec(noinline, noreturn)
+#elif defined(__GNUC__)
+ __attribute__((noinline, noreturn, cold))
+#endif
+static void assert_noinline_call(const Fn& fn) {
+ fn();
+ Crash();
+ exit(1); // Keeps GCC's mouth shut about this actually returning
+}
+
// TODO (yuriks) allow synchronous logging so we don't need printf
#define ASSERT(_a_) \
- do if (!(_a_)) {\
+ do if (!(_a_)) { assert_noinline_call([] { \
fprintf(stderr, "Assertion Failed!\n\n Line: %d\n File: %s\n Time: %s\n", \
__LINE__, __FILE__, __TIME__); \
- Crash(); \
- } while (0)
+ }); } while (0)
#define ASSERT_MSG(_a_, ...) \
- do if (!(_a_)) {\
+ do if (!(_a_)) { assert_noinline_call([&] { \
fprintf(stderr, "Assertion Failed!\n\n Line: %d\n File: %s\n Time: %s\n", \
__LINE__, __FILE__, __TIME__); \
fprintf(stderr, __VA_ARGS__); \
fprintf(stderr, "\n"); \
- Crash(); \
- } while (0)
+ }); } while (0)
#define UNREACHABLE() ASSERT_MSG(false, "Unreachable code!")
diff --git a/src/common/common.h b/src/common/common.h
index 948dc536..f7d0f55c 100644
--- a/src/common/common.h
+++ b/src/common/common.h
@@ -117,40 +117,4 @@ enum EMUSTATE_CHANGE
EMUSTATE_CHANGE_STOP
};
-
-#ifdef _MSC_VER
-inline unsigned long long bswap64(unsigned long long x) { return _byteswap_uint64(x); }
-inline unsigned int bswap32(unsigned int x) { return _byteswap_ulong(x); }
-inline unsigned short bswap16(unsigned short x) { return _byteswap_ushort(x); }
-#else
-// TODO: speedup
-inline unsigned short bswap16(unsigned short x) { return (x << 8) | (x >> 8); }
-inline unsigned int bswap32(unsigned int x) { return (x >> 24) | ((x & 0xFF0000) >> 8) | ((x & 0xFF00) << 8) | (x << 24);}
-inline unsigned long long bswap64(unsigned long long x) {return ((unsigned long long)bswap32(x) << 32) | bswap32(x >> 32); }
-#endif
-
-inline float bswapf(float f) {
- union {
- float f;
- unsigned int u32;
- } dat1, dat2;
-
- dat1.f = f;
- dat2.u32 = bswap32(dat1.u32);
-
- return dat2.f;
-}
-
-inline double bswapd(double f) {
- union {
- double f;
- unsigned long long u64;
- } dat1, dat2;
-
- dat1.f = f;
- dat2.u64 = bswap64(dat1.u64);
-
- return dat2.f;
-}
-
#include "swap.h"
diff --git a/src/common/common_funcs.h b/src/common/common_funcs.h
index d56156e4..e76cb7d6 100644
--- a/src/common/common_funcs.h
+++ b/src/common/common_funcs.h
@@ -37,11 +37,6 @@
#ifndef _MSC_VER
#include <errno.h>
-#ifdef __linux__
-#include <byteswap.h>
-#elif defined __FreeBSD__
-#include <sys/endian.h>
-#endif
#if defined(__x86_64__) || defined(_M_X64)
#define Crash() __asm__ __volatile__("int $3")
@@ -145,75 +140,3 @@ inline u64 _rotr64(u64 x, unsigned int shift){
// This function might change the error code.
// Defined in Misc.cpp.
const char* GetLastErrorMsg();
-
-namespace Common
-{
-inline u8 swap8(u8 _data) {return _data;}
-inline u32 swap24(const u8* _data) {return (_data[0] << 16) | (_data[1] << 8) | _data[2];}
-
-#ifdef ANDROID
-#undef swap16
-#undef swap32
-#undef swap64
-#endif
-
-#ifdef _MSC_VER
-inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);}
-inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);}
-inline u64 swap64(u64 _data) {return _byteswap_uint64(_data);}
-#elif _M_ARM
-inline u16 swap16 (u16 _data) { u32 data = _data; __asm__ ("rev16 %0, %1\n" : "=l" (data) : "l" (data)); return (u16)data;}
-inline u32 swap32 (u32 _data) {__asm__ ("rev %0, %1\n" : "=l" (_data) : "l" (_data)); return _data;}
-inline u64 swap64(u64 _data) {return ((u64)swap32(_data) << 32) | swap32(_data >> 32);}
-#elif __linux__
-inline u16 swap16(u16 _data) {return bswap_16(_data);}
-inline u32 swap32(u32 _data) {return bswap_32(_data);}
-inline u64 swap64(u64 _data) {return bswap_64(_data);}
-#elif __APPLE__
-inline __attribute__((always_inline)) u16 swap16(u16 _data)
- {return (_data >> 8) | (_data << 8);}
-inline __attribute__((always_inline)) u32 swap32(u32 _data)
- {return __builtin_bswap32(_data);}
-inline __attribute__((always_inline)) u64 swap64(u64 _data)
- {return __builtin_bswap64(_data);}
-#elif __FreeBSD__
-inline u16 swap16(u16 _data) {return bswap16(_data);}
-inline u32 swap32(u32 _data) {return bswap32(_data);}
-inline u64 swap64(u64 _data) {return bswap64(_data);}
-#else
-// Slow generic implementation.
-inline u16 swap16(u16 data) {return (data >> 8) | (data << 8);}
-inline u32 swap32(u32 data) {return (swap16(data) << 16) | swap16(data >> 16);}
-inline u64 swap64(u64 data) {return ((u64)swap32(data) << 32) | swap32(data >> 32);}
-#endif
-
-inline u16 swap16(const u8* _pData) {return swap16(*(const u16*)_pData);}
-inline u32 swap32(const u8* _pData) {return swap32(*(const u32*)_pData);}
-inline u64 swap64(const u8* _pData) {return swap64(*(const u64*)_pData);}
-
-template <int count>
-void swap(u8*);
-
-template <>
-inline void swap<1>(u8* data)
-{}
-
-template <>
-inline void swap<2>(u8* data)
-{
- *reinterpret_cast<u16*>(data) = swap16(data);
-}
-
-template <>
-inline void swap<4>(u8* data)
-{
- *reinterpret_cast<u32*>(data) = swap32(data);
-}
-
-template <>
-inline void swap<8>(u8* data)
-{
- *reinterpret_cast<u64*>(data) = swap64(data);
-}
-
-} // Namespace Common
diff --git a/src/common/emu_window.cpp b/src/common/emu_window.cpp
index 48bb35db..6459d2f3 100644
--- a/src/common/emu_window.cpp
+++ b/src/common/emu_window.cpp
@@ -3,6 +3,7 @@
// Refer to the license.txt file included.
#include "emu_window.h"
+#include "video_core/video_core.h"
void EmuWindow::KeyPressed(KeyMap::HostDeviceKey key) {
Service::HID::PadState mapped_key = KeyMap::GetPadKey(key);
@@ -15,3 +16,52 @@ void EmuWindow::KeyReleased(KeyMap::HostDeviceKey key) {
Service::HID::PadButtonRelease(mapped_key);
}
+
+EmuWindow::FramebufferLayout EmuWindow::FramebufferLayout::DefaultScreenLayout(unsigned width, unsigned height) {
+ ASSERT(width > 0);
+ ASSERT(height > 0);
+
+ EmuWindow::FramebufferLayout res = { width, height, {}, {} };
+
+ float window_aspect_ratio = static_cast<float>(height) / width;
+ float emulation_aspect_ratio = static_cast<float>(VideoCore::kScreenTopHeight * 2) /
+ VideoCore::kScreenTopWidth;
+
+ if (window_aspect_ratio > emulation_aspect_ratio) {
+ // Window is narrower than the emulation content => apply borders to the top and bottom
+ int viewport_height = static_cast<int>(std::round(emulation_aspect_ratio * width));
+
+ res.top_screen.left = 0;
+ res.top_screen.right = res.top_screen.left + width;
+ res.top_screen.top = (height - viewport_height) / 2;
+ res.top_screen.bottom = res.top_screen.top + viewport_height / 2;
+
+ int bottom_width = static_cast<int>((static_cast<float>(VideoCore::kScreenBottomWidth) /
+ VideoCore::kScreenTopWidth) * (res.top_screen.right - res.top_screen.left));
+ int bottom_border = ((res.top_screen.right - res.top_screen.left) - bottom_width) / 2;
+
+ res.bottom_screen.left = bottom_border;
+ res.bottom_screen.right = res.bottom_screen.left + bottom_width;
+ res.bottom_screen.top = res.top_screen.bottom;
+ res.bottom_screen.bottom = res.bottom_screen.top + viewport_height / 2;
+ } else {
+ // Otherwise, apply borders to the left and right sides of the window.
+ int viewport_width = static_cast<int>(std::round(height / emulation_aspect_ratio));
+
+ res.top_screen.left = (width - viewport_width) / 2;
+ res.top_screen.right = res.top_screen.left + viewport_width;
+ res.top_screen.top = 0;
+ res.top_screen.bottom = res.top_screen.top + height / 2;
+
+ int bottom_width = static_cast<int>((static_cast<float>(VideoCore::kScreenBottomWidth) /
+ VideoCore::kScreenTopWidth) * (res.top_screen.right - res.top_screen.left));
+ int bottom_border = ((res.top_screen.right - res.top_screen.left) - bottom_width) / 2;
+
+ res.bottom_screen.left = res.top_screen.left + bottom_border;
+ res.bottom_screen.right = res.bottom_screen.left + bottom_width;
+ res.bottom_screen.top = res.top_screen.bottom;
+ res.bottom_screen.bottom = res.bottom_screen.top + height / 2;
+ }
+
+ return res;
+}
diff --git a/src/common/emu_window.h b/src/common/emu_window.h
index 1ad4b82a..f6099fdb 100644
--- a/src/common/emu_window.h
+++ b/src/common/emu_window.h
@@ -8,6 +8,7 @@
#include "common/scm_rev.h"
#include "common/string_util.h"
#include "common/key_map.h"
+#include "common/math_util.h"
/**
* Abstraction class used to provide an interface between emulation code and the frontend
@@ -38,6 +39,23 @@ public:
std::pair<unsigned,unsigned> min_client_area_size;
};
+ /// Describes the layout of the window framebuffer (size and top/bottom screen positions)
+ struct FramebufferLayout {
+
+ /**
+ * Factory method for constructing a default FramebufferLayout
+ * @param width Window framebuffer width in pixels
+ * @param height Window framebuffer height in pixels
+ * @return Newly created FramebufferLayout object with default screen regions initialized
+ */
+ static FramebufferLayout DefaultScreenLayout(unsigned width, unsigned height);
+
+ unsigned width;
+ unsigned height;
+ MathUtil::Rectangle<unsigned> top_screen;
+ MathUtil::Rectangle<unsigned> bottom_screen;
+ };
+
/// Swap buffers to display the next frame
virtual void SwapBuffers() = 0;
@@ -75,11 +93,11 @@ public:
}
/**
- * Gets the framebuffer size in pixels.
+ * Gets the framebuffer layout (width, height, and screen regions)
* @note This method is thread-safe
*/
- const std::pair<unsigned,unsigned> GetFramebufferSize() const {
- return framebuffer_size;
+ const FramebufferLayout& GetFramebufferLayout() const {
+ return framebuffer_layout;
}
/**
@@ -118,11 +136,11 @@ protected:
}
/**
- * Update internal framebuffer size with the given parameter.
+ * Update framebuffer layout with the given parameter.
* @note EmuWindow implementations will usually use this in window resize event handlers.
*/
- void NotifyFramebufferSizeChanged(const std::pair<unsigned,unsigned>& size) {
- framebuffer_size = size;
+ void NotifyFramebufferLayoutChanged(const FramebufferLayout& layout) {
+ framebuffer_layout = layout;
}
/**
@@ -143,7 +161,7 @@ private:
// By default, ignore this request and do nothing.
}
- std::pair<unsigned,unsigned> framebuffer_size;
+ FramebufferLayout framebuffer_layout; ///< Current framebuffer layout
unsigned client_area_width; ///< Current client width, should be set by window impl.
unsigned client_area_height; ///< Current client height, should be set by window impl.
diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp
index da287f69..649640e7 100644
--- a/src/common/logging/backend.cpp
+++ b/src/common/logging/backend.cpp
@@ -136,9 +136,18 @@ Entry CreateEntry(Class log_class, Level log_level,
return std::move(entry);
}
+static Filter* filter;
+
+void SetFilter(Filter* new_filter) {
+ filter = new_filter;
+}
+
void LogMessage(Class log_class, Level log_level,
const char* filename, unsigned int line_nr, const char* function,
const char* format, ...) {
+ if (!filter->CheckMessage(log_class, log_level))
+ return;
+
va_list args;
va_start(args, format);
Entry entry = CreateEntry(log_class, log_level,
diff --git a/src/common/logging/backend.h b/src/common/logging/backend.h
index 1c44c929..3114f864 100644
--- a/src/common/logging/backend.h
+++ b/src/common/logging/backend.h
@@ -10,6 +10,7 @@
#include "common/concurrent_ring_buffer.h"
+#include "common/logging/filter.h"
#include "common/logging/log.h"
namespace Log {
@@ -131,4 +132,6 @@ Entry CreateEntry(Class log_class, Level log_level,
/// Initializes the default Logger.
std::shared_ptr<Logger> InitGlobalLogger();
+void SetFilter(Filter* filter);
+
}
diff --git a/src/common/logging/filter.h b/src/common/logging/filter.h
index c3da9989..b53e4e63 100644
--- a/src/common/logging/filter.h
+++ b/src/common/logging/filter.h
@@ -2,6 +2,8 @@
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
+#pragma once
+
#include <array>
#include <string>
diff --git a/src/common/logging/text_formatter.cpp b/src/common/logging/text_formatter.cpp
index ef5739d8..36c91c4f 100644
--- a/src/common/logging/text_formatter.cpp
+++ b/src/common/logging/text_formatter.cpp
@@ -11,7 +11,6 @@
#endif
#include "common/logging/backend.h"
-#include "common/logging/filter.h"
#include "common/logging/log.h"
#include "common/logging/text_formatter.h"
@@ -116,7 +115,7 @@ void PrintColoredMessage(const Entry& entry) {
#endif
}
-void TextLoggingLoop(std::shared_ptr<Logger> logger, const Filter* filter) {
+void TextLoggingLoop(std::shared_ptr<Logger> logger) {
std::array<Entry, 256> entry_buffer;
while (true) {
@@ -126,9 +125,7 @@ void TextLoggingLoop(std::shared_ptr<Logger> logger, const Filter* filter) {
}
for (size_t i = 0; i < num_entries; ++i) {
const Entry& entry = entry_buffer[i];
- if (filter->CheckMessage(entry.log_class, entry.log_level)) {
- PrintColoredMessage(entry);
- }
+ PrintColoredMessage(entry);
}
}
}
diff --git a/src/common/logging/text_formatter.h b/src/common/logging/text_formatter.h
index 2f05794f..8474a190 100644
--- a/src/common/logging/text_formatter.h
+++ b/src/common/logging/text_formatter.h
@@ -11,7 +11,6 @@ namespace Log {
class Logger;
struct Entry;
-class Filter;
/**
* Attempts to trim an arbitrary prefix from `path`, leaving only the part starting at `root`. It's
@@ -36,6 +35,6 @@ void PrintColoredMessage(const Entry& entry);
* Logging loop that repeatedly reads messages from the provided logger and prints them to the
* console. It is the baseline barebones log outputter.
*/
-void TextLoggingLoop(std::shared_ptr<Logger> logger, const Filter* filter);
+void TextLoggingLoop(std::shared_ptr<Logger> logger);
}
diff --git a/src/common/profiler.cpp b/src/common/profiler.cpp
new file mode 100644
index 00000000..65c3df16
--- /dev/null
+++ b/src/common/profiler.cpp
@@ -0,0 +1,182 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/profiler.h"
+#include "common/profiler_reporting.h"
+#include "common/assert.h"
+
+#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013.
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN
+#include <Windows.h> // For QueryPerformanceCounter/Frequency
+#endif
+
+namespace Common {
+namespace Profiling {
+
+#if ENABLE_PROFILING
+thread_local Timer* Timer::current_timer = nullptr;
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
+QPCClock::time_point QPCClock::now() {
+ static LARGE_INTEGER freq;
+ // Use this dummy local static to ensure this gets initialized once.
+ static BOOL dummy = QueryPerformanceFrequency(&freq);
+
+ LARGE_INTEGER ticks;
+ QueryPerformanceCounter(&ticks);
+
+ // This is prone to overflow when multiplying, which is why I'm using micro instead of nano. The
+ // correct way to approach this would be to just return ticks as a time_point and then subtract
+ // and do this conversion when creating a duration from two time_points, however, as far as I
+ // could tell the C++ requirements for these types are incompatible with this approach.
+ return time_point(duration(ticks.QuadPart * std::micro::den / freq.QuadPart));
+}
+#endif
+
+TimingCategory::TimingCategory(const char* name, TimingCategory* parent)
+ : accumulated_duration(0) {
+
+ ProfilingManager& manager = GetProfilingManager();
+ category_id = manager.RegisterTimingCategory(this, name);
+ if (parent != nullptr)
+ manager.SetTimingCategoryParent(category_id, parent->category_id);
+}
+
+ProfilingManager::ProfilingManager()
+ : last_frame_end(Clock::now()), this_frame_start(Clock::now()) {
+}
+
+unsigned int ProfilingManager::RegisterTimingCategory(TimingCategory* category, const char* name) {
+ TimingCategoryInfo info;
+ info.category = category;
+ info.name = name;
+ info.parent = TimingCategoryInfo::NO_PARENT;
+
+ unsigned int id = (unsigned int)timing_categories.size();
+ timing_categories.push_back(std::move(info));
+
+ return id;
+}
+
+void ProfilingManager::SetTimingCategoryParent(unsigned int category, unsigned int parent) {
+ ASSERT(category < timing_categories.size());
+ ASSERT(parent < timing_categories.size());
+
+ timing_categories[category].parent = parent;
+}
+
+void ProfilingManager::BeginFrame() {
+ this_frame_start = Clock::now();
+}
+
+void ProfilingManager::FinishFrame() {
+ Clock::time_point now = Clock::now();
+
+ results.interframe_time = now - last_frame_end;
+ results.frame_time = now - this_frame_start;
+
+ results.time_per_category.resize(timing_categories.size());
+ for (size_t i = 0; i < timing_categories.size(); ++i) {
+ results.time_per_category[i] = timing_categories[i].category->GetAccumulatedTime();
+ }
+
+ last_frame_end = now;
+}
+
+TimingResultsAggregator::TimingResultsAggregator(size_t window_size)
+ : max_window_size(window_size), window_size(0) {
+ interframe_times.resize(window_size, Duration::zero());
+ frame_times.resize(window_size, Duration::zero());
+}
+
+void TimingResultsAggregator::Clear() {
+ window_size = cursor = 0;
+}
+
+void TimingResultsAggregator::SetNumberOfCategories(size_t n) {
+ size_t old_size = times_per_category.size();
+ if (n == old_size)
+ return;
+
+ times_per_category.resize(n);
+
+ for (size_t i = old_size; i < n; ++i) {
+ times_per_category[i].resize(max_window_size, Duration::zero());
+ }
+}
+
+void TimingResultsAggregator::AddFrame(const ProfilingFrameResult& frame_result) {
+ SetNumberOfCategories(frame_result.time_per_category.size());
+
+ interframe_times[cursor] = frame_result.interframe_time;
+ frame_times[cursor] = frame_result.frame_time;
+ for (size_t i = 0; i < frame_result.time_per_category.size(); ++i) {
+ times_per_category[i][cursor] = frame_result.time_per_category[i];
+ }
+
+ ++cursor;
+ if (cursor == max_window_size)
+ cursor = 0;
+ if (window_size < max_window_size)
+ ++window_size;
+}
+
+static AggregatedDuration AggregateField(const std::vector<Duration>& v, size_t len) {
+ AggregatedDuration result;
+ result.avg = Duration::zero();
+
+ result.min = result.max = (len == 0 ? Duration::zero() : v[0]);
+
+ for (size_t i = 1; i < len; ++i) {
+ Duration value = v[i];
+ result.avg += value;
+ result.min = std::min(result.min, value);
+ result.max = std::max(result.max, value);
+ }
+ if (len != 0)
+ result.avg /= len;
+
+ return result;
+}
+
+static float tof(Common::Profiling::Duration dur) {
+ using FloatMs = std::chrono::duration<float, std::chrono::milliseconds::period>;
+ return std::chrono::duration_cast<FloatMs>(dur).count();
+}
+
+AggregatedFrameResult TimingResultsAggregator::GetAggregatedResults() const {
+ AggregatedFrameResult result;
+
+ result.interframe_time = AggregateField(interframe_times, window_size);
+ result.frame_time = AggregateField(frame_times, window_size);
+
+ if (result.interframe_time.avg != Duration::zero()) {
+ result.fps = 1000.0f / tof(result.interframe_time.avg);
+ } else {
+ result.fps = 0.0f;
+ }
+
+ result.time_per_category.resize(times_per_category.size());
+ for (size_t i = 0; i < times_per_category.size(); ++i) {
+ result.time_per_category[i] = AggregateField(times_per_category[i], window_size);
+ }
+
+ return result;
+}
+
+ProfilingManager& GetProfilingManager() {
+ // Takes advantage of "magic" static initialization for race-free initialization.
+ static ProfilingManager manager;
+ return manager;
+}
+
+SynchronizedRef<TimingResultsAggregator> GetTimingResultsAggregator() {
+ static SynchronizedWrapper<TimingResultsAggregator> aggregator(30);
+ return SynchronizedRef<TimingResultsAggregator>(aggregator);
+}
+
+} // namespace Profiling
+} // namespace Common
diff --git a/src/common/profiler.h b/src/common/profiler.h
new file mode 100644
index 00000000..3e967b4b
--- /dev/null
+++ b/src/common/profiler.h
@@ -0,0 +1,152 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <atomic>
+#include <chrono>
+
+#include "common/assert.h"
+#include "common/thread.h"
+
+namespace Common {
+namespace Profiling {
+
+// If this is defined to 0, it turns all Timers into no-ops.
+#ifndef ENABLE_PROFILING
+#define ENABLE_PROFILING 1
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER <= 1800 // MSVC 2013
+// MSVC up to 2013 doesn't use QueryPerformanceCounter for high_resolution_clock, so it has bad
+// precision. We manually implement a clock based on QPC to get good results.
+
+struct QPCClock {
+ using duration = std::chrono::microseconds;
+ using time_point = std::chrono::time_point<QPCClock>;
+ using rep = duration::rep;
+ using period = duration::period;
+ static const bool is_steady = false;
+
+ static time_point now();
+};
+
+using Clock = QPCClock;
+#else
+using Clock = std::chrono::high_resolution_clock;
+#endif
+
+using Duration = Clock::duration;
+
+/**
+ * Represents a timing category that measured time can be accounted towards. Should be declared as a
+ * global variable and passed to Timers.
+ */
+class TimingCategory final {
+public:
+ TimingCategory(const char* name, TimingCategory* parent = nullptr);
+
+ unsigned int GetCategoryId() const {
+ return category_id;
+ }
+
+ /// Adds some time to this category. Can safely be called from multiple threads at the same time.
+ void AddTime(Duration amount) {
+ std::atomic_fetch_add_explicit(
+ &accumulated_duration, amount.count(),
+ std::memory_order_relaxed);
+ }
+
+ /**
+ * Atomically retrieves the accumulated measured time for this category and resets the counter
+ * to zero. Can be safely called concurrently with AddTime.
+ */
+ Duration GetAccumulatedTime() {
+ return Duration(std::atomic_exchange_explicit(
+ &accumulated_duration, (Duration::rep)0,
+ std::memory_order_relaxed));
+ }
+
+private:
+ unsigned int category_id;
+ std::atomic<Duration::rep> accumulated_duration;
+};
+
+/**
+ * Measures time elapsed between a call to Start and a call to Stop and attributes it to the given
+ * TimingCategory. Start/Stop can be called multiple times on the same timer, but each call must be
+ * appropriately paired.
+ *
+ * When a Timer is started, it automatically pauses a previously running timer on the same thread,
+ * which is resumed when it is stopped. As such, no special action needs to be taken to avoid
+ * double-accounting of time on two categories.
+ */
+class Timer {
+public:
+ Timer(TimingCategory& category) : category(category) {
+ }
+
+ void Start() {
+#if ENABLE_PROFILING
+ ASSERT(!running);
+ previous_timer = current_timer;
+ current_timer = this;
+ if (previous_timer != nullptr)
+ previous_timer->StopTiming();
+
+ StartTiming();
+#endif
+ }
+
+ void Stop() {
+#if ENABLE_PROFILING
+ ASSERT(running);
+ StopTiming();
+
+ if (previous_timer != nullptr)
+ previous_timer->StartTiming();
+ current_timer = previous_timer;
+#endif
+ }
+
+private:
+#if ENABLE_PROFILING
+ void StartTiming() {
+ start = Clock::now();
+ running = true;
+ }
+
+ void StopTiming() {
+ auto duration = Clock::now() - start;
+ running = false;
+ category.AddTime(std::chrono::duration_cast<Duration>(duration));
+ }
+
+ Clock::time_point start;
+ bool running = false;
+
+ Timer* previous_timer;
+ static thread_local Timer* current_timer;
+#endif
+
+ TimingCategory& category;
+};
+
+/**
+ * A Timer that automatically starts timing when created and stops at the end of the scope. Should
+ * be used in the majority of cases.
+ */
+class ScopeTimer : public Timer {
+public:
+ ScopeTimer(TimingCategory& category) : Timer(category) {
+ Start();
+ }
+
+ ~ScopeTimer() {
+ Stop();
+ }
+};
+
+} // namespace Profiling
+} // namespace Common
diff --git a/src/common/profiler_reporting.h b/src/common/profiler_reporting.h
new file mode 100644
index 00000000..3abb7331
--- /dev/null
+++ b/src/common/profiler_reporting.h
@@ -0,0 +1,108 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <array>
+#include <chrono>
+#include <mutex>
+#include <utility>
+#include <vector>
+
+#include "common/profiler.h"
+#include "common/synchronized_wrapper.h"
+
+namespace Common {
+namespace Profiling {
+
+struct TimingCategoryInfo {
+ static const unsigned int NO_PARENT = -1;
+
+ TimingCategory* category;
+ const char* name;
+ unsigned int parent;
+};
+
+struct ProfilingFrameResult {
+ /// Time since the last delivered frame
+ Duration interframe_time;
+
+ /// Time spent processing a frame, excluding VSync
+ Duration frame_time;
+
+ /// Total amount of time spent inside each category in this frame. Indexed by the category id
+ std::vector<Duration> time_per_category;
+};
+
+class ProfilingManager final {
+public:
+ ProfilingManager();
+
+ unsigned int RegisterTimingCategory(TimingCategory* category, const char* name);
+ void SetTimingCategoryParent(unsigned int category, unsigned int parent);
+
+ const std::vector<TimingCategoryInfo>& GetTimingCategoriesInfo() const {
+ return timing_categories;
+ }
+
+ /// This should be called after swapping screen buffers.
+ void BeginFrame();
+ /// This should be called before swapping screen buffers.
+ void FinishFrame();
+
+ /// Get the timing results from the previous frame. This is updated when you call FinishFrame().
+ const ProfilingFrameResult& GetPreviousFrameResults() const {
+ return results;
+ }
+
+private:
+ std::vector<TimingCategoryInfo> timing_categories;
+ Clock::time_point last_frame_end;
+ Clock::time_point this_frame_start;
+
+ ProfilingFrameResult results;
+};
+
+struct AggregatedDuration {
+ Duration avg, min, max;
+};
+
+struct AggregatedFrameResult {
+ /// Time since the last delivered frame
+ AggregatedDuration interframe_time;
+
+ /// Time spent processing a frame, excluding VSync
+ AggregatedDuration frame_time;
+
+ float fps;
+
+ /// Total amount of time spent inside each category in this frame. Indexed by the category id
+ std::vector<AggregatedDuration> time_per_category;
+};
+
+class TimingResultsAggregator final {
+public:
+ TimingResultsAggregator(size_t window_size);
+
+ void Clear();
+ void SetNumberOfCategories(size_t n);
+
+ void AddFrame(const ProfilingFrameResult& frame_result);
+
+ AggregatedFrameResult GetAggregatedResults() const;
+
+ size_t max_window_size;
+ size_t window_size;
+ size_t cursor;
+
+ std::vector<Duration> interframe_times;
+ std::vector<Duration> frame_times;
+ std::vector<std::vector<Duration>> times_per_category;
+};
+
+ProfilingManager& GetProfilingManager();
+SynchronizedRef<TimingResultsAggregator> GetTimingResultsAggregator();
+
+} // namespace Profiling
+} // namespace Common
diff --git a/src/common/swap.h b/src/common/swap.h
index e2d91836..7e37655b 100644
--- a/src/common/swap.h
+++ b/src/common/swap.h
@@ -17,18 +17,14 @@
#pragma once
-// Android
-#if defined(ANDROID)
+#if defined(__linux__)
+#include <byteswap.h>
+#elif defined(__FreeBSD__)
#include <sys/endian.h>
-
-#if _BYTE_ORDER == _LITTLE_ENDIAN && !defined(COMMON_LITTLE_ENDIAN)
-#define COMMON_LITTLE_ENDIAN 1
-#elif _BYTE_ORDER == _BIG_ENDIAN && !defined(COMMON_BIG_ENDIAN)
-#define COMMON_BIG_ENDIAN 1
#endif
// GCC 4.6+
-#elif __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
+#if __GNUC__ >= 5 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)
#if __BYTE_ORDER__ && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && !defined(COMMON_LITTLE_ENDIAN)
#define COMMON_LITTLE_ENDIAN 1
@@ -49,7 +45,6 @@
#elif defined(_MSC_VER) && !defined(COMMON_BIG_ENDIAN) && !defined(COMMON_LITTLE_ENDIAN)
#define COMMON_LITTLE_ENDIAN 1
-
#endif
// Worst case, default to little endian.
@@ -57,6 +52,93 @@
#define COMMON_LITTLE_ENDIAN 1
#endif
+namespace Common {
+
+inline u8 swap8(u8 _data) {return _data;}
+inline u32 swap24(const u8* _data) {return (_data[0] << 16) | (_data[1] << 8) | _data[2];}
+
+#ifdef _MSC_VER
+inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);}
+inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);}
+inline u64 swap64(u64 _data) {return _byteswap_uint64(_data);}
+#elif _M_ARM
+inline u16 swap16 (u16 _data) { u32 data = _data; __asm__ ("rev16 %0, %1\n" : "=l" (data) : "l" (data)); return (u16)data;}
+inline u32 swap32 (u32 _data) {__asm__ ("rev %0, %1\n" : "=l" (_data) : "l" (_data)); return _data;}
+inline u64 swap64(u64 _data) {return ((u64)swap32(_data) << 32) | swap32(_data >> 32);}
+#elif __linux__
+inline u16 swap16(u16 _data) {return bswap_16(_data);}
+inline u32 swap32(u32 _data) {return bswap_32(_data);}
+inline u64 swap64(u64 _data) {return bswap_64(_data);}
+#elif __APPLE__
+inline __attribute__((always_inline)) u16 swap16(u16 _data)
+{return (_data >> 8) | (_data << 8);}
+inline __attribute__((always_inline)) u32 swap32(u32 _data)
+{return __builtin_bswap32(_data);}
+inline __attribute__((always_inline)) u64 swap64(u64 _data)
+{return __builtin_bswap64(_data);}
+#elif __FreeBSD__
+inline u16 swap16(u16 _data) {return bswap16(_data);}
+inline u32 swap32(u32 _data) {return bswap32(_data);}
+inline u64 swap64(u64 _data) {return bswap64(_data);}
+#else
+// Slow generic implementation.
+inline u16 swap16(u16 data) {return (data >> 8) | (data << 8);}
+inline u32 swap32(u32 data) {return (swap16(data) << 16) | swap16(data >> 16);}
+inline u64 swap64(u64 data) {return ((u64)swap32(data) << 32) | swap32(data >> 32);}
+#endif
+
+inline float swapf(float f) {
+ union {
+ float f;
+ unsigned int u32;
+ } dat1, dat2;
+
+ dat1.f = f;
+ dat2.u32 = swap32(dat1.u32);
+
+ return dat2.f;
+}
+
+inline double swapd(double f) {
+ union {
+ double f;
+ unsigned long long u64;
+ } dat1, dat2;
+
+ dat1.f = f;
+ dat2.u64 = swap64(dat1.u64);
+
+ return dat2.f;
+}
+
+inline u16 swap16(const u8* _pData) {return swap16(*(const u16*)_pData);}
+inline u32 swap32(const u8* _pData) {return swap32(*(const u32*)_pData);}
+inline u64 swap64(const u8* _pData) {return swap64(*(const u64*)_pData);}
+
+template <int count>
+void swap(u8*);
+
+template <>
+inline void swap<1>(u8* data) { }
+
+template <>
+inline void swap<2>(u8* data) {
+ *reinterpret_cast<u16*>(data) = swap16(data);
+}
+
+template <>
+inline void swap<4>(u8* data) {
+ *reinterpret_cast<u32*>(data) = swap32(data);
+}
+
+template <>
+inline void swap<8>(u8* data) {
+ *reinterpret_cast<u64*>(data) = swap64(data);
+}
+
+} // Namespace Common
+
+
template <typename T, typename F>
struct swap_struct_t {
typedef swap_struct_t<T, F> swapped_t;
@@ -448,35 +530,35 @@ bool operator==(const S &p, const swap_struct_t<T, F> v) {
template <typename T>
struct swap_64_t {
static T swap(T x) {
- return (T)bswap64(*(u64 *)&x);
+ return (T)Common::swap64(*(u64 *)&x);
}
};
template <typename T>
struct swap_32_t {
static T swap(T x) {
- return (T)bswap32(*(u32 *)&x);
+ return (T)Common::swap32(*(u32 *)&x);
}
};
template <typename T>
struct swap_16_t {
static T swap(T x) {
- return (T)bswap16(*(u16 *)&x);
+ return (T)Common::swap16(*(u16 *)&x);
}
};
template <typename T>
struct swap_float_t {
static T swap(T x) {
- return (T)bswapf(*(float *)&x);
+ return (T)Common::swapf(*(float *)&x);
}
};
template <typename T>
struct swap_double_t {
static T swap(T x) {
- return (T)bswapd(*(double *)&x);
+ return (T)Common::swapd(*(double *)&x);
}
};
@@ -527,4 +609,5 @@ typedef s64 s64_be;
typedef float float_be;
typedef double double_be;
+
#endif
diff --git a/src/common/synchronized_wrapper.h b/src/common/synchronized_wrapper.h
new file mode 100644
index 00000000..946252b8
--- /dev/null
+++ b/src/common/synchronized_wrapper.h
@@ -0,0 +1,69 @@
+// Copyright 2015 Citra Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <mutex>
+
+namespace Common {
+
+/**
+ * Wraps an object, only allowing access to it via a locking reference wrapper. Good to ensure no
+ * one forgets to lock a mutex before acessing an object. To access the wrapped object construct a
+ * SyncronizedRef on this wrapper. Inspired by Rust's Mutex type (http://doc.rust-lang.org/std/sync/struct.Mutex.html).
+ */
+template <typename T>
+class SynchronizedWrapper {
+public:
+ template <typename... Args>
+ SynchronizedWrapper(Args&&... args) :
+ data(std::forward<Args>(args)...) {
+ }
+
+private:
+ template <typename U>
+ friend class SynchronizedRef;
+
+ std::mutex mutex;
+ T data;
+};
+
+/**
+ * Synchronized reference, that keeps a SynchronizedWrapper's mutex locked during its lifetime. This
+ * greatly reduces the chance that someone will access the wrapped resource without locking the
+ * mutex.
+ */
+template <typename T>
+class SynchronizedRef {
+public:
+ SynchronizedRef(SynchronizedWrapper<T>& wrapper) : wrapper(&wrapper) {
+ wrapper.mutex.lock();
+ }
+
+ SynchronizedRef(SynchronizedRef&) = delete;
+ SynchronizedRef(SynchronizedRef&& o) : wrapper(o.wrapper) {
+ o.wrapper = nullptr;
+ }
+
+ ~SynchronizedRef() {
+ if (wrapper)
+ wrapper->mutex.unlock();
+ }
+
+ SynchronizedRef& operator=(SynchronizedRef&) = delete;
+ SynchronizedRef& operator=(SynchronizedRef&& o) {
+ std::swap(wrapper, o.wrapper);
+ }
+
+ T& operator*() { return wrapper->data; }
+ const T& operator*() const { return wrapper->data; }
+
+ T* operator->() { return &wrapper->data; }
+ const T* operator->() const { return &wrapper->data; }
+
+private:
+ SynchronizedWrapper<T>* wrapper;
+};
+
+} // namespace Common
diff --git a/src/common/thread.h b/src/common/thread.h
index eaf1ba00..a45728e1 100644
--- a/src/common/thread.h
+++ b/src/common/thread.h
@@ -24,6 +24,25 @@
#include <unistd.h>
#endif
+// Support for C++11's thread_local keyword was surprisingly spotty in compilers until very
+// recently. Fortunately, thread local variables have been well supported for compilers for a while,
+// but with semantics supporting only POD types, so we can use a few defines to get some amount of
+// backwards compat support.
+// WARNING: This only works correctly with POD types.
+#if defined(__clang__)
+# if !__has_feature(cxx_thread_local)
+# define thread_local __thread
+# endif
+#elif defined(__GNUC__)
+# if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8)
+# define thread_local __thread
+# endif
+#elif defined(_MSC_VER)
+# if _MSC_VER < 1900
+# define thread_local __declspec(thread)
+# endif
+#endif
+
namespace Common
{
diff --git a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
index d8a708b9..d953adba 100644
--- a/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
+++ b/src/core/arm/dyncom/arm_dyncom_interpreter.cpp
@@ -9,6 +9,7 @@
#include <unordered_map>
#include "common/logging/log.h"
+#include "common/profiler.h"
#include "core/mem_map.h"
#include "core/hle/hle.h"
@@ -20,6 +21,9 @@
#include "core/arm/skyeye_common/armmmu.h"
#include "core/arm/skyeye_common/vfp/vfp.h"
+Common::Profiling::TimingCategory profile_execute("DynCom::Execute");
+Common::Profiling::TimingCategory profile_decode("DynCom::Decode");
+
enum {
COND = (1 << 0),
NON_BRANCH = (1 << 1),
@@ -3569,6 +3573,8 @@ typedef struct instruction_set_encoding_item ISEITEM;
extern const ISEITEM arm_instruction[];
static int InterpreterTranslate(ARMul_State* cpu, int& bb_start, addr_t addr) {
+ Common::Profiling::ScopeTimer timer_decode(profile_decode);
+
// Decode instruction, get index
// Allocate memory and init InsCream
// Go on next, until terminal instruction
@@ -3641,6 +3647,8 @@ static bool InAPrivilegedMode(ARMul_State* core) {
}
unsigned InterpreterMainLoop(ARMul_State* state) {
+ Common::Profiling::ScopeTimer timer_execute(profile_execute);
+
#undef RM
#undef RS
@@ -4354,6 +4362,8 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
cpu->Reg[14] = Memory::Read32(addr);
else
cpu->Reg_usr[1] = Memory::Read32(addr);
+
+ addr += 4;
}
} else if (!BIT(inst, 22)) {
for(int i = 0; i < 16; i++ ){
@@ -4478,10 +4488,6 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
unsigned int operand2 = ROTATE_RIGHT_32(RM, 8 * inst_cream->rotate) & 0xffff;
RD = RN + operand2;
- if (inst_cream->Rn == 15 || inst_cream->Rm == 15) {
- LOG_ERROR(Core_ARM11, "invalid operands for UXTAH");
- CITRA_IGNORE_EXIT(-1);
- }
}
cpu->Reg[15] += GET_INST_SIZE(cpu);
INC_PC(sizeof(uxtah_inst));
@@ -4812,10 +4818,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
uint64_t rm = RM;
uint64_t rs = RS;
uint64_t rn = RN;
- if (inst_cream->Rm == 15 || inst_cream->Rs == 15 || inst_cream->Rn == 15) {
- LOG_ERROR(Core_ARM11, "invalid operands for MLA");
- CITRA_IGNORE_EXIT(-1);
- }
+
RD = static_cast<uint32_t>((rm * rs + rn) & 0xffffffff);
if (inst_cream->S) {
UPDATE_NFLAG(RD);
@@ -5094,10 +5097,10 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
PLD_INST:
{
- // Instruction not implemented
- //LOG_CRITICAL(Core_ARM11, "unimplemented instruction");
+ // Not implemented. PLD is a hint instruction, so it's optional.
+
cpu->Reg[15] += GET_INST_SIZE(cpu);
- INC_PC(sizeof(stc_inst));
+ INC_PC(sizeof(pld_inst));
FETCH_INST;
GOTO_NEXT_INST;
}
@@ -5966,54 +5969,51 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
ldst_inst* inst_cream = (ldst_inst*)inst_base->component;
unsigned int inst = inst_cream->inst;
- int i;
unsigned int Rn = BITS(inst, 16, 19);
unsigned int old_RN = cpu->Reg[Rn];
inst_cream->get_addr(cpu, inst_cream->inst, addr, 0);
if (BIT(inst_cream->inst, 22) == 1) {
- for (i = 0; i < 13; i++) {
- if(BIT(inst_cream->inst, i)) {
+ for (int i = 0; i < 13; i++) {
+ if (BIT(inst_cream->inst, i)) {
Memory::Write32(addr, cpu->Reg[i]);
addr += 4;
}
}
if (BIT(inst_cream->inst, 13)) {
- if (cpu->Mode == USER32MODE) {
- Memory::Write32(addr, cpu->Reg[i]);
- addr += 4;
- } else {
+ if (cpu->Mode == USER32MODE)
+ Memory::Write32(addr, cpu->Reg[13]);
+ else
Memory::Write32(addr, cpu->Reg_usr[0]);
- addr += 4;
- }
+
+ addr += 4;
}
if (BIT(inst_cream->inst, 14)) {
- if (cpu->Mode == USER32MODE) {
- Memory::Write32(addr, cpu->Reg[i]);
- addr += 4;
- } else {
+ if (cpu->Mode == USER32MODE)
+ Memory::Write32(addr, cpu->Reg[14]);
+ else
Memory::Write32(addr, cpu->Reg_usr[1]);
- addr += 4;
- }
+
+ addr += 4;
}
if (BIT(inst_cream->inst, 15)) {
Memory::Write32(addr, cpu->Reg_usr[1] + 8);
}
} else {
- for( i = 0; i < 15; i++ ) {
- if(BIT(inst_cream->inst, i)) {
- if(i == Rn)
+ for (int i = 0; i < 15; i++) {
+ if (BIT(inst_cream->inst, i)) {
+ if (i == Rn)
Memory::Write32(addr, old_RN);
else
Memory::Write32(addr, cpu->Reg[i]);
+
addr += 4;
}
}
// Check PC reg
- if(BIT(inst_cream->inst, i)) {
+ if (BIT(inst_cream->inst, 15))
Memory::Write32(addr, cpu->Reg_usr[1] + 8);
- }
}
}
cpu->Reg[15] += GET_INST_SIZE(cpu);
@@ -6026,15 +6026,12 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
sxtb_inst* inst_cream = (sxtb_inst*)inst_base->component;
- if (inst_cream->Rm == 15) {
- LOG_ERROR(Core_ARM11, "invalid operand for SXTB");
- CITRA_IGNORE_EXIT(-1);
- }
unsigned int operand2 = ROTATE_RIGHT_32(RM, 8 * inst_cream->rotate);
if (BIT(operand2, 7)) {
operand2 |= 0xffffff00;
- } else
+ } else {
operand2 &= 0xff;
+ }
RD = operand2;
}
cpu->Reg[15] += GET_INST_SIZE(cpu);
@@ -6292,8 +6289,7 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
swp_inst* inst_cream = (swp_inst*)inst_base->component;
addr = RN;
- unsigned int value;
- value = Memory::Read32(addr);
+ unsigned int value = Memory::Read32(addr);
Memory::Write32(addr, RM);
RD = value;
@@ -6322,10 +6318,6 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
sxtab_inst* inst_cream = (sxtab_inst*)inst_base->component;
- // R15 should be check
- if(inst_cream->Rn == 15 || inst_cream->Rm == 15 || inst_cream->Rd ==15){
- CITRA_IGNORE_EXIT(-1);
- }
unsigned int operand2 = ROTATE_RIGHT_32(RM, 8 * inst_cream->rotate) & 0xff;
// Sign extend for byte
@@ -6376,10 +6368,6 @@ unsigned InterpreterMainLoop(ARMul_State* state) {
if (inst_base->cond == 0xE || CondPassed(cpu, inst_base->cond)) {
sxtah_inst* inst_cream = (sxtah_inst*)inst_base->component;
- // R15 should be check
- if(inst_cream->Rn == 15 || inst_cream->Rm == 15 || inst_cream->Rd ==15) {
- CITRA_IGNORE_EXIT(-1);
- }
unsigned int operand2 = ROTATE_RIGHT_32(RM, 8 * inst_cream->rotate) & 0xffff;
// Sign extend for half
operand2 = (0x8000 & operand2) ? (0xFFFF0000 | operand2) : operand2;
diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp
index c6764a52..1aaeaa9c 100644
--- a/src/core/hle/hle.cpp
+++ b/src/core/hle/hle.cpp
@@ -4,6 +4,8 @@
#include <vector>
+#include "common/profiler.h"
+
#include "core/arm/arm_interface.h"
#include "core/mem_map.h"
#include "core/hle/hle.h"
@@ -16,6 +18,8 @@
namespace HLE {
+Common::Profiling::TimingCategory profiler_svc("SVC Calls");
+
static std::vector<ModuleDef> g_module_db;
bool g_reschedule = false; ///< If true, immediately reschedules the CPU to a new thread
@@ -30,6 +34,8 @@ static const FunctionDef* GetSVCInfo(u32 opcode) {
}
void CallSVC(u32 opcode) {
+ Common::Profiling::ScopeTimer timer_svc(profiler_svc);
+
const FunctionDef *info = GetSVCInfo(opcode);
if (!info) {
diff --git a/src/core/hle/service/cfg/cfg.cpp b/src/core/hle/service/cfg/cfg.cpp
index 1eb2562d..6adadb22 100644
--- a/src/core/hle/service/cfg/cfg.cpp
+++ b/src/core/hle/service/cfg/cfg.cpp
@@ -48,13 +48,18 @@ ResultCode GetConfigInfoBlock(u32 block_id, u32 size, u32 flag, u8* output) {
SaveFileConfig* config = reinterpret_cast<SaveFileConfig*>(cfg_config_file_buffer.data());
auto itr = std::find_if(std::begin(config->block_entries), std::end(config->block_entries),
- [&](const SaveConfigBlockEntry& entry) {
- return entry.block_id == block_id && entry.size == size && (entry.flags & flag);
- });
+ [&](const SaveConfigBlockEntry& entry) {
+ return entry.block_id == block_id && (entry.flags & flag);
+ });
if (itr == std::end(config->block_entries)) {
- LOG_ERROR(Service_CFG, "Config block %u with size %u and flags %u not found", block_id, size, flag);
- return ResultCode(-1); // TODO(Subv): Find the correct error code
+ LOG_ERROR(Service_CFG, "Config block %u with flags %u was not found", block_id, flag);
+ return ResultCode(ErrorDescription::NotFound, ErrorModule::Config, ErrorSummary::WrongArgument, ErrorLevel::Permanent);
+ }
+
+ if (itr->size != size) {
+ LOG_ERROR(Service_CFG, "Invalid size %u for config block %u with flags %u", size, block_id, flag);
+ return ResultCode(ErrorDescription::InvalidSize, ErrorModule::Config, ErrorSummary::WrongArgument, ErrorLevel::Permanent);
}
// The data is located in the block header itself if the size is less than 4 bytes
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index 9942aab1..30318fc0 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -82,9 +82,9 @@ inline void Write(u32 addr, const T data) {
if (config.fill_24bit) {
// fill with 24-bit values
for (u8* ptr = start; ptr < end; ptr += 3) {
- ptr[0] = config.value_24bit_b;
+ ptr[0] = config.value_24bit_r;
ptr[1] = config.value_24bit_g;
- ptr[2] = config.value_24bit_r;
+ ptr[2] = config.value_24bit_b;
}
} else if (config.fill_32bit) {
// fill with 32-bit values
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index 76f4d66f..5b7f0a4e 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -98,12 +98,12 @@ struct Regs {
BitField<0, 1, u32> trigger;
// Set to 1 upon completion.
- BitField<0, 1, u32> finished;
+ BitField<1, 1, u32> finished;
- // 0: fill with 16- or 32-bit wide values; 1: fill with 24-bit wide values
+ // If both of these bits are unset, then it will fill the memory with a 16 bit value
+ // 1: fill with 24-bit wide values
BitField<8, 1, u32> fill_24bit;
-
- // 0: fill with 16-bit wide values; 1: fill with 32-bit wide values
+ // 1: fill with 32-bit wide values
BitField<9, 1, u32> fill_32bit;
};
diff --git a/src/video_core/color.h b/src/video_core/color.h
index 35da901f..14ade74f 100644
--- a/src/video_core/color.h
+++ b/src/video_core/color.h
@@ -101,6 +101,33 @@ inline const Math::Vec4<u8> DecodeRGBA4(const u8* bytes) {
}
/**
+ * Decode a depth value stored in D16 format
+ * @param bytes Pointer to encoded source value
+ * @return Depth value as an u32
+ */
+inline u32 DecodeD16(const u8* bytes) {
+ return *reinterpret_cast<const u16_le*>(bytes);
+}
+
+/**
+ * Decode a depth value stored in D24 format
+ * @param bytes Pointer to encoded source value
+ * @return Depth value as an u32
+ */
+inline u32 DecodeD24(const u8* bytes) {
+ return (bytes[2] << 16) | (bytes[1] << 8) | bytes[0];
+}
+
+/**
+ * Decode a depth value and a stencil value stored in D24S8 format
+ * @param bytes Pointer to encoded source values
+ * @return Resulting values stored as a Math::Vec2
+ */
+inline const Math::Vec2<u32> DecodeD24S8(const u8* bytes) {
+ return { (bytes[2] << 16) | (bytes[1] << 8) | bytes[0], bytes[3] };
+}
+
+/**
* Encode a color as RGBA8 format
* @param color Source color to encode
* @param bytes Destination pointer to store encoded color
@@ -153,4 +180,34 @@ inline void EncodeRGBA4(const Math::Vec4<u8>& color, u8* bytes) {
(Convert8To4(color.g()) << 8) | (Convert8To4(color.b()) << 4) | Convert8To4(color.a());
}
+/**
+ * Encode a 16 bit depth value as D16 format
+ * @param value 16 bit source depth value to encode
+ * @param bytes Pointer where to store the encoded value
+ */
+inline void EncodeD16(u32 value, u8* bytes) {
+ *reinterpret_cast<u16_le*>(bytes) = value & 0xFFFF;
+}
+
+/**
+ * Encode a 24 bit depth value as D24 format
+ * @param value 24 bit source depth value to encode
+ * @param bytes Pointer where to store the encoded value
+ */
+inline void EncodeD24(u32 value, u8* bytes) {
+ bytes[0] = value & 0xFF;
+ bytes[1] = (value >> 8) & 0xFF;
+ bytes[2] = (value >> 16) & 0xFF;
+}
+
+/**
+ * Encode a 24 bit depth and 8 bit stencil values as D24S8 format
+ * @param depth 24 bit source depth value to encode
+ * @param stencil 8 bit source stencil value to encode
+ * @param bytes Pointer where to store the encoded value
+ */
+inline void EncodeD24S8(u32 depth, u8 stencil, u8* bytes) {
+ *reinterpret_cast<u32_le*>(bytes) = (stencil << 24) | depth;
+}
+
} // namespace
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index 586ad62b..e031871e 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -4,6 +4,8 @@
#include <boost/range/algorithm/fill.hpp>
+#include "common/profiler.h"
+
#include "clipper.h"
#include "command_processor.h"
#include "math.h"
@@ -25,6 +27,8 @@ static int float_regs_counter = 0;
static u32 uniform_write_buffer[4];
+Common::Profiling::TimingCategory category_drawing("Drawing");
+
static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
if (id >= registers.NumIds())
@@ -53,6 +57,8 @@ static inline void WritePicaReg(u32 id, u32 value, u32 mask) {
case PICA_REG_INDEX(trigger_draw):
case PICA_REG_INDEX(trigger_draw_indexed):
{
+ Common::Profiling::ScopeTimer scope_timer(category_drawing);
+
DebugUtils::DumpTevStageConfig(registers.GetTevStages());
if (g_debug_context)
diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp
index a27d3828..745c4f4e 100644
--- a/src/video_core/debug_utils/debug_utils.cpp
+++ b/src/video_core/debug_utils/debug_utils.cpp
@@ -505,7 +505,7 @@ const Math::Vec4<u8> LookupTexture(const u8* source, int x, int y, const Texture
}
// Add modifier
- unsigned table_index = (x < 2) ? table_index_2.Value() : table_index_1.Value();
+ unsigned table_index = (x < 2) ? table_index_1.Value() : table_index_2.Value();
static const auto etc1_modifier_table = std::array<std::array<u8, 2>, 8>{{
{ 2, 8 }, { 5, 17 }, { 9, 29 }, { 13, 42 },
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index b14de927..fe20cd77 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -393,7 +393,15 @@ struct Regs {
BitField< 8, 8, u32> ref;
} alpha_test;
- INSERT_PADDING_WORDS(0x2);
+ union {
+ BitField< 0, 1, u32> stencil_test_enable;
+ BitField< 4, 3, CompareFunc> stencil_test_func;
+ BitField< 8, 8, u32> stencil_replacement_value;
+ BitField<16, 8, u32> stencil_reference_value;
+ BitField<24, 8, u32> stencil_mask;
+ } stencil_test;
+
+ INSERT_PADDING_WORDS(0x1);
union {
BitField< 0, 1, u32> depth_test_enable;
@@ -408,6 +416,30 @@ struct Regs {
INSERT_PADDING_WORDS(0x8);
} output_merger;
+ enum DepthFormat : u32 {
+ D16 = 0,
+
+ D24 = 2,
+ D24S8 = 3
+ };
+
+ /*
+ * Returns the number of bytes in the specified depth format
+ */
+ static u32 BytesPerDepthPixel(DepthFormat format) {
+ switch (format) {
+ case DepthFormat::D16:
+ return 2;
+ case DepthFormat::D24:
+ return 3;
+ case DepthFormat::D24S8:
+ return 4;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unknown depth format %u", format);
+ UNIMPLEMENTED();
+ }
+ }
+
struct {
// Components are laid out in reverse byte order, most significant bits first.
enum ColorFormat : u32 {
@@ -420,7 +452,7 @@ struct Regs {
INSERT_PADDING_WORDS(0x6);
- u32 depth_format;
+ DepthFormat depth_format;
BitField<16, 3, u32> color_format;
INSERT_PADDING_WORDS(0x4);
diff --git a/src/video_core/primitive_assembly.cpp b/src/video_core/primitive_assembly.cpp
index 1776a192..0120f289 100644
--- a/src/video_core/primitive_assembly.cpp
+++ b/src/video_core/primitive_assembly.cpp
@@ -33,12 +33,9 @@ void PrimitiveAssembler<VertexType>::SubmitVertex(VertexType& vtx, TriangleHandl
case Regs::TriangleTopology::Strip:
case Regs::TriangleTopology::Fan:
- if (strip_ready) {
- // TODO: Should be "buffer[0], buffer[1], vtx" instead!
- // Not quite sure why we need this order for things to show up properly.
- // Maybe a bug in the rasterizer?
- triangle_handler(buffer[1], buffer[0], vtx);
- }
+ if (strip_ready)
+ triangle_handler(buffer[0], buffer[1], vtx);
+
buffer[buffer_index] = vtx;
if (topology == Regs::TriangleTopology::Strip) {
diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp
index 5861c192..dd46f0ec 100644
--- a/src/video_core/rasterizer.cpp
+++ b/src/video_core/rasterizer.cpp
@@ -91,7 +91,7 @@ static const Math::Vec4<u8> GetPixel(int x, int y) {
}
return {};
- }
+}
static u32 GetDepth(int x, int y) {
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
@@ -100,23 +100,55 @@ static u32 GetDepth(int x, int y) {
y = (registers.framebuffer.height - y);
const u32 coarse_y = y & ~7;
- u32 stride = registers.framebuffer.width * 2;
-
- // Assuming 16-bit depth buffer format until actual format handling is implemented
- return *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride);
+ u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format);
+ u32 stride = registers.framebuffer.width * bytes_per_pixel;
+
+ u32 src_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+ u8* src_pixel = depth_buffer + src_offset;
+
+ switch (registers.framebuffer.depth_format) {
+ case Pica::Regs::DepthFormat::D16:
+ return Color::DecodeD16(src_pixel);
+ case Pica::Regs::DepthFormat::D24:
+ return Color::DecodeD24(src_pixel);
+ case Pica::Regs::DepthFormat::D24S8:
+ return Color::DecodeD24S8(src_pixel).x;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
+ UNIMPLEMENTED();
+ return 0;
+ }
}
-static void SetDepth(int x, int y, u16 value) {
+static void SetDepth(int x, int y, u32 value) {
const PAddr addr = registers.framebuffer.GetDepthBufferPhysicalAddress();
u8* depth_buffer = Memory::GetPointer(PAddrToVAddr(addr));
y = (registers.framebuffer.height - y);
const u32 coarse_y = y & ~7;
- u32 stride = registers.framebuffer.width * 2;
-
- // Assuming 16-bit depth buffer format until actual format handling is implemented
- *(u16*)(depth_buffer + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * stride) = value;
+ u32 bytes_per_pixel = Pica::Regs::BytesPerDepthPixel(registers.framebuffer.depth_format);
+ u32 stride = registers.framebuffer.width * bytes_per_pixel;
+
+ u32 dst_offset = VideoCore::GetMortonOffset(x, y, bytes_per_pixel) + coarse_y * stride;
+ u8* dst_pixel = depth_buffer + dst_offset;
+
+ switch (registers.framebuffer.depth_format) {
+ case Pica::Regs::DepthFormat::D16:
+ Color::EncodeD16(value, dst_pixel);
+ break;
+ case Pica::Regs::DepthFormat::D24:
+ Color::EncodeD24(value, dst_pixel);
+ break;
+ case Pica::Regs::DepthFormat::D24S8:
+ // TODO(Subv): Implement the stencil buffer
+ Color::EncodeD24S8(value, 0, dst_pixel);
+ break;
+ default:
+ LOG_CRITICAL(HW_GPU, "Unimplemented depth format %u", registers.framebuffer.depth_format);
+ UNIMPLEMENTED();
+ break;
+ }
}
// NOTE: Assuming that rasterizer coordinates are 12.4 fixed-point values
@@ -595,7 +627,7 @@ static void ProcessTriangleInternal(const VertexShader::OutputVertex& v0,
u16 z = (u16)((v0.screenpos[2].ToFloat32() * w0 +
v1.screenpos[2].ToFloat32() * w1 +
v2.screenpos[2].ToFloat32() * w2) * 65535.f / wsum);
- u16 ref_z = GetDepth(x >> 4, y >> 4);
+ u32 ref_z = GetDepth(x >> 4, y >> 4);
bool pass = false;
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 2fcbb0cc..4273a177 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -6,7 +6,10 @@
#include "core/hw/hw.h"
#include "core/hw/lcd.h"
#include "core/mem_map.h"
+
#include "common/emu_window.h"
+#include "common/profiler_reporting.h"
+
#include "video_core/video_core.h"
#include "video_core/renderer_opengl/renderer_opengl.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
@@ -94,9 +97,18 @@ void RendererOpenGL::SwapBuffers() {
DrawScreens();
+ auto& profiler = Common::Profiling::GetProfilingManager();
+ profiler.FinishFrame();
+ {
+ auto aggregator = Common::Profiling::GetTimingResultsAggregator();
+ aggregator->AddFrame(profiler.GetPreviousFrameResults());
+ }
+
// Swap buffers
render_window->PollEvents();
render_window->SwapBuffers();
+
+ profiler.BeginFrame();
}
/**
@@ -276,28 +288,26 @@ void RendererOpenGL::DrawSingleScreenRotated(const TextureInfo& texture, float x
* Draws the emulated screens to the emulator window.
*/
void RendererOpenGL::DrawScreens() {
- auto viewport_extent = GetViewportExtent();
- glViewport(viewport_extent.left, viewport_extent.top, viewport_extent.GetWidth(), viewport_extent.GetHeight()); // TODO: Or bottom?
+ auto layout = render_window->GetFramebufferLayout();
+
+ glViewport(0, 0, layout.width, layout.height);
glClear(GL_COLOR_BUFFER_BIT);
glUseProgram(program_id);
// Set projection matrix
- std::array<GLfloat, 3*2> ortho_matrix = MakeOrthographicMatrix((float)resolution_width, (float)resolution_height);
+ std::array<GLfloat, 3 * 2> ortho_matrix = MakeOrthographicMatrix((float)layout.width,
+ (float)layout.height);
glUniformMatrix3x2fv(uniform_modelview_matrix, 1, GL_FALSE, ortho_matrix.data());
// Bind texture in Texture Unit 0
glActiveTexture(GL_TEXTURE0);
glUniform1i(uniform_color_texture, 0);
- const float max_width = std::max((float)VideoCore::kScreenTopWidth, (float)VideoCore::kScreenBottomWidth);
- const float top_x = 0.5f * (max_width - VideoCore::kScreenTopWidth);
- const float bottom_x = 0.5f * (max_width - VideoCore::kScreenBottomWidth);
-
- DrawSingleScreenRotated(textures[0], top_x, 0,
- (float)VideoCore::kScreenTopWidth, (float)VideoCore::kScreenTopHeight);
- DrawSingleScreenRotated(textures[1], bottom_x, (float)VideoCore::kScreenTopHeight,
- (float)VideoCore::kScreenBottomWidth, (float)VideoCore::kScreenBottomHeight);
+ DrawSingleScreenRotated(textures[0], (float)layout.top_screen.left, (float)layout.top_screen.top,
+ (float)layout.top_screen.GetWidth(), (float)layout.top_screen.GetHeight());
+ DrawSingleScreenRotated(textures[1], (float)layout.bottom_screen.left,(float)layout.bottom_screen.top,
+ (float)layout.bottom_screen.GetWidth(), (float)layout.bottom_screen.GetHeight());
m_current_frame++;
}
@@ -314,34 +324,6 @@ void RendererOpenGL::SetWindow(EmuWindow* window) {
render_window = window;
}
-MathUtil::Rectangle<unsigned> RendererOpenGL::GetViewportExtent() {
- unsigned framebuffer_width;
- unsigned framebuffer_height;
- std::tie(framebuffer_width, framebuffer_height) = render_window->GetFramebufferSize();
-
- float window_aspect_ratio = static_cast<float>(framebuffer_height) / framebuffer_width;
- float emulation_aspect_ratio = static_cast<float>(resolution_height) / resolution_width;
-
- MathUtil::Rectangle<unsigned> viewport_extent;
- if (window_aspect_ratio > emulation_aspect_ratio) {
- // Window is narrower than the emulation content => apply borders to the top and bottom
- unsigned viewport_height = static_cast<unsigned>(std::round(emulation_aspect_ratio * framebuffer_width));
- viewport_extent.left = 0;
- viewport_extent.top = (framebuffer_height - viewport_height) / 2;
- viewport_extent.right = viewport_extent.left + framebuffer_width;
- viewport_extent.bottom = viewport_extent.top + viewport_height;
- } else {
- // Otherwise, apply borders to the left and right sides of the window.
- unsigned viewport_width = static_cast<unsigned>(std::round(framebuffer_height / emulation_aspect_ratio));
- viewport_extent.left = (framebuffer_width - viewport_width) / 2;
- viewport_extent.top = 0;
- viewport_extent.right = viewport_extent.left + viewport_width;
- viewport_extent.bottom = viewport_extent.top + framebuffer_height;
- }
-
- return viewport_extent;
-}
-
/// Initialize the renderer
void RendererOpenGL::Init() {
render_window->MakeCurrent();
diff --git a/src/video_core/vertex_shader.cpp b/src/video_core/vertex_shader.cpp
index bc8c0041..4eb3e743 100644
--- a/src/video_core/vertex_shader.cpp
+++ b/src/video_core/vertex_shader.cpp
@@ -17,6 +17,7 @@
#include "vertex_shader.h"
#include "debug_utils/debug_utils.h"
+using nihstro::OpCode;
using nihstro::Instruction;
using nihstro::RegisterType;
using nihstro::SourceRegister;
@@ -154,10 +155,10 @@ static void ProcessShaderCode(VertexShaderState& state) {
}
};
- switch (instr.opcode.GetInfo().type) {
- case Instruction::OpCodeType::Arithmetic:
+ switch (instr.opcode.Value().GetInfo().type) {
+ case OpCode::Type::Arithmetic:
{
- bool is_inverted = 0 != (instr.opcode.GetInfo().subtype & Instruction::OpCodeInfo::SrcInversed);
+ bool is_inverted = 0 != (instr.opcode.Value().GetInfo().subtype & OpCode::Info::SrcInversed);
// TODO: We don't really support this properly: For instance, the address register
// offset needs to be applied to SRC2 instead, etc.
// For now, we just abort in this situation.
@@ -197,15 +198,15 @@ static void ProcessShaderCode(VertexShaderState& state) {
src2[3] = src2[3] * float24::FromFloat32(-1);
}
- float24* dest = (instr.common.dest < 0x08) ? state.output_register_table[4*instr.common.dest.GetIndex()]
- : (instr.common.dest < 0x10) ? dummy_vec4_float24
- : (instr.common.dest < 0x20) ? &state.temporary_registers[instr.common.dest.GetIndex()][0]
+ float24* dest = (instr.common.dest.Value() < 0x08) ? state.output_register_table[4*instr.common.dest.Value().GetIndex()]
+ : (instr.common.dest.Value() < 0x10) ? dummy_vec4_float24
+ : (instr.common.dest.Value() < 0x20) ? &state.temporary_registers[instr.common.dest.Value().GetIndex()][0]
: dummy_vec4_float24;
state.debug.max_opdesc_id = std::max<u32>(state.debug.max_opdesc_id, 1+instr.common.operand_desc_id);
- switch (instr.opcode.EffectiveOpCode()) {
- case Instruction::OpCode::ADD:
+ switch (instr.opcode.Value().EffectiveOpCode()) {
+ case OpCode::Id::ADD:
{
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
@@ -217,7 +218,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
}
- case Instruction::OpCode::MUL:
+ case OpCode::Id::MUL:
{
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
@@ -229,7 +230,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
}
- case Instruction::OpCode::MAX:
+ case OpCode::Id::MAX:
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
continue;
@@ -238,11 +239,11 @@ static void ProcessShaderCode(VertexShaderState& state) {
}
break;
- case Instruction::OpCode::DP3:
- case Instruction::OpCode::DP4:
+ case OpCode::Id::DP3:
+ case OpCode::Id::DP4:
{
float24 dot = float24::FromFloat32(0.f);
- int num_components = (instr.opcode == Instruction::OpCode::DP3) ? 3 : 4;
+ int num_components = (instr.opcode.Value() == OpCode::Id::DP3) ? 3 : 4;
for (int i = 0; i < num_components; ++i)
dot = dot + src1[i] * src2[i];
@@ -256,7 +257,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
}
// Reciprocal
- case Instruction::OpCode::RCP:
+ case OpCode::Id::RCP:
{
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
@@ -271,7 +272,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
}
// Reciprocal Square Root
- case Instruction::OpCode::RSQ:
+ case OpCode::Id::RSQ:
{
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
@@ -285,7 +286,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
}
- case Instruction::OpCode::MOVA:
+ case OpCode::Id::MOVA:
{
for (int i = 0; i < 2; ++i) {
if (!swizzle.DestComponentEnabled(i))
@@ -298,7 +299,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
}
- case Instruction::OpCode::MOV:
+ case OpCode::Id::MOV:
{
for (int i = 0; i < 4; ++i) {
if (!swizzle.DestComponentEnabled(i))
@@ -309,7 +310,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
}
- case Instruction::OpCode::CMP:
+ case OpCode::Id::CMP:
for (int i = 0; i < 2; ++i) {
// TODO: Can you restrict to one compare via dest masking?
@@ -350,7 +351,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
default:
LOG_ERROR(HW_GPU, "Unhandled arithmetic instruction: 0x%02x (%s): 0x%08x",
- (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex);
+ (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
DEBUG_ASSERT(false);
break;
}
@@ -358,9 +359,9 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
}
- case Instruction::OpCodeType::MultiplyAdd:
+ case OpCode::Type::MultiplyAdd:
{
- if (instr.opcode.EffectiveOpCode() == Instruction::OpCode::MAD) {
+ if (instr.opcode.Value().EffectiveOpCode() == OpCode::Id::MAD) {
const SwizzlePattern& swizzle = *(SwizzlePattern*)&swizzle_data[instr.mad.operand_desc_id];
const float24* src1_ = LookupSourceRegister(instr.mad.src1);
@@ -408,9 +409,9 @@ static void ProcessShaderCode(VertexShaderState& state) {
src3[3] = src3[3] * float24::FromFloat32(-1);
}
- float24* dest = (instr.mad.dest < 0x08) ? state.output_register_table[4*instr.mad.dest.GetIndex()]
- : (instr.mad.dest < 0x10) ? dummy_vec4_float24
- : (instr.mad.dest < 0x20) ? &state.temporary_registers[instr.mad.dest.GetIndex()][0]
+ float24* dest = (instr.mad.dest.Value() < 0x08) ? state.output_register_table[4*instr.mad.dest.Value().GetIndex()]
+ : (instr.mad.dest.Value() < 0x10) ? dummy_vec4_float24
+ : (instr.mad.dest.Value() < 0x20) ? &state.temporary_registers[instr.mad.dest.Value().GetIndex()][0]
: dummy_vec4_float24;
for (int i = 0; i < 4; ++i) {
@@ -421,7 +422,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
}
} else {
LOG_ERROR(HW_GPU, "Unhandled multiply-add instruction: 0x%02x (%s): 0x%08x",
- (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex);
+ (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
}
break;
}
@@ -448,31 +449,31 @@ static void ProcessShaderCode(VertexShaderState& state) {
};
// Handle each instruction on its own
- switch (instr.opcode) {
- case Instruction::OpCode::END:
+ switch (instr.opcode.Value()) {
+ case OpCode::Id::END:
exit_loop = true;
break;
- case Instruction::OpCode::JMPC:
+ case OpCode::Id::JMPC:
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1;
}
break;
- case Instruction::OpCode::JMPU:
+ case OpCode::Id::JMPU:
if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) {
state.program_counter = &shader_memory[instr.flow_control.dest_offset] - 1;
}
break;
- case Instruction::OpCode::CALL:
+ case OpCode::Id::CALL:
call(state,
instr.flow_control.dest_offset,
instr.flow_control.num_instructions,
binary_offset + 1, 0, 0);
break;
- case Instruction::OpCode::CALLU:
+ case OpCode::Id::CALLU:
if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) {
call(state,
instr.flow_control.dest_offset,
@@ -481,7 +482,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
}
break;
- case Instruction::OpCode::CALLC:
+ case OpCode::Id::CALLC:
if (evaluate_condition(state, instr.flow_control.refx, instr.flow_control.refy, instr.flow_control)) {
call(state,
instr.flow_control.dest_offset,
@@ -490,10 +491,10 @@ static void ProcessShaderCode(VertexShaderState& state) {
}
break;
- case Instruction::OpCode::NOP:
+ case OpCode::Id::NOP:
break;
- case Instruction::OpCode::IFU:
+ case OpCode::Id::IFU:
if (shader_uniforms.b[instr.flow_control.bool_uniform_id]) {
call(state,
binary_offset + 1,
@@ -508,7 +509,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
- case Instruction::OpCode::IFC:
+ case OpCode::Id::IFC:
{
// TODO: Do we need to consider swizzlers here?
@@ -527,7 +528,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
break;
}
- case Instruction::OpCode::LOOP:
+ case OpCode::Id::LOOP:
{
state.address_registers[2] = shader_uniforms.i[instr.flow_control.int_uniform_id].y;
@@ -542,7 +543,7 @@ static void ProcessShaderCode(VertexShaderState& state) {
default:
LOG_ERROR(HW_GPU, "Unhandled instruction: 0x%02x (%s): 0x%08x",
- (int)instr.opcode.Value(), instr.opcode.GetInfo().name, instr.hex);
+ (int)instr.opcode.Value().EffectiveOpCode(), instr.opcode.Value().GetInfo().name, instr.hex);
break;
}
diff --git a/src/video_core/video_core.cpp b/src/video_core/video_core.cpp
index 0a236595..b9d4ede3 100644
--- a/src/video_core/video_core.cpp
+++ b/src/video_core/video_core.cpp
@@ -18,7 +18,6 @@ namespace VideoCore {
EmuWindow* g_emu_window = nullptr; ///< Frontend emulator window
RendererBase* g_renderer = nullptr; ///< Renderer plugin
-int g_current_frame = 0;
/// Initialize the video core
void Init(EmuWindow* emu_window) {
@@ -27,8 +26,6 @@ void Init(EmuWindow* emu_window) {
g_renderer->SetWindow(g_emu_window);
g_renderer->Init();
- g_current_frame = 0;
-
LOG_DEBUG(Render, "initialized OK");
}
diff --git a/src/video_core/video_core.h b/src/video_core/video_core.h
index b782f17b..1b51d39b 100644
--- a/src/video_core/video_core.h
+++ b/src/video_core/video_core.h
@@ -30,7 +30,6 @@ static const int kScreenBottomHeight = 240; ///< 3DS bottom screen height
// ---------------------
extern RendererBase* g_renderer; ///< Renderer plugin
-extern int g_current_frame; ///< Current frame
extern EmuWindow* g_emu_window; ///< Emu window
/// Start the video core