9 files changed, 377 insertions, 416 deletions
diff --git a/src/citra_qt/debugger/graphics_cmdlists.cpp b/src/citra_qt/debugger/graphics_cmdlists.cpp
index 195197ef..30b8b5da 100644
--- a/src/citra_qt/debugger/graphics_cmdlists.cpp
+++ b/src/citra_qt/debugger/graphics_cmdlists.cpp
@@ -83,7 +83,7 @@ QVariant GPUCommandListModel::data(const QModelIndex& index, int role) const
         if (role == Qt::DisplayRole) {
             QString content;
             if (index.column() == 0) {
-                content = Pica::command_names[header.cmd_id];
+                content = QString::fromLatin1(Pica::Regs::GetCommandName(header.cmd_id).c_str());
                 content.append(" ");
             } else if (index.column() == 1) {
                 for (int j = 0; j < cmd.size(); ++j)
diff --git a/src/common/common.vcxproj b/src/common/common.vcxproj
index 1f5c714c..341d3a81 100644
--- a/src/common/common.vcxproj
+++ b/src/common/common.vcxproj
@@ -182,7 +182,6 @@
     <ClInclude Include="mem_arena.h" />
     <ClInclude Include="msg_handler.h" />
     <ClInclude Include="platform.h" />
-    <ClInclude Include="register_set.h" />
     <ClInclude Include="scm_rev.h" />
     <ClInclude Include="std_condition_variable.h" />
     <ClInclude Include="std_mutex.h" />
diff --git a/src/common/common.vcxproj.filters b/src/common/common.vcxproj.filters
index e8c4ce36..59268ce5 100644
--- a/src/common/common.vcxproj.filters
+++ b/src/common/common.vcxproj.filters
@@ -29,7 +29,6 @@
     <ClInclude Include="memory_util.h" />
     <ClInclude Include="msg_handler.h" />
     <ClInclude Include="platform.h" />
-    <ClInclude Include="register_set.h" />
     <ClInclude Include="std_condition_variable.h" />
     <ClInclude Include="std_mutex.h" />
     <ClInclude Include="std_thread.h" />
diff --git a/src/common/register_set.h b/src/common/register_set.h
deleted file mode 100644
index ba19a261..00000000
--- a/src/common/register_set.h
+++ /dev/null
@@ -1,163 +0,0 @@
-// Copyright 2014 Citra Emulator Project
-// Licensed under GPLv2
-// Refer to the license.txt file included.
-
-#pragma once
-
-// Copyright 2014 Tony Wasserka
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-//     * Redistributions of source code must retain the above copyright
-//       notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above copyright
-//       notice, this list of conditions and the following disclaimer in the
-//       documentation and/or other materials provided with the distribution.
-//     * Neither the name of the owner nor the names of its contributors may
-//       be used to endorse or promote products derived from this software
-//       without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-/*
- * Standardized way to define a group of registers and corresponding data structures. To define
- * a new register set, first define struct containing an enumeration called "Id" containing
- * all register IDs and a template struct called "Struct". Specialize the Struct struct for any
- * register ID which needs to be accessed in a specialized way. You can then declare the object
- * containing all register values using the RegisterSet<BaseType, DefiningStruct> type, where
- * BaseType is the underlying type of each register (e.g. u32).
- * Of course, you'll usually want to implement the Struct template such that they are of the same
- * size as BaseType. However, it's also possible to make it larger, e.g. when you want to describe
- * multiple registers with the same structure.
- *
- * Example:
- *
- *     struct Regs {
- *         enum Id : u32 {
- *             Value1 = 0,
- *             Value2 = 1,
- *             Value3 = 2,
- *             NumIds = 3
- *         };
- *
- *         // declare register definition structures
- *         template<Id id>
- *         struct Struct;
- *     };
- *
- *     // Define register set object
- *     RegisterSet<u32, CommandIds> registers;
- *
- *     // define register definition structures
- *     template<>
- *     struct Regs::Struct<Regs::Value1> {
- *         union {
- *             BitField<0, 4, u32> some_field;
- *             BitField<4, 3, u32> some_other_field;
- *         };
- *     };
- *
- * Usage in external code (within SomeNamespace scope):
- *
- *     For a register which maps to a single index:
- *     registers.Get<Regs::Value1>().some_field = some_value;
- *
- *      For a register which maps to different indices, e.g. a group of similar registers
- *     registers.Get<Regs::Value1>(index).some_field = some_value;
- *
- *
- * @tparam BaseType Base type used for storing individual registers, e.g. u32
- * @tparam RegDefinition Class defining an enumeration called "Id" and a template<Id id> struct, as described above.
- * @note RegDefinition::Id needs to have an enum value called NumIds defining the number of registers to be allocated.
- */
-template<typename BaseType, typename RegDefinition>
-struct RegisterSet {
-    // Register IDs
-    using Id = typename RegDefinition::Id;
-
-    // type used for *this
-    using ThisType = RegisterSet<BaseType, RegDefinition>;
-
-    // Register definition structs, defined in RegDefinition
-    template<Id id>
-    using Struct = typename RegDefinition::template Struct<id>;
-
-
-    /*
-     * Lookup register with the given id and return it as the corresponding structure type.
-     * @note This just forwards the arguments to Get(Id).
-     */
-    template<Id id>
-    const Struct<id>& Get() const {
-        return Get<id>(id);
-    }
-
-    /*
-     * Lookup register with the given id and return it as the corresponding structure type.
-     * @note This just forwards the arguments to Get(Id).
-     */
-    template<Id id>
-    Struct<id>& Get() {
-        return Get<id>(id);
-    }
-
-    /*
-     * Lookup register with the given index and return it as the corresponding structure type.
-     * @todo Is this portable with regards to structures larger than BaseType?
-     * @note if index==id, you don't need to specify the function parameter.
-     */
-    template<Id id>
-    const Struct<id>& Get(const Id& index) const {
-        const int idx = static_cast<size_t>(index);
-        return *reinterpret_cast<const Struct<id>*>(&raw[idx]);
-    }
-
-    /*
-     * Lookup register with the given index and return it as the corresponding structure type.
-     * @note This just forwards the arguments to the const version of Get(Id).
-     * @note if index==id, you don't need to specify the function parameter.
-     */
-    template<Id id>
-    Struct<id>& Get(const Id& index) {
-        return const_cast<Struct<id>&>(GetThis().Get<id>(index));
-    }
-
-    /*
-     * Plain array access.
-     * @note If you want to have this casted to a register defininition struct, use Get() instead.
-     */
-    const BaseType& operator[] (const Id& id) const {
-        return raw[static_cast<size_t>(id)];
-    }
-
-    /*
-     * Plain array access.
-     * @note If you want to have this casted to a register defininition struct, use Get() instead.
-     * @note This operator just forwards its argument to the const version.
-     */
-    BaseType& operator[] (const Id& id) {
-        return const_cast<BaseType&>(GetThis()[id]);
-    }
-
-private:
-    /*
-     * Returns a const reference to "this".
-     */
-    const ThisType& GetThis() const {
-        return static_cast<const ThisType&>(*this);
-    }
-
-    BaseType raw[Id::NumIds];
-};
diff --git a/src/core/hle/service/gsp.cpp b/src/core/hle/service/gsp.cpp
index e241b31c..08e65612 100644
--- a/src/core/hle/service/gsp.cpp
+++ b/src/core/hle/service/gsp.cpp
@@ -173,11 +173,11 @@ void ExecuteCommand(const Command& command) {
     case CommandId::SET_COMMAND_LIST_LAST:
     {
         auto& params = command.set_command_list_last;
-        WriteGPURegister(GPU::Regs::CommandProcessor + 2, params.address >> 3);
-        WriteGPURegister(GPU::Regs::CommandProcessor, params.size >> 3);
+        WriteGPURegister(GPU_REG_INDEX(command_processor_config.address), params.address >> 3);
+        WriteGPURegister(GPU_REG_INDEX(command_processor_config.size), params.size >> 3);
 
         // TODO: Not sure if we are supposed to always write this .. seems to trigger processing though
-        WriteGPURegister(GPU::Regs::CommandProcessor + 4, 1);
+        WriteGPURegister(GPU_REG_INDEX(command_processor_config.trigger), 1);
 
         // TODO: Move this to GPU
         // TODO: Not sure what units the size is measured in
@@ -193,15 +193,15 @@ void ExecuteCommand(const Command& command) {
     case CommandId::SET_MEMORY_FILL:
     {
         auto& params = command.memory_fill;
-        WriteGPURegister(GPU::Regs::MemoryFill, params.start1 >> 3);
-        WriteGPURegister(GPU::Regs::MemoryFill + 1, params.end1 >> 3);
-        WriteGPURegister(GPU::Regs::MemoryFill + 2, params.end1 - params.start1);
-        WriteGPURegister(GPU::Regs::MemoryFill + 3, params.value1);
-
-        WriteGPURegister(GPU::Regs::MemoryFill + 4, params.start2 >> 3);
-        WriteGPURegister(GPU::Regs::MemoryFill + 5, params.end2 >> 3);
-        WriteGPURegister(GPU::Regs::MemoryFill + 6, params.end2 - params.start2);
-        WriteGPURegister(GPU::Regs::MemoryFill + 7, params.value2);
+        WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_start), params.start1 >> 3);
+        WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].address_end), params.end1 >> 3);
+        WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].size), params.end1 - params.start1);
+        WriteGPURegister(GPU_REG_INDEX(memory_fill_config[0].value), params.value1);
+
+        WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_start), params.start2 >> 3);
+        WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].address_end), params.end2 >> 3);
+        WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].size), params.end2 - params.start2);
+        WriteGPURegister(GPU_REG_INDEX(memory_fill_config[1].value), params.value2);
         break;
     }
 
@@ -220,15 +220,15 @@ void ExecuteCommand(const Command& command) {
     case CommandId::SET_TEXTURE_COPY:
     {
         auto& params = command.image_copy;
-        WriteGPURegister(GPU::Regs::DisplayTransfer, params.in_buffer_address >> 3);
-        WriteGPURegister(GPU::Regs::DisplayTransfer + 1, params.out_buffer_address >> 3);
-        WriteGPURegister(GPU::Regs::DisplayTransfer + 3, params.in_buffer_size);
-        WriteGPURegister(GPU::Regs::DisplayTransfer + 2, params.out_buffer_size);
-        WriteGPURegister(GPU::Regs::DisplayTransfer + 4, params.flags);
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_address), params.in_buffer_address >> 3);
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_address), params.out_buffer_address >> 3);
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.input_size), params.in_buffer_size);
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.output_size), params.out_buffer_size);
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.flags), params.flags);
 
         // TODO: Should this only be ORed with 1 for texture copies?
         // trigger transfer
-        WriteGPURegister(GPU::Regs::DisplayTransfer + 6, 1);
+        WriteGPURegister(GPU_REG_INDEX(display_transfer_config.trigger), 1);
         break;
     }
 
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index d94c2329..fd40f8ac 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -19,7 +19,7 @@
 
 namespace GPU {
 
-RegisterSet<u32, Regs> g_regs;
+Regs g_regs;
 
 u32 g_cur_line = 0;         ///< Current vertical screen line
 u64 g_last_line_ticks = 0;  ///< CPU tick count from last vertical screen line
@@ -32,8 +32,8 @@ void SetFramebufferLocation(const FramebufferLocation mode) {
     switch (mode) {
     case FRAMEBUFFER_LOCATION_FCRAM:
     {
-        auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
-        auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>();
+        auto& framebuffer_top = g_regs.framebuffer_config[0];
+        auto& framebuffer_sub = g_regs.framebuffer_config[1];
 
         framebuffer_top.address_left1  = PADDR_TOP_LEFT_FRAME1;
         framebuffer_top.address_left2  = PADDR_TOP_LEFT_FRAME2;
@@ -48,8 +48,8 @@ void SetFramebufferLocation(const FramebufferLocation mode) {
 
     case FRAMEBUFFER_LOCATION_VRAM:
     {
-        auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
-        auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>();
+        auto& framebuffer_top = g_regs.framebuffer_config[0];
+        auto& framebuffer_sub = g_regs.framebuffer_config[1];
 
         framebuffer_top.address_left1  = PADDR_VRAM_TOP_LEFT_FRAME1;
         framebuffer_top.address_left2  = PADDR_VRAM_TOP_LEFT_FRAME2;
@@ -107,13 +107,12 @@ inline void Read(T &var, const u32 raw_addr) {
     int index = addr / 4;
 
     // Reads other than u32 are untested, so I'd rather have them abort than silently fail
-    if (index >= Regs::NumIds || !std::is_same<T,u32>::value)
-    {
+    if (index >= Regs::NumIds() || !std::is_same<T,u32>::value) {
         ERROR_LOG(GPU, "unknown Read%d @ 0x%08X", sizeof(var) * 8, addr);
         return;
     }
 
-    var = g_regs[static_cast<Regs::Id>(addr / 4)];
+    var = g_regs[addr / 4];
 }
 
 template <typename T>
@@ -122,22 +121,22 @@ inline void Write(u32 addr, const T data) {
     int index = addr / 4;
 
     // Writes other than u32 are untested, so I'd rather have them abort than silently fail
-    if (index >= Regs::NumIds || !std::is_same<T,u32>::value)
-    {
+    if (index >= Regs::NumIds() || !std::is_same<T,u32>::value) {
         ERROR_LOG(GPU, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, addr);
         return;
     }
 
-    g_regs[static_cast<Regs::Id>(index)] = data;
+    g_regs[index] = data;
 
-    switch (static_cast<Regs::Id>(index)) {
+    switch (index) {
 
     // Memory fills are triggered once the fill value is written.
     // NOTE: This is not verified.
-    case Regs::MemoryFill + 3:
-    case Regs::MemoryFill + 7:
+    case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].value, 0x00004 + 0x3):
+    case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].value, 0x00008 + 0x3):
     {
-        const auto& config = g_regs.Get<Regs::MemoryFill>(static_cast<Regs::Id>(index - 3));
+        const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].value));
+        const auto& config = g_regs.memory_fill_config[is_second_filler];
 
         // TODO: Not sure if this check should be done at GSP level instead
         if (config.address_start) {
@@ -152,9 +151,9 @@ inline void Write(u32 addr, const T data) {
         break;
     }
 
-    case Regs::DisplayTransfer + 6:
+    case GPU_REG_INDEX(display_transfer_config.trigger):
     {
-        const auto& config = g_regs.Get<Regs::DisplayTransfer>();
+        const auto& config = g_regs.display_transfer_config;
         if (config.trigger & 1) {
             u8* source_pointer = Memory::GetPointer(config.GetPhysicalInputAddress());
             u8* dest_pointer = Memory::GetPointer(config.GetPhysicalOutputAddress());
@@ -221,13 +220,13 @@ inline void Write(u32 addr, const T data) {
         break;
     }
 
-    case Regs::CommandProcessor + 4:
+    case GPU_REG_INDEX(command_processor_config.trigger):
     {
-        const auto& config = g_regs.Get<Regs::CommandProcessor>();
+        const auto& config = g_regs.command_processor_config;
         if (config.trigger & 1)
         {
-            // u32* buffer = (u32*)Memory::GetPointer(config.address << 3);
-            ERROR_LOG(GPU, "Beginning 0x%08x bytes of commands from address 0x%08x", config.size, config.address << 3);
+            // u32* buffer = (u32*)Memory::GetPointer(config.GetPhysicalAddress());
+            ERROR_LOG(GPU, "Beginning 0x%08x bytes of commands from address 0x%08x", config.size, config.GetPhysicalAddress());
             // TODO: Process command list!
         }
         break;
@@ -252,7 +251,7 @@ template void Write<u8>(u32 addr, const u8 data);
 
 /// Update hardware
 void Update() {
-    auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
+    auto& framebuffer_top = g_regs.framebuffer_config[0];
     u64 current_ticks = Core::g_app_core->GetTicks();
 
     // Synchronize line...
@@ -280,8 +279,8 @@ void Init() {
 //    SetFramebufferLocation(FRAMEBUFFER_LOCATION_FCRAM);
     SetFramebufferLocation(FRAMEBUFFER_LOCATION_VRAM);
 
-    auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
-    auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>();
+    auto& framebuffer_top = g_regs.framebuffer_config[0];
+    auto& framebuffer_sub = g_regs.framebuffer_config[1];
     // TODO: Width should be 240 instead?
     framebuffer_top.width = 480;
     framebuffer_top.height = 400;
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index 42f18a0e..3065da89 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -4,32 +4,57 @@
 
 #pragma once
 
+#include <cstddef>
+
 #include "common/common_types.h"
 #include "common/bit_field.h"
-#include "common/register_set.h"
 
 namespace GPU {
 
 static const u32 kFrameCycles   = 268123480 / 60;   ///< 268MHz / 60 frames per second
 static const u32 kFrameTicks    = kFrameCycles / 3; ///< Approximate number of instructions/frame
 
+// Returns index corresponding to the Regs member labeled by field_name
+// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions
+//       when used with array elements (e.g. GPU_REG_INDEX(memory_fill_config[0])).
+//       For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members
+//       Hopefully, this will be fixed sometime in the future.
+//       For lack of better alternatives, we currently hardcode the offsets when constant
+//       expressions are needed via GPU_REG_INDEX_WORKAROUND (on sane compilers, static_asserts
+//       will then make sure the offsets indeed match the automatically calculated ones).
+#define GPU_REG_INDEX(field_name) (offsetof(GPU::Regs, field_name) / sizeof(u32))
+#if defined(_MSC_VER)
+#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) (backup_workaround_index)
+#else
+// NOTE: Yeah, hacking in a static_assert here just to workaround the lacking MSVC compiler
+//       really is this annoying. This macro just forwards its first argument to GPU_REG_INDEX
+//       and then performs a (no-op) cast to size_t iff the second argument matches the expected
+//       field offset. Otherwise, the compiler will fail to compile this code.
+#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \
+    ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type)GPU_REG_INDEX(field_name))
+#endif
+
 // MMIO region 0x1EFxxxxx
 struct Regs {
-    enum Id : u32 {
-        MemoryFill                = 0x00004, // + 5,6,7; second block at 8-11
-
-        FramebufferTop            = 0x00117, // + 11a,11b,11c,11d(?),11e...126
-        FramebufferBottom         = 0x00157, // + 15a,15b,15c,15d(?),15e...166
-
-        DisplayTransfer           = 0x00300, // + 301,302,303,304,305,306
-
-        CommandProcessor          = 0x00638, // + 63a,63c
 
-        NumIds                    = 0x01000
-    };
-
-    template<Id id>
-    struct Struct;
+// helper macro to properly align structure members.
+// Calling INSERT_PADDING_WORDS will add a new member variable with a name like "pad121",
+// depending on the current source line to make sure variable names are unique.
+#define INSERT_PADDING_WORDS_HELPER1(x, y) x ## y
+#define INSERT_PADDING_WORDS_HELPER2(x, y) INSERT_PADDING_WORDS_HELPER1(x, y)
+#define INSERT_PADDING_WORDS(num_words) u32 INSERT_PADDING_WORDS_HELPER2(pad, __LINE__)[(num_words)];
+
+// helper macro to make sure the defined structures are of the expected size.
+#if defined(_MSC_VER)
+// TODO: MSVC does not support using sizeof() on non-static data members even though this
+//       is technically allowed since C++11. This macro should be enabled once MSVC adds
+//       support for that.
+#define ASSERT_MEMBER_SIZE(name, size_in_bytes)
+#else
+#define ASSERT_MEMBER_SIZE(name, size_in_bytes)  \
+    static_assert(sizeof(name) == size_in_bytes, \
+                  "Structure size and register block length don't match");
+#endif
 
     enum class FramebufferFormat : u32 {
         RGBA8  = 0,
@@ -38,135 +63,191 @@ struct Regs {
         RGB5A1 = 3,
         RGBA4  = 4,
     };
-};
 
-template<>
-struct Regs::Struct<Regs::MemoryFill> {
-    u32 address_start;
-    u32 address_end; // ?
-    u32 size;
-    u32 value; // ?
+    INSERT_PADDING_WORDS(0x4);
 
-    inline u32 GetStartAddress() const {
-        return address_start * 8;
-    }
+    struct {
+        u32 address_start;
+        u32 address_end; // ?
+        u32 size;
+        u32 value; // ?
 
-    inline u32 GetEndAddress() const {
-        return address_end * 8;
-    }
-};
-static_assert(sizeof(Regs::Struct<Regs::MemoryFill>) == 0x10, "Structure size and register block length don't match");
+        inline u32 GetStartAddress() const {
+            return DecodeAddressRegister(address_start);
+        }
 
-template<>
-struct Regs::Struct<Regs::FramebufferTop> {
-    using Format = Regs::FramebufferFormat;
+        inline u32 GetEndAddress() const {
+            return DecodeAddressRegister(address_end);
+        }
+    } memory_fill_config[2];
+    ASSERT_MEMBER_SIZE(memory_fill_config[0], 0x10);
 
-    union {
-        u32 size;
+    INSERT_PADDING_WORDS(0x10b);
 
-        BitField< 0, 16, u32> width;
-        BitField<16, 16, u32> height;
-    };
+    struct {
+        using Format = Regs::FramebufferFormat;
 
-    u32 pad0[2];
+        union {
+            u32 size;
 
-    u32 address_left1;
-    u32 address_left2;
+            BitField< 0, 16, u32> width;
+            BitField<16, 16, u32> height;
+        };
 
-    union {
-        u32 format;
+        INSERT_PADDING_WORDS(0x2);
 
-        BitField< 0, 3, Format> color_format;
-    };
+        u32 address_left1;
+        u32 address_left2;
 
-    u32 pad1;
+        union {
+            u32 format;
 
-    union {
-        u32 active_fb;
+            BitField< 0, 3, Format> color_format;
+        };
 
-        // 0: Use parameters ending with "1"
-        // 1: Use parameters ending with "2"
-        BitField<0, 1, u32> second_fb_active;
-    };
+        INSERT_PADDING_WORDS(0x1);
 
-    u32 pad2[5];
+        union {
+            u32 active_fb;
 
-    // Distance between two pixel rows, in bytes
-    u32 stride;
+            // 0: Use parameters ending with "1"
+            // 1: Use parameters ending with "2"
+            BitField<0, 1, u32> second_fb_active;
+        };
 
-    u32 address_right1;
-    u32 address_right2;
-};
+        INSERT_PADDING_WORDS(0x5);
 
-template<>
-struct Regs::Struct<Regs::FramebufferBottom> : public Regs::Struct<Regs::FramebufferTop> {
-};
-static_assert(sizeof(Regs::Struct<Regs::FramebufferTop>) == 0x40, "Structure size and register block length don't match");
+        // Distance between two pixel rows, in bytes
+        u32 stride;
 
-template<>
-struct Regs::Struct<Regs::DisplayTransfer> {
-    using Format = Regs::FramebufferFormat;
+        u32 address_right1;
+        u32 address_right2;
 
-    u32 input_address;
-    u32 output_address;
+        INSERT_PADDING_WORDS(0x30);
+    } framebuffer_config[2];
+    ASSERT_MEMBER_SIZE(framebuffer_config[0], 0x100);
 
-    inline u32 GetPhysicalInputAddress() const {
-        return input_address * 8;
-    }
+    INSERT_PADDING_WORDS(0x169);
 
-    inline u32 GetPhysicalOutputAddress() const {
-        return output_address * 8;
-    }
+    struct {
+        using Format = Regs::FramebufferFormat;
 
-    union {
-        u32 output_size;
+        u32 input_address;
+        u32 output_address;
 
-        BitField< 0, 16, u32> output_width;
-        BitField<16, 16, u32> output_height;
-    };
+        inline u32 GetPhysicalInputAddress() const {
+            return DecodeAddressRegister(input_address);
+        }
 
-    union {
-        u32 input_size;
+        inline u32 GetPhysicalOutputAddress() const {
+            return DecodeAddressRegister(output_address);
+        }
 
-        BitField< 0, 16, u32> input_width;
-        BitField<16, 16, u32> input_height;
-    };
+        union {
+            u32 output_size;
 
-    union {
-        u32 flags;
+            BitField< 0, 16, u32> output_width;
+            BitField<16, 16, u32> output_height;
+        };
 
-        BitField< 0, 1, u32> flip_data;        // flips input data horizontally (TODO) if true
-        BitField< 8, 3, Format> input_format;
-        BitField<12, 3, Format> output_format;
-        BitField<16, 1, u32> output_tiled;     // stores output in a tiled format
-    };
+        union {
+            u32 input_size;
 
-    u32 unknown;
+            BitField< 0, 16, u32> input_width;
+            BitField<16, 16, u32> input_height;
+        };
 
-    // it seems that writing to this field triggers the display transfer
-    u32 trigger;
-};
-static_assert(sizeof(Regs::Struct<Regs::DisplayTransfer>) == 0x1C, "Structure size and register block length don't match");
+        union {
+            u32 flags;
 
-template<>
-struct Regs::Struct<Regs::CommandProcessor> {
-    // command list size
-    u32 size;
+            BitField< 0, 1, u32> flip_data;        // flips input data horizontally (TODO) if true
+            BitField< 8, 3, Format> input_format;
+            BitField<12, 3, Format> output_format;
+            BitField<16, 1, u32> output_tiled;     // stores output in a tiled format
+        };
 
-    u32 pad0;
+        INSERT_PADDING_WORDS(0x1);
 
-    // command list address
-    u32 address;
+        // it seems that writing to this field triggers the display transfer
+        u32 trigger;
+    } display_transfer_config;
+    ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c);
 
-    u32 pad1;
+    INSERT_PADDING_WORDS(0x331);
 
-    // it seems that writing to this field triggers command list processing
-    u32 trigger;
-};
-static_assert(sizeof(Regs::Struct<Regs::CommandProcessor>) == 0x14, "Structure size and register block length don't match");
+    struct {
+        // command list size
+        u32 size;
+
+        INSERT_PADDING_WORDS(0x1);
+
+        // command list address
+        u32 address;
+
+        INSERT_PADDING_WORDS(0x1);
+
+        // it seems that writing to this field triggers command list processing
+        u32 trigger;
 
+        inline u32 GetPhysicalAddress() const {
+            return DecodeAddressRegister(address);
+        }
+    } command_processor_config;
+    ASSERT_MEMBER_SIZE(command_processor_config, 0x14);
 
-extern RegisterSet<u32, Regs> g_regs;
+    INSERT_PADDING_WORDS(0x9c3);
+
+#undef INSERT_PADDING_WORDS_HELPER1
+#undef INSERT_PADDING_WORDS_HELPER2
+#undef INSERT_PADDING_WORDS
+
+    static inline int NumIds() {
+        return sizeof(Regs) / sizeof(u32);
+    }
+
+    u32& operator [] (int index) const {
+        u32* content = (u32*)this;
+        return content[index];
+    }
+
+    u32& operator [] (int index) {
+        u32* content = (u32*)this;
+        return content[index];
+    }
+
+private:
+    /*
+     * Most physical addresses which GPU registers refer to are 8-byte aligned.
+     * This function should be used to get the address from a raw register value.
+     */
+    static inline u32 DecodeAddressRegister(u32 register_value) {
+        return register_value * 8;
+    }
+};
+static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout");
+
+// TODO: MSVC does not support using offsetof() on non-static data members even though this
+//       is technically allowed since C++11. This macro should be enabled once MSVC adds
+//       support for that.
+#ifndef _MSC_VER
+#define ASSERT_REG_POSITION(field_name, position)             \
+    static_assert(offsetof(Regs, field_name) == position * 4, \
+                  "Field "#field_name" has invalid position")
+
+ASSERT_REG_POSITION(memory_fill_config[0],    0x00004);
+ASSERT_REG_POSITION(memory_fill_config[1],    0x00008);
+ASSERT_REG_POSITION(framebuffer_config[0],    0x00117);
+ASSERT_REG_POSITION(framebuffer_config[1],    0x00157);
+ASSERT_REG_POSITION(display_transfer_config,  0x00300);
+ASSERT_REG_POSITION(command_processor_config, 0x00638);
+
+#undef ASSERT_REG_POSITION
+#endif // !defined(_MSC_VER)
+
+// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway.
+static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of register set");
+
+extern Regs g_regs;
 
 enum {
     TOP_ASPECT_X        = 0x5,
diff --git a/src/video_core/pica.h b/src/video_core/pica.h
index f0fa3aba..d64559d7 100644
--- a/src/video_core/pica.h
+++ b/src/video_core/pica.h
@@ -4,127 +4,173 @@
 
 #pragma once
 
+#include <cstddef>
 #include <initializer_list>
 #include <map>
 
 #include "common/bit_field.h"
 #include "common/common_types.h"
-#include "common/register_set.h"
 
 namespace Pica {
 
+// Returns index corresponding to the Regs member labeled by field_name
+// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions
+//       when used with array elements (e.g. PICA_REG_INDEX(vs_uniform_setup.set_value[1])).
+//       For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members
+//       Hopefully, this will be fixed sometime in the future.
+//       For lack of better alternatives, we currently hardcode the offsets when constant
+//       expressions are needed via PICA_REG_INDEX_WORKAROUND (on sane compilers, static_asserts
+//       will then make sure the offsets indeed match the automatically calculated ones).
+#define PICA_REG_INDEX(field_name) (offsetof(Pica::Regs, field_name) / sizeof(u32))
+#if defined(_MSC_VER)
+#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) (backup_workaround_index)
+#else
+// NOTE: Yeah, hacking in a static_assert here just to workaround the lacking MSVC compiler
+//       really is this annoying. This macro just forwards its first argument to PICA_REG_INDEX
+//       and then performs a (no-op) cast to size_t iff the second argument matches the expected
+//       field offset. Otherwise, the compiler will fail to compile this code.
+#define PICA_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \
+    ((typename std::enable_if<backup_workaround_index == PICA_REG_INDEX(field_name), size_t>::type)PICA_REG_INDEX(field_name))
+#endif // _MSC_VER
+
 struct Regs {
-    enum Id : u32 {
-        ViewportSizeX              =  0x41,
-        ViewportInvSizeX           =  0x42,
-        ViewportSizeY              =  0x43,
-        ViewportInvSizeY           =  0x44,
-        ViewportCorner             =  0x68,
-        DepthBufferFormat          = 0x116,
-        ColorBufferFormat          = 0x117,
-        DepthBufferAddress         = 0x11C,
-        ColorBufferAddress         = 0x11D,
-        ColorBufferSize            = 0x11E,
-
-        VertexArrayBaseAddr        = 0x200,
-        VertexDescriptor           = 0x201, // 0x202
-        VertexAttributeOffset      = 0x203, // 0x206,0x209,0x20C,0x20F,0x212,0x215,0x218,0x21B,0x21E,0x221,0x224
-        VertexAttributeInfo0       = 0x204, // 0x207,0x20A,0x20D,0x210,0x213,0x216,0x219,0x21C,0x21F,0x222,0x225
-        VertexAttributeInfo1       = 0x205, // 0x208,0x20B,0x20E,0x211,0x214,0x217,0x21A,0x21D,0x220,0x223,0x226
-
-        NumIds                     = 0x300,
-    };
-
-    template<Id id>
-    union Struct;
+
+// helper macro to properly align structure members.
+// Calling INSERT_PADDING_WORDS will add a new member variable with a name like "pad121",
+// depending on the current source line to make sure variable names are unique.
+#define INSERT_PADDING_WORDS_HELPER1(x, y) x ## y
+#define INSERT_PADDING_WORDS_HELPER2(x, y) INSERT_PADDING_WORDS_HELPER1(x, y)
+#define INSERT_PADDING_WORDS(num_words) u32 INSERT_PADDING_WORDS_HELPER2(pad, __LINE__)[(num_words)];
+
+    INSERT_PADDING_WORDS(0x41);
+
+    BitField<0, 24, u32> viewport_size_x;
+    INSERT_PADDING_WORDS(1);
+    BitField<0, 24, u32> viewport_size_y;
+
+    INSERT_PADDING_WORDS(0x1bc);
+
+    union {
+        enum class Format : u64 {
+            BYTE = 0,
+            UBYTE = 1,
+            SHORT = 2,
+            FLOAT = 3,
+        };
+
+        BitField< 0,  2, Format> format0;
+        BitField< 2,  2, u64> size0;      // number of elements minus 1
+        BitField< 4,  2, Format> format1;
+        BitField< 6,  2, u64> size1;
+        BitField< 8,  2, Format> format2;
+        BitField<10,  2, u64> size2;
+        BitField<12,  2, Format> format3;
+        BitField<14,  2, u64> size3;
+        BitField<16,  2, Format> format4;
+        BitField<18,  2, u64> size4;
+        BitField<20,  2, Format> format5;
+        BitField<22,  2, u64> size5;
+        BitField<24,  2, Format> format6;
+        BitField<26,  2, u64> size6;
+        BitField<28,  2, Format> format7;
+        BitField<30,  2, u64> size7;
+        BitField<32,  2, Format> format8;
+        BitField<34,  2, u64> size8;
+        BitField<36,  2, Format> format9;
+        BitField<38,  2, u64> size9;
+        BitField<40,  2, Format> format10;
+        BitField<42,  2, u64> size10;
+        BitField<44,  2, Format> format11;
+        BitField<46,  2, u64> size11;
+
+        BitField<48, 12, u64> attribute_mask;
+        BitField<60,  4, u64> num_attributes; // number of total attributes minus 1
+    } vertex_descriptor;
+
+    INSERT_PADDING_WORDS(0xfe);
+
+#undef INSERT_PADDING_WORDS_HELPER1
+#undef INSERT_PADDING_WORDS_HELPER2
+#undef INSERT_PADDING_WORDS
+
+    // Map register indices to names readable by humans
+    // Used for debugging purposes, so performance is not an issue here
+    static std::string GetCommandName(int index) {
+        std::map<u32, std::string> map;
+        Regs regs;
+
+        // TODO: MSVC does not support using offsetof() on non-static data members even though this
+        //       is technically allowed since C++11. Hence, this functionality is disabled until
+        //       MSVC properly supports it.
+        #ifndef _MSC_VER
+        #define ADD_FIELD(name)                                                                               \
+            do {                                                                                              \
+                map.insert({PICA_REG_INDEX(name), #name});                                                    \
+                for (u32 i = PICA_REG_INDEX(name) + 1; i < PICA_REG_INDEX(name) + sizeof(regs.name) / 4; ++i) \
+                    map.insert({i, #name + std::string("+") + std::to_string(i-PICA_REG_INDEX(name))});       \
+            } while(false)
+
+        ADD_FIELD(viewport_size_x);
+        ADD_FIELD(viewport_size_y);
+        ADD_FIELD(vertex_descriptor);
+
+        #undef ADD_FIELD
+        #endif // _MSC_VER
+
+        // Return empty string if no match is found
+        return map[index];
+    }
+
+    static inline int NumIds() {
+        return sizeof(Regs) / sizeof(u32);
+    }
+
+    u32& operator [] (int index) const {
+        u32* content = (u32*)this;
+        return content[index];
+    }
+
+    u32& operator [] (int index) {
+        u32* content = (u32*)this;
+        return content[index];
+    }
+
+private:
+    /*
+     * Most physical addresses which Pica registers refer to are 8-byte aligned.
+     * This function should be used to get the address from a raw register value.
+     */
+    static inline u32 DecodeAddressRegister(u32 register_value) {
+        return register_value * 8;
+    }
 };
 
-static inline Regs::Id VertexAttributeOffset(int n)
-{
-    return static_cast<Regs::Id>(0x203 + 3*n);
-}
+// TODO: MSVC does not support using offsetof() on non-static data members even though this
+//       is technically allowed since C++11. This macro should be enabled once MSVC adds
+//       support for that.
+#ifndef _MSC_VER
+#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(Regs, field_name) == position * 4, "Field "#field_name" has invalid position")
+
+ASSERT_REG_POSITION(viewport_size_x, 0x41);
+ASSERT_REG_POSITION(viewport_size_y, 0x43);
+ASSERT_REG_POSITION(vertex_descriptor, 0x200);
 
-static inline Regs::Id VertexAttributeInfo0(int n)
-{
-    return static_cast<Regs::Id>(0x204 + 3*n);
-}
+#undef ASSERT_REG_POSITION
+#endif // !defined(_MSC_VER)
 
-static inline Regs::Id VertexAttributeInfo1(int n)
-{
-    return static_cast<Regs::Id>(0x205 + 3*n);
-}
+// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway.
+static_assert(sizeof(Regs) == 0x300 * sizeof(u32), "Invalid total size of register set");
 
 union CommandHeader {
     CommandHeader(u32 h) : hex(h) {}
 
     u32 hex;
 
-    BitField< 0, 16, Regs::Id> cmd_id;
+    BitField< 0, 16, u32> cmd_id;
     BitField<16,  4, u32> parameter_mask;
     BitField<20, 11, u32> extra_data_length;
     BitField<31,  1, u32> group_commands;
 };
 
-static std::map<Regs::Id, const char*> command_names = {
-    {Regs::ViewportSizeX, "ViewportSizeX" },
-    {Regs::ViewportInvSizeX, "ViewportInvSizeX" },
-    {Regs::ViewportSizeY, "ViewportSizeY" },
-    {Regs::ViewportInvSizeY, "ViewportInvSizeY" },
-    {Regs::ViewportCorner, "ViewportCorner" },
-    {Regs::DepthBufferFormat, "DepthBufferFormat" },
-    {Regs::ColorBufferFormat, "ColorBufferFormat" },
-    {Regs::DepthBufferAddress, "DepthBufferAddress" },
-    {Regs::ColorBufferAddress, "ColorBufferAddress" },
-    {Regs::ColorBufferSize, "ColorBufferSize" },
-};
-
-template<>
-union Regs::Struct<Regs::ViewportSizeX> {
-    BitField<0, 24, u32> value;
-};
-
-template<>
-union Regs::Struct<Regs::ViewportSizeY> {
-    BitField<0, 24, u32> value;
-};
-
-template<>
-union Regs::Struct<Regs::VertexDescriptor> {
-    enum class Format : u64 {
-        BYTE = 0,
-        UBYTE = 1,
-        SHORT = 2,
-        FLOAT = 3,
-    };
-
-    BitField< 0,  2, Format> format0;
-    BitField< 2,  2, u64> size0;      // number of elements minus 1
-    BitField< 4,  2, Format> format1;
-    BitField< 6,  2, u64> size1;
-    BitField< 8,  2, Format> format2;
-    BitField<10,  2, u64> size2;
-    BitField<12,  2, Format> format3;
-    BitField<14,  2, u64> size3;
-    BitField<16,  2, Format> format4;
-    BitField<18,  2, u64> size4;
-    BitField<20,  2, Format> format5;
-    BitField<22,  2, u64> size5;
-    BitField<24,  2, Format> format6;
-    BitField<26,  2, u64> size6;
-    BitField<28,  2, Format> format7;
-    BitField<30,  2, u64> size7;
-    BitField<32,  2, Format> format8;
-    BitField<34,  2, u64> size8;
-    BitField<36,  2, Format> format9;
-    BitField<38,  2, u64> size9;
-    BitField<40,  2, Format> format10;
-    BitField<42,  2, u64> size10;
-    BitField<44,  2, Format> format11;
-    BitField<46,  2, u64> size11;
-
-    BitField<48, 12, u64> attribute_mask;
-    BitField<60,  4, u64> num_attributes; // number of total attributes minus 1
-};
-
 
 } // namespace
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index d0a8ec1d..02b17456 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -78,8 +78,8 @@ void RendererOpenGL::FlipFramebuffer(const u8* in, u8* out) {
  */
 void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& dst_rect) {
 
-    const auto& framebuffer_top = GPU::g_regs.Get<GPU::Regs::FramebufferTop>();
-    const auto& framebuffer_sub = GPU::g_regs.Get<GPU::Regs::FramebufferBottom>();
+    const auto& framebuffer_top = GPU::g_regs.framebuffer_config[0];
+    const auto& framebuffer_sub = GPU::g_regs.framebuffer_config[1];
     const u32 active_fb_top = (framebuffer_top.active_fb == 1)
                                 ? framebuffer_top.address_left2
                                 : framebuffer_top.address_left1;