Remove the fancy RegisterSet class introduced in 4c2bff61e.

While it was some nice and fancy template usage, it ultimately had many practical issues regarding length of involved expressions under regular usage as well as common code completion tools not being able to handle the structures. Instead, we now use a more conventional approach which is a lot more clean to use.
author: Tony Wasserka <NeoBrainX@gmail.com> 2014-08-03 16:00:52 +0200
committer: Tony Wasserka <NeoBrainX@gmail.com> 2014-08-12 02:17:21 +0200
commit: 9c781a6c7646a3f30c23adae75e1879b7fc47d0f (patch)
tree: 76f32e7dc447e661f9f34303609cf4aed23d7a5b /src/core/hw
parent: 29365e67d621dc732997c5b7a5269fa2dfda09ab (diff)
2 files changed, 212 insertions, 132 deletions
diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp
index d94c2329..fd40f8ac 100644
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@@ -19,7 +19,7 @@
 
 namespace GPU {
 
-RegisterSet<u32, Regs> g_regs;
+Regs g_regs;
 
 u32 g_cur_line = 0;         ///< Current vertical screen line
 u64 g_last_line_ticks = 0;  ///< CPU tick count from last vertical screen line
@@ -32,8 +32,8 @@ void SetFramebufferLocation(const FramebufferLocation mode) {
     switch (mode) {
     case FRAMEBUFFER_LOCATION_FCRAM:
     {
-        auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
-        auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>();
+        auto& framebuffer_top = g_regs.framebuffer_config[0];
+        auto& framebuffer_sub = g_regs.framebuffer_config[1];
 
         framebuffer_top.address_left1  = PADDR_TOP_LEFT_FRAME1;
         framebuffer_top.address_left2  = PADDR_TOP_LEFT_FRAME2;
@@ -48,8 +48,8 @@ void SetFramebufferLocation(const FramebufferLocation mode) {
 
     case FRAMEBUFFER_LOCATION_VRAM:
     {
-        auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
-        auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>();
+        auto& framebuffer_top = g_regs.framebuffer_config[0];
+        auto& framebuffer_sub = g_regs.framebuffer_config[1];
 
         framebuffer_top.address_left1  = PADDR_VRAM_TOP_LEFT_FRAME1;
         framebuffer_top.address_left2  = PADDR_VRAM_TOP_LEFT_FRAME2;
@@ -107,13 +107,12 @@ inline void Read(T &var, const u32 raw_addr) {
     int index = addr / 4;
 
     // Reads other than u32 are untested, so I'd rather have them abort than silently fail
-    if (index >= Regs::NumIds || !std::is_same<T,u32>::value)
-    {
+    if (index >= Regs::NumIds() || !std::is_same<T,u32>::value) {
         ERROR_LOG(GPU, "unknown Read%d @ 0x%08X", sizeof(var) * 8, addr);
         return;
     }
 
-    var = g_regs[static_cast<Regs::Id>(addr / 4)];
+    var = g_regs[addr / 4];
 }
 
 template <typename T>
@@ -122,22 +121,22 @@ inline void Write(u32 addr, const T data) {
     int index = addr / 4;
 
     // Writes other than u32 are untested, so I'd rather have them abort than silently fail
-    if (index >= Regs::NumIds || !std::is_same<T,u32>::value)
-    {
+    if (index >= Regs::NumIds() || !std::is_same<T,u32>::value) {
         ERROR_LOG(GPU, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, addr);
         return;
     }
 
-    g_regs[static_cast<Regs::Id>(index)] = data;
+    g_regs[index] = data;
 
-    switch (static_cast<Regs::Id>(index)) {
+    switch (index) {
 
     // Memory fills are triggered once the fill value is written.
     // NOTE: This is not verified.
-    case Regs::MemoryFill + 3:
-    case Regs::MemoryFill + 7:
+    case GPU_REG_INDEX_WORKAROUND(memory_fill_config[0].value, 0x00004 + 0x3):
+    case GPU_REG_INDEX_WORKAROUND(memory_fill_config[1].value, 0x00008 + 0x3):
     {
-        const auto& config = g_regs.Get<Regs::MemoryFill>(static_cast<Regs::Id>(index - 3));
+        const bool is_second_filler = (index != GPU_REG_INDEX(memory_fill_config[0].value));
+        const auto& config = g_regs.memory_fill_config[is_second_filler];
 
         // TODO: Not sure if this check should be done at GSP level instead
         if (config.address_start) {
@@ -152,9 +151,9 @@ inline void Write(u32 addr, const T data) {
         break;
     }
 
-    case Regs::DisplayTransfer + 6:
+    case GPU_REG_INDEX(display_transfer_config.trigger):
     {
-        const auto& config = g_regs.Get<Regs::DisplayTransfer>();
+        const auto& config = g_regs.display_transfer_config;
         if (config.trigger & 1) {
             u8* source_pointer = Memory::GetPointer(config.GetPhysicalInputAddress());
             u8* dest_pointer = Memory::GetPointer(config.GetPhysicalOutputAddress());
@@ -221,13 +220,13 @@ inline void Write(u32 addr, const T data) {
         break;
     }
 
-    case Regs::CommandProcessor + 4:
+    case GPU_REG_INDEX(command_processor_config.trigger):
     {
-        const auto& config = g_regs.Get<Regs::CommandProcessor>();
+        const auto& config = g_regs.command_processor_config;
         if (config.trigger & 1)
         {
-            // u32* buffer = (u32*)Memory::GetPointer(config.address << 3);
-            ERROR_LOG(GPU, "Beginning 0x%08x bytes of commands from address 0x%08x", config.size, config.address << 3);
+            // u32* buffer = (u32*)Memory::GetPointer(config.GetPhysicalAddress());
+            ERROR_LOG(GPU, "Beginning 0x%08x bytes of commands from address 0x%08x", config.size, config.GetPhysicalAddress());
             // TODO: Process command list!
         }
         break;
@@ -252,7 +251,7 @@ template void Write<u8>(u32 addr, const u8 data);
 
 /// Update hardware
 void Update() {
-    auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
+    auto& framebuffer_top = g_regs.framebuffer_config[0];
     u64 current_ticks = Core::g_app_core->GetTicks();
 
     // Synchronize line...
@@ -280,8 +279,8 @@ void Init() {
 //    SetFramebufferLocation(FRAMEBUFFER_LOCATION_FCRAM);
     SetFramebufferLocation(FRAMEBUFFER_LOCATION_VRAM);
 
-    auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>();
-    auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>();
+    auto& framebuffer_top = g_regs.framebuffer_config[0];
+    auto& framebuffer_sub = g_regs.framebuffer_config[1];
     // TODO: Width should be 240 instead?
     framebuffer_top.width = 480;
     framebuffer_top.height = 400;
diff --git a/src/core/hw/gpu.h b/src/core/hw/gpu.h
index 42f18a0e..3065da89 100644
--- a/src/core/hw/gpu.h
+++ b/src/core/hw/gpu.h
@@ -4,32 +4,57 @@
 
 #pragma once
 
+#include <cstddef>
+
 #include "common/common_types.h"
 #include "common/bit_field.h"
-#include "common/register_set.h"
 
 namespace GPU {
 
 static const u32 kFrameCycles   = 268123480 / 60;   ///< 268MHz / 60 frames per second
 static const u32 kFrameTicks    = kFrameCycles / 3; ///< Approximate number of instructions/frame
 
+// Returns index corresponding to the Regs member labeled by field_name
+// TODO: Due to Visual studio bug 209229, offsetof does not return constant expressions
+//       when used with array elements (e.g. GPU_REG_INDEX(memory_fill_config[0])).
+//       For details cf. https://connect.microsoft.com/VisualStudio/feedback/details/209229/offsetof-does-not-produce-a-constant-expression-for-array-members
+//       Hopefully, this will be fixed sometime in the future.
+//       For lack of better alternatives, we currently hardcode the offsets when constant
+//       expressions are needed via GPU_REG_INDEX_WORKAROUND (on sane compilers, static_asserts
+//       will then make sure the offsets indeed match the automatically calculated ones).
+#define GPU_REG_INDEX(field_name) (offsetof(GPU::Regs, field_name) / sizeof(u32))
+#if defined(_MSC_VER)
+#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) (backup_workaround_index)
+#else
+// NOTE: Yeah, hacking in a static_assert here just to workaround the lacking MSVC compiler
+//       really is this annoying. This macro just forwards its first argument to GPU_REG_INDEX
+//       and then performs a (no-op) cast to size_t iff the second argument matches the expected
+//       field offset. Otherwise, the compiler will fail to compile this code.
+#define GPU_REG_INDEX_WORKAROUND(field_name, backup_workaround_index) \
+    ((typename std::enable_if<backup_workaround_index == GPU_REG_INDEX(field_name), size_t>::type)GPU_REG_INDEX(field_name))
+#endif
+
 // MMIO region 0x1EFxxxxx
 struct Regs {
-    enum Id : u32 {
-        MemoryFill                = 0x00004, // + 5,6,7; second block at 8-11
-
-        FramebufferTop            = 0x00117, // + 11a,11b,11c,11d(?),11e...126
-        FramebufferBottom         = 0x00157, // + 15a,15b,15c,15d(?),15e...166
-
-        DisplayTransfer           = 0x00300, // + 301,302,303,304,305,306
-
-        CommandProcessor          = 0x00638, // + 63a,63c
 
-        NumIds                    = 0x01000
-    };
-
-    template<Id id>
-    struct Struct;
+// helper macro to properly align structure members.
+// Calling INSERT_PADDING_WORDS will add a new member variable with a name like "pad121",
+// depending on the current source line to make sure variable names are unique.
+#define INSERT_PADDING_WORDS_HELPER1(x, y) x ## y
+#define INSERT_PADDING_WORDS_HELPER2(x, y) INSERT_PADDING_WORDS_HELPER1(x, y)
+#define INSERT_PADDING_WORDS(num_words) u32 INSERT_PADDING_WORDS_HELPER2(pad, __LINE__)[(num_words)];
+
+// helper macro to make sure the defined structures are of the expected size.
+#if defined(_MSC_VER)
+// TODO: MSVC does not support using sizeof() on non-static data members even though this
+//       is technically allowed since C++11. This macro should be enabled once MSVC adds
+//       support for that.
+#define ASSERT_MEMBER_SIZE(name, size_in_bytes)
+#else
+#define ASSERT_MEMBER_SIZE(name, size_in_bytes)  \
+    static_assert(sizeof(name) == size_in_bytes, \
+                  "Structure size and register block length don't match");
+#endif
 
     enum class FramebufferFormat : u32 {
         RGBA8  = 0,
@@ -38,135 +63,191 @@ struct Regs {
         RGB5A1 = 3,
         RGBA4  = 4,
     };
-};
 
-template<>
-struct Regs::Struct<Regs::MemoryFill> {
-    u32 address_start;
-    u32 address_end; // ?
-    u32 size;
-    u32 value; // ?
+    INSERT_PADDING_WORDS(0x4);
 
-    inline u32 GetStartAddress() const {
-        return address_start * 8;
-    }
+    struct {
+        u32 address_start;
+        u32 address_end; // ?
+        u32 size;
+        u32 value; // ?
 
-    inline u32 GetEndAddress() const {
-        return address_end * 8;
-    }
-};
-static_assert(sizeof(Regs::Struct<Regs::MemoryFill>) == 0x10, "Structure size and register block length don't match");
+        inline u32 GetStartAddress() const {
+            return DecodeAddressRegister(address_start);
+        }
 
-template<>
-struct Regs::Struct<Regs::FramebufferTop> {
-    using Format = Regs::FramebufferFormat;
+        inline u32 GetEndAddress() const {
+            return DecodeAddressRegister(address_end);
+        }
+    } memory_fill_config[2];
+    ASSERT_MEMBER_SIZE(memory_fill_config[0], 0x10);
 
-    union {
-        u32 size;
+    INSERT_PADDING_WORDS(0x10b);
 
-        BitField< 0, 16, u32> width;
-        BitField<16, 16, u32> height;
-    };
+    struct {
+        using Format = Regs::FramebufferFormat;
 
-    u32 pad0[2];
+        union {
+            u32 size;
 
-    u32 address_left1;
-    u32 address_left2;
+            BitField< 0, 16, u32> width;
+            BitField<16, 16, u32> height;
+        };
 
-    union {
-        u32 format;
+        INSERT_PADDING_WORDS(0x2);
 
-        BitField< 0, 3, Format> color_format;
-    };
+        u32 address_left1;
+        u32 address_left2;
 
-    u32 pad1;
+        union {
+            u32 format;
 
-    union {
-        u32 active_fb;
+            BitField< 0, 3, Format> color_format;
+        };
 
-        // 0: Use parameters ending with "1"
-        // 1: Use parameters ending with "2"
-        BitField<0, 1, u32> second_fb_active;
-    };
+        INSERT_PADDING_WORDS(0x1);
 
-    u32 pad2[5];
+        union {
+            u32 active_fb;
 
-    // Distance between two pixel rows, in bytes
-    u32 stride;
+            // 0: Use parameters ending with "1"
+            // 1: Use parameters ending with "2"
+            BitField<0, 1, u32> second_fb_active;
+        };
 
-    u32 address_right1;
-    u32 address_right2;
-};
+        INSERT_PADDING_WORDS(0x5);
 
-template<>
-struct Regs::Struct<Regs::FramebufferBottom> : public Regs::Struct<Regs::FramebufferTop> {
-};
-static_assert(sizeof(Regs::Struct<Regs::FramebufferTop>) == 0x40, "Structure size and register block length don't match");
+        // Distance between two pixel rows, in bytes
+        u32 stride;
 
-template<>
-struct Regs::Struct<Regs::DisplayTransfer> {
-    using Format = Regs::FramebufferFormat;
+        u32 address_right1;
+        u32 address_right2;
 
-    u32 input_address;
-    u32 output_address;
+        INSERT_PADDING_WORDS(0x30);
+    } framebuffer_config[2];
+    ASSERT_MEMBER_SIZE(framebuffer_config[0], 0x100);
 
-    inline u32 GetPhysicalInputAddress() const {
-        return input_address * 8;
-    }
+    INSERT_PADDING_WORDS(0x169);
 
-    inline u32 GetPhysicalOutputAddress() const {
-        return output_address * 8;
-    }
+    struct {
+        using Format = Regs::FramebufferFormat;
 
-    union {
-        u32 output_size;
+        u32 input_address;
+        u32 output_address;
 
-        BitField< 0, 16, u32> output_width;
-        BitField<16, 16, u32> output_height;
-    };
+        inline u32 GetPhysicalInputAddress() const {
+            return DecodeAddressRegister(input_address);
+        }
 
-    union {
-        u32 input_size;
+        inline u32 GetPhysicalOutputAddress() const {
+            return DecodeAddressRegister(output_address);
+        }
 
-        BitField< 0, 16, u32> input_width;
-        BitField<16, 16, u32> input_height;
-    };
+        union {
+            u32 output_size;
 
-    union {
-        u32 flags;
+            BitField< 0, 16, u32> output_width;
+            BitField<16, 16, u32> output_height;
+        };
 
-        BitField< 0, 1, u32> flip_data;        // flips input data horizontally (TODO) if true
-        BitField< 8, 3, Format> input_format;
-        BitField<12, 3, Format> output_format;
-        BitField<16, 1, u32> output_tiled;     // stores output in a tiled format
-    };
+        union {
+            u32 input_size;
 
-    u32 unknown;
+            BitField< 0, 16, u32> input_width;
+            BitField<16, 16, u32> input_height;
+        };
 
-    // it seems that writing to this field triggers the display transfer
-    u32 trigger;
-};
-static_assert(sizeof(Regs::Struct<Regs::DisplayTransfer>) == 0x1C, "Structure size and register block length don't match");
+        union {
+            u32 flags;
 
-template<>
-struct Regs::Struct<Regs::CommandProcessor> {
-    // command list size
-    u32 size;
+            BitField< 0, 1, u32> flip_data;        // flips input data horizontally (TODO) if true
+            BitField< 8, 3, Format> input_format;
+            BitField<12, 3, Format> output_format;
+            BitField<16, 1, u32> output_tiled;     // stores output in a tiled format
+        };
 
-    u32 pad0;
+        INSERT_PADDING_WORDS(0x1);
 
-    // command list address
-    u32 address;
+        // it seems that writing to this field triggers the display transfer
+        u32 trigger;
+    } display_transfer_config;
+    ASSERT_MEMBER_SIZE(display_transfer_config, 0x1c);
 
-    u32 pad1;
+    INSERT_PADDING_WORDS(0x331);
 
-    // it seems that writing to this field triggers command list processing
-    u32 trigger;
-};
-static_assert(sizeof(Regs::Struct<Regs::CommandProcessor>) == 0x14, "Structure size and register block length don't match");
+    struct {
+        // command list size
+        u32 size;
+
+        INSERT_PADDING_WORDS(0x1);
+
+        // command list address
+        u32 address;
+
+        INSERT_PADDING_WORDS(0x1);
+
+        // it seems that writing to this field triggers command list processing
+        u32 trigger;
 
+        inline u32 GetPhysicalAddress() const {
+            return DecodeAddressRegister(address);
+        }
+    } command_processor_config;
+    ASSERT_MEMBER_SIZE(command_processor_config, 0x14);
 
-extern RegisterSet<u32, Regs> g_regs;
+    INSERT_PADDING_WORDS(0x9c3);
+
+#undef INSERT_PADDING_WORDS_HELPER1
+#undef INSERT_PADDING_WORDS_HELPER2
+#undef INSERT_PADDING_WORDS
+
+    static inline int NumIds() {
+        return sizeof(Regs) / sizeof(u32);
+    }
+
+    u32& operator [] (int index) const {
+        u32* content = (u32*)this;
+        return content[index];
+    }
+
+    u32& operator [] (int index) {
+        u32* content = (u32*)this;
+        return content[index];
+    }
+
+private:
+    /*
+     * Most physical addresses which GPU registers refer to are 8-byte aligned.
+     * This function should be used to get the address from a raw register value.
+     */
+    static inline u32 DecodeAddressRegister(u32 register_value) {
+        return register_value * 8;
+    }
+};
+static_assert(std::is_standard_layout<Regs>::value, "Structure does not use standard layout");
+
+// TODO: MSVC does not support using offsetof() on non-static data members even though this
+//       is technically allowed since C++11. This macro should be enabled once MSVC adds
+//       support for that.
+#ifndef _MSC_VER
+#define ASSERT_REG_POSITION(field_name, position)             \
+    static_assert(offsetof(Regs, field_name) == position * 4, \
+                  "Field "#field_name" has invalid position")
+
+ASSERT_REG_POSITION(memory_fill_config[0],    0x00004);
+ASSERT_REG_POSITION(memory_fill_config[1],    0x00008);
+ASSERT_REG_POSITION(framebuffer_config[0],    0x00117);
+ASSERT_REG_POSITION(framebuffer_config[1],    0x00157);
+ASSERT_REG_POSITION(display_transfer_config,  0x00300);
+ASSERT_REG_POSITION(command_processor_config, 0x00638);
+
+#undef ASSERT_REG_POSITION
+#endif // !defined(_MSC_VER)
+
+// The total number of registers is chosen arbitrarily, but let's make sure it's not some odd value anyway.
+static_assert(sizeof(Regs) == 0x1000 * sizeof(u32), "Invalid total size of register set");
+
+extern Regs g_regs;
 
 enum {
     TOP_ASPECT_X        = 0x5,
author	Tony Wasserka <NeoBrainX@gmail.com>	2014-08-03 16:00:52 +0200
committer	Tony Wasserka <NeoBrainX@gmail.com>	2014-08-12 02:17:21 +0200
commit	9c781a6c7646a3f30c23adae75e1879b7fc47d0f (patch)
tree	76f32e7dc447e661f9f34303609cf4aed23d7a5b /src/core/hw
parent	29365e67d621dc732997c5b7a5269fa2dfda09ab (diff)