aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-01-11 13:41:30 -0500
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-01-11 19:20:37 +0000
commit09326e7a92c7db55665e2ee2d5ac7cb1ce258fff (patch)
treef335f25491f5b97aa3c2cb8b9d2327e6283ffbf8 /src
parent58d2a72afad28a991f5dead5e5275b2fdca40607 (diff)
SkSplicer: start on Windows support
should just be missing copy_to_executable_mem() / cleanup_executable_mem() Change-Id: I47d34d4b57a40c07120cca7dc03f6ae22ecbe910 Reviewed-on: https://skia-review.googlesource.com/6854 Commit-Queue: Mike Klein <mtklein@chromium.org> Reviewed-by: Herb Derby <herb@google.com>
Diffstat (limited to 'src')
-rw-r--r--src/splicer/SkSplicer.cpp122
1 files changed, 104 insertions, 18 deletions
diff --git a/src/splicer/SkSplicer.cpp b/src/splicer/SkSplicer.cpp
index e68adabd3f..b2745538d1 100644
--- a/src/splicer/SkSplicer.cpp
+++ b/src/splicer/SkSplicer.cpp
@@ -9,7 +9,11 @@
#include "SkOpts.h"
#include "SkRasterPipeline.h"
#include "SkStream.h"
-#include <sys/mman.h>
+#if defined(_MSC_VER)
+ #include <windows.h>
+#else
+ #include <sys/mman.h>
+#endif
#include "SkSplicer_generated.h"
#include "SkSplicer_shared.h"
@@ -86,44 +90,126 @@ namespace {
}
#endif
-#if !defined(__aarch64__) && defined(DUMP)
+#if defined(_MSC_VER)
+ // Adapt from MS ABI to System V ABI used by stages.
+ static void before_loop(SkWStream* buf) {
+ static const uint8_t ms_to_system_v[] = {
+ 0x56, // push %rsi
+ 0x57, // push %rdi
+ 0x48,0x81,0xec,0xa8,0x00,0x00,0x00, // sub $0xa8,%rsp
+ 0xc5,0x78,0x29,0xbc,0x24,0x90,0x00,0x00,0x00, // vmovaps %xmm15,0x90(%rsp)
+ 0xc5,0x78,0x29,0xb4,0x24,0x80,0x00,0x00,0x00, // vmovaps %xmm14,0x80(%rsp)
+ 0xc5,0x78,0x29,0x6c,0x24,0x70, // vmovaps %xmm13,0x70(%rsp)
+ 0xc5,0x78,0x29,0x64,0x24,0x60, // vmovaps %xmm12,0x60(%rsp)
+ 0xc5,0x78,0x29,0x5c,0x24,0x50, // vmovaps %xmm11,0x50(%rsp)
+ 0xc5,0x78,0x29,0x54,0x24,0x40, // vmovaps %xmm10,0x40(%rsp)
+ 0xc5,0x78,0x29,0x4c,0x24,0x30, // vmovaps %xmm9,0x30(%rsp)
+ 0xc5,0x78,0x29,0x44,0x24,0x20, // vmovaps %xmm8,0x20(%rsp)
+ 0xc5,0xf8,0x29,0x7c,0x24,0x10, // vmovaps %xmm7,0x10(%rsp)
+ 0xc5,0xf8,0x29,0x34,0x24, // vmovaps %xmm6,(%rsp)
+ 0x48,0x89,0xcf, // mov %rcx,%rdi
+ 0x48,0x89,0xd6, // mov %rdx,%rsi
+ 0x4c,0x89,0xc2, // mov %r8,%rdx
+ 0x4c,0x89,0xc9, // mov %r9,%rcx
+ };
+ splice(buf, ms_to_system_v);
+ }
+ static void after_loop(SkWStream* buf) {
+ static const uint8_t system_v_to_ms[] = {
+ 0xc5,0xf8,0x28,0x34,0x24, // vmovaps (%rsp),%xmm6
+ 0xc5,0xf8,0x28,0x7c,0x24,0x10, // vmovaps 0x10(%rsp),%xmm7
+ 0xc5,0x78,0x28,0x44,0x24,0x20, // vmovaps 0x20(%rsp),%xmm8
+ 0xc5,0x78,0x28,0x4c,0x24,0x30, // vmovaps 0x30(%rsp),%xmm9
+ 0xc5,0x78,0x28,0x54,0x24,0x40, // vmovaps 0x40(%rsp),%xmm10
+ 0xc5,0x78,0x28,0x5c,0x24,0x50, // vmovaps 0x50(%rsp),%xmm11
+ 0xc5,0x78,0x28,0x64,0x24,0x60, // vmovaps 0x60(%rsp),%xmm12
+ 0xc5,0x78,0x28,0x6c,0x24,0x70, // vmovaps 0x70(%rsp),%xmm13
+ 0xc5,0x78,0x28,0xb4,0x24,0x80,0x00,0x00,0x00, // vmovaps 0x80(%rsp),%xmm14
+ 0xc5,0x78,0x28,0xbc,0x24,0x90,0x00,0x00,0x00, // vmovaps 0x90(%rsp),%xmm15
+ 0x48,0x81,0xc4,0xa8,0x00,0x00,0x00, // add $0xa8,%rsp
+ 0x5f, // pop %rdi
+ 0x5e, // pop %rsi
+ };
+ splice(buf, system_v_to_ms);
+ }
+#elif !defined(__aarch64__) && defined(DUMP)
+ // IACA start and end markers.
static const uint8_t ud2[] = { 0x0f, 0x0b }; // undefined... crashes when run
static const uint8_t nop3[] = { 0x64, 0x67, 0x90 }; // 3 byte no-op
static const uint8_t movl_ebx[] = { 0xbb }; // move next 4 bytes into ebx
- static void iaca_start(SkWStream* buf) {
+ static void before_loop(SkWStream* buf) {
splice(buf, ud2);
splice(buf, movl_ebx);
splice(buf, 111);
splice(buf, nop3);
}
- static void iaca_end(SkWStream* buf) {
+ static void after_loop(SkWStream* buf) {
splice(buf, movl_ebx);
splice(buf, 222);
splice(buf, nop3);
splice(buf, ud2);
}
#else
- static void iaca_start(SkWStream*) {}
- static void iaca_end (SkWStream*) {}
+ static void before_loop(SkWStream*) {}
+ static void after_loop (SkWStream*) {}
#endif
+ // We can only mprotect / VirtualProtect at 4K page granularity.
+ static size_t round_up_to_full_pages(size_t len) {
+ size_t size = 0;
+ while (size < len) {
+ size += 4096;
+ }
+ return size;
+ }
+
+#if defined(_MSC_VER)
// Copy len bytes from src to memory that's executable. cleanup with cleanup_executable_mem().
- static void* copy_to_executable_mem(const void* src, size_t len) {
- if (src && len) {
- // TODO: w^x
- auto fn = mmap(nullptr, len, PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0);
- memcpy(fn, src, len);
- __builtin___clear_cache((char*)fn, (char*)fn + len);
- return fn;
+ static void* copy_to_executable_mem(const void* src, size_t* len) {
+ if (!src || !*len) {
+ return nullptr;
}
- return nullptr;
+
+ size_t alloc = round_up_to_full_pages(*len);
+
+ auto fn = VirtualAlloc(nullptr, alloc, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
+ memcpy(fn, src, *len);
+
+ DWORD dont_care;
+ VirtualProtect(fn, alloc, PAGE_EXECUTE_READ, &dont_care);
+
+ *len = alloc;
+ return fn;
+ }
+ static void cleanup_executable_mem(void* fn, size_t len) {
+ if (fn) {
+ VirtualFree(fn, 0, MEM_RELEASE);
+ }
+ }
+#else
+ static void* copy_to_executable_mem(const void* src, size_t* len) {
+ if (!src || !*len) {
+ return nullptr;
+ }
+
+ size_t alloc = round_up_to_full_pages(*len);
+
+ auto fn = mmap(nullptr, alloc, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0);
+ memcpy(fn, src, *len);
+
+ mprotect(fn, alloc, PROT_READ|PROT_EXEC);
+ __builtin___clear_cache((char*)fn, (char*)fn + *len); // Essential on ARM; no-op on x86.
+
+ *len = alloc;
+ return fn;
}
static void cleanup_executable_mem(void* fn, size_t len) {
if (fn) {
munmap(fn, len);
}
}
+#endif
struct Spliced {
@@ -150,7 +236,7 @@ namespace {
// ... run spliced stages...
// x += kStride;
// } while(x < limit);
- iaca_start(&buf);
+ before_loop(&buf);
auto loop_start = buf.bytesWritten(); // Think of this like a label, loop_start:
for (int i = 0; i < nstages; i++) {
@@ -189,12 +275,12 @@ namespace {
}
loop(&buf, loop_start); // Loop back to handle more pixels if not done.
- iaca_end(&buf);
+ after_loop(&buf);
ret(&buf); // We're done.
auto data = buf.detachAsData();
fSplicedLen = data->size();
- fSpliced = copy_to_executable_mem(data->data(), fSplicedLen);
+ fSpliced = copy_to_executable_mem(data->data(), &fSplicedLen);
#if defined(DUMP)
SkFILEWStream(DUMP).write(data->data(), data->size());
@@ -204,7 +290,7 @@ namespace {
// Spliced is stored in a std::function, so it needs to be copyable.
Spliced(const Spliced& o) : fBackup (o.fBackup)
, fSplicedLen(o.fSplicedLen)
- , fSpliced (copy_to_executable_mem(o.fSpliced, fSplicedLen)) {}
+ , fSpliced (copy_to_executable_mem(o.fSpliced, &fSplicedLen)) {}
~Spliced() {
cleanup_executable_mem(fSpliced, fSplicedLen);