diff options
author | 2017-01-11 13:41:30 -0500 | |
---|---|---|
committer | 2017-01-11 19:20:37 +0000 | |
commit | 09326e7a92c7db55665e2ee2d5ac7cb1ce258fff (patch) | |
tree | f335f25491f5b97aa3c2cb8b9d2327e6283ffbf8 /src | |
parent | 58d2a72afad28a991f5dead5e5275b2fdca40607 (diff) |
SkSplicer: start on Windows support
should just be missing copy_to_executable_mem() / cleanup_executable_mem()
Change-Id: I47d34d4b57a40c07120cca7dc03f6ae22ecbe910
Reviewed-on: https://skia-review.googlesource.com/6854
Commit-Queue: Mike Klein <mtklein@chromium.org>
Reviewed-by: Herb Derby <herb@google.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/splicer/SkSplicer.cpp | 122 |
1 files changed, 104 insertions, 18 deletions
diff --git a/src/splicer/SkSplicer.cpp b/src/splicer/SkSplicer.cpp index e68adabd3f..b2745538d1 100644 --- a/src/splicer/SkSplicer.cpp +++ b/src/splicer/SkSplicer.cpp @@ -9,7 +9,11 @@ #include "SkOpts.h" #include "SkRasterPipeline.h" #include "SkStream.h" -#include <sys/mman.h> +#if defined(_MSC_VER) + #include <windows.h> +#else + #include <sys/mman.h> +#endif #include "SkSplicer_generated.h" #include "SkSplicer_shared.h" @@ -86,44 +90,126 @@ namespace { } #endif -#if !defined(__aarch64__) && defined(DUMP) +#if defined(_MSC_VER) + // Adapt from MS ABI to System V ABI used by stages. + static void before_loop(SkWStream* buf) { + static const uint8_t ms_to_system_v[] = { + 0x56, // push %rsi + 0x57, // push %rdi + 0x48,0x81,0xec,0xa8,0x00,0x00,0x00, // sub $0xa8,%rsp + 0xc5,0x78,0x29,0xbc,0x24,0x90,0x00,0x00,0x00, // vmovaps %xmm15,0x90(%rsp) + 0xc5,0x78,0x29,0xb4,0x24,0x80,0x00,0x00,0x00, // vmovaps %xmm14,0x80(%rsp) + 0xc5,0x78,0x29,0x6c,0x24,0x70, // vmovaps %xmm13,0x70(%rsp) + 0xc5,0x78,0x29,0x64,0x24,0x60, // vmovaps %xmm12,0x60(%rsp) + 0xc5,0x78,0x29,0x5c,0x24,0x50, // vmovaps %xmm11,0x50(%rsp) + 0xc5,0x78,0x29,0x54,0x24,0x40, // vmovaps %xmm10,0x40(%rsp) + 0xc5,0x78,0x29,0x4c,0x24,0x30, // vmovaps %xmm9,0x30(%rsp) + 0xc5,0x78,0x29,0x44,0x24,0x20, // vmovaps %xmm8,0x20(%rsp) + 0xc5,0xf8,0x29,0x7c,0x24,0x10, // vmovaps %xmm7,0x10(%rsp) + 0xc5,0xf8,0x29,0x34,0x24, // vmovaps %xmm6,(%rsp) + 0x48,0x89,0xcf, // mov %rcx,%rdi + 0x48,0x89,0xd6, // mov %rdx,%rsi + 0x4c,0x89,0xc2, // mov %r8,%rdx + 0x4c,0x89,0xc9, // mov %r9,%rcx + }; + splice(buf, ms_to_system_v); + } + static void after_loop(SkWStream* buf) { + static const uint8_t system_v_to_ms[] = { + 0xc5,0xf8,0x28,0x34,0x24, // vmovaps (%rsp),%xmm6 + 0xc5,0xf8,0x28,0x7c,0x24,0x10, // vmovaps 0x10(%rsp),%xmm7 + 0xc5,0x78,0x28,0x44,0x24,0x20, // vmovaps 0x20(%rsp),%xmm8 + 0xc5,0x78,0x28,0x4c,0x24,0x30, // vmovaps 0x30(%rsp),%xmm9 + 0xc5,0x78,0x28,0x54,0x24,0x40, // vmovaps 0x40(%rsp),%xmm10 + 0xc5,0x78,0x28,0x5c,0x24,0x50, // vmovaps 0x50(%rsp),%xmm11 + 0xc5,0x78,0x28,0x64,0x24,0x60, // vmovaps 0x60(%rsp),%xmm12 + 0xc5,0x78,0x28,0x6c,0x24,0x70, // vmovaps 0x70(%rsp),%xmm13 + 0xc5,0x78,0x28,0xb4,0x24,0x80,0x00,0x00,0x00, // vmovaps 0x80(%rsp),%xmm14 + 0xc5,0x78,0x28,0xbc,0x24,0x90,0x00,0x00,0x00, // vmovaps 0x90(%rsp),%xmm15 + 0x48,0x81,0xc4,0xa8,0x00,0x00,0x00, // add $0xa8,%rsp + 0x5f, // pop %rdi + 0x5e, // pop %rsi + }; + splice(buf, system_v_to_ms); + } +#elif !defined(__aarch64__) && defined(DUMP) + // IACA start and end markers. static const uint8_t ud2[] = { 0x0f, 0x0b }; // undefined... crashes when run static const uint8_t nop3[] = { 0x64, 0x67, 0x90 }; // 3 byte no-op static const uint8_t movl_ebx[] = { 0xbb }; // move next 4 bytes into ebx - static void iaca_start(SkWStream* buf) { + static void before_loop(SkWStream* buf) { splice(buf, ud2); splice(buf, movl_ebx); splice(buf, 111); splice(buf, nop3); } - static void iaca_end(SkWStream* buf) { + static void after_loop(SkWStream* buf) { splice(buf, movl_ebx); splice(buf, 222); splice(buf, nop3); splice(buf, ud2); } #else - static void iaca_start(SkWStream*) {} - static void iaca_end (SkWStream*) {} + static void before_loop(SkWStream*) {} + static void after_loop (SkWStream*) {} #endif + // We can only mprotect / VirtualProtect at 4K page granularity. + static size_t round_up_to_full_pages(size_t len) { + size_t size = 0; + while (size < len) { + size += 4096; + } + return size; + } + +#if defined(_MSC_VER) // Copy len bytes from src to memory that's executable. cleanup with cleanup_executable_mem(). - static void* copy_to_executable_mem(const void* src, size_t len) { - if (src && len) { - // TODO: w^x - auto fn = mmap(nullptr, len, PROT_WRITE|PROT_EXEC, MAP_ANON|MAP_PRIVATE, -1, 0); - memcpy(fn, src, len); - __builtin___clear_cache((char*)fn, (char*)fn + len); - return fn; + static void* copy_to_executable_mem(const void* src, size_t* len) { + if (!src || !*len) { + return nullptr; } - return nullptr; + + size_t alloc = round_up_to_full_pages(*len); + + auto fn = VirtualAlloc(nullptr, alloc, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); + memcpy(fn, src, *len); + + DWORD dont_care; + VirtualProtect(fn, alloc, PAGE_EXECUTE_READ, &dont_care); + + *len = alloc; + return fn; + } + static void cleanup_executable_mem(void* fn, size_t len) { + if (fn) { + VirtualFree(fn, 0, MEM_RELEASE); + } + } +#else + static void* copy_to_executable_mem(const void* src, size_t* len) { + if (!src || !*len) { + return nullptr; + } + + size_t alloc = round_up_to_full_pages(*len); + + auto fn = mmap(nullptr, alloc, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0); + memcpy(fn, src, *len); + + mprotect(fn, alloc, PROT_READ|PROT_EXEC); + __builtin___clear_cache((char*)fn, (char*)fn + *len); // Essential on ARM; no-op on x86. + + *len = alloc; + return fn; } static void cleanup_executable_mem(void* fn, size_t len) { if (fn) { munmap(fn, len); } } +#endif struct Spliced { @@ -150,7 +236,7 @@ namespace { // ... run spliced stages... // x += kStride; // } while(x < limit); - iaca_start(&buf); + before_loop(&buf); auto loop_start = buf.bytesWritten(); // Think of this like a label, loop_start: for (int i = 0; i < nstages; i++) { @@ -189,12 +275,12 @@ namespace { } loop(&buf, loop_start); // Loop back to handle more pixels if not done. - iaca_end(&buf); + after_loop(&buf); ret(&buf); // We're done. auto data = buf.detachAsData(); fSplicedLen = data->size(); - fSpliced = copy_to_executable_mem(data->data(), fSplicedLen); + fSpliced = copy_to_executable_mem(data->data(), &fSplicedLen); #if defined(DUMP) SkFILEWStream(DUMP).write(data->data(), data->size()); @@ -204,7 +290,7 @@ namespace { // Spliced is stored in a std::function, so it needs to be copyable. Spliced(const Spliced& o) : fBackup (o.fBackup) , fSplicedLen(o.fSplicedLen) - , fSpliced (copy_to_executable_mem(o.fSpliced, fSplicedLen)) {} + , fSpliced (copy_to_executable_mem(o.fSpliced, &fSplicedLen)) {} ~Spliced() { cleanup_executable_mem(fSpliced, fSplicedLen); |