aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorGravatar Mike Klein <mtklein@chromium.org>2017-04-26 09:52:09 -0400
committerGravatar Skia Commit-Bot <skia-commit-bot@chromium.org>2017-04-26 19:14:19 +0000
commit67e617149df4336b77a3ccdf5ea7d557c7a33166 (patch)
tree30bb3cb33e75c8c55a16f24fcc601b80448caed6 /src
parent459c9679a221bbe66a735080728afb9599fa5ed7 (diff)
prep for more constants
- Add -z to print zero bytes instead of ... - avx+hsw will create 32-byte constants in .const, so we should disassemble those too, and align to 32 bytes. - The default _text section on Windows is 16-byte aligned, so we make a new one that's 32-byte aligned. Change-Id: Icb2a962baa4c3735e98a992f2285eaf5cb1680fd Reviewed-on: https://skia-review.googlesource.com/14364 Commit-Queue: Mike Klein <mtklein@chromium.org> Reviewed-by: Herb Derby <herb@google.com>
Diffstat (limited to 'src')
-rw-r--r--src/jumper/SkJumper_generated.S10
-rw-r--r--src/jumper/SkJumper_generated_win.S10
-rwxr-xr-xsrc/jumper/build_stages.py26
3 files changed, 27 insertions, 19 deletions
diff --git a/src/jumper/SkJumper_generated.S b/src/jumper/SkJumper_generated.S
index 4cc2405675..ed23be731f 100644
--- a/src/jumper/SkJumper_generated.S
+++ b/src/jumper/SkJumper_generated.S
@@ -11,12 +11,14 @@
#define FUNCTION(name)
#define BALIGN4 .align 2
#define BALIGN16 .align 4
+ #define BALIGN32 .align 5
#else
.section .note.GNU-stack,"",%progbits
#define HIDDEN .hidden
#define FUNCTION(name) .type name,%function
#define BALIGN4 .balign 4
#define BALIGN16 .balign 16
+ #define BALIGN32 .balign 32
#endif
.text
#if defined(__aarch64__)
@@ -7268,7 +7270,7 @@ _sk_callback_vfp4:
.long 0xe8bd48f0 // pop {r4, r5, r6, r7, fp, lr}
.long 0xe12fff13 // bx r3
#elif defined(__x86_64__)
-BALIGN16
+BALIGN32
HIDDEN _sk_start_pipeline_hsw
.globl _sk_start_pipeline_hsw
@@ -11715,7 +11717,7 @@ BALIGN4
.byte 0,0 // add %al,(%rax)
.byte 128 // .byte 0x80
.byte 63 // (bad)
-BALIGN16
+BALIGN32
HIDDEN _sk_start_pipeline_avx
.globl _sk_start_pipeline_avx
@@ -17590,7 +17592,7 @@ BALIGN4
.byte 0,0 // add %al,(%rax)
.byte 128 // .byte 0x80
.byte 63 // (bad)
-BALIGN16
+BALIGN32
HIDDEN _sk_start_pipeline_sse41
.globl _sk_start_pipeline_sse41
@@ -22106,7 +22108,7 @@ BALIGN16
.byte 0,0 // add %al,(%rax)
.byte 128 // .byte 0x80
.byte 63 // (bad)
-BALIGN16
+BALIGN32
HIDDEN _sk_start_pipeline_sse2
.globl _sk_start_pipeline_sse2
diff --git a/src/jumper/SkJumper_generated_win.S b/src/jumper/SkJumper_generated_win.S
index 2ba8a0a456..12fc90ee38 100644
--- a/src/jumper/SkJumper_generated_win.S
+++ b/src/jumper/SkJumper_generated_win.S
@@ -7,8 +7,8 @@
; $ src/jumper/build_stages.py
IFDEF RAX
-_text SEGMENT
-ALIGN 16
+_text32 SEGMENT ALIGN(32) 'CODE'
+ALIGN 32
PUBLIC _sk_start_pipeline_hsw
_sk_start_pipeline_hsw LABEL PROC
@@ -4256,7 +4256,7 @@ ALIGN 4
DB 0,0 ; add %al,(%rax)
DB 128 ; .byte 0x80
DB 63 ; (bad)
-ALIGN 16
+ALIGN 32
PUBLIC _sk_start_pipeline_avx
_sk_start_pipeline_avx LABEL PROC
@@ -9934,7 +9934,7 @@ ALIGN 4
DB 0,0 ; add %al,(%rax)
DB 128 ; .byte 0x80
DB 63 ; (bad)
-ALIGN 16
+ALIGN 32
PUBLIC _sk_start_pipeline_sse41
_sk_start_pipeline_sse41 LABEL PROC
@@ -14253,7 +14253,7 @@ ALIGN 16
DB 0,0 ; add %al,(%rax)
DB 128 ; .byte 0x80
DB 63 ; (bad)
-ALIGN 16
+ALIGN 32
PUBLIC _sk_start_pipeline_sse2
_sk_start_pipeline_sse2 LABEL PROC
diff --git a/src/jumper/build_stages.py b/src/jumper/build_stages.py
index baa0b99e32..959913bd95 100755
--- a/src/jumper/build_stages.py
+++ b/src/jumper/build_stages.py
@@ -78,7 +78,7 @@ def parse_object_file(dot_o, directive, target=None):
# Look for sections we know we can't handle.
section_headers = subprocess.check_output(cmd + ['-h', dot_o])
- for snippet in ['.const', '.rodata']:
+ for snippet in ['.rodata']:
if snippet in section_headers:
print >>sys.stderr, 'Found %s in section.' % snippet
assert snippet not in section_headers
@@ -90,10 +90,12 @@ def parse_object_file(dot_o, directive, target=None):
# x86-64... as long as we're using %rip-relative addressing,
# literal sections should be fine to just dump in with .text.
disassemble = ['-d', # DO NOT USE -D.
+ '-z', # Print zero bytes instead of ...
'--insn-width=10',
'-j', '.text',
'-j', '.literal4',
'-j', '.literal16',
+ '-j', '.const',
dot_o]
dehex = lambda h: str(int(h,16))
@@ -111,6 +113,8 @@ def parse_object_file(dot_o, directive, target=None):
sym = m.group(1)
if sym.startswith('.literal'): # .literal4, .literal16, etc
print sym.replace('.literal', align)
+ elif sym.startswith('.const'): # 32-byte constants
+ print align + '32'
else: # a stage function
if hidden:
print hidden + ' _' + sym
@@ -150,12 +154,14 @@ print ' #define HIDDEN .private_extern'
print ' #define FUNCTION(name)'
print ' #define BALIGN4 .align 2'
print ' #define BALIGN16 .align 4'
+print ' #define BALIGN32 .align 5'
print '#else'
print ' .section .note.GNU-stack,"",%progbits'
print ' #define HIDDEN .hidden'
print ' #define FUNCTION(name) .type name,%function'
print ' #define BALIGN4 .balign 4'
print ' #define BALIGN16 .balign 16'
+print ' #define BALIGN32 .balign 32'
print '#endif'
print '.text'
@@ -168,13 +174,13 @@ print 'BALIGN4'
parse_object_file('vfp4.o', '.long', target='elf32-littlearm')
print '#elif defined(__x86_64__)'
-print 'BALIGN16'
+print 'BALIGN32'
parse_object_file('hsw.o', '.byte')
-print 'BALIGN16'
+print 'BALIGN32'
parse_object_file('avx.o', '.byte')
-print 'BALIGN16'
+print 'BALIGN32'
parse_object_file('sse41.o', '.byte')
-print 'BALIGN16'
+print 'BALIGN32'
parse_object_file('sse2.o', '.byte')
print '#endif'
@@ -190,14 +196,14 @@ print '''; Copyright 2017 Google Inc.
'''
print 'IFDEF RAX'
-print '_text SEGMENT'
-print 'ALIGN 16'
+print "_text32 SEGMENT ALIGN(32) 'CODE'"
+print 'ALIGN 32'
parse_object_file('win_hsw.o', 'DB')
-print 'ALIGN 16'
+print 'ALIGN 32'
parse_object_file('win_avx.o', 'DB')
-print 'ALIGN 16'
+print 'ALIGN 32'
parse_object_file('win_sse41.o', 'DB')
-print 'ALIGN 16'
+print 'ALIGN 32'
parse_object_file('win_sse2.o', 'DB')
print 'ENDIF'
print 'END'