#!/usr/bin/env python2.7 # # Copyright 2017 Google Inc. # # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. import re import subprocess import sys clang = 'clang-5.0' objdump = 'gobjdump' ccache = 'ccache' stages = 'src/jumper/SkJumper_stages.cpp' stages_lowp = 'src/jumper/SkJumper_stages_lowp.cpp' generated = 'src/jumper/SkJumper_generated.S' generated_win = 'src/jumper/SkJumper_generated_win.S' clang = sys.argv[1] if len(sys.argv) > 1 else clang objdump = sys.argv[2] if len(sys.argv) > 2 else objdump ccache = sys.argv[3] if len(sys.argv) > 3 else ccache stages = sys.argv[4] if len(sys.argv) > 4 else stages stages_lowp = sys.argv[5] if len(sys.argv) > 5 else stages_lowp generated = sys.argv[6] if len(sys.argv) > 6 else generated generated_win = sys.argv[7] if len(sys.argv) > 7 else generated_win clang = [ccache, clang, '-x', 'c++'] cflags = ['-std=c++11', '-Os', '-DJUMPER_IS_OFFLINE', '-momit-leaf-frame-pointer', '-ffp-contract=fast', '-fno-exceptions', '-fno-rtti', '-fno-unwind-tables'] x86 = [ '-m32' ] win = ['-DWIN', '-mno-red-zone'] sse2 = ['-msse2', '-mno-sse3', '-mno-ssse3', '-mno-sse4.1'] subprocess.check_call(clang + cflags + sse2 + ['-c', stages] + ['-o', 'sse2.o']) subprocess.check_call(clang + cflags + sse2 + win + ['-c', stages] + ['-o', 'win_sse2.o']) subprocess.check_call(clang + cflags + sse2 + x86 + ['-c', stages] + ['-o', 'x86_sse2.o']) subprocess.check_call(clang + cflags + sse2 + win + x86 + ['-c', stages] + ['-o', 'win_x86_sse2.o']) subprocess.check_call(clang + cflags + sse2 + ['-c', stages_lowp] + ['-o', 'lowp_sse2.o']) subprocess.check_call(clang + cflags + sse2 + win + ['-c', stages_lowp] + ['-o', 'win_lowp_sse2.o']) subprocess.check_call(clang + cflags + sse2 + x86 + ['-c', stages_lowp] + ['-o', 'x86_lowp_sse2.o']) subprocess.check_call(clang + cflags + sse2 + win + x86 + ['-c', stages_lowp] + ['-o', 'win_x86_lowp_sse2.o']) sse41 = ['-msse4.1'] subprocess.check_call(clang + cflags + sse41 + ['-c', stages] + ['-o', 'sse41.o']) subprocess.check_call(clang + cflags + sse41 + win + ['-c', stages] + ['-o', 'win_sse41.o']) subprocess.check_call(clang + cflags + sse41 + ['-c', stages_lowp] + ['-o', 'lowp_sse41.o']) subprocess.check_call(clang + cflags + sse41 + win + ['-c', stages_lowp] + ['-o', 'win_lowp_sse41.o']) avx = ['-mavx'] subprocess.check_call(clang + cflags + avx + ['-c', stages] + ['-o', 'avx.o']) subprocess.check_call(clang + cflags + avx + win + ['-c', stages] + ['-o', 'win_avx.o']) hsw = ['-mavx2', '-mfma', '-mf16c'] subprocess.check_call(clang + cflags + hsw + ['-c', stages] + ['-o', 'hsw.o']) subprocess.check_call(clang + cflags + hsw + win + ['-c', stages] + ['-o', 'win_hsw.o']) subprocess.check_call(clang + cflags + hsw + ['-c', stages_lowp] + ['-o', 'lowp_hsw.o']) subprocess.check_call(clang + cflags + hsw + win + ['-c', stages_lowp] + ['-o', 'win_lowp_hsw.o']) skx = ['-march=skylake-avx512'] subprocess.check_call(clang + cflags + skx + ['-c', stages] + ['-o', 'skx.o']) # Merge x86-64 object files to deduplicate constants. # (No other platform has more than one specialization.) subprocess.check_call(['ld', '-r', '-o', 'merged.o', 'skx.o', 'hsw.o', 'avx.o', 'sse41.o', 'sse2.o', 'lowp_hsw.o', 'lowp_sse41.o', 'lowp_sse2.o']) subprocess.check_call(['ld', '-r', '-o', 'win_merged.o', 'win_hsw.o', 'win_avx.o', 'win_sse41.o', 'win_sse2.o', 'win_lowp_hsw.o', 'win_lowp_sse41.o', 'win_lowp_sse2.o']) subprocess.check_call(['ld', '-r', '-o', 'x86_merged.o', 'x86_sse2.o', 'x86_lowp_sse2.o']) subprocess.check_call(['ld', '-r', '-o', 'win_x86_merged.o', 'win_x86_sse2.o', 'win_x86_lowp_sse2.o']) def parse_object_file(dot_o, directive, target=None): globl, hidden, label, comment, align = \ '.globl', 'HIDDEN', ':', '// ', 'BALIGN' if 'win' in dot_o: globl, hidden, label, comment, align = \ 'PUBLIC', '', ' LABEL PROC', '; ', 'ALIGN ' cmd = [objdump] if target: cmd += ['--target', target] # Look for sections we know we can't handle. section_headers = subprocess.check_output(cmd + ['-h', dot_o]) for snippet in ['.rodata']: if snippet in section_headers: print >>sys.stderr, 'Found %s in section.' % snippet assert snippet not in section_headers if directive == '.long': disassemble = ['-d', dot_o] dehex = lambda h: '0x'+h else: # x86-64... as long as we're using %rip-relative addressing, # literal sections should be fine to just dump in with .text. disassemble = ['-d', # DO NOT USE -D. '-z', # Print zero bytes instead of ... '--insn-width=11', '-j', '.text', '-j', '.literal4', '-j', '.literal8', '-j', '.literal16', '-j', '.const', dot_o] dehex = lambda h: str(int(h,16)) # Ok. Let's disassemble. for line in subprocess.check_output(cmd + disassemble).split('\n'): line = line.strip() if not line or line.startswith(dot_o) or line.startswith('Disassembly'): continue # E.g. 00000000000003a4 <_load_f16>: m = re.match('''[0-9a-f]+ <_?(.*)>:''', line) if m: print sym = m.group(1) if sym.startswith('.literal'): # .literal4, .literal16, etc print sym.replace('.literal', align) elif sym.startswith('.const'): # 32-byte constants print align + '32' elif not sym.startswith('sk_'): print >>sys.stderr, "build_stages.py can't handle '%s' (yet?)." % sym assert sym.startswith('sk_') else: # a stage function if hidden: print hidden + ' _' + sym print globl + ' _' + sym if 'win' not in dot_o: print 'FUNCTION(_' + sym + ')' print '_' + sym + label continue columns = line.split('\t') #print >>sys.stderr, columns code = columns[1] if len(columns) >= 4: inst = columns[2] args = columns[3] else: inst, args = columns[2], '' if ' ' in columns[2]: inst, args = columns[2].split(' ', 1) code, inst, args = code.strip(), inst.strip(), args.strip() hexed = ','.join(dehex(x) for x in code.split(' ')) print ' ' + directive + ' ' + hexed + ' '*(36-len(hexed)) + \ comment + inst + (' '*(14-len(inst)) + args if args else '') sys.stdout = open(generated, 'w') print '''# Copyright 2017 Google Inc. # # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. # This file is generated semi-automatically with this command: # $ src/jumper/build_stages.py ''' print '#if defined(__MACH__)' print ' #define HIDDEN .private_extern' print ' #define FUNCTION(name)' print ' #define BALIGN4 .align 2' print ' #define BALIGN8 .align 3' print ' #define BALIGN16 .align 4' print ' #define BALIGN32 .align 5' print '#else' print ' .section .note.GNU-stack,"",%progbits' print ' #define HIDDEN .hidden' print ' #define FUNCTION(name) .type name,%function' print ' #define BALIGN4 .balign 4' print ' #define BALIGN8 .balign 8' print ' #define BALIGN16 .balign 16' print ' #define BALIGN32 .balign 32' print '#endif' print '.text' print '#if defined(__x86_64__)' print 'BALIGN32' parse_object_file('merged.o', '.byte') print '#elif defined(__i386__)' print 'BALIGN32' parse_object_file('x86_merged.o', '.byte') print '#endif' sys.stdout = open(generated_win, 'w') print '''; Copyright 2017 Google Inc. ; ; Use of this source code is governed by a BSD-style license that can be ; found in the LICENSE file. ; This file is generated semi-automatically with this command: ; $ src/jumper/build_stages.py ''' print 'IFDEF RAX' print "_text32 SEGMENT ALIGN(32) 'CODE'" print 'ALIGN 32' parse_object_file('win_merged.o', 'DB') print 'ELSE' print '.MODEL FLAT,C' print "_text32 SEGMENT ALIGN(32) 'CODE'" print 'ALIGN 32' parse_object_file('win_x86_merged.o', 'DB') print 'ENDIF' print 'END'