From b5b2ca49eaa7f2723ebf077e8069f947779d8d08 Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Sun, 6 Aug 2017 17:07:13 -0400 Subject: Add initial stab at C-compilation by optimization --- etc/compile-by-zinc/compile-to-zinc.py | 170 +++++++++++++++++++++++++++++++++ etc/compile-by-zinc/femulDisplay.log | 78 +++++++++++++++ 2 files changed, 248 insertions(+) create mode 100755 etc/compile-by-zinc/compile-to-zinc.py create mode 100644 etc/compile-by-zinc/femulDisplay.log (limited to 'etc') diff --git a/etc/compile-by-zinc/compile-to-zinc.py b/etc/compile-by-zinc/compile-to-zinc.py new file mode 100755 index 000000000..9d8fde45e --- /dev/null +++ b/etc/compile-by-zinc/compile-to-zinc.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python +from __future__ import with_statement +import codecs, re +import subprocess + +LAMBDA = u'\u03bb' + +CORES = ('ADD_MUL0', 'ADD_MUL1', 'MUL0', 'LEA_BW0', 'LEA_BW1') + +MAX_INSTRUCTION_WINDOW = 4 + +INSTRUCTIONS_PER_CYCLE = 4 + +BITWISE_CORES = tuple({ + 'core' : { 'name' : core_name , 'latency' : 1 }, + 'latency' : 1 + } for core_name in ('LEA_BW0', 'LEA_BW1')) + +MODEL = { + '*': tuple({ + 'core' : { 'name' : core_name , 'latency' : 1 }, + 'latency' : 3 + } + for core_name in ('ADD_MUL0', 'ADD_MUL1', 'MUL0')), + '+': tuple({ + 'core' : { 'name' : core_name , 'latency' : 1 }, + 'latency' : 1 + } + for core_name in ('ADD_MUL0', 'ADD_MUL1', 'LEA_BW0', 'LEA_BW1')), + '>>': BITWISE_CORES, + '<<': BITWISE_CORES, + '|': BITWISE_CORES, + '&': BITWISE_CORES + } + +def get_lines(filename): + with codecs.open(filename, 'r', encoding='utf8') as f: + lines = f.read().replace('\r\n', '\n') + return [line.strip() for line in re.findall("%s '.*?[Rr]eturn [^\r\n]*" % LAMBDA, lines, flags=re.DOTALL)[0].split('\n')] + +def strip_casts(text): + return re.sub(r'\(u?int[0-9]*_t\)\s*\(?([^\)]*)\)?', r'\1', text) + +def parse_lines(lines): + lines = list(map(strip_casts, lines)) + assert lines[0][:len(LAMBDA + ' ')] == LAMBDA + ' ' + assert lines[0][-1] == ',' + ret = {} + ret['vars'] = lines[0][len(LAMBDA + ' '):-1] + assert lines[-1][-1] == ')' + ret['return'] = lines[-1][:-1].replace('return ', '').replace('Return ', '') + ret['lines'] = [] + for line in lines[1:-1]: + datatype, varname, arg1, op, arg2 = re.findall('^(u?int[0-9]*_t) ([^ ]*) = ([^ ]*) ([^ ]*) ([^ ]*);$', line)[0] + ret['lines'].append({'type':datatype, 'out':varname, 'op':op, 'args':(arg1, arg2), 'source':line}) + print('Compiling %d of %d lines...' % (MAX_INSTRUCTION_WINDOW, len(ret['lines']))) + ret['lines'] = ret['lines'][:MAX_INSTRUCTION_WINDOW] + return ret + +def get_var_names(input_data): + return tuple(line['out'] for line in input_data['lines']) + +def get_input_var_names(input_data): + return tuple(i for i in data['vars'].replace('%core', '').replace(',', ' ').replace('(', ' ').replace(')', ' ').replace("'", ' ').split(' ') + if i != '') + + +def make_decls(input_data): + var_names = get_var_names(input_data) + ret = '' + ret += 'set of int: CORE = 1..%d;\n' % len(CORES) + for i, core in enumerate(CORES): + ret += 'CORE: %s = %d; ' % (core, i+1) + ret += '\n' + for var in var_names: + ret += 'var int: %s_loc;\n' % var + ret += 'var int: %s_latency;\n' % var + ret += 'var CORE: %s_core;\n' % var + ret += 'var int: RET_loc;\n' + return ret + +def make_disjoint(input_data): + var_names = get_var_names(input_data) + ret = '' + for var in var_names: + ret += 'constraint %s_loc >= 0;\n' % var + ret += 'constraint %s_latency >= 0;\n' % var + ret += 'constraint %s_loc + %s_latency <= RET_loc;\n' % (var, var) + # TODO: Figure out if this next constraint actually helps things.... + MAX_NUMBER_OF_NOPS_PER_INSTRUCTION = 3 + APPROXIMATE_MAX_LATENCY = 6 * INSTRUCTIONS_PER_CYCLE + max_ret_loc = ('constraint RET_loc <= %d;\n' + % (len(var_names) + * MAX_NUMBER_OF_NOPS_PER_INSTRUCTION + + APPROXIMATE_MAX_LATENCY)) + #ret += max_ret_loc + ret += '\n' + for var_i in range(len(var_names)): + for var_j in range(var_i+1, len(var_names)): + ret += 'constraint %s_loc != %s_loc;\n' % (var_names[var_i], var_names[var_j]) + ret += '\n' + return ret + +def make_dependencies(input_data): + var_names = get_var_names(input_data) + ret = '' + for line in input_data['lines']: + for arg in line['args']: + if arg in var_names and arg[0] not in '0123456789': + ret += ('constraint %s_loc + %s_latency <= %s_loc;\n' + % (arg, arg, line['out'])) + ret += '\n' + return ret + +def make_cores(input_data): + ret = '' + for line in input_data['lines']: + ret += 'constraint ' + possible_cores = [] + cores = MODEL[line['op']] + for core in cores: + possible_cores.append('(%s_latency == %d /\ %s_core == %s)' + % (line['out'], core['latency'] * INSTRUCTIONS_PER_CYCLE, + line['out'], core['core']['name'])) + ret += ' \/ '.join(possible_cores) + ret += ';\n' + return ret + +def make_cores_disjoint(input_data): + var_names = get_var_names(input_data) + ret = '' + for core in CORES: + for var_i in range(len(var_names)): + for var_j in range(var_i+1, len(var_names)): + op_i = input_data['lines'][var_i]['op'] + op_j = input_data['lines'][var_j]['op'] + latencies_i = [val['core']['latency'] for val in MODEL[op_i] if val['core']['name'] == core] + latencies_j = [val['core']['latency'] for val in MODEL[op_j] if val['core']['name'] == core] + if len(latencies_i) == 0 or len(latencies_j) == 0: continue + assert len(latencies_i) == 1 + assert len(latencies_j) == 1 + ret += ('constraint (%(vari)s_core != %(core)s \\/ %(varj)s_core != %(core)s) \\/ (%(vari)s_loc + %(latencyi)d <= %(varj)s_loc \\/ %(varj)s_loc + %(latencyj)d <= %(vari)s_loc);\n' + % { 'vari':var_names[var_i] , 'varj':var_names[var_j] , 'core':core , + 'latencyi':latencies_i[0] * INSTRUCTIONS_PER_CYCLE, + 'latencyj':latencies_j[0] * INSTRUCTIONS_PER_CYCLE}) + ret += '\n' + return ret + +def make_output(input_data): + ret = 'solve minimize RET_loc;\n\n' + ret += 'output [ "{\\n" ++\n' + for line in input_data['lines']: + ret += (' " \'%(var)s_loc\': " ++ show(%(var)s_loc) ++ ", \'%(var)s_latency\': " ++ show(%(var)s_latency) ++ ", \'%(var)s_core\': " ++ show(%(var)s_core) ++ ",\\n" ++\n %% %(source)s\n' + % { 'var':line['out'] , 'source':line['source'] }) + ret += ' " \'RET_loc\': " ++ show(RET_loc) ++ "\\n" ++\n' + ret += ' "}" ]\n' + return ret + + +data = parse_lines(get_lines('femulDisplay.log')) +with open('femulDisplay.mzn', 'w') as f: + f.write(make_decls(data)) + f.write(make_disjoint(data)) + f.write(make_dependencies(data)) + f.write(make_cores(data)) + f.write(make_cores_disjoint(data)) + f.write(make_output(data)) + +p = subprocess.Popen(['/usr/bin/time', '-f', 'real: %e, user: %U, sys: %S, mem: %M ko', 'minizinc', 'femulDisplay.mzn']) +p.communicate() diff --git a/etc/compile-by-zinc/femulDisplay.log b/etc/compile-by-zinc/femulDisplay.log new file mode 100644 index 000000000..87da411a0 --- /dev/null +++ b/etc/compile-by-zinc/femulDisplay.log @@ -0,0 +1,78 @@ +λ x x0 : word64 * word64 * word64 * word64 * word64, +Interp-η +(λ var : Syntax.base_type → Type, + λ '(x10, x11, x9, x7, x5, (x18, x19, x17, x15, x13))%core, + uint128_t x20 = (uint128_t) x5 * x13; + uint128_t x21 = (uint128_t) x5 * x15; + uint128_t x22 = (uint128_t) x7 * x13; + uint128_t x23 = x21 + x22; + uint128_t x24 = (uint128_t) x5 * x17; + uint128_t x25 = (uint128_t) x9 * x13; + uint128_t x26 = x24 + x25; + uint128_t x27 = (uint128_t) x7 * x15; + uint128_t x28 = x26 + x27; + uint128_t x29 = (uint128_t) x5 * x19; + uint128_t x30 = (uint128_t) x11 * x13; + uint128_t x31 = x29 + x30; + uint128_t x32 = (uint128_t) x7 * x17; + uint128_t x33 = x31 + x32; + uint128_t x34 = (uint128_t) x9 * x15; + uint128_t x35 = x33 + x34; + uint128_t x36 = (uint128_t) x5 * x18; + uint128_t x37 = (uint128_t) x10 * x13; + uint128_t x38 = x36 + x37; + uint128_t x39 = (uint128_t) x11 * x15; + uint128_t x40 = x38 + x39; + uint128_t x41 = (uint128_t) x7 * x19; + uint128_t x42 = x40 + x41; + uint128_t x43 = (uint128_t) x9 * x17; + uint128_t x44 = x42 + x43; + uint64_t x45 = x10 * 0x13; + uint64_t x46 = x7 * 0x13; + uint64_t x47 = x9 * 0x13; + uint64_t x48 = x11 * 0x13; + uint128_t x49 = (uint128_t) x45 * x15; + uint128_t x50 = x20 + x49; + uint128_t x51 = (uint128_t) x46 * x18; + uint128_t x52 = x50 + x51; + uint128_t x53 = (uint128_t) x47 * x19; + uint128_t x54 = x52 + x53; + uint128_t x55 = (uint128_t) x48 * x17; + uint128_t x56 = x54 + x55; + uint128_t x57 = (uint128_t) x45 * x17; + uint128_t x58 = x23 + x57; + uint128_t x59 = (uint128_t) x47 * x18; + uint128_t x60 = x58 + x59; + uint128_t x61 = (uint128_t) x48 * x19; + uint128_t x62 = x60 + x61; + uint128_t x63 = (uint128_t) x45 * x19; + uint128_t x64 = x28 + x63; + uint128_t x65 = (uint128_t) x48 * x18; + uint128_t x66 = x64 + x65; + uint128_t x67 = (uint128_t) x45 * x18; + uint128_t x68 = x35 + x67; + uint64_t x69 = (uint64_t) (x56 >> 0x33); + uint64_t x70 = (uint64_t) x56 & 0x7ffffffffffff; + uint128_t x71 = x69 + x62; + uint64_t x72 = (uint64_t) (x71 >> 0x33); + uint64_t x73 = (uint64_t) x71 & 0x7ffffffffffff; + uint128_t x74 = x72 + x66; + uint64_t x75 = (uint64_t) (x74 >> 0x33); + uint64_t x76 = (uint64_t) x74 & 0x7ffffffffffff; + uint128_t x77 = x75 + x68; + uint64_t x78 = (uint64_t) (x77 >> 0x33); + uint64_t x79 = (uint64_t) x77 & 0x7ffffffffffff; + uint128_t x80 = x78 + x44; + uint64_t x81 = (uint64_t) (x80 >> 0x33); + uint64_t x82 = (uint64_t) x80 & 0x7ffffffffffff; + uint64_t x83 = 0x13 * x81; + uint64_t x84 = x70 + x83; + uint64_t x85 = x84 >> 0x33; + uint64_t x86 = x84 & 0x7ffffffffffff; + uint64_t x87 = x85 + x73; + uint64_t x88 = x87 >> 0x33; + uint64_t x89 = x87 & 0x7ffffffffffff; + uint64_t x90 = x88 + x76; + return (x82, x79, x90, x89, x86)) +(x, x0)%core + : word64 * word64 * word64 * word64 * word64 → word64 * word64 * word64 * word64 * word64 → ReturnType (uint64_t * uint64_t * uint64_t * uint64_t * uint64_t) -- cgit v1.2.3