From 57018f66cd5358ca3350597ef8cdf94fdc60244a Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Sun, 13 Aug 2017 21:18:43 -0400 Subject: Use a more realistic processor model --- etc/compile-by-zinc/femulScheduled.log | 74 +++++++++ etc/compile-by-zinc/heuristic-search.py | 286 ++++++++++++++------------------ 2 files changed, 198 insertions(+), 162 deletions(-) create mode 100644 etc/compile-by-zinc/femulScheduled.log (limited to 'etc/compile-by-zinc') diff --git a/etc/compile-by-zinc/femulScheduled.log b/etc/compile-by-zinc/femulScheduled.log new file mode 100644 index 000000000..6c9281748 --- /dev/null +++ b/etc/compile-by-zinc/femulScheduled.log @@ -0,0 +1,74 @@ +INPUT: (x10, x11, x9, x7, x5, x18, x19, x17, x15, x13) +uint128_t x20 = (uint128_t) x5 * x13; // MULX r64,r64,r64, start: 0, end: 4 +uint128_t x21 = (uint128_t) x5 * x15; // MULX r64,r64,r64, start: 0, end: 4 +uint128_t x22 = (uint128_t) x7 * x13; // MULX r64,r64,r64, start: 1, end: 5 +uint128_t x24 = (uint128_t) x5 * x17; // MULX r64,r64,r64, start: 2, end: 6 +uint128_t x25 = (uint128_t) x9 * x13; // MULX r64,r64,r64, start: 3, end: 7 +uint128_t x27 = (uint128_t) x7 * x15; // MULX r64,r64,r64, start: 4, end: 8 +uint128_t x23 = x21 + x22; // ADD; ADC(X), start: 5, end: 7 +uint128_t x29 = (uint128_t) x5 * x19; // MULX r64,r64,r64, start: 6, end: 10 +uint128_t x30 = (uint128_t) x11 * x13; // MULX r64,r64,r64, start: 6, end: 10 +uint128_t x26 = x24 + x25; // ADD; ADC(X), start: 7, end: 9 +uint128_t x32 = (uint128_t) x7 * x17; // MULX r64,r64,r64, start: 8, end: 12 +uint128_t x34 = (uint128_t) x9 * x15; // MULX r64,r64,r64, start: 8, end: 12 +uint128_t x28 = x26 + x27; // ADD; ADC(X), start: 9, end: 11 +uint128_t x36 = (uint128_t) x5 * x18; // MULX r64,r64,r64, start: 10, end: 14 +uint128_t x31 = x29 + x30; // ADD; ADC(X), start: 10, end: 12 +uint128_t x37 = (uint128_t) x10 * x13; // MULX r64,r64,r64, start: 11, end: 15 +uint128_t x39 = (uint128_t) x11 * x15; // MULX r64,r64,r64, start: 11, end: 15 +uint128_t x33 = x31 + x32; // ADD; ADC(X), start: 12, end: 14 +uint128_t x41 = (uint128_t) x7 * x19; // MULX r64,r64,r64, start: 13, end: 17 +uint128_t x43 = (uint128_t) x9 * x17; // MULX r64,r64,r64, start: 13, end: 17 +uint128_t x35 = x33 + x34; // ADD; ADC(X), start: 14, end: 16 +uint128_t x38 = x36 + x37; // ADD; ADC(X), start: 15, end: 17 +uint64_t x45 = x10 * 0x13; // IMUL r64,r64,i, start: 15, end: 18 +uint64_t x46 = x7 * 0x13; // IMUL r64,r64,i, start: 15, end: 18 +uint128_t x40 = x38 + x39; // ADD; ADC(X), start: 16, end: 18 +uint64_t x47 = x9 * 0x13; // IMUL r64,r64,i, start: 17, end: 20 +uint64_t x48 = x11 * 0x13; // IMUL r64,r64,i, start: 17, end: 20 +uint128_t x42 = x40 + x41; // ADD; ADC(X), start: 18, end: 20 +uint128_t x49 = (uint128_t) x45 * x15; // MULX r64,r64,r64, start: 19, end: 23 +uint128_t x51 = (uint128_t) x46 * x18; // MULX r64,r64,r64, start: 19, end: 23 +uint128_t x44 = x42 + x43; // ADD; ADC(X), start: 20, end: 22 +uint128_t x53 = (uint128_t) x47 * x19; // MULX r64,r64,r64, start: 21, end: 25 +uint128_t x55 = (uint128_t) x48 * x17; // MULX r64,r64,r64, start: 21, end: 25 +uint128_t x50 = x20 + x49; // ADD; ADC(X), start: 22, end: 24 +uint128_t x57 = (uint128_t) x45 * x17; // MULX r64,r64,r64, start: 23, end: 27 +uint128_t x59 = (uint128_t) x47 * x18; // MULX r64,r64,r64, start: 23, end: 27 +uint128_t x52 = x50 + x51; // ADD; ADC(X), start: 24, end: 26 +uint128_t x61 = (uint128_t) x48 * x19; // MULX r64,r64,r64, start: 25, end: 29 +uint128_t x63 = (uint128_t) x45 * x19; // MULX r64,r64,r64, start: 25, end: 29 +uint128_t x54 = x52 + x53; // ADD; ADC(X), start: 26, end: 28 +uint128_t x58 = x23 + x57; // ADD; ADC(X), start: 27, end: 29 +uint128_t x65 = (uint128_t) x48 * x18; // MULX r64,r64,r64, start: 27, end: 31 +uint128_t x67 = (uint128_t) x45 * x18; // MULX r64,r64,r64, start: 27, end: 31 +uint128_t x56 = x54 + x55; // ADD; ADC(X), start: 28, end: 30 +uint128_t x60 = x58 + x59; // ADD; ADC(X), start: 29, end: 31 +uint128_t x62 = x60 + x61; // ADD; ADC(X), start: 29, end: 31 +uint128_t x64 = x28 + x63; // ADD; ADC(X), start: 31, end: 33 +uint64_t x69 = (uint64_t) (x56 >> 0x33); // SHRD r,r,i, start: 31, end: 34 +uint64_t x70 = (uint64_t) x56 & 0x7ffffffffffff; // AND, start: 31, end: 32 +uint128_t x66 = x64 + x65; // ADD; ADC(X), start: 31, end: 33 +uint128_t x68 = x35 + x67; // ADD; ADC(X), start: 33, end: 35 +uint128_t x71 = x69 + x62; // ADD; ADC(X), start: 33, end: 35 +uint64_t x72 = (uint64_t) (x71 >> 0x33); // SHRD r,r,i, start: 35, end: 38 +uint64_t x73 = (uint64_t) x71 & 0x7ffffffffffff; // AND, start: 37, end: 38 +uint128_t x74 = x72 + x66; // ADD; ADC(X), start: 37, end: 39 +uint64_t x75 = (uint64_t) (x74 >> 0x33); // SHRD r,r,i, start: 40, end: 43 +uint64_t x76 = (uint64_t) x74 & 0x7ffffffffffff; // AND, start: 42, end: 43 +uint128_t x77 = x75 + x68; // ADD; ADC(X), start: 42, end: 44 +uint64_t x78 = (uint64_t) (x77 >> 0x33); // SHRD r,r,i, start: 45, end: 48 +uint64_t x79 = (uint64_t) x77 & 0x7ffffffffffff; // AND, start: 47, end: 48 +uint128_t x80 = x78 + x44; // ADD; ADC(X), start: 47, end: 49 +uint64_t x81 = (uint64_t) (x80 >> 0x33); // SHRD r,r,i, start: 50, end: 53 +uint64_t x82 = (uint64_t) x80 & 0x7ffffffffffff; // AND, start: 52, end: 53 +uint64_t x83 = 0x13 * x81; // IMUL r64,r64,i, start: 52, end: 55 +uint64_t x84 = x70 + x83; // ADD, start: 55, end: 56 +uint64_t x85 = x84 >> 0x33; // SHR r,i, start: 58, end: 59 +uint64_t x86 = x84 & 0x7ffffffffffff; // AND, start: 59, end: 60 +uint64_t x87 = x85 + x73; // ADD, start: 59, end: 60 +uint64_t x88 = x87 >> 0x33; // SHR r,i, start: 60, end: 61 +uint64_t x89 = x87 & 0x7ffffffffffff; // AND, start: 61, end: 62 +uint64_t x90 = x88 + x76; // ADD, start: 61, end: 62 +Return (x82, x79, x90, x89, x86) +// end: 63 diff --git a/etc/compile-by-zinc/heuristic-search.py b/etc/compile-by-zinc/heuristic-search.py index b7586061e..ea528a957 100755 --- a/etc/compile-by-zinc/heuristic-search.py +++ b/etc/compile-by-zinc/heuristic-search.py @@ -2,6 +2,7 @@ from __future__ import with_statement from memoize import memoize import codecs, re, sys +import random LAMBDA = u'\u03bb' @@ -14,39 +15,97 @@ INSTRUCTIONS_PER_CYCLE = 4 REGISTERS = tuple(['RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP', 'RSP'] + ['r%d' % i for i in range(8, 16)]) -CORE_DATA = (('ADD_MUL', 2), ('MUL_CORE', 1), ('LEA_BW', 2)) +CORE_DATA = tuple(('p%d' % i, 1) for i in range(8)) CORES = tuple(name for name, count in CORE_DATA) CORE_COUNT = dict(CORE_DATA) -BITWISE_CORES = tuple({ - 'core' : { 'name' : core_name , 'latency' : 1 }, - 'latency' : 1 - } for core_name in ('LEA_BW',)) - -MODEL = { - '*': tuple({ - 'core' : { 'name' : core_name , 'latency' : 1 }, - 'latency' : 3 - } - for core_name in ('ADD_MUL', 'MUL_CORE')), - '+': tuple({ - 'core' : { 'name' : core_name , 'latency' : 1 }, - 'latency' : 1 - } - for core_name in ('ADD_MUL', 'LEA_BW')), - '>>': BITWISE_CORES, - '<<': BITWISE_CORES, - '|': BITWISE_CORES, - '&': BITWISE_CORES, - 'LOAD': tuple({ - 'core' : { 'name' : core_name , 'latency' : 1 }, - 'latency' : 1 - } for core_name in REGISTERS), - 'STORE': tuple({ - 'core' : { 'name' : core_name , 'latency' : 1 }, - 'latency' : 1 - } for core_name in REGISTERS) - } +def possible_cores_for_line(line, var_types): + # from page 233 of http://agner.org/optimize/instruction_tables.pdf + if line['op'] == '*': + if line['type'] == 'uint64_t' and '0x13' in line['args']: # * 19 can be either imul r64/r64/i, or two lea; we skip the second case because jgross can't figure out what cost to use for it + return ({ + 'core': ({ 'name' : 'p1' , 'latency' : 1 },), + 'latency' : 3, + 'instruction' : 'IMUL r64,r64,i' + },) + elif line['type'] == 'uint128_t' and all(var_types[var] == 'uint64_t' for var in line['args']): # mulx + return ({ + 'core': tuple({ 'name' : core_name , 'latency' : 1 } for core_name in ('p1', 'p5')), + 'latency' : 4, + 'instruction' : 'MULX r64,r64,r64' + },) + else: + assert False + elif line['op'] == '+': + if line['type'] == 'uint128_t': + return tuple({ + 'core' : ({ 'name' : core_name , 'latency' : 1+1 },), + 'latency' : 1+1, + 'instruction' : 'ADD; ADC(X)' + } for core_name in ('p0', 'p6')) + elif line['type'] == 'uint64_t': + return tuple({ + 'core' : ({ 'name' : core_name , 'latency' : 1 },), + 'latency' : 1, + 'instruction' : 'ADD' + } for core_name in ('p0', 'p1', 'p5', 'p6')) + else: + assert False + elif line['op'] in ('>>', '<<'): + if var_types[line['args'][0]] == 'uint128_t' and line['type'] == 'uint64_t' and line['args'][1][:2] == '0x': + return tuple({ + 'core' : ({ 'name' : core_name , 'latency' : 1 },), + 'latency' : 3, + 'instruction' : ('SHLD' if line['op'] == '<<' else 'SHRD') + ' r,r,i' + } for core_name in ('p1',)) + elif var_types[line['args'][0]] == 'uint64_t' and line['type'] == 'uint64_t' and line['args'][1][:2] == '0x': + return tuple({ + 'core' : ({ 'name' : core_name , 'latency' : 1 },), + 'latency' : 1, + 'instruction' : ('SHL' if line['op'] == '<<' else 'SHR') + ' r,i' + } for core_name in ('p0', 'p6')) + else: + assert False + elif line['op'] in ('&', '|', '^'): + return tuple({ + 'core' : ({ 'name' : core_name , 'latency' : 1 },), + 'latency' : 1, + 'instruction' : {'&':'AND', '|':'OR', '^':'XOR'}[line['op']] + } for core_name in ('p0', 'p1', 'p5', 'p6')) + elif line['op'] in ('LOAD',): + if line['type'] == 'uint128_t': # issue 2 MOV, same port, block on p4 + return tuple({ + 'core' : ({ 'name' : core_name , 'latency' : 2 }, { 'name' : 'p4' , 'latency' : 2 }), + 'latency' : 2, + 'instruction' : 'MOV m,r; MOV m,r' + } for core_name in ('p2', 'p3', 'p7')) + elif line['type'] == 'uint64_t': + return tuple({ + 'core' : ({ 'name' : core_name , 'latency' : 1 }, { 'name' : 'p4' , 'latency' : 1 }), + 'latency' : 1, + 'instruction' : 'MOV m,r' + } for core_name in ('p2', 'p3', 'p7')) + else: + assert False + elif line['op'] in ('STORE',): + if line['type'] == 'uint128_t': # issue 2 MOV, different ports + return ({ + 'core' : tuple({ 'name' : core_name , 'latency' : 1 } for core_name in ('p2', 'p3')), + 'latency' : 1, + 'instruction' : 'MOV r64,m; MOV r64,m' + },) + elif line['type'] == 'uint64_t': + return tuple({ + 'core' : ({ 'name' : core_name , 'latency' : 1 },), + 'latency' : 1, + 'instruction' : 'MOV r64,m' + } for core_name in ('p2', 'p3')) + else: + assert False + else: + assert False + + if len(sys.argv) > 1: MAX_INSTRUCTION_WINDOW = int(sys.argv[1]) @@ -59,8 +118,13 @@ def get_lines(filename): def strip_casts(text): return re.sub(r'\(u?int[0-9]*_t\)\s*\(?([^\)]*)\)?', r'\1', text) +def get_input_var_names(input_data): + return tuple(i for i in input_data['vars'].replace('%core', '').replace(',', ' ').replace('(', ' ').replace(')', ' ').replace("'", ' ').split(' ') + if i != '') + def parse_lines(lines): - lines = list(map(strip_casts, lines)) + orig_lines = list(lines) + lines = list(map(strip_casts, orig_lines)) assert lines[0][:len(LAMBDA + ' ')] == LAMBDA + ' ' assert lines[0][-1] == ',' ret = {} @@ -68,9 +132,14 @@ def parse_lines(lines): assert lines[-1][-1] == ')' ret['return'] = lines[-1][:-1].replace('return ', '').replace('Return ', '') ret['lines'] = [] - for line in lines[1:-1]: + var_types = dict((var, 'uint64_t') for var in get_input_var_names(ret)) + for line, orig_line in zip(lines, orig_lines)[1:-1]: datatype, varname, arg1, op, arg2 = re.findall('^(u?int[0-9]*_t) ([^ ]*) = ([^ ]*) ([^ ]*) ([^ ]*);$', line)[0] - ret['lines'].append({'type':datatype, 'out':varname, 'op':op, 'args':(arg1, arg2), 'source':line}) + var_types[varname] = datatype + cur_line = {'type':datatype, 'out':varname, 'op':op, 'args':(arg1, arg2), 'source':orig_line} + possible_cores = possible_cores_for_line(cur_line, var_types) + cur_line['cores'] = possible_cores + ret['lines'].append(cur_line) print('Compiling %d lines in groups of %d...' % (len(ret['lines']), min(MAX_INSTRUCTION_WINDOW, len(ret['lines'])))) ret['lines'] = tuple(ret['lines']) split_ret = [] @@ -83,10 +152,6 @@ def parse_lines(lines): def get_var_names(input_data): return tuple(line['out'] for line in input_data['lines']) -def get_input_var_names(input_data): - return tuple(i for i in data['vars'].replace('%core', '').replace(',', ' ').replace('(', ' ').replace(')', ' ').replace("'", ' ').split(' ') - if i != '') - def get_output_var_names(input_data): return tuple(i for i in data['return'].replace(',', ' ').replace('(', ' ').replace(')', ' ').split(' ') if i != '') @@ -101,7 +166,7 @@ def create_set(name, items): ret += '\n' return ret -def schedule(data, basepoint): +def schedule(data, basepoint, do_print): def make_data_dependencies(input_data): input_var_names = get_input_var_names(input_data) dependencies = dict((var, tuple()) for var in input_var_names) @@ -200,7 +265,7 @@ def schedule(data, basepoint): core_remaining_cycle_count[c][i] = max(0, core_remaining_cycle_count[c][i] - 1) vars_remaining_cycles = dict((var, c - 1) for var, c in vars_remaining_cycles.items() if c > 1) - cycles_passed = max([min(core_remaining_cycle_count[core['core']['name']])] + + cycles_passed = max([min(core_remaining_cycle_count[port['name']]) for port in core['core']] + [vars_remaining_cycles[v] for v in dependencies[var] if v in vars_remaining_cycles.keys()]) if cycles_passed != 0: cost += cycles_passed @@ -213,9 +278,10 @@ def schedule(data, basepoint): else: cur_instructions_in_cycle += 1 vars_remaining_cycles[var] = core['latency'] - assert core_remaining_cycle_count[core['core']['name']][0] == 0 - core_remaining_cycle_count[core['core']['name']][0] = core['core']['latency'] - core_remaining_cycle_count[core['core']['name']] = sorted(core_remaining_cycle_count[core['core']['name']]) + for port in core['core']: + assert core_remaining_cycle_count[port['name']][0] == 0 + core_remaining_cycle_count[port['name']][0] = port['latency'] + core_remaining_cycle_count[port['name']] = sorted(core_remaining_cycle_count[port['name']]) return (cost, freeze((vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle))) @memoize @@ -238,40 +304,15 @@ def schedule(data, basepoint): return sorted(get_wait_times(next_var_cores, core_state)) def add_cycle_info(schedule): - vars_remaining_cycles = {} - core_remaining_cycle_count = dict([(core_name, [0] * core_count) for core_name, core_count in CORE_DATA] - + [(core_name, [0]) for core_name in REGISTERS]) + core_state = freeze(make_initial_core_state()) schedule_with_cycle_info = [] cur_cycle = 0 - cur_instructions_in_cycle = 0 for var, core in schedule: - if cur_instructions_in_cycle >= INSTRUCTIONS_PER_CYCLE: - cur_cycle += 1 - cur_instructions_in_cycle = 0 - for c in core_remaining_cycle_count.keys(): - for i in range(len(core_remaining_cycle_count[c])): - core_remaining_cycle_count[c][i] = max(0, core_remaining_cycle_count[c][i] - 1) - vars_remaining_cycles = dict((var, c - 1) for var, c in vars_remaining_cycles.items() - if c > 1) - cycles_passed = max([min(core_remaining_cycle_count[core['core']['name']])] + - [vars_remaining_cycles[v] for v in dependencies[var] if v in vars_remaining_cycles.keys()]) - if cycles_passed != 0: - cur_cycle += cycles_passed - cur_instructions_in_cycle = 1 - for c in core_remaining_cycle_count.keys(): - for i in range(len(core_remaining_cycle_count[c])): - core_remaining_cycle_count[c][i] = max(0, core_remaining_cycle_count[c][i] - cycles_passed) - vars_remaining_cycles = dict((var, c - cycles_passed) for var, c in vars_remaining_cycles.items() - if c > cycles_passed) - else: - cur_instructions_in_cycle += 1 - vars_remaining_cycles[var] = core['latency'] - assert core_remaining_cycle_count[core['core']['name']][0] == 0 - core_remaining_cycle_count[core['core']['name']][0] = core['core']['latency'] - core_remaining_cycle_count[core['core']['name']] = sorted(core_remaining_cycle_count[core['core']['name']]) + cost, core_state = update_core_state(var, freeze(core), core_state) schedule_with_cycle_info.append((var, - {'start':cur_cycle, 'finish':cur_cycle + core['core']['latency']}, + {'start':cur_cycle, 'finish':cur_cycle + core['latency']}, core)) + cur_cycle += cost return schedule_with_cycle_info def evaluate_cost(schedule_with_cycle_info): @@ -291,7 +332,7 @@ def schedule(data, basepoint): min_cost, min_schedule = None, None var_cores = [(var, core) for var in next_statements - for core in MODEL[(lines[var]['op'] if var in lines.keys() else 'LOAD')]] + for core in (lines[var]['cores'] if var in lines.keys() else possible_cores_for_line({'op':'LOAD', 'type':'uint64_t'}))] sorted_subset_next_statements = sorted_next_statements = get_sorted_next_statements(var_cores, core_state) if len(sorted_next_statements) > 0: pre_min_cost = sorted_next_statements[0][0] @@ -299,8 +340,8 @@ def schedule(data, basepoint): sorted_subset_next_statements \ = tuple((cost, var, core, new_core_state) for cost, var, core, new_core_state in sorted_next_statements if pre_min_cost == cost) - sorted_subset_next_statements = sorted_subset_next_statements[:2] - if pre_min_cost == 0: sorted_subset_next_statements = sorted_subset_next_statements[:2] + sorted_subset_next_statements = sorted_subset_next_statements[:1] + if pre_min_cost == 0: sorted_subset_next_statements = sorted_subset_next_statements[:1] for cost, var, core, new_core_state in sorted_subset_next_statements: cost, schedule = make_schedule(var, core) if min_cost is None or cost < min_cost: @@ -315,100 +356,21 @@ def schedule(data, basepoint): schedule_with_cycle_info = add_cycle_info(schedule) for var, cycles, core in schedule_with_cycle_info: if var in lines.keys(): - print('%s // %s, start: %s, end: %s' % (lines[var]['source'], core['core']['name'], basepoint + cycles['start'], basepoint + cycles['finish'])) + do_print('%s // %s, start: %s, end: %s' % (lines[var]['source'], core['instruction'], basepoint + cycles['start'], basepoint + cycles['finish'])) else: - print('LOAD %s; // %s, start: %s, end: %s' % (var, core['core']['name'], basepoint + cycles['start'], basepoint + cycles['finish'])) + do_print('%s = %s; // %s, start: %s, end: %s' % (var, core['instruction'], basepoint + cycles['start'], basepoint + cycles['finish'])) return basepoint + cost - def make_decls(input_data): - var_names = get_var_names(input_data) - ret = '' - ret += 'include "alldifferent.mzn";\n' - ret += 'include "cumulative.mzn";\n' - for line in input_data['lines']: - ret += '%%%s\n' % line['source'] - ret += create_set('CORE', CORES) - ret += create_set('INSTRUCTIONS', list(var_names)) - ret += create_set('OPS', list(OP_NAMES.values())) - MAX_NUMBER_OF_NOOPS_PER_INSTRUCTION = 3 - APPROXIMATE_MAX_LATENCY = 6 * INSTRUCTIONS_PER_CYCLE - max_loc = len(var_names) * MAX_NUMBER_OF_NOOPS_PER_INSTRUCTION + APPROXIMATE_MAX_LATENCY - ret += 'int: MAX_LOC = %d;\n\n' % max_loc - ret += 'set of int: LOCATIONS = 1..MAX_LOC;\n' - ret += 'array[INSTRUCTIONS] of var LOCATIONS: output_locations;\n' - ret += 'array[INSTRUCTIONS] of var int: output_data_latency;\n' - ret += 'array[INSTRUCTIONS] of var int: output_core_latency;\n' - ret += 'array[INSTRUCTIONS] of var CORE: output_cores;\n' - ret += 'array[INSTRUCTIONS] of OPS: input_ops = [%s];\n' % ', '.join(OP_NAMES[line['op']] for line in input_data['lines']) - for core in CORES: - ret += 'array[INSTRUCTIONS] of var int: output_%s_core_latency;\n' % core - ret += 'array[INSTRUCTIONS] of var 0..1: output_%s_core_use;\n' % core - ret += 'constraint forall (i in INSTRUCTIONS) (0 <= output_%s_core_latency[i]);\n' % core - ret += 'constraint forall (i in INSTRUCTIONS) (output_%s_core_use[i] == 1 -> output_core_latency[i] == output_%s_core_latency[i]);\n' % (core, core) - ret += 'var LOCATIONS: RET_loc;\n' - ret += '\n' - return ret - - def make_cores(input_data): - ret = '' - for opc, cores in MODEL.items(): - possible_cores = [] - for core in cores: - conjuncts = (['output_cores[i] == %s' % core['core']['name'], - 'output_%s_core_use[i] == 1' % core['core']['name'], - 'output_%s_core_latency[i] == %d' % (core['core']['name'], core['core']['latency'] * INSTRUCTIONS_PER_CYCLE), - 'output_data_latency[i] == %d' % (core['latency'] * INSTRUCTIONS_PER_CYCLE)] + - ['output_%s_core_use[i] == 0 /\ output_%s_core_latency[i] == 0' % (other_core, other_core) - for other_core in CORES if other_core != core['core']['name']]) - possible_cores.append('(%s)' % (r' /\ '.join(conjuncts))) - ret += ('constraint forall (i in INSTRUCTIONS) (input_ops[i] == %s -> (%s));\n' - % (OP_NAMES[opc], r' \/ '.join(possible_cores))) - ret += '\n' - for core in CORES: - ret += ('constraint cumulative(output_locations, output_%s_core_latency, output_%s_core_use, %d);\n' - % (core, core, CORE_COUNT[core])) - return ret - - def make_disjoint(input_data): - var_names = get_var_names(input_data) - ret = '' - ret += 'constraint alldifferent(output_locations);\n' - return ret - - def make_dependencies(input_data): - var_names = get_var_names(input_data) - ret = '' - for line in input_data['lines']: - for arg in line['args']: - if arg in var_names and arg[0] not in '0123456789': - ret += ('constraint output_locations[%s] + output_data_latency[%s] <= output_locations[%s];\n' - % (arg, arg, line['out'])) - ret += '\n' - ret += 'constraint max([ output_locations[i] + output_data_latency[i] | i in INSTRUCTIONS ]) <= RET_loc;\n' - ret += '\n' - return ret - - - - def make_output(input_data): - ret = 'solve minimize RET_loc;\n\n' - ret += 'output [ "(" ++ show(INSTRUCTIONS_NAMES[i]) ++ ", " ++ show(CORE_NAMES[fix(output_cores[i])]) ++ ", " ++ show(output_locations[i]) ++ ", " ++ show(output_data_latency[i]) ++ ", " ++ show(output_core_latency[i]) ++ ") ,\\n"\n' - ret += ' | i in INSTRUCTIONS ];\n' - ret += 'output [ "RET_loc: " ++ show(RET_loc) ];\n' - return ret - -# return '\n'.join([ -# make_decls(data), -# make_disjoint(data), -# make_dependencies(data), -# make_cores(data), -# make_output(data) -# ]) - data_list = parse_lines(get_lines('femulDisplay.log')) basepoint = 0 for i, data in enumerate(data_list): - basepoint = schedule(data, basepoint) + with open('femulScheduled.log', 'w') as f: + def do_print(v): + print(v) + f.write(v + '\n') + f.write('INPUT: (%s)\n' % ', '.join(get_input_var_names(data))) + basepoint = schedule(data, basepoint, do_print) + f.write('Return (%s)\n// end: %d\n' % (', '.join(get_output_var_names(data)), basepoint)) print(basepoint) sys.exit(0) -- cgit v1.2.3