aboutsummaryrefslogtreecommitdiff
path: root/etc/compile-by-zinc/heuristic-search.py
diff options
context:
space:
mode:
authorGravatar Jason Gross <jgross@mit.edu>2017-08-14 12:21:51 -0400
committerGravatar Jason Gross <jgross@mit.edu>2017-08-14 12:21:51 -0400
commit014062cd1e034badfebe86951bb055f3fc224f3c (patch)
tree1cfa9ae8cc0bf19847665c8574c60f3ef0d7cecc /etc/compile-by-zinc/heuristic-search.py
parent57018f66cd5358ca3350597ef8cdf94fdc60244a (diff)
Update scheduler to know about implicit mulx arg
Now it prefers putting together mulx with the same implicit arg (approximated as the same variable with the lower number).
Diffstat (limited to 'etc/compile-by-zinc/heuristic-search.py')
-rwxr-xr-xetc/compile-by-zinc/heuristic-search.py66
1 files changed, 45 insertions, 21 deletions
diff --git a/etc/compile-by-zinc/heuristic-search.py b/etc/compile-by-zinc/heuristic-search.py
index ea528a957..ba979f5dd 100755
--- a/etc/compile-by-zinc/heuristic-search.py
+++ b/etc/compile-by-zinc/heuristic-search.py
@@ -222,7 +222,8 @@ def schedule(data, basepoint, do_print):
core_remaining_cycle_count = dict([(core_name, [0] * core_count) for core_name, core_count in CORE_DATA]
+ [(core_name, [0]) for core_name in REGISTERS])
cur_instructions_in_cycle = 0
- return vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle
+ register_vals = dict((var, None) for var in REGISTERS)
+ return vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle, register_vals
def freeze_gen(v, rec=(lambda x:x)):
if isinstance(v, list):
@@ -252,10 +253,19 @@ def schedule(data, basepoint, do_print):
def unfreeze(v):
return unfreeze_gen(v, unfreeze)
+ def update_register_vals_with_core_args(core, args, register_vals):
+ new_rdx = register_vals['RDX']
+ if 'MULX' in core['instruction']:
+ new_rdx = sorted(args, key=(lambda x: int(x.lstrip('0x'))))[0]
+ changed = (register_vals['RDX'] != new_rdx)
+ register_vals['RDX'] = new_rdx
+ return changed, register_vals
+
@memoize
- def update_core_state(var, core, core_state):
+ def update_core_state(var, core, args, core_state):
core = unfreeze(core)
- (vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle) = unfreeze(core_state)
+ (vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle, register_vals) = unfreeze(core_state)
+ changed, register_vals = update_register_vals_with_core_args(core, args, register_vals)
cost = 0
if cur_instructions_in_cycle >= INSTRUCTIONS_PER_CYCLE:
cost += 1
@@ -282,37 +292,50 @@ def schedule(data, basepoint, do_print):
assert core_remaining_cycle_count[port['name']][0] == 0
core_remaining_cycle_count[port['name']][0] = port['latency']
core_remaining_cycle_count[port['name']] = sorted(core_remaining_cycle_count[port['name']])
- return (cost, freeze((vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle)))
+ return (cost, freeze((vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle, register_vals)))
@memoize
def evaluate_cost_memo(arg):
schedule, core_state = unfreeze_gen(arg)
schedule = unfreeze(schedule)
- (vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle) = unfreeze(core_state)
+ (vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle, register_vals) = unfreeze(core_state)
if len(schedule) == 0: return max([0] + list(vars_remaining_cycles.values()))
- (var, core), schedule = schedule[0], schedule[1:]
- cost, core_state = update_core_state(var, freeze(core), core_state)
+ (var, core, args), schedule = schedule[0], schedule[1:]
+ cost, core_state = update_core_state(var, freeze(core), args, core_state)
return cost + evaluate_cost_memo(freeze_gen((freeze(schedule), core_state)))
def get_wait_times(var_cores, core_state):
- for var, core in var_cores:
- cost, new_core_state = update_core_state(var, freeze(core), core_state)
- yield (cost, var, core, new_core_state)
+ for var, core, args in var_cores:
+ (vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle, register_vals) = unfreeze(core_state)
+ changed, register_vals = update_register_vals_with_core_args(core, args, register_vals)
+ cost, new_core_state = update_core_state(var, freeze(core), args, core_state)
+ yield (cost, -len(reverse_dependencies.get(var, [])), changed, -core['latency'], var, core, args, new_core_state)
+
+ def cmp_wait_time(v1, v2):
+ (cost1, neg_len_deps1, changed1, neg_latency1, var1, core1, args1, new_core_state1) = v1
+ (cost2, neg_len_deps2, changed2, neg_latency2, var2, core2, args2, new_core_state2) = v2
+ if cost1 != cost2: return cmp(cost1, cost2)
+ if core1['instruction'] == core2['instruction']:
+ if changed1 != changed2: return cmp(changed1, changed2)
+ if neg_len_deps1 != neg_len_deps2: return cmp(neg_len_deps1, neg_len_deps2)
+ if neg_latency1 != neg_latency2: return cmp(neg_latency1, neg_latency2)
+ if var1 != var2: return cmp(var1, var2)
+ return cmp(v1, v2)
def get_sorted_next_statements(next_var_cores, core_state):
- return sorted(get_wait_times(next_var_cores, core_state))
+ return sorted(get_wait_times(next_var_cores, core_state), cmp=cmp_wait_time)
def add_cycle_info(schedule):
core_state = freeze(make_initial_core_state())
schedule_with_cycle_info = []
cur_cycle = 0
- for var, core in schedule:
- cost, core_state = update_core_state(var, freeze(core), core_state)
+ for var, core, args in schedule:
+ cost, core_state = update_core_state(var, freeze(core), args, core_state)
+ cur_cycle += cost
schedule_with_cycle_info.append((var,
{'start':cur_cycle, 'finish':cur_cycle + core['latency']},
core))
- cur_cycle += cost
return schedule_with_cycle_info
def evaluate_cost(schedule_with_cycle_info):
@@ -324,26 +347,27 @@ def schedule(data, basepoint, do_print):
@memoize
def schedule_remaining(remaining_indices, core_state):
- def make_schedule(var, core):
- cost, new_core_state = update_core_state(var, freeze(core), core_state)
+ def make_schedule(var, core, args):
+ cost, new_core_state = update_core_state(var, freeze(core), args, core_state)
extra_cost, schedule = schedule_remaining(tuple(i for i in remaining_indices if i != var), new_core_state)
- return cost + extra_cost, ([(var, core)] + schedule)
+ return cost + extra_cost, ([(var, core, args)] + schedule)
next_statements = get_possible_next_statements(remaining_indices, dependencies)
min_cost, min_schedule = None, None
- var_cores = [(var, core)
+ var_cores = [(var, core, (lines[var]['args'] if var in lines.keys() else (var,)))
for var in next_statements
for core in (lines[var]['cores'] if var in lines.keys() else possible_cores_for_line({'op':'LOAD', 'type':'uint64_t'}))]
+
sorted_subset_next_statements = sorted_next_statements = get_sorted_next_statements(var_cores, core_state)
if len(sorted_next_statements) > 0:
pre_min_cost = sorted_next_statements[0][0]
# print((pre_min_cost, tuple(var for cost2, var, core, new_core_state in sorted_next_statements if pre_min_cost == cost2)))
sorted_subset_next_statements \
- = tuple((cost, var, core, new_core_state) for cost, var, core, new_core_state in sorted_next_statements
+ = tuple((cost, var, core, args, new_core_state) for cost, reverse_dep_count, changed, neg_latency, var, core, args, new_core_state in sorted_next_statements
if pre_min_cost == cost)
sorted_subset_next_statements = sorted_subset_next_statements[:1]
if pre_min_cost == 0: sorted_subset_next_statements = sorted_subset_next_statements[:1]
- for cost, var, core, new_core_state in sorted_subset_next_statements:
- cost, schedule = make_schedule(var, core)
+ for cost, var, core, args, new_core_state in sorted_subset_next_statements:
+ cost, schedule = make_schedule(var, core, args)
if min_cost is None or cost < min_cost:
min_cost, min_schedule = cost, schedule
# return min_cost, min_schedule