aboutsummaryrefslogtreecommitdiff
path: root/etc/compile-by-zinc/heuristic-search.py
diff options
context:
space:
mode:
Diffstat (limited to 'etc/compile-by-zinc/heuristic-search.py')
-rwxr-xr-xetc/compile-by-zinc/heuristic-search.py66
1 files changed, 45 insertions, 21 deletions
diff --git a/etc/compile-by-zinc/heuristic-search.py b/etc/compile-by-zinc/heuristic-search.py
index ea528a957..ba979f5dd 100755
--- a/etc/compile-by-zinc/heuristic-search.py
+++ b/etc/compile-by-zinc/heuristic-search.py
@@ -222,7 +222,8 @@ def schedule(data, basepoint, do_print):
core_remaining_cycle_count = dict([(core_name, [0] * core_count) for core_name, core_count in CORE_DATA]
+ [(core_name, [0]) for core_name in REGISTERS])
cur_instructions_in_cycle = 0
- return vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle
+ register_vals = dict((var, None) for var in REGISTERS)
+ return vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle, register_vals
def freeze_gen(v, rec=(lambda x:x)):
if isinstance(v, list):
@@ -252,10 +253,19 @@ def schedule(data, basepoint, do_print):
def unfreeze(v):
return unfreeze_gen(v, unfreeze)
+ def update_register_vals_with_core_args(core, args, register_vals):
+ new_rdx = register_vals['RDX']
+ if 'MULX' in core['instruction']:
+ new_rdx = sorted(args, key=(lambda x: int(x.lstrip('0x'))))[0]
+ changed = (register_vals['RDX'] != new_rdx)
+ register_vals['RDX'] = new_rdx
+ return changed, register_vals
+
@memoize
- def update_core_state(var, core, core_state):
+ def update_core_state(var, core, args, core_state):
core = unfreeze(core)
- (vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle) = unfreeze(core_state)
+ (vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle, register_vals) = unfreeze(core_state)
+ changed, register_vals = update_register_vals_with_core_args(core, args, register_vals)
cost = 0
if cur_instructions_in_cycle >= INSTRUCTIONS_PER_CYCLE:
cost += 1
@@ -282,37 +292,50 @@ def schedule(data, basepoint, do_print):
assert core_remaining_cycle_count[port['name']][0] == 0
core_remaining_cycle_count[port['name']][0] = port['latency']
core_remaining_cycle_count[port['name']] = sorted(core_remaining_cycle_count[port['name']])
- return (cost, freeze((vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle)))
+ return (cost, freeze((vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle, register_vals)))
@memoize
def evaluate_cost_memo(arg):
schedule, core_state = unfreeze_gen(arg)
schedule = unfreeze(schedule)
- (vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle) = unfreeze(core_state)
+ (vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle, register_vals) = unfreeze(core_state)
if len(schedule) == 0: return max([0] + list(vars_remaining_cycles.values()))
- (var, core), schedule = schedule[0], schedule[1:]
- cost, core_state = update_core_state(var, freeze(core), core_state)
+ (var, core, args), schedule = schedule[0], schedule[1:]
+ cost, core_state = update_core_state(var, freeze(core), args, core_state)
return cost + evaluate_cost_memo(freeze_gen((freeze(schedule), core_state)))
def get_wait_times(var_cores, core_state):
- for var, core in var_cores:
- cost, new_core_state = update_core_state(var, freeze(core), core_state)
- yield (cost, var, core, new_core_state)
+ for var, core, args in var_cores:
+ (vars_remaining_cycles, core_remaining_cycle_count, cur_instructions_in_cycle, register_vals) = unfreeze(core_state)
+ changed, register_vals = update_register_vals_with_core_args(core, args, register_vals)
+ cost, new_core_state = update_core_state(var, freeze(core), args, core_state)
+ yield (cost, -len(reverse_dependencies.get(var, [])), changed, -core['latency'], var, core, args, new_core_state)
+
+ def cmp_wait_time(v1, v2):
+ (cost1, neg_len_deps1, changed1, neg_latency1, var1, core1, args1, new_core_state1) = v1
+ (cost2, neg_len_deps2, changed2, neg_latency2, var2, core2, args2, new_core_state2) = v2
+ if cost1 != cost2: return cmp(cost1, cost2)
+ if core1['instruction'] == core2['instruction']:
+ if changed1 != changed2: return cmp(changed1, changed2)
+ if neg_len_deps1 != neg_len_deps2: return cmp(neg_len_deps1, neg_len_deps2)
+ if neg_latency1 != neg_latency2: return cmp(neg_latency1, neg_latency2)
+ if var1 != var2: return cmp(var1, var2)
+ return cmp(v1, v2)
def get_sorted_next_statements(next_var_cores, core_state):
- return sorted(get_wait_times(next_var_cores, core_state))
+ return sorted(get_wait_times(next_var_cores, core_state), cmp=cmp_wait_time)
def add_cycle_info(schedule):
core_state = freeze(make_initial_core_state())
schedule_with_cycle_info = []
cur_cycle = 0
- for var, core in schedule:
- cost, core_state = update_core_state(var, freeze(core), core_state)
+ for var, core, args in schedule:
+ cost, core_state = update_core_state(var, freeze(core), args, core_state)
+ cur_cycle += cost
schedule_with_cycle_info.append((var,
{'start':cur_cycle, 'finish':cur_cycle + core['latency']},
core))
- cur_cycle += cost
return schedule_with_cycle_info
def evaluate_cost(schedule_with_cycle_info):
@@ -324,26 +347,27 @@ def schedule(data, basepoint, do_print):
@memoize
def schedule_remaining(remaining_indices, core_state):
- def make_schedule(var, core):
- cost, new_core_state = update_core_state(var, freeze(core), core_state)
+ def make_schedule(var, core, args):
+ cost, new_core_state = update_core_state(var, freeze(core), args, core_state)
extra_cost, schedule = schedule_remaining(tuple(i for i in remaining_indices if i != var), new_core_state)
- return cost + extra_cost, ([(var, core)] + schedule)
+ return cost + extra_cost, ([(var, core, args)] + schedule)
next_statements = get_possible_next_statements(remaining_indices, dependencies)
min_cost, min_schedule = None, None
- var_cores = [(var, core)
+ var_cores = [(var, core, (lines[var]['args'] if var in lines.keys() else (var,)))
for var in next_statements
for core in (lines[var]['cores'] if var in lines.keys() else possible_cores_for_line({'op':'LOAD', 'type':'uint64_t'}))]
+
sorted_subset_next_statements = sorted_next_statements = get_sorted_next_statements(var_cores, core_state)
if len(sorted_next_statements) > 0:
pre_min_cost = sorted_next_statements[0][0]
# print((pre_min_cost, tuple(var for cost2, var, core, new_core_state in sorted_next_statements if pre_min_cost == cost2)))
sorted_subset_next_statements \
- = tuple((cost, var, core, new_core_state) for cost, var, core, new_core_state in sorted_next_statements
+ = tuple((cost, var, core, args, new_core_state) for cost, reverse_dep_count, changed, neg_latency, var, core, args, new_core_state in sorted_next_statements
if pre_min_cost == cost)
sorted_subset_next_statements = sorted_subset_next_statements[:1]
if pre_min_cost == 0: sorted_subset_next_statements = sorted_subset_next_statements[:1]
- for cost, var, core, new_core_state in sorted_subset_next_statements:
- cost, schedule = make_schedule(var, core)
+ for cost, var, core, args, new_core_state in sorted_subset_next_statements:
+ cost, schedule = make_schedule(var, core, args)
if min_cost is None or cost < min_cost:
min_cost, min_schedule = cost, schedule
# return min_cost, min_schedule