aboutsummaryrefslogtreecommitdiff
path: root/etc
diff options
context:
space:
mode:
authorGravatar Jason Gross <jgross@mit.edu>2017-09-05 02:02:15 -0400
committerGravatar Jason Gross <jgross@mit.edu>2017-09-05 02:02:15 -0400
commite951f6622a943c101aa4a34e504f01dd94e1f8d5 (patch)
tree721ba4ecb9d35318df6a718f2cc2a545549760ec /etc
parentc2c87235121f071eeca2b137b7a5a9a3dc0a970c (diff)
Fix schedule to be in correct order
Diffstat (limited to 'etc')
-rw-r--r--etc/compile-by-zinc/femulDisplayScheduled0.log72
-rwxr-xr-xetc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py31
2 files changed, 64 insertions, 39 deletions
diff --git a/etc/compile-by-zinc/femulDisplayScheduled0.log b/etc/compile-by-zinc/femulDisplayScheduled0.log
index efe70652a..bb5558197 100644
--- a/etc/compile-by-zinc/femulDisplayScheduled0.log
+++ b/etc/compile-by-zinc/femulDisplayScheduled0.log
@@ -4,71 +4,40 @@ r14 <- LOAD x13;
r13:r12 <- MULX r15, r14; // x37_tmp = x10 * x13
r14 <- MOV r12; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high
r15 <- MOV r13; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low
-r14 <- ADCX (cx80), r14, 0x0; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high
-r14 <- ADX r14, c0; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high
-r14 <- SHR r15:r14, 0x33; // x81 = x80_low:x80_high >> 0x33
-r15 <- AND r15, 0x7ffffffffffff; // x82 = x80_low & 0x7ffffffffffff
r15 <- MULX r15, 0x13; // x45 = x10 * 0x13
r11 <- LOAD x15;
r12:r13 <- MULX r15, r11; // x49_tmp = x45 * x15
r11 <- MOV r13; // bucket: x50_high + x52_high + x54_high + x56_high
r15 <- MOV r12; // bucket: x50_low + x52_low + x54_low + x56_low
-r11 <- ADCX (cx56), r11, 0x0; // bucket: x50_high + x52_high + x54_high + x56_high
-r11 <- ADX r11, c0; // bucket: x50_high + x52_high + x54_high + x56_high
-r11 <- SHR r15:r11, 0x33; // x69 = x56_low:x56_high >> 0x33
-r9 <- MOV r11; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low
-r15 <- AND r15, 0x7ffffffffffff; // x70 = x56_low & 0x7ffffffffffff
r10 <- LOAD x17;
r13:r12 <- MULX r15, r10; // x57_tmp = x45 * x17
r10 <- MOV r12; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high
-r9, (cx71) <- ADD r9, r13; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low
-r10 <- ADCX (cx71), r10, 0x0; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high
-r10 <- ADX r10, c0; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high
-r10 <- SHR r9:r10, 0x33; // x72 = x71_low:x71_high >> 0x33
-RBP <- MOV r10; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low
-r9 <- AND r9, 0x7ffffffffffff; // x73 = x71_low & 0x7ffffffffffff
-r15 <- ADX r15, r14; // x84 = x70 + x83
-r15 <- SHR r15, 0x33; // x85 = x70 >> 0x33
-r14 <- AND r15, 0x7ffffffffffff; // x86 = x84 & 0x7ffffffffffff
-r15 <- ADX r15, r9; // x87 = x85 + x73
-r15 <- SHR r15, 0x33; // x88 = x85 >> 0x33
-r9 <- AND r15, 0x7ffffffffffff; // x89 = x87 & 0x7ffffffffffff
+r9 <- MOV r13; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low
r8 <- LOAD x19;
r12:r13 <- MULX r15, r8; // x63_tmp = x45 * x19
r8 <- MOV r13; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high
-RBP, (cx74) <- ADD RBP, r12; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low
-r8 <- ADCX (cx74), r8, 0x0; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high
-r8 <- ADX r8, c0; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high
-r8 <- SHR RBP:r8, 0x33; // x75 = x74_low:x74_high >> 0x33
-RSI <- MOV r8; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low
-RBP <- AND RBP, 0x7ffffffffffff; // x76 = x74_low & 0x7ffffffffffff
-r15 <- ADX r15, RBP; // x90 = x88 + x76
+RBP <- MOV r12; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low
RDI <- LOAD x18;
r13:r12 <- MULX r15, RDI; // x67_tmp = x45 * x18
RDI <- MOV r12; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high
-RSI, (cx77) <- ADD RSI, r13; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low
-RDI <- ADCX (cx77), RDI, 0x0; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high
-RDI <- ADX RDI, c0; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high
-RDI <- SHR RSI:RDI, 0x33; // x78 = x77_low:x77_high >> 0x33
-r15, (cx80) <- ADD r15, RDI; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low
-RSI <- AND RSI, 0x7ffffffffffff; // x79 = x77_low & 0x7ffffffffffff
+RSI <- MOV r13; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low
RDX <- LOAD x11;
r12:r13 <- MULX RDX, r14; // x30_tmp = x11 * x13
RDI <- ADX RDI, r13; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high
-RSI, (cx77) <- ADC (cx77), RSI, r12; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low
+RSI, (cx77) <- ADD RSI, r12; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low
r13:r12 <- MULX RDX, r11; // x39_tmp = x11 * x15
r14 <- ADX r14, r12; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high
-r15, (cx80) <- ADC (cx80), r15, r13; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low
+r15, (cx80) <- ADD r15, r13; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low
RDX <- MULX RDX, 0x13; // x48 = x11 * 0x13
r12:r13 <- MULX RDX, r10; // x55_tmp = x48 * x17
r11 <- ADX r11, r13; // bucket: x50_high + x52_high + x54_high + x56_high
r15, (cx56) <- ADD r15, r12; // bucket: x50_low + x52_low + x54_low + x56_low
r13:r12 <- MULX RDX, r8; // x61_tmp = x48 * x19
r10 <- ADX r10, r12; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high
-r9, (cx71) <- ADC (cx71), r9, r13; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low
+r9, (cx71) <- ADD r9, r13; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low
r12:r13 <- MULX RDX, RDI; // x65_tmp = x48 * x18
r8 <- ADX r8, r13; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high
-RBP, (cx74) <- ADC (cx74), RBP, r12; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low
+RBP, (cx74) <- ADD RBP, r12; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low
RDX <- LOAD x9;
r13:r12 <- MULX RDX, r14; // x25_tmp = x9 * x13
r8 <- ADX r8, r12; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high
@@ -107,15 +76,42 @@ RDX <- LOAD x5;
r13:r12 <- MULX RDX, r14; // x20_tmp = x5 * x13
r11 <- ADX r11, r12; // bucket: x50_high + x52_high + x54_high + x56_high
r15, (cx56) <- ADC (cx56), r15, r13; // bucket: x50_low + x52_low + x54_low + x56_low
+r11 <- ADCX (cx56), r11, 0x0; // bucket: x50_high + x52_high + x54_high + x56_high
+r15 <- AND r15, 0x7ffffffffffff; // x70 = x56_low & 0x7ffffffffffff
r12:r13 <- MULX RDX, r11; // x21_tmp = x5 * x15
+r11 <- SHR r15:r11, 0x33; // x69 = x56_low:x56_high >> 0x33
+r9, (cx71) <- ADC (cx71), r9, r11; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low
r10 <- ADX r10, r13; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high
r9, (cx71) <- ADC (cx71), r9, r12; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low
+r10 <- ADCX (cx71), r10, 0x0; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high
+r9 <- AND r9, 0x7ffffffffffff; // x73 = x71_low & 0x7ffffffffffff
r13:r12 <- MULX RDX, r10; // x24_tmp = x5 * x17
+r10 <- SHR r9:r10, 0x33; // x72 = x71_low:x71_high >> 0x33
+RBP, (cx74) <- ADC (cx74), RBP, r10; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low
r8 <- ADX r8, r12; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high
RBP, (cx74) <- ADC (cx74), RBP, r13; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low
+r8 <- ADCX (cx74), r8, 0x0; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high
+RBP <- AND RBP, 0x7ffffffffffff; // x76 = x74_low & 0x7ffffffffffff
r12:r13 <- MULX RDX, r8; // x29_tmp = x5 * x19
+r8 <- SHR RBP:r8, 0x33; // x75 = x74_low:x74_high >> 0x33
+RSI, (cx77) <- ADC (cx77), RSI, r8; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low
RDI <- ADX RDI, r13; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high
RSI, (cx77) <- ADC (cx77), RSI, r12; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low
+RDI <- ADCX (cx77), RDI, 0x0; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high
+RSI <- AND RSI, 0x7ffffffffffff; // x79 = x77_low & 0x7ffffffffffff
r13:r12 <- MULX RDX, RDI; // x36_tmp = x5 * x18
+RDI <- SHR RSI:RDI, 0x33; // x78 = x77_low:x77_high >> 0x33
+r15, (cx80) <- ADC (cx80), r15, RDI; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low
r14 <- ADX r14, r12; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high
r15, (cx80) <- ADC (cx80), r15, r13; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low
+r14 <- ADCX (cx80), r14, 0x0; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high
+r15 <- AND r15, 0x7ffffffffffff; // x82 = x80_low & 0x7ffffffffffff
+r14 <- SHR r15:r14, 0x33; // x81 = x80_low:x80_high >> 0x33
+r14 <- MULX r14, 0x13; // x83 = x81 * 0x13
+r15 <- ADX r15, r14; // x84 = x70 + x83
+r15 <- SHR r15, 0x33; // x85 = x70 >> 0x33
+r14 <- AND r15, 0x7ffffffffffff; // x86 = x84 & 0x7ffffffffffff
+r15 <- ADX r15, r9; // x87 = x85 + x73
+r15 <- SHR r15, 0x33; // x88 = x85 >> 0x33
+r9 <- AND r15, 0x7ffffffffffff; // x89 = x87 & 0x7ffffffffffff
+r15 <- ADX r15, RBP; // x90 = x88 + x76
diff --git a/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py b/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py
index 6396cb6ed..6a7ddda1d 100755
--- a/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py
+++ b/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py
@@ -408,6 +408,7 @@ def push_allocate(existing, nodes, *args, **kwargs):
elif node['out'] in full_map.keys() and len(node['rev_deps']) == 1 and all(d['out'] not in full_map.keys() for d in node['rev_deps']) and len(node['rev_deps'][0]['deps']) == 1 and node['type'] == node['rev_deps'][0]['type']:
next_node = node['rev_deps'][0]
cur_map[next_node['out']] = full_map[node['out']]
+ emit_vars.append(next_node)
fill_node(next_node)
full_map.update(cur_map)
elif node['out'] not in full_map.keys() and len(node['rev_deps']) == 2 and len(node['deps']) == 2 and all(d['out'] not in full_map.keys() for d in node['rev_deps']) and all(d['out'] in full_map.keys() for d in node['deps']) and node['type'] == 'uint64_t' and all(d['type'] == 'uint64_t' for d in node['rev_deps']) and all(d['type'] == 'uint64_t' for d in node['deps']):
@@ -479,10 +480,38 @@ def print_graph(graph, allocs):
body += ''.join(' %s -> out ;\n' % node['out'] for node in graph['out'].values())
return ('digraph G {\n' + body + '}\n')
+def fix_emit_vars(emit_vars):
+ ret = []
+ waiting = []
+ seen = set()
+ for node in emit_vars:
+ waiting.append(node)
+ new_waiting = []
+ for wnode in waiting:
+ if all(dep['out'] in seen for dep in wnode['deps']):
+ ret.append(wnode)
+ seen.add(wnode['out'])
+ else:
+ new_waiting.append(wnode)
+ waiting = new_waiting
+ while len(waiting) > 0:
+ print('Waiting on...')
+ print(list(sorted(node['out'] for node in waiting)))
+ new_waiting = []
+ for wnode in waiting:
+ if all(dep['out'] in seen for dep in wnode['deps']):
+ ret.append(wnode)
+ seen.add(wnode['out'])
+ else:
+ new_waiting.append(wnode)
+ waiting = new_waiting
+ return tuple(ret)
+
def schedule(input_data, existing, emit_vars):
ret = ''
buckets_seen = set()
buckets_carried = set()
+ emit_vars = fix_emit_vars(emit_vars)
ret += ('// Convention is low_reg:high_reg\n')
for node in emit_vars:
if node['op'] == 'INPUT':
@@ -585,7 +614,7 @@ def schedule(input_data, existing, emit_vars):
pass
else:
raw_input((node['out'], node['op']))
- if node['op'] not in ('GET_HIGH', 'GET_LOW', 'COMBINE'):
+ if node['op'] not in ('GET_HIGH', 'GET_LOW', 'COMBINE', 'GET_CARRY'):
for rdep in node['rev_deps']:
if len(rdep['extra_out']) > 0 and rdep['op'] == '+':
if rdep['out'] not in buckets_seen: