From c77ca6f20ce3082135744383aac5f02c84689958 Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Tue, 5 Sep 2017 02:27:09 -0400 Subject: Fix (hopefully) overlap in reg --- etc/compile-by-zinc/femulData0.dot | 232 +-- etc/compile-by-zinc/femulData0.png | Bin 2063288 -> 2076561 bytes etc/compile-by-zinc/femulData0.svg | 1572 ++++++++++---------- etc/compile-by-zinc/femulDisplayScheduled0.log | 226 +-- .../make-graph-with-reg-by-ac-buckets.py | 19 +- 5 files changed, 1026 insertions(+), 1023 deletions(-) (limited to 'etc') diff --git a/etc/compile-by-zinc/femulData0.dot b/etc/compile-by-zinc/femulData0.dot index 630f3a2cd..db07f732c 100644 --- a/etc/compile-by-zinc/femulData0.dot +++ b/etc/compile-by-zinc/femulData0.dot @@ -1,131 +1,131 @@ digraph G { - x5 [label="x5 (RDX)" , style="filled", fillcolor="red"]; - x7 [label="x7 (RDX)" , style="filled", fillcolor="red"]; - x9 [label="x9 (RDX)" , style="filled", fillcolor="red"]; - x10 [label="x10 (r15)" , style="filled", fillcolor="red"]; - x11 [label="x11 (RDX)" , style="filled", fillcolor="red"]; - x13 [label="x13 (r14)" , style="filled", fillcolor="red"]; - x15 [label="x15 (r11)" , style="filled", fillcolor="red"]; - x17 [label="x17 (r10)" , style="filled", fillcolor="red"]; - x18 [label="x18 (RDI)" , style="filled", fillcolor="red"]; - x19 [label="x19 (r8)" , style="filled", fillcolor="red"]; - x20_128_high [label="x20_128_high (r12)" , style="filled", fillcolor="red"]; - x20_128_low [label="x20_128_low (r13)" , style="filled", fillcolor="red"]; - x20_128_tmp [label="x20_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; - x21_128_high [label="x21_128_high (r13)" , style="filled", fillcolor="red"]; - x21_128_low [label="x21_128_low (r12)" , style="filled", fillcolor="red"]; - x21_128_tmp [label="x21_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; - x22_128_high [label="x22_128_high (r13)" , style="filled", fillcolor="red"]; - x22_128_low [label="x22_128_low (r12)" , style="filled", fillcolor="red"]; - x22_128_tmp [label="x22_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; - x24_128_high [label="x24_128_high (r12)" , style="filled", fillcolor="red"]; - x24_128_low [label="x24_128_low (r13)" , style="filled", fillcolor="red"]; - x24_128_tmp [label="x24_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; - x25_128_high [label="x25_128_high (r12)" , style="filled", fillcolor="red"]; - x25_128_low [label="x25_128_low (r13)" , style="filled", fillcolor="red"]; - x25_128_tmp [label="x25_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; - x27_128_high [label="x27_128_high (r12)" , style="filled", fillcolor="red"]; - x27_128_low [label="x27_128_low (r13)" , style="filled", fillcolor="red"]; - x27_128_tmp [label="x27_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; - x29_128_high [label="x29_128_high (r13)" , style="filled", fillcolor="red"]; - x29_128_low [label="x29_128_low (r12)" , style="filled", fillcolor="red"]; - x29_128_tmp [label="x29_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; - x30_128_high [label="x30_128_high (r13)" , style="filled", fillcolor="red"]; - x30_128_low [label="x30_128_low (r12)" , style="filled", fillcolor="red"]; - x30_128_tmp [label="x30_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; - x32_128_high [label="x32_128_high (r13)" , style="filled", fillcolor="red"]; - x32_128_low [label="x32_128_low (r12)" , style="filled", fillcolor="red"]; - x32_128_tmp [label="x32_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; - x34_128_high [label="x34_128_high (r13)" , style="filled", fillcolor="red"]; - x34_128_low [label="x34_128_low (r12)" , style="filled", fillcolor="red"]; - x34_128_tmp [label="x34_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; - x36_128_high [label="x36_128_high (r12)" , style="filled", fillcolor="red"]; - x36_128_low [label="x36_128_low (r13)" , style="filled", fillcolor="red"]; - x36_128_tmp [label="x36_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; - x37_128_high [label="x37_128_high (r12)" , style="filled", fillcolor="red"]; - x37_128_low [label="x37_128_low (r13)" , style="filled", fillcolor="red"]; - x37_128_tmp [label="x37_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; - x39_128_high [label="x39_128_high (r12)" , style="filled", fillcolor="red"]; - x39_128_low [label="x39_128_low (r13)" , style="filled", fillcolor="red"]; - x39_128_tmp [label="x39_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; - x41_128_high [label="x41_128_high (r12)" , style="filled", fillcolor="red"]; - x41_128_low [label="x41_128_low (r13)" , style="filled", fillcolor="red"]; - x41_128_tmp [label="x41_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; - x43_128_high [label="x43_128_high (r12)" , style="filled", fillcolor="red"]; - x43_128_low [label="x43_128_low (r13)" , style="filled", fillcolor="red"]; - x43_128_tmp [label="x43_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; - x45 [label="x45 (r15)" , style="filled", fillcolor="red"]; - x46 [label="x46 (RDX)" , style="filled", fillcolor="red"]; - x47 [label="x47 (RDX)" , style="filled", fillcolor="red"]; - x48 [label="x48 (RDX)" , style="filled", fillcolor="red"]; - x49_128_high [label="x49_128_high (r13)" , style="filled", fillcolor="red"]; - x49_128_low [label="x49_128_low (r12)" , style="filled", fillcolor="red"]; - x49_128_tmp [label="x49_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; - x51_128_high [label="x51_128_high (r13)" , style="filled", fillcolor="red"]; - x51_128_low [label="x51_128_low (r12)" , style="filled", fillcolor="red"]; - x51_128_tmp [label="x51_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; - x53_128_high [label="x53_128_high (r13)" , style="filled", fillcolor="red"]; - x53_128_low [label="x53_128_low (r12)" , style="filled", fillcolor="red"]; - x53_128_tmp [label="x53_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; - x55_128_high [label="x55_128_high (r13)" , style="filled", fillcolor="red"]; - x55_128_low [label="x55_128_low (r12)" , style="filled", fillcolor="red"]; - x55_128_tmp [label="x55_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; + x5 [label="x5 (RAX)" , style="filled", fillcolor="red"]; + x7 [label="x7 (RAX)" , style="filled", fillcolor="red"]; + x9 [label="x9 (RAX)" , style="filled", fillcolor="red"]; + x10 [label="x10 (r18)" , style="filled", fillcolor="red"]; + x11 [label="x11 (RAX)" , style="filled", fillcolor="red"]; + x13 [label="x13 (r17)" , style="filled", fillcolor="red"]; + x15 [label="x15 (r12)" , style="filled", fillcolor="red"]; + x17 [label="x17 (r9)" , style="filled", fillcolor="red"]; + x18 [label="x18 (RCX)" , style="filled", fillcolor="red"]; + x19 [label="x19 (RDI)" , style="filled", fillcolor="red"]; + x20_128_high [label="x20_128_high (r15)" , style="filled", fillcolor="red"]; + x20_128_low [label="x20_128_low (r16)" , style="filled", fillcolor="red"]; + x20_128_tmp [label="x20_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; + x21_128_high [label="x21_128_high (r16)" , style="filled", fillcolor="red"]; + x21_128_low [label="x21_128_low (r15)" , style="filled", fillcolor="red"]; + x21_128_tmp [label="x21_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; + x22_128_high [label="x22_128_high (r16)" , style="filled", fillcolor="red"]; + x22_128_low [label="x22_128_low (r15)" , style="filled", fillcolor="red"]; + x22_128_tmp [label="x22_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; + x24_128_high [label="x24_128_high (r15)" , style="filled", fillcolor="red"]; + x24_128_low [label="x24_128_low (r16)" , style="filled", fillcolor="red"]; + x24_128_tmp [label="x24_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; + x25_128_high [label="x25_128_high (r15)" , style="filled", fillcolor="red"]; + x25_128_low [label="x25_128_low (r16)" , style="filled", fillcolor="red"]; + x25_128_tmp [label="x25_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; + x27_128_high [label="x27_128_high (r15)" , style="filled", fillcolor="red"]; + x27_128_low [label="x27_128_low (r16)" , style="filled", fillcolor="red"]; + x27_128_tmp [label="x27_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; + x29_128_high [label="x29_128_high (r16)" , style="filled", fillcolor="red"]; + x29_128_low [label="x29_128_low (r15)" , style="filled", fillcolor="red"]; + x29_128_tmp [label="x29_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; + x30_128_high [label="x30_128_high (r16)" , style="filled", fillcolor="red"]; + x30_128_low [label="x30_128_low (r15)" , style="filled", fillcolor="red"]; + x30_128_tmp [label="x30_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; + x32_128_high [label="x32_128_high (r16)" , style="filled", fillcolor="red"]; + x32_128_low [label="x32_128_low (r15)" , style="filled", fillcolor="red"]; + x32_128_tmp [label="x32_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; + x34_128_high [label="x34_128_high (r16)" , style="filled", fillcolor="red"]; + x34_128_low [label="x34_128_low (r15)" , style="filled", fillcolor="red"]; + x34_128_tmp [label="x34_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; + x36_128_high [label="x36_128_high (r15)" , style="filled", fillcolor="red"]; + x36_128_low [label="x36_128_low (r16)" , style="filled", fillcolor="red"]; + x36_128_tmp [label="x36_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; + x37_128_high [label="x37_128_high (r15)" , style="filled", fillcolor="red"]; + x37_128_low [label="x37_128_low (r16)" , style="filled", fillcolor="red"]; + x37_128_tmp [label="x37_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; + x39_128_high [label="x39_128_high (r15)" , style="filled", fillcolor="red"]; + x39_128_low [label="x39_128_low (r16)" , style="filled", fillcolor="red"]; + x39_128_tmp [label="x39_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; + x41_128_high [label="x41_128_high (r15)" , style="filled", fillcolor="red"]; + x41_128_low [label="x41_128_low (r16)" , style="filled", fillcolor="red"]; + x41_128_tmp [label="x41_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; + x43_128_high [label="x43_128_high (r15)" , style="filled", fillcolor="red"]; + x43_128_low [label="x43_128_low (r16)" , style="filled", fillcolor="red"]; + x43_128_tmp [label="x43_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; + x45 [label="x45 (r18)" , style="filled", fillcolor="red"]; + x46 [label="x46 (RAX)" , style="filled", fillcolor="red"]; + x47 [label="x47 (RAX)" , style="filled", fillcolor="red"]; + x48 [label="x48 (RAX)" , style="filled", fillcolor="red"]; + x49_128_high [label="x49_128_high (r16)" , style="filled", fillcolor="red"]; + x49_128_low [label="x49_128_low (r15)" , style="filled", fillcolor="red"]; + x49_128_tmp [label="x49_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; + x51_128_high [label="x51_128_high (r16)" , style="filled", fillcolor="red"]; + x51_128_low [label="x51_128_low (r15)" , style="filled", fillcolor="red"]; + x51_128_tmp [label="x51_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; + x53_128_high [label="x53_128_high (r16)" , style="filled", fillcolor="red"]; + x53_128_low [label="x53_128_low (r15)" , style="filled", fillcolor="red"]; + x53_128_tmp [label="x53_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; + x55_128_high [label="x55_128_high (r16)" , style="filled", fillcolor="red"]; + x55_128_low [label="x55_128_low (r15)" , style="filled", fillcolor="red"]; + x55_128_tmp [label="x55_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; cx56_128 [label="cx56_128 (c0)" , style="filled", fillcolor="red"]; - x56_128 [label="x50_128 + x52_128 + x54_128 + x56_128 (r15:r11)" , style="filled", fillcolor="red"]; + x56_128 [label="x50_128 + x52_128 + x54_128 + x56_128 (r10:r11)" , style="filled", fillcolor="red"]; x56_128_high [label="x50_128_high + x52_128_high + x54_128_high + x56_128_high (r11)" , style="filled", fillcolor="red"]; - x56_128_low [label="x50_128_low + x52_128_low + x54_128_low + x56_128_low (r15)" , style="filled", fillcolor="red"]; - x57_128_high [label="x57_128_high (r12)" , style="filled", fillcolor="red"]; - x57_128_low [label="x57_128_low (r13)" , style="filled", fillcolor="red"]; - x57_128_tmp [label="x57_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; - x59_128_high [label="x59_128_high (r12)" , style="filled", fillcolor="red"]; - x59_128_low [label="x59_128_low (r13)" , style="filled", fillcolor="red"]; - x59_128_tmp [label="x59_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; - x61_128_high [label="x61_128_high (r12)" , style="filled", fillcolor="red"]; - x61_128_low [label="x61_128_low (r13)" , style="filled", fillcolor="red"]; - x61_128_tmp [label="x61_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; - x63_128_high [label="x63_128_high (r13)" , style="filled", fillcolor="red"]; - x63_128_low [label="x63_128_low (r12)" , style="filled", fillcolor="red"]; - x63_128_tmp [label="x63_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; - x65_128_high [label="x65_128_high (r13)" , style="filled", fillcolor="red"]; - x65_128_low [label="x65_128_low (r12)" , style="filled", fillcolor="red"]; - x65_128_tmp [label="x65_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; - x67_128_high [label="x67_128_high (r12)" , style="filled", fillcolor="red"]; - x67_128_low [label="x67_128_low (r13)" , style="filled", fillcolor="red"]; - x67_128_tmp [label="x67_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; + x56_128_low [label="x50_128_low + x52_128_low + x54_128_low + x56_128_low (r10)" , style="filled", fillcolor="red"]; + x57_128_high [label="x57_128_high (r15)" , style="filled", fillcolor="red"]; + x57_128_low [label="x57_128_low (r16)" , style="filled", fillcolor="red"]; + x57_128_tmp [label="x57_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; + x59_128_high [label="x59_128_high (r15)" , style="filled", fillcolor="red"]; + x59_128_low [label="x59_128_low (r16)" , style="filled", fillcolor="red"]; + x59_128_tmp [label="x59_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; + x61_128_high [label="x61_128_high (r15)" , style="filled", fillcolor="red"]; + x61_128_low [label="x61_128_low (r16)" , style="filled", fillcolor="red"]; + x61_128_tmp [label="x61_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; + x63_128_high [label="x63_128_high (r16)" , style="filled", fillcolor="red"]; + x63_128_low [label="x63_128_low (r15)" , style="filled", fillcolor="red"]; + x63_128_tmp [label="x63_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; + x65_128_high [label="x65_128_high (r16)" , style="filled", fillcolor="red"]; + x65_128_low [label="x65_128_low (r15)" , style="filled", fillcolor="red"]; + x65_128_tmp [label="x65_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; + x67_128_high [label="x67_128_high (r15)" , style="filled", fillcolor="red"]; + x67_128_low [label="x67_128_low (r16)" , style="filled", fillcolor="red"]; + x67_128_tmp [label="x67_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; x69 [label="x69 (r11)" , style="filled", fillcolor="red"]; - x70 [label="x70 (r15)" , style="filled", fillcolor="red"]; + x70 [label="x70 (r10)" , style="filled", fillcolor="red"]; cx71_128 [label="cx71_128 (c0)" , style="filled", fillcolor="red"]; - x71_128 [label="x23_128 + x58_128 + x60_128 + x62_128 + x71_128 (r9:r10)" , style="filled", fillcolor="red"]; - x71_128_high [label="x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (r10)" , style="filled", fillcolor="red"]; - x71_128_low [label="x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (r9)" , style="filled", fillcolor="red"]; - x72 [label="x72 (r10)" , style="filled", fillcolor="red"]; - x73 [label="x73 (r9)" , style="filled", fillcolor="red"]; + x71_128 [label="x23_128 + x58_128 + x60_128 + x62_128 + x71_128 (RBP:r8)" , style="filled", fillcolor="red"]; + x71_128_high [label="x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (r8)" , style="filled", fillcolor="red"]; + x71_128_low [label="x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (RBP)" , style="filled", fillcolor="red"]; + x72 [label="x72 (r8)" , style="filled", fillcolor="red"]; + x73 [label="x73 (RBP)" , style="filled", fillcolor="red"]; cx74_128 [label="cx74_128 (c0)" , style="filled", fillcolor="red"]; - x74_128 [label="x26_128 + x28_128 + x64_128 + x66_128 + x74_128 (RBP:r8)" , style="filled", fillcolor="red"]; - x74_128_high [label="x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (r8)" , style="filled", fillcolor="red"]; - x74_128_low [label="x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (RBP)" , style="filled", fillcolor="red"]; - x75 [label="x75 (r8)" , style="filled", fillcolor="red"]; - x76 [label="x76 (RBP)" , style="filled", fillcolor="red"]; + x74_128 [label="x26_128 + x28_128 + x64_128 + x66_128 + x74_128 (RDX:RSI)" , style="filled", fillcolor="red"]; + x74_128_high [label="x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (RSI)" , style="filled", fillcolor="red"]; + x74_128_low [label="x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (RDX)" , style="filled", fillcolor="red"]; + x75 [label="x75 (RSI)" , style="filled", fillcolor="red"]; + x76 [label="x76 (RDX)" , style="filled", fillcolor="red"]; cx77_128 [label="cx77_128 (c0)" , style="filled", fillcolor="red"]; - x77_128 [label="x31_128 + x33_128 + x35_128 + x68_128 + x77_128 (RSI:RDI)" , style="filled", fillcolor="red"]; - x77_128_high [label="x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high (RDI)" , style="filled", fillcolor="red"]; - x77_128_low [label="x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low (RSI)" , style="filled", fillcolor="red"]; - x78 [label="x78 (RDI)" , style="filled", fillcolor="red"]; - x79 [label="x79 (RSI)" , style="filled", fillcolor="red"]; + x77_128 [label="x31_128 + x33_128 + x35_128 + x68_128 + x77_128 (RBX:r18)" , style="filled", fillcolor="red"]; + x77_128_high [label="x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high (r18)" , style="filled", fillcolor="red"]; + x77_128_low [label="x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low (RBX)" , style="filled", fillcolor="red"]; + x78 [label="x78 (r18)" , style="filled", fillcolor="red"]; + x79 [label="x79 (RBX)" , style="filled", fillcolor="red"]; cx80_128 [label="cx80_128 (c0)" , style="filled", fillcolor="red"]; - x80_128 [label="x38_128 + x40_128 + x42_128 + x44_128 + x80_128 (r15:r14)" , style="filled", fillcolor="red"]; + x80_128 [label="x38_128 + x40_128 + x42_128 + x44_128 + x80_128 (r13:r14)" , style="filled", fillcolor="red"]; x80_128_high [label="x38_128_high + x40_128_high + x42_128_high + x44_128_high + x80_128_high (r14)" , style="filled", fillcolor="red"]; - x80_128_low [label="x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low (r15)" , style="filled", fillcolor="red"]; + x80_128_low [label="x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low (r13)" , style="filled", fillcolor="red"]; x81 [label="x81 (r14)" , style="filled", fillcolor="red"]; - x82 [label="x82 (r15)" , style="filled", fillcolor="red"]; + x82 [label="x82 (r13)" , style="filled", fillcolor="red"]; x83 [label="x83 (r14)" , style="filled", fillcolor="red"]; - x84 [label="x84 (r15)" , style="filled", fillcolor="red"]; - x85 [label="x85 (r15)" , style="filled", fillcolor="red"]; + x84 [label="x84 (r10)" , style="filled", fillcolor="red"]; + x85 [label="x85 (r10)" , style="filled", fillcolor="red"]; x86 [label="x86 (r14)" , style="filled", fillcolor="red"]; - x87 [label="x87 (r15)" , style="filled", fillcolor="red"]; - x88 [label="x88 (r15)" , style="filled", fillcolor="red"]; - x89 [label="x89 (r9)" , style="filled", fillcolor="red"]; - x90 [label="x90 (r15)" , style="filled", fillcolor="red"]; + x87 [label="x87 (r10)" , style="filled", fillcolor="red"]; + x88 [label="x88 (r10)" , style="filled", fillcolor="red"]; + x89 [label="x89 (RBP)" , style="filled", fillcolor="red"]; + x90 [label="x90 (r10)" , style="filled", fillcolor="red"]; x56_128_low -> cx56_128 [ label="GET_CARRY" ] ; x71_128_low -> cx71_128 [ label="GET_CARRY" ] ; x74_128_low -> cx74_128 [ label="GET_CARRY" ] ; diff --git a/etc/compile-by-zinc/femulData0.png b/etc/compile-by-zinc/femulData0.png index fb080c265..1878e5b8d 100644 Binary files a/etc/compile-by-zinc/femulData0.png and b/etc/compile-by-zinc/femulData0.png differ diff --git a/etc/compile-by-zinc/femulData0.svg b/etc/compile-by-zinc/femulData0.svg index 51da9ca53..18f75f14d 100644 --- a/etc/compile-by-zinc/femulData0.svg +++ b/etc/compile-by-zinc/femulData0.svg @@ -11,499 +11,499 @@ x5 - -x5 (RDX) + +x5 (RAX) x20_128_tmp -x20_128_tmp (r13:r12) +x20_128_tmp (r16:r15) x5->x20_128_tmp - - -* + + +* x21_128_tmp -x21_128_tmp (r12:r13) +x21_128_tmp (r15:r16) x5->x21_128_tmp - - -* + + +* x24_128_tmp -x24_128_tmp (r13:r12) +x24_128_tmp (r16:r15) x5->x24_128_tmp - - -* + + +* x29_128_tmp -x29_128_tmp (r12:r13) +x29_128_tmp (r15:r16) x5->x29_128_tmp - - -* + + +* x36_128_tmp -x36_128_tmp (r13:r12) +x36_128_tmp (r16:r15) x5->x36_128_tmp - - -* + + +* x7 - -x7 (RDX) + +x7 (RAX) x22_128_tmp -x22_128_tmp (r12:r13) +x22_128_tmp (r15:r16) x7->x22_128_tmp - - -* + + +* x27_128_tmp -x27_128_tmp (r13:r12) +x27_128_tmp (r16:r15) x7->x27_128_tmp - - -* + + +* x32_128_tmp -x32_128_tmp (r12:r13) +x32_128_tmp (r15:r16) x7->x32_128_tmp - - -* + + +* x41_128_tmp -x41_128_tmp (r13:r12) +x41_128_tmp (r16:r15) x7->x41_128_tmp - - -* + + +* x46 - -x46 (RDX) + +x46 (RAX) x7->x46 - - -* + + +* x9 - -x9 (RDX) + +x9 (RAX) x25_128_tmp -x25_128_tmp (r13:r12) +x25_128_tmp (r16:r15) x9->x25_128_tmp - - -* + + +* x34_128_tmp -x34_128_tmp (r12:r13) +x34_128_tmp (r15:r16) x9->x34_128_tmp - - -* + + +* x43_128_tmp -x43_128_tmp (r13:r12) +x43_128_tmp (r16:r15) x9->x43_128_tmp - - -* + + +* x47 - -x47 (RDX) + +x47 (RAX) x9->x47 - - -* + + +* x10 - -x10 (r15) + +x10 (r18) x37_128_tmp -x37_128_tmp (r13:r12) +x37_128_tmp (r16:r15) x10->x37_128_tmp - - -* + + +* x45 - -x45 (r15) + +x45 (r18) x10->x45 - - -* + + +* x11 - -x11 (RDX) + +x11 (RAX) x30_128_tmp -x30_128_tmp (r12:r13) +x30_128_tmp (r15:r16) x11->x30_128_tmp - - -* + + +* x39_128_tmp -x39_128_tmp (r13:r12) +x39_128_tmp (r16:r15) x11->x39_128_tmp - - -* + + +* x48 - -x48 (RDX) + +x48 (RAX) x11->x48 - - -* + + +* x13 - -x13 (r14) + +x13 (r17) x13->x20_128_tmp - - -* + + +* x13->x22_128_tmp - - -* + + +* x13->x25_128_tmp - - -* + + +* x13->x30_128_tmp - - -* + + +* x13->x37_128_tmp - + -* +* x15 - -x15 (r11) + +x15 (r12) x15->x21_128_tmp - - -* + + +* x15->x27_128_tmp - - -* + + +* x15->x34_128_tmp - - -* + + +* x15->x39_128_tmp - + -* +* x49_128_tmp -x49_128_tmp (r12:r13) +x49_128_tmp (r15:r16) x15->x49_128_tmp - - -* + + +* x17 - -x17 (r10) + +x17 (r9) x17->x24_128_tmp - - -* + + +* x17->x32_128_tmp - - -* + + +* x17->x43_128_tmp - - -* + + +* x55_128_tmp -x55_128_tmp (r12:r13) +x55_128_tmp (r15:r16) x17->x55_128_tmp - - -* + + +* x57_128_tmp -x57_128_tmp (r13:r12) +x57_128_tmp (r16:r15) x17->x57_128_tmp - - -* + + +* x18 - -x18 (RDI) + +x18 (RCX) x18->x36_128_tmp - - -* + + +* x51_128_tmp -x51_128_tmp (r12:r13) +x51_128_tmp (r15:r16) x18->x51_128_tmp - - -* + + +* x59_128_tmp -x59_128_tmp (r13:r12) +x59_128_tmp (r16:r15) x18->x59_128_tmp - - -* + + +* x65_128_tmp -x65_128_tmp (r12:r13) +x65_128_tmp (r15:r16) x18->x65_128_tmp - - -* + + +* x67_128_tmp -x67_128_tmp (r13:r12) +x67_128_tmp (r16:r15) x18->x67_128_tmp - - -* + + +* x19 - -x19 (r8) + +x19 (RDI) x19->x29_128_tmp - - -* + + +* x19->x41_128_tmp - - -* + + +* x53_128_tmp -x53_128_tmp (r12:r13) +x53_128_tmp (r15:r16) x19->x53_128_tmp - - -* + + +* x61_128_tmp -x61_128_tmp (r13:r12) +x61_128_tmp (r16:r15) x19->x61_128_tmp - - -* + + +* x63_128_tmp -x63_128_tmp (r12:r13) +x63_128_tmp (r15:r16) x19->x63_128_tmp - - -* + + +* x20_128_high - -x20_128_high (r12) + +x20_128_high (r15) x56_128_high - -x50_128_high + x52_128_high + x54_128_high + x56_128_high (r11) + +x50_128_high + x52_128_high + x54_128_high + x56_128_high (r11) x20_128_high->x56_128_high - - -+ + + ++ x20_128_low -x20_128_low (r13) +x20_128_low (r16) x56_128_low - -x50_128_low + x52_128_low + x54_128_low + x56_128_low (r15) + +x50_128_low + x52_128_low + x54_128_low + x56_128_low (r10) x20_128_low->x56_128_low - - + + + x20_128_tmp->x20_128_high - - + + GET_HIGH @@ -514,230 +514,230 @@ x21_128_high - -x21_128_high (r13) + +x21_128_high (r16) x71_128_high - -x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (r10) + +x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (r8) x21_128_high->x71_128_high - - -+ + + ++ x21_128_low - -x21_128_low (r12) + +x21_128_low (r15) x71_128_low - -x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (r9) + +x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (RBP) x21_128_low->x71_128_low - - -+ + + ++ x21_128_tmp->x21_128_high - - -GET_HIGH + + +GET_HIGH x21_128_tmp->x21_128_low - - -GET_LOW + + +GET_LOW x22_128_high - -x22_128_high (r13) + +x22_128_high (r16) x22_128_high->x71_128_high - - -+ + + ++ x22_128_low - -x22_128_low (r12) + +x22_128_low (r15) x22_128_low->x71_128_low - - -+ + + ++ x22_128_tmp->x22_128_high - - -GET_HIGH + + +GET_HIGH x22_128_tmp->x22_128_low - - -GET_LOW + + +GET_LOW x24_128_high - -x24_128_high (r12) + +x24_128_high (r15) x74_128_high - -x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (r8) + +x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (RSI) x24_128_high->x74_128_high - - -+ + + ++ x24_128_low - -x24_128_low (r13) + +x24_128_low (r16) x74_128_low - -x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (RBP) + +x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (RDX) x24_128_low->x74_128_low - - -+ + + ++ x24_128_tmp->x24_128_high - - -GET_HIGH + + +GET_HIGH x24_128_tmp->x24_128_low - - -GET_LOW + + +GET_LOW x25_128_high - -x25_128_high (r12) + +x25_128_high (r15) x25_128_high->x74_128_high - - -+ + + ++ x25_128_low - -x25_128_low (r13) + +x25_128_low (r16) x25_128_low->x74_128_low - - -+ + + ++ x25_128_tmp->x25_128_high - - -GET_HIGH + + +GET_HIGH x25_128_tmp->x25_128_low - - -GET_LOW + + +GET_LOW x27_128_high - -x27_128_high (r12) + +x27_128_high (r15) x27_128_high->x74_128_high - - -+ + + ++ x27_128_low - -x27_128_low (r13) + +x27_128_low (r16) x27_128_low->x74_128_low - - -+ + + ++ x27_128_tmp->x27_128_high - - -GET_HIGH + + +GET_HIGH x27_128_tmp->x27_128_low - - -GET_LOW + + +GET_LOW x29_128_high - -x29_128_high (r13) + +x29_128_high (r16) x77_128_high - -x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high (RDI) + +x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high (r18) x29_128_high->x77_128_high - - -+ + + ++ x29_128_low -x29_128_low (r12) +x29_128_low (r15) x77_128_low - -x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low (RSI) + +x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low (RBX) x29_128_low->x77_128_low - - -+ + + ++ x29_128_tmp->x29_128_high - - + + GET_HIGH @@ -748,279 +748,279 @@ x30_128_high - -x30_128_high (r13) + +x30_128_high (r16) x30_128_high->x77_128_high - - -+ + + ++ x30_128_low - -x30_128_low (r12) + +x30_128_low (r15) x30_128_low->x77_128_low - - -+ + + ++ x30_128_tmp->x30_128_high - - -GET_HIGH + + +GET_HIGH x30_128_tmp->x30_128_low - - -GET_LOW + + +GET_LOW x32_128_high - -x32_128_high (r13) + +x32_128_high (r16) x32_128_high->x77_128_high - - -+ + + ++ x32_128_low - -x32_128_low (r12) + +x32_128_low (r15) x32_128_low->x77_128_low - - -+ + + ++ x32_128_tmp->x32_128_high - - -GET_HIGH + + +GET_HIGH x32_128_tmp->x32_128_low - - -GET_LOW + + +GET_LOW x34_128_high - -x34_128_high (r13) + +x34_128_high (r16) x34_128_high->x77_128_high - - -+ + + ++ x34_128_low - -x34_128_low (r12) + +x34_128_low (r15) x34_128_low->x77_128_low - - -+ + + ++ x34_128_tmp->x34_128_high - - -GET_HIGH + + +GET_HIGH x34_128_tmp->x34_128_low - - -GET_LOW + + +GET_LOW x36_128_high - -x36_128_high (r12) + +x36_128_high (r15) x80_128_high - -x38_128_high + x40_128_high + x42_128_high + x44_128_high + x80_128_high (r14) + +x38_128_high + x40_128_high + x42_128_high + x44_128_high + x80_128_high (r14) x36_128_high->x80_128_high - - -+ + + ++ x36_128_low - -x36_128_low (r13) + +x36_128_low (r16) x80_128_low - -x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low (r15) + +x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low (r13) x36_128_low->x80_128_low - - -+ + + ++ x36_128_tmp->x36_128_high - - -GET_HIGH + + +GET_HIGH x36_128_tmp->x36_128_low - - -GET_LOW + + +GET_LOW x37_128_high -x37_128_high (r12) +x37_128_high (r15) x37_128_high->x80_128_high - - + + + x37_128_low - -x37_128_low (r13) + +x37_128_low (r16) x37_128_low->x80_128_low - - -+ + + ++ x37_128_tmp->x37_128_high - + GET_HIGH x37_128_tmp->x37_128_low - - -GET_LOW + + +GET_LOW x39_128_high - -x39_128_high (r12) + +x39_128_high (r15) x39_128_high->x80_128_high - - + + + x39_128_low - -x39_128_low (r13) + +x39_128_low (r16) x39_128_low->x80_128_low - - -+ + + ++ x39_128_tmp->x39_128_high - - -GET_HIGH + + +GET_HIGH x39_128_tmp->x39_128_low - - -GET_LOW + + +GET_LOW x41_128_high -x41_128_high (r12) +x41_128_high (r15) x41_128_high->x80_128_high - - + + + x41_128_low -x41_128_low (r13) +x41_128_low (r16) x41_128_low->x80_128_low - - -+ + + ++ x41_128_tmp->x41_128_high - - -GET_HIGH + + +GET_HIGH x41_128_tmp->x41_128_low - + -GET_LOW +GET_LOW x43_128_high - -x43_128_high (r12) + +x43_128_high (r15) x43_128_high->x80_128_high - - -+ + + ++ x43_128_low -x43_128_low (r13) +x43_128_low (r16) x43_128_low->x80_128_low - - + + + x43_128_tmp->x43_128_high - - -GET_HIGH + + +GET_HIGH x43_128_tmp->x43_128_low @@ -1030,90 +1030,90 @@ x45->x49_128_tmp - - -* + + +* x45->x57_128_tmp - - -* + + +* x45->x63_128_tmp - - -* + + +* x45->x67_128_tmp - - -* + + +* x46->x51_128_tmp - - -* + + +* x47->x53_128_tmp - - -* + + +* x47->x59_128_tmp - - -* + + +* x48->x55_128_tmp - - -* + + +* x48->x61_128_tmp - - -* + + +* x48->x65_128_tmp - - -* + + +* x49_128_high - -x49_128_high (r13) + +x49_128_high (r16) x49_128_high->x56_128_high - - -+ + + ++ x49_128_low -x49_128_low (r12) +x49_128_low (r15) x49_128_low->x56_128_low - - + + + x49_128_tmp->x49_128_high - - + + GET_HIGH @@ -1124,31 +1124,31 @@ x51_128_high - -x51_128_high (r13) + +x51_128_high (r16) x51_128_high->x56_128_high - - -+ + + ++ x51_128_low -x51_128_low (r12) +x51_128_low (r15) x51_128_low->x56_128_low - - + + + x51_128_tmp->x51_128_high - - -GET_HIGH + + +GET_HIGH x51_128_tmp->x51_128_low @@ -1158,30 +1158,30 @@ x53_128_high - -x53_128_high (r13) + +x53_128_high (r16) x53_128_high->x56_128_high - - -+ + + ++ x53_128_low -x53_128_low (r12) +x53_128_low (r15) x53_128_low->x56_128_low - - + + + x53_128_tmp->x53_128_high - - + + GET_HIGH @@ -1193,23 +1193,23 @@ x55_128_high -x55_128_high (r13) +x55_128_high (r16) x55_128_high->x56_128_high - - -+ + + ++ x55_128_low -x55_128_low (r12) +x55_128_low (r15) x55_128_low->x56_128_low - - + + + @@ -1226,81 +1226,81 @@ cx56_128 - -cx56_128 (c0) + +cx56_128 (c0) cx56_128->x56_128_high - - -+ + + ++ x56_128 - -x50_128 + x52_128 + x54_128 + x56_128 (r15:r11) + +x50_128 + x52_128 + x54_128 + x56_128 (r10:r11) x69 - -x69 (r11) + +x69 (r11) x56_128->x69 - - ->> + + +>> x56_128_high->x56_128 - - -COMBINE + + +COMBINE x56_128_low->cx56_128 - - -GET_CARRY + + +GET_CARRY x56_128_low->x56_128 - - -COMBINE + + +COMBINE x70 - -x70 (r15) + +x70 (r10) x56_128_low->x70 - - -& + + +& x57_128_high -x57_128_high (r12) +x57_128_high (r15) x57_128_high->x71_128_high - - -+ + + ++ x57_128_low -x57_128_low (r13) +x57_128_low (r16) x57_128_low->x71_128_low - - -+ + + ++ x57_128_tmp->x57_128_high @@ -1310,98 +1310,98 @@ x57_128_tmp->x57_128_low - - + + GET_LOW x59_128_high - -x59_128_high (r12) + +x59_128_high (r15) x59_128_high->x71_128_high - - -+ + + ++ x59_128_low - -x59_128_low (r13) + +x59_128_low (r16) x59_128_low->x71_128_low - - -+ + + ++ x59_128_tmp->x59_128_high - - -GET_HIGH + + +GET_HIGH x59_128_tmp->x59_128_low - - -GET_LOW + + +GET_LOW x61_128_high - -x61_128_high (r12) + +x61_128_high (r15) x61_128_high->x71_128_high - - -+ + + ++ x61_128_low - -x61_128_low (r13) + +x61_128_low (r16) x61_128_low->x71_128_low - - -+ + + ++ x61_128_tmp->x61_128_high - - -GET_HIGH + + +GET_HIGH x61_128_tmp->x61_128_low - - -GET_LOW + + +GET_LOW x63_128_high -x63_128_high (r13) +x63_128_high (r16) x63_128_high->x74_128_high - - + + + x63_128_low -x63_128_low (r12) +x63_128_low (r15) x63_128_low->x74_128_low - - + + + @@ -1412,30 +1412,30 @@ x63_128_tmp->x63_128_low - - -GET_LOW + + +GET_LOW x65_128_high -x65_128_high (r13) +x65_128_high (r16) x65_128_high->x74_128_high - - + + + x65_128_low - -x65_128_low (r12) + +x65_128_low (r15) x65_128_low->x74_128_low - - + + + @@ -1446,482 +1446,482 @@ x65_128_tmp->x65_128_low - - -GET_LOW + + +GET_LOW x67_128_high - -x67_128_high (r12) + +x67_128_high (r15) x67_128_high->x77_128_high - - -+ + + ++ x67_128_low -x67_128_low (r13) +x67_128_low (r16) x67_128_low->x77_128_low - - -+ + + ++ x67_128_tmp->x67_128_high - - -GET_HIGH + + +GET_HIGH x67_128_tmp->x67_128_low - + -GET_LOW +GET_LOW x69->x71_128_low - - -+ + + ++ x84 - -x84 (r15) + +x84 (r10) x70->x84 - - -+ + + ++ cx71_128 - -cx71_128 (c0) + +cx71_128 (c0) cx71_128->x71_128_high - - -+ + + ++ x71_128 - -x23_128 + x58_128 + x60_128 + x62_128 + x71_128 (r9:r10) + +x23_128 + x58_128 + x60_128 + x62_128 + x71_128 (RBP:r8) x72 - -x72 (r10) + +x72 (r8) x71_128->x72 - - ->> + + +>> x71_128_high->x71_128 - - -COMBINE + + +COMBINE x71_128_low->cx71_128 - - -GET_CARRY + + +GET_CARRY x71_128_low->x71_128 - - -COMBINE + + +COMBINE x73 - -x73 (r9) + +x73 (RBP) x71_128_low->x73 - - -& + + +& x72->x74_128_low - - -+ + + ++ x87 - -x87 (r15) + +x87 (r10) x73->x87 - - -+ + + ++ cx74_128 - -cx74_128 (c0) + +cx74_128 (c0) cx74_128->x74_128_high - - -+ + + ++ x74_128 - -x26_128 + x28_128 + x64_128 + x66_128 + x74_128 (RBP:r8) + +x26_128 + x28_128 + x64_128 + x66_128 + x74_128 (RDX:RSI) x75 - -x75 (r8) + +x75 (RSI) x74_128->x75 - - ->> + + +>> x74_128_high->x74_128 - - -COMBINE + + +COMBINE x74_128_low->cx74_128 - - -GET_CARRY + + +GET_CARRY x74_128_low->x74_128 - - -COMBINE + + +COMBINE x76 - -x76 (RBP) + +x76 (RDX) x74_128_low->x76 - - -& + + +& x75->x77_128_low - - -+ + + ++ x90 - -x90 (r15) + +x90 (r10) x76->x90 - - -+ + + ++ cx77_128 - -cx77_128 (c0) + +cx77_128 (c0) cx77_128->x77_128_high - - -+ + + ++ x77_128 - -x31_128 + x33_128 + x35_128 + x68_128 + x77_128 (RSI:RDI) + +x31_128 + x33_128 + x35_128 + x68_128 + x77_128 (RBX:r18) x78 - -x78 (RDI) + +x78 (r18) x77_128->x78 - - ->> + + +>> x77_128_high->x77_128 - - -COMBINE + + +COMBINE x77_128_low->cx77_128 - - -GET_CARRY + + +GET_CARRY x77_128_low->x77_128 - - -COMBINE + + +COMBINE x79 - -x79 (RSI) + +x79 (RBX) x77_128_low->x79 - - -& + + +& x78->x80_128_low - - -+ + + ++ out - -out + +out x79->out - - + + cx80_128 - -cx80_128 (c0) + +cx80_128 (c0) cx80_128->x80_128_high - - -+ + + ++ x80_128 - -x38_128 + x40_128 + x42_128 + x44_128 + x80_128 (r15:r14) + +x38_128 + x40_128 + x42_128 + x44_128 + x80_128 (r13:r14) x81 - -x81 (r14) + +x81 (r14) x80_128->x81 - - ->> + + +>> x80_128_high->x80_128 - - -COMBINE + + +COMBINE x80_128_low->cx80_128 - - -GET_CARRY + + +GET_CARRY x80_128_low->x80_128 - - -COMBINE + + +COMBINE x82 - -x82 (r15) + +x82 (r13) x80_128_low->x82 - - -& + + +& x83 - -x83 (r14) + +x83 (r14) x81->x83 - - -* + + +* x82->out - - + + x83->x84 - - -+ + + ++ x85 - -x85 (r15) + +x85 (r10) x84->x85 - - ->> + + +>> x86 - -x86 (r14) + +x86 (r14) x84->x86 - - -& + + +& x85->x87 - - -+ + + ++ x86->out - - + + x88 - -x88 (r15) + +x88 (r10) x87->x88 - - ->> + + +>> x89 - -x89 (r9) + +x89 (RBP) x87->x89 - - -& + + +& x88->x90 - - -+ + + ++ x89->out - - + + x90->out - - + + in - -in + +in in->x5 - - + + in->x7 - - + + in->x9 - - + + in->x10 - - + + in->x11 - - + + in->x13 - - + + in->x15 - - + + in->x17 - - + + in->x18 - - + + in->x19 - - + + diff --git a/etc/compile-by-zinc/femulDisplayScheduled0.log b/etc/compile-by-zinc/femulDisplayScheduled0.log index bb5558197..82557e31e 100644 --- a/etc/compile-by-zinc/femulDisplayScheduled0.log +++ b/etc/compile-by-zinc/femulDisplayScheduled0.log @@ -1,117 +1,117 @@ // Convention is low_reg:high_reg -r15 <- LOAD x10; -r14 <- LOAD x13; -r13:r12 <- MULX r15, r14; // x37_tmp = x10 * x13 -r14 <- MOV r12; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -r15 <- MOV r13; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low -r15 <- MULX r15, 0x13; // x45 = x10 * 0x13 -r11 <- LOAD x15; -r12:r13 <- MULX r15, r11; // x49_tmp = x45 * x15 -r11 <- MOV r13; // bucket: x50_high + x52_high + x54_high + x56_high -r15 <- MOV r12; // bucket: x50_low + x52_low + x54_low + x56_low -r10 <- LOAD x17; -r13:r12 <- MULX r15, r10; // x57_tmp = x45 * x17 -r10 <- MOV r12; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -r9 <- MOV r13; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -r8 <- LOAD x19; -r12:r13 <- MULX r15, r8; // x63_tmp = x45 * x19 -r8 <- MOV r13; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -RBP <- MOV r12; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -RDI <- LOAD x18; -r13:r12 <- MULX r15, RDI; // x67_tmp = x45 * x18 -RDI <- MOV r12; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -RSI <- MOV r13; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -RDX <- LOAD x11; -r12:r13 <- MULX RDX, r14; // x30_tmp = x11 * x13 -RDI <- ADX RDI, r13; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -RSI, (cx77) <- ADD RSI, r12; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -r13:r12 <- MULX RDX, r11; // x39_tmp = x11 * x15 -r14 <- ADX r14, r12; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -r15, (cx80) <- ADD r15, r13; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low -RDX <- MULX RDX, 0x13; // x48 = x11 * 0x13 -r12:r13 <- MULX RDX, r10; // x55_tmp = x48 * x17 -r11 <- ADX r11, r13; // bucket: x50_high + x52_high + x54_high + x56_high -r15, (cx56) <- ADD r15, r12; // bucket: x50_low + x52_low + x54_low + x56_low -r13:r12 <- MULX RDX, r8; // x61_tmp = x48 * x19 -r10 <- ADX r10, r12; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -r9, (cx71) <- ADD r9, r13; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -r12:r13 <- MULX RDX, RDI; // x65_tmp = x48 * x18 -r8 <- ADX r8, r13; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -RBP, (cx74) <- ADD RBP, r12; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -RDX <- LOAD x9; -r13:r12 <- MULX RDX, r14; // x25_tmp = x9 * x13 -r8 <- ADX r8, r12; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -RBP, (cx74) <- ADC (cx74), RBP, r13; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -r12:r13 <- MULX RDX, r11; // x34_tmp = x9 * x15 -RDI <- ADX RDI, r13; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -RSI, (cx77) <- ADC (cx77), RSI, r12; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -r13:r12 <- MULX RDX, r10; // x43_tmp = x9 * x17 -r14 <- ADX r14, r12; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -r15, (cx80) <- ADC (cx80), r15, r13; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low -RDX <- MULX RDX, 0x13; // x47 = x9 * 0x13 -r12:r13 <- MULX RDX, r8; // x53_tmp = x47 * x19 -r11 <- ADX r11, r13; // bucket: x50_high + x52_high + x54_high + x56_high -r15, (cx56) <- ADC (cx56), r15, r12; // bucket: x50_low + x52_low + x54_low + x56_low -r13:r12 <- MULX RDX, RDI; // x59_tmp = x47 * x18 -r10 <- ADX r10, r12; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -r9, (cx71) <- ADC (cx71), r9, r13; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -RDX <- LOAD x7; -r12:r13 <- MULX RDX, r14; // x22_tmp = x7 * x13 -r10 <- ADX r10, r13; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -r9, (cx71) <- ADC (cx71), r9, r12; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -r13:r12 <- MULX RDX, r11; // x27_tmp = x7 * x15 -r8 <- ADX r8, r12; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -RBP, (cx74) <- ADC (cx74), RBP, r13; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -r12:r13 <- MULX RDX, r10; // x32_tmp = x7 * x17 -RDI <- ADX RDI, r13; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -RSI, (cx77) <- ADC (cx77), RSI, r12; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -r13:r12 <- MULX RDX, r8; // x41_tmp = x7 * x19 -r14 <- ADX r14, r12; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -r15, (cx80) <- ADC (cx80), r15, r13; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low -RDX <- MULX RDX, 0x13; // x46 = x7 * 0x13 -r12:r13 <- MULX RDX, RDI; // x51_tmp = x46 * x18 -r11 <- ADX r11, r13; // bucket: x50_high + x52_high + x54_high + x56_high -r15, (cx56) <- ADC (cx56), r15, r12; // bucket: x50_low + x52_low + x54_low + x56_low -RDX <- LOAD x5; -r13:r12 <- MULX RDX, r14; // x20_tmp = x5 * x13 -r11 <- ADX r11, r12; // bucket: x50_high + x52_high + x54_high + x56_high -r15, (cx56) <- ADC (cx56), r15, r13; // bucket: x50_low + x52_low + x54_low + x56_low +r18 <- LOAD x10; +r17 <- LOAD x13; +r16:r15 <- MULX r18, r17; // x37_tmp = x10 * x13 +r14 <- MOV r15; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +r13 <- MOV r16; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +r18 <- MULX r18, 0x13; // x45 = x10 * 0x13 +r12 <- LOAD x15; +r15:r16 <- MULX r18, r12; // x49_tmp = x45 * x15 +r11 <- MOV r16; // bucket: x50_high + x52_high + x54_high + x56_high +r10 <- MOV r15; // bucket: x50_low + x52_low + x54_low + x56_low +r9 <- LOAD x17; +r16:r15 <- MULX r18, r9; // x57_tmp = x45 * x17 +r8 <- MOV r15; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +RBP <- MOV r16; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +RDI <- LOAD x19; +r15:r16 <- MULX r18, RDI; // x63_tmp = x45 * x19 +RSI <- MOV r16; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +RDX <- MOV r15; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +RCX <- LOAD x18; +r16:r15 <- MULX r18, RCX; // x67_tmp = x45 * x18 +r18 <- MOV r15; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +RBX <- MOV r16; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +RAX <- LOAD x11; +r15:r16 <- MULX RAX, r17; // x30_tmp = x11 * x13 +r18 <- ADX r18, r16; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +RBX, (cx77) <- ADD RBX, r15; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +r16:r15 <- MULX RAX, r12; // x39_tmp = x11 * x15 +r14 <- ADX r14, r15; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +r13, (cx80) <- ADD r13, r16; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +RAX <- MULX RAX, 0x13; // x48 = x11 * 0x13 +r15:r16 <- MULX RAX, r9; // x55_tmp = x48 * x17 +r11 <- ADX r11, r16; // bucket: x50_high + x52_high + x54_high + x56_high +r10, (cx56) <- ADD r10, r15; // bucket: x50_low + x52_low + x54_low + x56_low +r16:r15 <- MULX RAX, RDI; // x61_tmp = x48 * x19 +r8 <- ADX r8, r15; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +RBP, (cx71) <- ADD RBP, r16; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +r15:r16 <- MULX RAX, RCX; // x65_tmp = x48 * x18 +RSI <- ADX RSI, r16; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +RDX, (cx74) <- ADD RDX, r15; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +RAX <- LOAD x9; +r16:r15 <- MULX RAX, r17; // x25_tmp = x9 * x13 +RSI <- ADX RSI, r15; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +RDX, (cx74) <- ADC (cx74), RDX, r16; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +r15:r16 <- MULX RAX, r12; // x34_tmp = x9 * x15 +r18 <- ADX r18, r16; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +RBX, (cx77) <- ADC (cx77), RBX, r15; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +r16:r15 <- MULX RAX, r9; // x43_tmp = x9 * x17 +r14 <- ADX r14, r15; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +r13, (cx80) <- ADC (cx80), r13, r16; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +RAX <- MULX RAX, 0x13; // x47 = x9 * 0x13 +r15:r16 <- MULX RAX, RDI; // x53_tmp = x47 * x19 +r11 <- ADX r11, r16; // bucket: x50_high + x52_high + x54_high + x56_high +r10, (cx56) <- ADC (cx56), r10, r15; // bucket: x50_low + x52_low + x54_low + x56_low +r16:r15 <- MULX RAX, RCX; // x59_tmp = x47 * x18 +r8 <- ADX r8, r15; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +RBP, (cx71) <- ADC (cx71), RBP, r16; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +RAX <- LOAD x7; +r15:r16 <- MULX RAX, r17; // x22_tmp = x7 * x13 +r8 <- ADX r8, r16; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +RBP, (cx71) <- ADC (cx71), RBP, r15; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +r16:r15 <- MULX RAX, r12; // x27_tmp = x7 * x15 +RSI <- ADX RSI, r15; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +RDX, (cx74) <- ADC (cx74), RDX, r16; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +r15:r16 <- MULX RAX, r9; // x32_tmp = x7 * x17 +r18 <- ADX r18, r16; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +RBX, (cx77) <- ADC (cx77), RBX, r15; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +r16:r15 <- MULX RAX, RDI; // x41_tmp = x7 * x19 +r14 <- ADX r14, r15; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +r13, (cx80) <- ADC (cx80), r13, r16; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +RAX <- MULX RAX, 0x13; // x46 = x7 * 0x13 +r15:r16 <- MULX RAX, RCX; // x51_tmp = x46 * x18 +r11 <- ADX r11, r16; // bucket: x50_high + x52_high + x54_high + x56_high +r10, (cx56) <- ADC (cx56), r10, r15; // bucket: x50_low + x52_low + x54_low + x56_low +RAX <- LOAD x5; +r16:r15 <- MULX RAX, r17; // x20_tmp = x5 * x13 +r11 <- ADX r11, r15; // bucket: x50_high + x52_high + x54_high + x56_high +r10, (cx56) <- ADC (cx56), r10, r16; // bucket: x50_low + x52_low + x54_low + x56_low r11 <- ADCX (cx56), r11, 0x0; // bucket: x50_high + x52_high + x54_high + x56_high -r15 <- AND r15, 0x7ffffffffffff; // x70 = x56_low & 0x7ffffffffffff -r12:r13 <- MULX RDX, r11; // x21_tmp = x5 * x15 -r11 <- SHR r15:r11, 0x33; // x69 = x56_low:x56_high >> 0x33 -r9, (cx71) <- ADC (cx71), r9, r11; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -r10 <- ADX r10, r13; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -r9, (cx71) <- ADC (cx71), r9, r12; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -r10 <- ADCX (cx71), r10, 0x0; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -r9 <- AND r9, 0x7ffffffffffff; // x73 = x71_low & 0x7ffffffffffff -r13:r12 <- MULX RDX, r10; // x24_tmp = x5 * x17 -r10 <- SHR r9:r10, 0x33; // x72 = x71_low:x71_high >> 0x33 -RBP, (cx74) <- ADC (cx74), RBP, r10; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -r8 <- ADX r8, r12; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -RBP, (cx74) <- ADC (cx74), RBP, r13; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -r8 <- ADCX (cx74), r8, 0x0; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -RBP <- AND RBP, 0x7ffffffffffff; // x76 = x74_low & 0x7ffffffffffff -r12:r13 <- MULX RDX, r8; // x29_tmp = x5 * x19 -r8 <- SHR RBP:r8, 0x33; // x75 = x74_low:x74_high >> 0x33 -RSI, (cx77) <- ADC (cx77), RSI, r8; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -RDI <- ADX RDI, r13; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -RSI, (cx77) <- ADC (cx77), RSI, r12; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -RDI <- ADCX (cx77), RDI, 0x0; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -RSI <- AND RSI, 0x7ffffffffffff; // x79 = x77_low & 0x7ffffffffffff -r13:r12 <- MULX RDX, RDI; // x36_tmp = x5 * x18 -RDI <- SHR RSI:RDI, 0x33; // x78 = x77_low:x77_high >> 0x33 -r15, (cx80) <- ADC (cx80), r15, RDI; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low -r14 <- ADX r14, r12; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -r15, (cx80) <- ADC (cx80), r15, r13; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +r10 <- AND r10, 0x7ffffffffffff; // x70 = x56_low & 0x7ffffffffffff +r15:r16 <- MULX RAX, r12; // x21_tmp = x5 * x15 +r11 <- SHR r10:r11, 0x33; // x69 = x56_low:x56_high >> 0x33 +RBP, (cx71) <- ADC (cx71), RBP, r11; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +r8 <- ADX r8, r16; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +RBP, (cx71) <- ADC (cx71), RBP, r15; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +r8 <- ADCX (cx71), r8, 0x0; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +RBP <- AND RBP, 0x7ffffffffffff; // x73 = x71_low & 0x7ffffffffffff +r16:r15 <- MULX RAX, r9; // x24_tmp = x5 * x17 +r8 <- SHR RBP:r8, 0x33; // x72 = x71_low:x71_high >> 0x33 +RDX, (cx74) <- ADC (cx74), RDX, r8; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +RSI <- ADX RSI, r15; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +RDX, (cx74) <- ADC (cx74), RDX, r16; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +RSI <- ADCX (cx74), RSI, 0x0; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +RDX <- AND RDX, 0x7ffffffffffff; // x76 = x74_low & 0x7ffffffffffff +r15:r16 <- MULX RAX, RDI; // x29_tmp = x5 * x19 +RSI <- SHR RDX:RSI, 0x33; // x75 = x74_low:x74_high >> 0x33 +RBX, (cx77) <- ADC (cx77), RBX, RSI; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +r18 <- ADX r18, r16; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +RBX, (cx77) <- ADC (cx77), RBX, r15; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +r18 <- ADCX (cx77), r18, 0x0; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +RBX <- AND RBX, 0x7ffffffffffff; // x79 = x77_low & 0x7ffffffffffff +r16:r15 <- MULX RAX, RCX; // x36_tmp = x5 * x18 +r18 <- SHR RBX:r18, 0x33; // x78 = x77_low:x77_high >> 0x33 +r13, (cx80) <- ADC (cx80), r13, r18; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +r14 <- ADX r14, r15; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +r13, (cx80) <- ADC (cx80), r13, r16; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low r14 <- ADCX (cx80), r14, 0x0; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -r15 <- AND r15, 0x7ffffffffffff; // x82 = x80_low & 0x7ffffffffffff -r14 <- SHR r15:r14, 0x33; // x81 = x80_low:x80_high >> 0x33 +r13 <- AND r13, 0x7ffffffffffff; // x82 = x80_low & 0x7ffffffffffff +r14 <- SHR r13:r14, 0x33; // x81 = x80_low:x80_high >> 0x33 r14 <- MULX r14, 0x13; // x83 = x81 * 0x13 -r15 <- ADX r15, r14; // x84 = x70 + x83 -r15 <- SHR r15, 0x33; // x85 = x70 >> 0x33 -r14 <- AND r15, 0x7ffffffffffff; // x86 = x84 & 0x7ffffffffffff -r15 <- ADX r15, r9; // x87 = x85 + x73 -r15 <- SHR r15, 0x33; // x88 = x85 >> 0x33 -r9 <- AND r15, 0x7ffffffffffff; // x89 = x87 & 0x7ffffffffffff -r15 <- ADX r15, RBP; // x90 = x88 + x76 +r10 <- ADX r10, r14; // x84 = x70 + x83 +r10 <- SHR r10, 0x33; // x85 = x70 >> 0x33 +r14 <- AND r10, 0x7ffffffffffff; // x86 = x84 & 0x7ffffffffffff +r10 <- ADX r10, RBP; // x87 = x85 + x73 +r10 <- SHR r10, 0x33; // x88 = x85 >> 0x33 +RBP <- AND r10, 0x7ffffffffffff; // x89 = x87 & 0x7ffffffffffff +r10 <- ADX r10, RDX; // x90 = x88 + x76 diff --git a/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py b/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py index 6a7ddda1d..4ad14e50f 100755 --- a/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py +++ b/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py @@ -9,7 +9,7 @@ LAMBDA = u'\u03bb' OP_NAMES = {'*':'MUL', '+':'ADD', '>>':'SHL', '<<':'SHR', '|':'OR', '&':'AND'} REGISTERS = tuple(['RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP'] #, 'RSP'] # RSP is stack pointer? - + ['r%d' % i for i in range(8, 16)]) + + ['r%d' % i for i in range(8, 19)]) REGISTER_COLORS = ['color="black"', 'color="white",fillcolor="black"', 'color="maroon"', 'color="green"', 'fillcolor="olive"', 'color="navy"', 'color="purple"', 'fillcolor="teal"', 'fillcolor="silver"', 'fillcolor="gray"', 'fillcolor="red"', 'fillcolor="lime"', 'fillcolor="yellow"', 'fillcolor="blue"', 'fillcolor="fuschia"', 'fillcolor="aqua"'] @@ -262,17 +262,15 @@ def allocate_node(existing, node, *args): free_temps.append(reg) else: if reg not in free_list: + print('freeing %s from %s' % (reg, var)) free_list.append(reg) def do_free_deps(node): full_map.update(cur_map) - if deps_allocated(full_map, node): + if node['out'] in full_map.keys(): for dep in node['deps']: - if dep['out'] not in freed: - do_free(dep['out']) - freed.append(dep['out']) - elif node['out'] in full_map.keys(): - for dep in node['deps']: - if dep['out'] not in freed and dep['out'] in full_map.keys() and all(reg in all_temps for reg in full_map[dep['out']].split(':')): + if dep['out'] in freed or dep['out'] not in full_map.keys(): continue + if (all(deps_allocated(full_map, rdep) for rdep in dep['rev_deps']) or + all(reg in all_temps for reg in full_map[dep['out']].split(':'))): do_free(dep['out']) freed.append(dep['out']) if node['out'] in full_map.keys(): @@ -319,6 +317,7 @@ def allocate_node(existing, node, *args): if all(rdep is node or (rdep['out'] in full_map.keys() and full_map[rdep['out']] != full_map[dep['out']]) for rdep in dep['rev_deps']): cur_map[node['out']] = full_map[dep['out']] + freed += [dep['out']] else: cur_map[node['out']] = free_list.pop() emit_vars.append(node) @@ -405,12 +404,14 @@ def push_allocate(existing, nodes, *args, **kwargs): fill_node(carry_node) fill_node(shr_node) fill_node(and_node) + freed += [node['out'], carry_node['out'], high_node['out'], combine_node['out']] elif node['out'] in full_map.keys() and len(node['rev_deps']) == 1 and all(d['out'] not in full_map.keys() for d in node['rev_deps']) and len(node['rev_deps'][0]['deps']) == 1 and node['type'] == node['rev_deps'][0]['type']: next_node = node['rev_deps'][0] cur_map[next_node['out']] = full_map[node['out']] emit_vars.append(next_node) fill_node(next_node) full_map.update(cur_map) + freed += [node['out']] elif node['out'] not in full_map.keys() and len(node['rev_deps']) == 2 and len(node['deps']) == 2 and all(d['out'] not in full_map.keys() for d in node['rev_deps']) and all(d['out'] in full_map.keys() for d in node['deps']) and node['type'] == 'uint64_t' and all(d['type'] == 'uint64_t' for d in node['rev_deps']) and all(d['type'] == 'uint64_t' for d in node['deps']): from1, from2 = node['deps'] to1, to2 = node['rev_deps'] @@ -425,6 +426,7 @@ def push_allocate(existing, nodes, *args, **kwargs): fill_node(to1) fill_node(to2) full_map.update(cur_map) + freed += [node['out'], from1['out'], from2['out']] elif node['out'] not in full_map.keys() and len(node['rev_deps']) == 0 and len(node['deps']) == 2 and all(d['out'] not in full_map.keys() for d in node['rev_deps']) and all(d['out'] in full_map.keys() for d in node['deps']) and node['type'] == 'uint64_t' and all(d['type'] == 'uint64_t' for d in node['rev_deps']) and all(d['type'] == 'uint64_t' for d in node['deps']): from1, from2 = node['deps'] assert(full_map[from1['out']] != full_map[from2['out']]) @@ -432,6 +434,7 @@ def push_allocate(existing, nodes, *args, **kwargs): emit_vars.append(node) fill_node(node) full_map.update(cur_map) + freed += [from1['out'], from2['out']] full_map.update(cur_map) args = (cur_map, tuple(free_temps), tuple(free_list), tuple(all_temps), tuple(freed), tuple(new_buckets), tuple(emit_vars)) kwargs['seen'].add(node['out']) -- cgit v1.2.3