From c75ed478fa1d3d2fc8336818cdefe0b1c0ba72ef Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Mon, 4 Sep 2017 23:37:39 -0400 Subject: WIP on reg alloc --- etc/compile-by-zinc/femulData0.dot | 514 ++-- etc/compile-by-zinc/femulData0.png | Bin 648863 -> 1730398 bytes etc/compile-by-zinc/femulData0.svg | 3092 ++++++++++++-------- .../make-graph-with-reg-by-ac-buckets.py | 443 +++ etc/compile-by-zinc/make-graph-with-reg.py | 38 +- 5 files changed, 2619 insertions(+), 1468 deletions(-) create mode 100755 etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py (limited to 'etc') diff --git a/etc/compile-by-zinc/femulData0.dot b/etc/compile-by-zinc/femulData0.dot index e4caf9720..df4261daa 100644 --- a/etc/compile-by-zinc/femulData0.dot +++ b/etc/compile-by-zinc/femulData0.dot @@ -1,189 +1,343 @@ digraph G { - x90 [label="x90 (r13)",fillcolor="gold"]; - x75 [label="x75 (r8)",fillcolor="brown"]; - x58_128 [label="x58_128 (RSP:r13)",fillcolor="blueviolet"]; - x69 [label="x69 (RBP)",fillcolor="blue"]; - x68_128 [label="x68_128 (r9:r14)",fillcolor="cadetblue"]; - x44_128 [label="x44_128 (r12:r15)",fillcolor="red"]; - x42_128 [label="x42_128 (r12:r15)",fillcolor="red"]; - x40_128 [label="x40_128 (r12:r15)",fillcolor="red"]; - x60_128 [label="x60_128 (RSP:r13)",fillcolor="blueviolet"]; - x62_128 [label="x62_128 (RSP:r13)",fillcolor="blueviolet"]; - x64_128 [label="x64_128 (r8:r10)",fillcolor="brown"]; - x66_128 [label="x66_128 (r8:r10)",fillcolor="brown"]; - x29_128 [label="x29_128 (r9:r14)",fillcolor="cadetblue"]; - x28_128 [label="x28_128 (r8:r10)",fillcolor="brown"]; - x24_128 [label="x24_128 (r8:r10)",fillcolor="brown"]; - x26_128 [label="x26_128 (r8:r10)",fillcolor="brown"]; - x21_128 [label="x21_128 (RSP:r13)",fillcolor="blueviolet"]; - x20_128 [label="x20_128 (RBP:r11)",fillcolor="blue"]; - x23_128 [label="x23_128 (RSP:r13)",fillcolor="blueviolet"]; - x83 [label="x83 (r12)",fillcolor="red"]; - x82 [label="x82 (r15)",fillcolor="darkorange"]; - x81 [label="x81 (r12)",fillcolor="red"]; - x80_128 [label="x80_128 (r12:r15)",fillcolor="red"]; - x87 [label="x87 (r12)",fillcolor="red"]; - x86 [label="x86 (r11)",fillcolor="cyan"]; - x85 [label="x85 (r12)",fillcolor="red"]; - x84 [label="x84 (r11)",fillcolor="cyan"]; - x89 [label="x89 (r12)",fillcolor="red"]; - x88 [label="x88 (r13)",fillcolor="gold"]; - x50_128 [label="x50_128 (RBP:r11)",fillcolor="blue"]; - x5 [label="x5 (r12)",fillcolor="red"]; - x54_128 [label="x54_128 (RBP:r11)",fillcolor="blue"]; - x33_128 [label="x33_128 (r9:r14)",fillcolor="cadetblue"]; - x56_128 [label="x56_128 (RBP:r11)",fillcolor="blue"]; - x31_128 [label="x31_128 (r9:r14)",fillcolor="cadetblue"]; - x36_128 [label="x36_128 (r12:r15)",fillcolor="red"]; - x52_128 [label="x52_128 (RBP:r11)",fillcolor="blue"]; - x78 [label="x78 (r9)",fillcolor="cadetblue"]; - x79 [label="x79 (r14)",fillcolor="deeppink"]; - x76 [label="x76 (r10)",fillcolor="chartreuse"]; - x77_128 [label="x77_128 (r9:r14)",fillcolor="cadetblue"]; - x38_128 [label="x38_128 (r12:r15)",fillcolor="red"]; - x35_128 [label="x35_128 (r9:r14)",fillcolor="cadetblue"]; - x72 [label="x72 (RSP)",fillcolor="blueviolet"]; - x73 [label="x73 (r13)",fillcolor="gold"]; - x70 [label="x70 (r11)",fillcolor="cyan"]; - x71_128 [label="x71_128 (RSP:r13)",fillcolor="blueviolet"]; - x18 [label="x18 (r15)",fillcolor="darkorange"]; - x74_128 [label="x74_128 (r8:r10)",fillcolor="brown"]; + x5 [label="x5 (r9)" , style="filled", fillcolor="red"]; + x7 [label="x7 (r15)" , style="filled", fillcolor="red"]; + x9 [label="x9 (RDX)" , style="filled", fillcolor="red"]; + x10 [label="x10" ]; + x11 [label="x11" ]; + x13 [label="x13 (r8)" , style="filled", fillcolor="red"]; + x15 [label="x15 (RSI)" , style="filled", fillcolor="red"]; + x17 [label="x17" ]; + x18 [label="x18 (r13)" , style="filled", fillcolor="red"]; + x19 [label="x19 (RBX)" , style="filled", fillcolor="red"]; + x20_128_high [label="x20_128_high (r11)" , style="filled", fillcolor="red"]; + x20_128_low [label="x20_128_low (r12)" , style="filled", fillcolor="red"]; + x20_128_tmp [label="x20_128_tmp (r12:r11)" , style="filled", fillcolor="red"]; + x21_128_high [label="x21_128_high (r11)" , style="filled", fillcolor="red"]; + x21_128_low [label="x21_128_low (r12)" , style="filled", fillcolor="red"]; + x21_128_tmp [label="x21_128_tmp (r12:r11)" , style="filled", fillcolor="red"]; + x22_128_high [label="x22_128_high (r11)" , style="filled", fillcolor="red"]; + x22_128_low [label="x22_128_low (r12)" , style="filled", fillcolor="red"]; + x22_128_tmp [label="x22_128_tmp (r12:r11)" , style="filled", fillcolor="red"]; + x24_128_high [label="x24_128_high" ]; + x24_128_low [label="x24_128_low" ]; + x24_128_tmp [label="x24_128_tmp" ]; + x25_128_high [label="x25_128_high (r11)" , style="filled", fillcolor="red"]; + x25_128_low [label="x25_128_low (r12)" , style="filled", fillcolor="red"]; + x25_128_tmp [label="x25_128_tmp (r12:r11)" , style="filled", fillcolor="red"]; + x27_128_high [label="x27_128_high (r11)" , style="filled", fillcolor="red"]; + x27_128_low [label="x27_128_low (r12)" , style="filled", fillcolor="red"]; + x27_128_tmp [label="x27_128_tmp (r12:r11)" , style="filled", fillcolor="red"]; + x29_128_high [label="x29_128_high" ]; + x29_128_low [label="x29_128_low" ]; + x29_128_tmp [label="x29_128_tmp" ]; + x30_128_high [label="x30_128_high" ]; + x30_128_low [label="x30_128_low" ]; + x30_128_tmp [label="x30_128_tmp" ]; + x32_128_high [label="x32_128_high" ]; + x32_128_low [label="x32_128_low" ]; + x32_128_tmp [label="x32_128_tmp" ]; + x34_128_high [label="x34_128_high" ]; + x34_128_low [label="x34_128_low" ]; + x34_128_tmp [label="x34_128_tmp" ]; + x36_128_high [label="x36_128_high" ]; + x36_128_low [label="x36_128_low" ]; + x36_128_tmp [label="x36_128_tmp" ]; + x37_128_high [label="x37_128_high" ]; + x37_128_low [label="x37_128_low" ]; + x37_128_tmp [label="x37_128_tmp" ]; + x39_128_high [label="x39_128_high" ]; + x39_128_low [label="x39_128_low" ]; + x39_128_tmp [label="x39_128_tmp" ]; + x41_128_high [label="x41_128_high" ]; + x41_128_low [label="x41_128_low" ]; + x41_128_tmp [label="x41_128_tmp" ]; + x43_128_high [label="x43_128_high" ]; + x43_128_low [label="x43_128_low" ]; + x43_128_tmp [label="x43_128_tmp" ]; + x45 [label="x45" ]; + x46 [label="x46 (r14)" , style="filled", fillcolor="red"]; + x47 [label="x47 (RCX)" , style="filled", fillcolor="red"]; + x48 [label="x48" ]; + x49_128_high [label="x49_128_high" ]; + x49_128_low [label="x49_128_low" ]; + x49_128_tmp [label="x49_128_tmp" ]; + x51_128_high [label="x51_128_high (r11)" , style="filled", fillcolor="red"]; + x51_128_low [label="x51_128_low (r12)" , style="filled", fillcolor="red"]; + x51_128_tmp [label="x51_128_tmp (r12:r11)" , style="filled", fillcolor="red"]; + x53_128_high [label="x53_128_high (r11)" , style="filled", fillcolor="red"]; + x53_128_low [label="x53_128_low (r12)" , style="filled", fillcolor="red"]; + x53_128_tmp [label="x53_128_tmp (r12:r11)" , style="filled", fillcolor="red"]; + x55_128_high [label="x55_128_high" ]; + x55_128_low [label="x55_128_low" ]; + x55_128_tmp [label="x55_128_tmp" ]; + cx56_128 [label="cx56_128" ]; + x56_128 [label="x50_128 + x52_128 + x54_128 + x56_128" ]; + x56_128_high [label="x50_128_high + x52_128_high + x54_128_high + x56_128_high (r10)" , style="filled", fillcolor="red"]; + x56_128_low [label="x50_128_low + x52_128_low + x54_128_low + x56_128_low (r14)" , style="filled", fillcolor="red"]; + x57_128_high [label="x57_128_high" ]; + x57_128_low [label="x57_128_low" ]; + x57_128_tmp [label="x57_128_tmp" ]; + x59_128_high [label="x59_128_high (r11)" , style="filled", fillcolor="red"]; + x59_128_low [label="x59_128_low (r12)" , style="filled", fillcolor="red"]; + x59_128_tmp [label="x59_128_tmp (r12:r11)" , style="filled", fillcolor="red"]; + x61_128_high [label="x61_128_high" ]; + x61_128_low [label="x61_128_low" ]; + x61_128_tmp [label="x61_128_tmp" ]; + x63_128_high [label="x63_128_high" ]; + x63_128_low [label="x63_128_low" ]; + x63_128_tmp [label="x63_128_tmp" ]; + x65_128_high [label="x65_128_high" ]; + x65_128_low [label="x65_128_low" ]; + x65_128_tmp [label="x65_128_tmp" ]; + x67_128_high [label="x67_128_high" ]; + x67_128_low [label="x67_128_low" ]; + x67_128_tmp [label="x67_128_tmp" ]; + x69 [label="x69" ]; + x70 [label="x70" ]; + cx71_128 [label="cx71_128" ]; + x71_128 [label="x23_128 + x58_128 + x60_128 + x62_128 + x71_128" ]; + x71_128_high [label="x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (RDI)" , style="filled", fillcolor="red"]; + x71_128_low [label="x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (RBP)" , style="filled", fillcolor="red"]; + x72 [label="x72" ]; + x73 [label="x73" ]; + cx74_128 [label="cx74_128" ]; + x74_128 [label="x26_128 + x28_128 + x64_128 + x66_128 + x74_128" ]; + x74_128_high [label="x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (RAX)" , style="filled", fillcolor="red"]; + x74_128_low [label="x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (RCX)" , style="filled", fillcolor="red"]; + x75 [label="x75" ]; + x76 [label="x76" ]; + cx77_128 [label="cx77_128" ]; + x77_128 [label="x31_128 + x33_128 + x35_128 + x68_128 + x77_128" ]; + x77_128_high [label="x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high" ]; + x77_128_low [label="x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low" ]; + x78 [label="x78" ]; + x79 [label="x79" ]; + cx80_128 [label="cx80_128" ]; + x80_128 [label="x38_128 + x40_128 + x42_128 + x44_128 + x80_128" ]; + x80_128_high [label="x38_128_high + x40_128_high + x42_128_high + x44_128_high + x80_128_high" ]; + x80_128_low [label="x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low" ]; + x81 [label="x81" ]; + x82 [label="x82" ]; + x83 [label="x83" ]; + x84 [label="x84" ]; + x85 [label="x85" ]; + x86 [label="x86" ]; + x87 [label="x87" ]; + x88 [label="x88" ]; + x89 [label="x89" ]; + x90 [label="x90" ]; + x56_128_low -> cx56_128 [ label="GET_CARRY" ] ; + x71_128_low -> cx71_128 [ label="GET_CARRY" ] ; + x74_128_low -> cx74_128 [ label="GET_CARRY" ] ; + x77_128_low -> cx77_128 [ label="GET_CARRY" ] ; + x80_128_low -> cx80_128 [ label="GET_CARRY" ] ; + x20_128_tmp -> x20_128_high [ label="GET_HIGH" ] ; + x20_128_tmp -> x20_128_low [ label="GET_LOW" ] ; + x5 -> x20_128_tmp [ label="*" ] ; + x13 -> x20_128_tmp [ label="*" ] ; + x21_128_tmp -> x21_128_high [ label="GET_HIGH" ] ; + x21_128_tmp -> x21_128_low [ label="GET_LOW" ] ; + x5 -> x21_128_tmp [ label="*" ] ; + x15 -> x21_128_tmp [ label="*" ] ; + x22_128_tmp -> x22_128_high [ label="GET_HIGH" ] ; + x22_128_tmp -> x22_128_low [ label="GET_LOW" ] ; + x7 -> x22_128_tmp [ label="*" ] ; + x13 -> x22_128_tmp [ label="*" ] ; + x24_128_tmp -> x24_128_high [ label="GET_HIGH" ] ; + x24_128_tmp -> x24_128_low [ label="GET_LOW" ] ; + x5 -> x24_128_tmp [ label="*" ] ; + x17 -> x24_128_tmp [ label="*" ] ; + x25_128_tmp -> x25_128_high [ label="GET_HIGH" ] ; + x25_128_tmp -> x25_128_low [ label="GET_LOW" ] ; + x9 -> x25_128_tmp [ label="*" ] ; + x13 -> x25_128_tmp [ label="*" ] ; + x27_128_tmp -> x27_128_high [ label="GET_HIGH" ] ; + x27_128_tmp -> x27_128_low [ label="GET_LOW" ] ; + x7 -> x27_128_tmp [ label="*" ] ; + x15 -> x27_128_tmp [ label="*" ] ; + x29_128_tmp -> x29_128_high [ label="GET_HIGH" ] ; + x29_128_tmp -> x29_128_low [ label="GET_LOW" ] ; + x5 -> x29_128_tmp [ label="*" ] ; + x19 -> x29_128_tmp [ label="*" ] ; + x30_128_tmp -> x30_128_high [ label="GET_HIGH" ] ; + x30_128_tmp -> x30_128_low [ label="GET_LOW" ] ; + x11 -> x30_128_tmp [ label="*" ] ; + x13 -> x30_128_tmp [ label="*" ] ; + x32_128_tmp -> x32_128_high [ label="GET_HIGH" ] ; + x32_128_tmp -> x32_128_low [ label="GET_LOW" ] ; + x7 -> x32_128_tmp [ label="*" ] ; + x17 -> x32_128_tmp [ label="*" ] ; + x34_128_tmp -> x34_128_high [ label="GET_HIGH" ] ; + x34_128_tmp -> x34_128_low [ label="GET_LOW" ] ; + x9 -> x34_128_tmp [ label="*" ] ; + x15 -> x34_128_tmp [ label="*" ] ; + x36_128_tmp -> x36_128_high [ label="GET_HIGH" ] ; + x36_128_tmp -> x36_128_low [ label="GET_LOW" ] ; + x5 -> x36_128_tmp [ label="*" ] ; + x18 -> x36_128_tmp [ label="*" ] ; + x37_128_tmp -> x37_128_high [ label="GET_HIGH" ] ; + x37_128_tmp -> x37_128_low [ label="GET_LOW" ] ; + x10 -> x37_128_tmp [ label="*" ] ; + x13 -> x37_128_tmp [ label="*" ] ; + x39_128_tmp -> x39_128_high [ label="GET_HIGH" ] ; + x39_128_tmp -> x39_128_low [ label="GET_LOW" ] ; + x11 -> x39_128_tmp [ label="*" ] ; + x15 -> x39_128_tmp [ label="*" ] ; + x41_128_tmp -> x41_128_high [ label="GET_HIGH" ] ; + x41_128_tmp -> x41_128_low [ label="GET_LOW" ] ; + x7 -> x41_128_tmp [ label="*" ] ; + x19 -> x41_128_tmp [ label="*" ] ; + x43_128_tmp -> x43_128_high [ label="GET_HIGH" ] ; + x43_128_tmp -> x43_128_low [ label="GET_LOW" ] ; + x9 -> x43_128_tmp [ label="*" ] ; + x17 -> x43_128_tmp [ label="*" ] ; + x10 -> x45 [ label="*" ] ; + x7 -> x46 [ label="*" ] ; + x9 -> x47 [ label="*" ] ; + x11 -> x48 [ label="*" ] ; + x49_128_tmp -> x49_128_high [ label="GET_HIGH" ] ; + x49_128_tmp -> x49_128_low [ label="GET_LOW" ] ; + x45 -> x49_128_tmp [ label="*" ] ; + x15 -> x49_128_tmp [ label="*" ] ; + x51_128_tmp -> x51_128_high [ label="GET_HIGH" ] ; + x51_128_tmp -> x51_128_low [ label="GET_LOW" ] ; + x46 -> x51_128_tmp [ label="*" ] ; + x18 -> x51_128_tmp [ label="*" ] ; + x53_128_tmp -> x53_128_high [ label="GET_HIGH" ] ; + x53_128_tmp -> x53_128_low [ label="GET_LOW" ] ; + x47 -> x53_128_tmp [ label="*" ] ; + x19 -> x53_128_tmp [ label="*" ] ; + x55_128_tmp -> x55_128_high [ label="GET_HIGH" ] ; + x55_128_tmp -> x55_128_low [ label="GET_LOW" ] ; + x48 -> x55_128_tmp [ label="*" ] ; + x17 -> x55_128_tmp [ label="*" ] ; + x56_128_low -> x56_128 [ label="COMBINE" ] ; + x56_128_high -> x56_128 [ label="COMBINE" ] ; + cx56_128 -> x56_128_high [ label="+" ] ; + x55_128_high -> x56_128_high [ label="+" ] ; + x53_128_high -> x56_128_high [ label="+" ] ; + x51_128_high -> x56_128_high [ label="+" ] ; + x20_128_high -> x56_128_high [ label="+" ] ; + x49_128_high -> x56_128_high [ label="+" ] ; + x55_128_low -> x56_128_low [ label="+" ] ; + x53_128_low -> x56_128_low [ label="+" ] ; + x51_128_low -> x56_128_low [ label="+" ] ; + x20_128_low -> x56_128_low [ label="+" ] ; + x49_128_low -> x56_128_low [ label="+" ] ; + x57_128_tmp -> x57_128_high [ label="GET_HIGH" ] ; + x57_128_tmp -> x57_128_low [ label="GET_LOW" ] ; + x45 -> x57_128_tmp [ label="*" ] ; + x17 -> x57_128_tmp [ label="*" ] ; + x59_128_tmp -> x59_128_high [ label="GET_HIGH" ] ; + x59_128_tmp -> x59_128_low [ label="GET_LOW" ] ; + x47 -> x59_128_tmp [ label="*" ] ; + x18 -> x59_128_tmp [ label="*" ] ; + x61_128_tmp -> x61_128_high [ label="GET_HIGH" ] ; + x61_128_tmp -> x61_128_low [ label="GET_LOW" ] ; + x48 -> x61_128_tmp [ label="*" ] ; + x19 -> x61_128_tmp [ label="*" ] ; + x63_128_tmp -> x63_128_high [ label="GET_HIGH" ] ; + x63_128_tmp -> x63_128_low [ label="GET_LOW" ] ; + x45 -> x63_128_tmp [ label="*" ] ; + x19 -> x63_128_tmp [ label="*" ] ; + x65_128_tmp -> x65_128_high [ label="GET_HIGH" ] ; + x65_128_tmp -> x65_128_low [ label="GET_LOW" ] ; + x48 -> x65_128_tmp [ label="*" ] ; + x18 -> x65_128_tmp [ label="*" ] ; + x67_128_tmp -> x67_128_high [ label="GET_HIGH" ] ; + x67_128_tmp -> x67_128_low [ label="GET_LOW" ] ; + x45 -> x67_128_tmp [ label="*" ] ; + x18 -> x67_128_tmp [ label="*" ] ; + x56_128 -> x69 [ label=">>" ] ; + x56_128_low -> x70 [ label="&" ] ; + x71_128_low -> x71_128 [ label="COMBINE" ] ; + x71_128_high -> x71_128 [ label="COMBINE" ] ; + cx71_128 -> x71_128_high [ label="+" ] ; + x61_128_high -> x71_128_high [ label="+" ] ; + x59_128_high -> x71_128_high [ label="+" ] ; + x57_128_high -> x71_128_high [ label="+" ] ; + x21_128_high -> x71_128_high [ label="+" ] ; + x22_128_high -> x71_128_high [ label="+" ] ; + x69 -> x71_128_low [ label="+" ] ; + x61_128_low -> x71_128_low [ label="+" ] ; + x59_128_low -> x71_128_low [ label="+" ] ; + x57_128_low -> x71_128_low [ label="+" ] ; + x21_128_low -> x71_128_low [ label="+" ] ; + x22_128_low -> x71_128_low [ label="+" ] ; + x71_128 -> x72 [ label=">>" ] ; + x71_128_low -> x73 [ label="&" ] ; + x74_128_low -> x74_128 [ label="COMBINE" ] ; + x74_128_high -> x74_128 [ label="COMBINE" ] ; + cx74_128 -> x74_128_high [ label="+" ] ; + x65_128_high -> x74_128_high [ label="+" ] ; + x63_128_high -> x74_128_high [ label="+" ] ; + x27_128_high -> x74_128_high [ label="+" ] ; + x24_128_high -> x74_128_high [ label="+" ] ; + x25_128_high -> x74_128_high [ label="+" ] ; + x72 -> x74_128_low [ label="+" ] ; + x65_128_low -> x74_128_low [ label="+" ] ; + x63_128_low -> x74_128_low [ label="+" ] ; + x27_128_low -> x74_128_low [ label="+" ] ; + x24_128_low -> x74_128_low [ label="+" ] ; + x25_128_low -> x74_128_low [ label="+" ] ; + x74_128 -> x75 [ label=">>" ] ; + x74_128_low -> x76 [ label="&" ] ; + x77_128_low -> x77_128 [ label="COMBINE" ] ; + x77_128_high -> x77_128 [ label="COMBINE" ] ; + cx77_128 -> x77_128_high [ label="+" ] ; + x67_128_high -> x77_128_high [ label="+" ] ; + x34_128_high -> x77_128_high [ label="+" ] ; + x32_128_high -> x77_128_high [ label="+" ] ; + x29_128_high -> x77_128_high [ label="+" ] ; + x30_128_high -> x77_128_high [ label="+" ] ; + x75 -> x77_128_low [ label="+" ] ; + x67_128_low -> x77_128_low [ label="+" ] ; + x34_128_low -> x77_128_low [ label="+" ] ; + x32_128_low -> x77_128_low [ label="+" ] ; + x29_128_low -> x77_128_low [ label="+" ] ; + x30_128_low -> x77_128_low [ label="+" ] ; + x77_128 -> x78 [ label=">>" ] ; + x77_128_low -> x79 [ label="&" ] ; + x80_128_low -> x80_128 [ label="COMBINE" ] ; + x80_128_high -> x80_128 [ label="COMBINE" ] ; + cx80_128 -> x80_128_high [ label="+" ] ; + x43_128_high -> x80_128_high [ label="+" ] ; + x41_128_high -> x80_128_high [ label="+" ] ; + x39_128_high -> x80_128_high [ label="+" ] ; + x36_128_high -> x80_128_high [ label="+" ] ; + x37_128_high -> x80_128_high [ label="+" ] ; + x78 -> x80_128_low [ label="+" ] ; + x43_128_low -> x80_128_low [ label="+" ] ; + x41_128_low -> x80_128_low [ label="+" ] ; + x39_128_low -> x80_128_low [ label="+" ] ; + x36_128_low -> x80_128_low [ label="+" ] ; + x37_128_low -> x80_128_low [ label="+" ] ; + x80_128 -> x81 [ label=">>" ] ; + x80_128_low -> x82 [ label="&" ] ; + x81 -> x83 [ label="*" ] ; + x70 -> x84 [ label="+" ] ; + x83 -> x84 [ label="+" ] ; + x84 -> x85 [ label=">>" ] ; + x84 -> x86 [ label="&" ] ; + x85 -> x87 [ label="+" ] ; + x73 -> x87 [ label="+" ] ; + x87 -> x88 [ label=">>" ] ; + x87 -> x89 [ label="&" ] ; + x88 -> x90 [ label="+" ] ; + x76 -> x90 [ label="+" ] ; + in -> x13 ; + in -> x15 ; + in -> x18 ; + in -> x19 ; + in -> x9 ; in -> x10 ; in -> x11 ; - in -> x9 ; + in -> x17 ; in -> x7 ; in -> x5 ; - in -> x18 ; - in -> x19 ; - in -> x17 ; - in -> x15 ; - in -> x13 ; - x82 -> out ; - x79 -> out ; x90 -> out ; + x82 -> out ; x89 -> out ; x86 -> out ; - x13 -> x20_128 ; - x5 -> x20_128 ; - x15 -> x21_128 ; - x5 -> x21_128 ; - x13 -> x22_128 ; - x7 -> x22_128 ; - x21_128 -> x23_128 ; - x22_128 -> x23_128 ; - x17 -> x24_128 ; - x5 -> x24_128 ; - x13 -> x25_128 ; - x9 -> x25_128 ; - x24_128 -> x26_128 ; - x25_128 -> x26_128 ; - x15 -> x27_128 ; - x7 -> x27_128 ; - x26_128 -> x28_128 ; - x27_128 -> x28_128 ; - x19 -> x29_128 ; - x5 -> x29_128 ; - x11 -> x30_128 ; - x13 -> x30_128 ; - x29_128 -> x31_128 ; - x30_128 -> x31_128 ; - x17 -> x32_128 ; - x7 -> x32_128 ; - x31_128 -> x33_128 ; - x32_128 -> x33_128 ; - x15 -> x34_128 ; - x9 -> x34_128 ; - x33_128 -> x35_128 ; - x34_128 -> x35_128 ; - x18 -> x36_128 ; - x5 -> x36_128 ; - x10 -> x37_128 ; - x13 -> x37_128 ; - x36_128 -> x38_128 ; - x37_128 -> x38_128 ; - x11 -> x39_128 ; - x15 -> x39_128 ; - x38_128 -> x40_128 ; - x39_128 -> x40_128 ; - x19 -> x41_128 ; - x7 -> x41_128 ; - x40_128 -> x42_128 ; - x41_128 -> x42_128 ; - x17 -> x43_128 ; - x9 -> x43_128 ; - x42_128 -> x44_128 ; - x43_128 -> x44_128 ; - x10 -> x45 ; - x7 -> x46 ; - x9 -> x47 ; - x11 -> x48 ; - x15 -> x49_128 ; - x45 -> x49_128 ; - x20_128 -> x50_128 ; - x49_128 -> x50_128 ; - x18 -> x51_128 ; - x46 -> x51_128 ; - x50_128 -> x52_128 ; - x51_128 -> x52_128 ; - x19 -> x53_128 ; - x47 -> x53_128 ; - x52_128 -> x54_128 ; - x53_128 -> x54_128 ; - x17 -> x55_128 ; - x48 -> x55_128 ; - x54_128 -> x56_128 ; - x55_128 -> x56_128 ; - x17 -> x57_128 ; - x45 -> x57_128 ; - x23_128 -> x58_128 ; - x57_128 -> x58_128 ; - x18 -> x59_128 ; - x47 -> x59_128 ; - x58_128 -> x60_128 ; - x59_128 -> x60_128 ; - x19 -> x61_128 ; - x48 -> x61_128 ; - x60_128 -> x62_128 ; - x61_128 -> x62_128 ; - x19 -> x63_128 ; - x45 -> x63_128 ; - x28_128 -> x64_128 ; - x63_128 -> x64_128 ; - x18 -> x65_128 ; - x48 -> x65_128 ; - x64_128 -> x66_128 ; - x65_128 -> x66_128 ; - x18 -> x67_128 ; - x45 -> x67_128 ; - x35_128 -> x68_128 ; - x67_128 -> x68_128 ; - x56_128 -> x69 ; - x56_128 -> x70 ; - x62_128 -> x71_128 ; - x69 -> x71_128 ; - x71_128 -> x72 ; - x71_128 -> x73 ; - x66_128 -> x74_128 ; - x72 -> x74_128 ; - x74_128 -> x75 ; - x74_128 -> x76 ; - x68_128 -> x77_128 ; - x75 -> x77_128 ; - x77_128 -> x78 ; - x77_128 -> x79 ; - x44_128 -> x80_128 ; - x78 -> x80_128 ; - x80_128 -> x81 ; - x80_128 -> x82 ; - x81 -> x83 ; - x70 -> x84 ; - x83 -> x84 ; - x84 -> x85 ; - x84 -> x86 ; - x73 -> x87 ; - x85 -> x87 ; - x87 -> x88 ; - x87 -> x89 ; - x76 -> x90 ; - x88 -> x90 ; + x79 -> out ; } diff --git a/etc/compile-by-zinc/femulData0.png b/etc/compile-by-zinc/femulData0.png index 0cd951607..52d41b310 100644 Binary files a/etc/compile-by-zinc/femulData0.png and b/etc/compile-by-zinc/femulData0.png differ diff --git a/etc/compile-by-zinc/femulData0.svg b/etc/compile-by-zinc/femulData0.svg index 827da12e7..361ceb3d1 100644 --- a/etc/compile-by-zinc/femulData0.svg +++ b/etc/compile-by-zinc/femulData0.svg @@ -1,1339 +1,1927 @@ - - - + + G - - - -x90 - -x90 (r13) + + +x5 + +x5 (r9) + + +x20_128_tmp + +x20_128_tmp (r12:r11) + + +x5->x20_128_tmp + + +* + + +x21_128_tmp + +x21_128_tmp (r12:r11) + + +x5->x21_128_tmp + + +* + + +x24_128_tmp + +x24_128_tmp + + +x5->x24_128_tmp + + +* + + +x29_128_tmp + +x29_128_tmp + + +x5->x29_128_tmp + + +* + + +x36_128_tmp + +x36_128_tmp + + +x5->x36_128_tmp + + +* - - -out - -out + +x7 + +x7 (r15) + + +x22_128_tmp + +x22_128_tmp (r12:r11) + + +x7->x22_128_tmp + + +* + + +x27_128_tmp + +x27_128_tmp (r12:r11) + + +x7->x27_128_tmp + + +* + + +x32_128_tmp + +x32_128_tmp + + +x7->x32_128_tmp + + +* + + +x41_128_tmp + +x41_128_tmp + + +x7->x41_128_tmp + + +* - - -x90->out - - + +x46 + +x46 (r14) - - -x75 - -x75 (r8) + +x7->x46 + + +* - - -x77_128 - -x77_128 (r9:r14) - - - -x75->x77_128 - - - - - -x58_128 - -x58_128 (RSP:r13) - - - -x60_128 - -x60_128 (RSP:r13) - - - -x58_128->x60_128 - - + +x9 + +x9 (RDX) + + +x25_128_tmp + +x25_128_tmp (r12:r11) + + +x9->x25_128_tmp + + +* + + +x34_128_tmp + +x34_128_tmp + + +x9->x34_128_tmp + + +* + + +x43_128_tmp + +x43_128_tmp + + +x9->x43_128_tmp + + +* - - -x69 - -x69 (RBP) + +x47 + +x47 (RCX) - - -x71_128 - -x71_128 (RSP:r13) - - - -x69->x71_128 - - - - - -x68_128 - -x68_128 (r9:r14) - - - -x68_128->x77_128 - - - - - -x44_128 - -x44_128 (r12:r15) + +x9->x47 + + +* - - -x80_128 - -x80_128 (r12:r15) - - - -x44_128->x80_128 - - - - - -x42_128 - -x42_128 (r12:r15) - - - -x42_128->x44_128 - - - - - -x40_128 - -x40_128 (r12:r15) - - - -x40_128->x42_128 - - - - - -x62_128 - -x62_128 (RSP:r13) - - - -x60_128->x62_128 - - - - - -x62_128->x71_128 - - - - - -x64_128 - -x64_128 (r8:r10) - - - -x66_128 - -x66_128 (r8:r10) - - - -x64_128->x66_128 - - + +x10 + +x10 - - -x74_128 - -x74_128 (r8:r10) - - - -x66_128->x74_128 - - - - - -x29_128 - -x29_128 (r9:r14) - - - -x31_128 - -x31_128 (r9:r14) - - - -x29_128->x31_128 - - - - - -x28_128 - -x28_128 (r8:r10) - - - -x28_128->x64_128 - - - - - -x24_128 - -x24_128 (r8:r10) - - - -x26_128 - -x26_128 (r8:r10) - - - -x24_128->x26_128 - - - - - -x26_128->x28_128 - - - - - -x21_128 - -x21_128 (RSP:r13) - - - -x23_128 - -x23_128 (RSP:r13) - - - -x21_128->x23_128 - - - - - -x20_128 - -x20_128 (RBP:r11) - - - -x50_128 - -x50_128 (RBP:r11) - - - -x20_128->x50_128 - - - - - -x23_128->x58_128 - - + +x37_128_tmp + +x37_128_tmp - - -x83 - -x83 (r12) + +x10->x37_128_tmp + + +* - - -x84 - -x84 (r11) - - - -x83->x84 - - + +x45 + +x45 - - -x82 - -x82 (r15) + +x10->x45 + + +* - - -x82->out - - + +x11 + +x11 + + +x30_128_tmp + +x30_128_tmp + + +x11->x30_128_tmp + + +* + + +x39_128_tmp + +x39_128_tmp + + +x11->x39_128_tmp + + +* - - -x81 - -x81 (r12) + +x48 + +x48 - - -x81->x83 - - + +x11->x48 + + +* - - -x80_128->x82 - - + +x13 + +x13 (r8) + + +x13->x20_128_tmp + + +* + + +x13->x22_128_tmp + + +* + + +x13->x25_128_tmp + + +* + + +x13->x30_128_tmp + + +* + + +x13->x37_128_tmp + + +* - - -x80_128->x81 - - + +x15 + +x15 (RSI) + + +x15->x21_128_tmp + + +* + + +x15->x27_128_tmp + + +* + + +x15->x34_128_tmp + + +* + + +x15->x39_128_tmp + + +* + + +x49_128_tmp + +x49_128_tmp + + +x15->x49_128_tmp + + +* - - -x87 - -x87 (r12) + +x17 + +x17 + + +x17->x24_128_tmp + + +* + + +x17->x32_128_tmp + + +* + + +x17->x43_128_tmp + + +* + + +x55_128_tmp + +x55_128_tmp + + +x17->x55_128_tmp + + +* + + +x57_128_tmp + +x57_128_tmp + + +x17->x57_128_tmp + + +* - - -x89 - -x89 (r12) + +x18 + +x18 (r13) + + +x18->x36_128_tmp + + +* + + +x51_128_tmp + +x51_128_tmp (r12:r11) + + +x18->x51_128_tmp + + +* + + +x59_128_tmp + +x59_128_tmp (r12:r11) + + +x18->x59_128_tmp + + +* + + +x65_128_tmp + +x65_128_tmp + + +x18->x65_128_tmp + + +* + + +x67_128_tmp + +x67_128_tmp + + +x18->x67_128_tmp + + +* - - -x87->x89 - - + +x19 + +x19 (RBX) + + +x19->x29_128_tmp + + +* + + +x19->x41_128_tmp + + +* + + +x53_128_tmp + +x53_128_tmp (r12:r11) + + +x19->x53_128_tmp + + +* + + +x61_128_tmp + +x61_128_tmp + + +x19->x61_128_tmp + + +* + + +x63_128_tmp + +x63_128_tmp + + +x19->x63_128_tmp + + +* + + +x20_128_high + +x20_128_high (r11) + + +x56_128_high + +x50_128_high + x52_128_high + x54_128_high + x56_128_high (r10) + + +x20_128_high->x56_128_high + + ++ + + +x20_128_low + +x20_128_low (r12) + + +x56_128_low + +x50_128_low + x52_128_low + x54_128_low + x56_128_low (r14) + + +x20_128_low->x56_128_low + + ++ + + +x20_128_tmp->x20_128_high + + +GET_HIGH + + +x20_128_tmp->x20_128_low + + +GET_LOW + + +x21_128_high + +x21_128_high (r11) + + +x71_128_high + +x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (RDI) + + +x21_128_high->x71_128_high + + ++ + + +x21_128_low + +x21_128_low (r12) + + +x71_128_low + +x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (RBP) + + +x21_128_low->x71_128_low + + ++ + + +x21_128_tmp->x21_128_high + + +GET_HIGH + + +x21_128_tmp->x21_128_low + + +GET_LOW + + +x22_128_high + +x22_128_high (r11) + + +x22_128_high->x71_128_high + + ++ + + +x22_128_low + +x22_128_low (r12) + + +x22_128_low->x71_128_low + + ++ + + +x22_128_tmp->x22_128_high + + +GET_HIGH + + +x22_128_tmp->x22_128_low + + +GET_LOW + + +x24_128_high + +x24_128_high + + +x74_128_high + +x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (RAX) + + +x24_128_high->x74_128_high + + ++ + + +x24_128_low + +x24_128_low + + +x74_128_low + +x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (RCX) + + +x24_128_low->x74_128_low + + ++ + + +x24_128_tmp->x24_128_high + + +GET_HIGH + + +x24_128_tmp->x24_128_low + + +GET_LOW + + +x25_128_high + +x25_128_high (r11) + + +x25_128_high->x74_128_high + + ++ + + +x25_128_low + +x25_128_low (r12) + + +x25_128_low->x74_128_low + + ++ + + +x25_128_tmp->x25_128_high + + +GET_HIGH + + +x25_128_tmp->x25_128_low + + +GET_LOW + + +x27_128_high + +x27_128_high (r11) + + +x27_128_high->x74_128_high + + ++ + + +x27_128_low + +x27_128_low (r12) + + +x27_128_low->x74_128_low + + ++ + + +x27_128_tmp->x27_128_high + + +GET_HIGH + + +x27_128_tmp->x27_128_low + + +GET_LOW + + +x29_128_high + +x29_128_high + + +x77_128_high + +x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high + + +x29_128_high->x77_128_high + + ++ + + +x29_128_low + +x29_128_low + + +x77_128_low + +x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low + + +x29_128_low->x77_128_low + + ++ + + +x29_128_tmp->x29_128_high + + +GET_HIGH + + +x29_128_tmp->x29_128_low + + +GET_LOW + + +x30_128_high + +x30_128_high + + +x30_128_high->x77_128_high + + ++ + + +x30_128_low + +x30_128_low + + +x30_128_low->x77_128_low + + ++ + + +x30_128_tmp->x30_128_high + + +GET_HIGH + + +x30_128_tmp->x30_128_low + + +GET_LOW + + +x32_128_high + +x32_128_high + + +x32_128_high->x77_128_high + + ++ + + +x32_128_low + +x32_128_low + + +x32_128_low->x77_128_low + + ++ + + +x32_128_tmp->x32_128_high + + +GET_HIGH + + +x32_128_tmp->x32_128_low + + +GET_LOW + + +x34_128_high + +x34_128_high + + +x34_128_high->x77_128_high + + ++ + + +x34_128_low + +x34_128_low + + +x34_128_low->x77_128_low + + ++ + + +x34_128_tmp->x34_128_high + + +GET_HIGH + + +x34_128_tmp->x34_128_low + + +GET_LOW + + +x36_128_high + +x36_128_high + + +x80_128_high + +x38_128_high + x40_128_high + x42_128_high + x44_128_high + x80_128_high + + +x36_128_high->x80_128_high + + ++ + + +x36_128_low + +x36_128_low + + +x80_128_low + +x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low + + +x36_128_low->x80_128_low + + ++ + + +x36_128_tmp->x36_128_high + + +GET_HIGH + + +x36_128_tmp->x36_128_low + + +GET_LOW + + +x37_128_high + +x37_128_high + + +x37_128_high->x80_128_high + + ++ + + +x37_128_low + +x37_128_low + + +x37_128_low->x80_128_low + + ++ + + +x37_128_tmp->x37_128_high + + +GET_HIGH + + +x37_128_tmp->x37_128_low + + +GET_LOW + + +x39_128_high + +x39_128_high + + +x39_128_high->x80_128_high + + ++ + + +x39_128_low + +x39_128_low + + +x39_128_low->x80_128_low + + ++ + + +x39_128_tmp->x39_128_high + + +GET_HIGH + + +x39_128_tmp->x39_128_low + + +GET_LOW + + +x41_128_high + +x41_128_high + + +x41_128_high->x80_128_high + + ++ + + +x41_128_low + +x41_128_low + + +x41_128_low->x80_128_low + + ++ + + +x41_128_tmp->x41_128_high + + +GET_HIGH + + +x41_128_tmp->x41_128_low + + +GET_LOW + + +x43_128_high + +x43_128_high + + +x43_128_high->x80_128_high + + ++ + + +x43_128_low + +x43_128_low + + +x43_128_low->x80_128_low + + ++ + + +x43_128_tmp->x43_128_high + + +GET_HIGH + + +x43_128_tmp->x43_128_low + + +GET_LOW + + +x45->x49_128_tmp + + +* + + +x45->x57_128_tmp + + +* + + +x45->x63_128_tmp + + +* + + +x45->x67_128_tmp + + +* + + +x46->x51_128_tmp + + +* + + +x47->x53_128_tmp + + +* + + +x47->x59_128_tmp + + +* + + +x48->x55_128_tmp + + +* + + +x48->x61_128_tmp + + +* + + +x48->x65_128_tmp + + +* + + +x49_128_high + +x49_128_high + + +x49_128_high->x56_128_high + + ++ + + +x49_128_low + +x49_128_low + + +x49_128_low->x56_128_low + + ++ + + +x49_128_tmp->x49_128_high + + +GET_HIGH + + +x49_128_tmp->x49_128_low + + +GET_LOW + + +x51_128_high + +x51_128_high (r11) + + +x51_128_high->x56_128_high + + ++ + + +x51_128_low + +x51_128_low (r12) + + +x51_128_low->x56_128_low + + ++ + + +x51_128_tmp->x51_128_high + + +GET_HIGH + + +x51_128_tmp->x51_128_low + + +GET_LOW + + +x53_128_high + +x53_128_high (r11) + + +x53_128_high->x56_128_high + + ++ + + +x53_128_low + +x53_128_low (r12) + + +x53_128_low->x56_128_low + + ++ + + +x53_128_tmp->x53_128_high + + +GET_HIGH + + +x53_128_tmp->x53_128_low + + +GET_LOW + + +x55_128_high + +x55_128_high + + +x55_128_high->x56_128_high + + ++ + + +x55_128_low + +x55_128_low + + +x55_128_low->x56_128_low + + ++ + + +x55_128_tmp->x55_128_high + + +GET_HIGH + + +x55_128_tmp->x55_128_low + + +GET_LOW + + +cx56_128 + +cx56_128 + + +cx56_128->x56_128_high + + ++ - - -x88 - -x88 (r13) + +x56_128 + +x50_128 + x52_128 + x54_128 + x56_128 - - -x87->x88 - - + +x69 + +x69 - - -x86 - -x86 (r11) + +x56_128->x69 + + +>> + + +x56_128_high->x56_128 + + +COMBINE + + +x56_128_low->cx56_128 + + +GET_CARRY + + +x56_128_low->x56_128 + + +COMBINE - - -x86->out - - + +x70 + +x70 + + +x56_128_low->x70 + + +& + + +x57_128_high + +x57_128_high + + +x57_128_high->x71_128_high + + ++ + + +x57_128_low + +x57_128_low + + +x57_128_low->x71_128_low + + ++ + + +x57_128_tmp->x57_128_high + + +GET_HIGH + + +x57_128_tmp->x57_128_low + + +GET_LOW + + +x59_128_high + +x59_128_high (r11) + + +x59_128_high->x71_128_high + + ++ + + +x59_128_low + +x59_128_low (r12) + + +x59_128_low->x71_128_low + + ++ + + +x59_128_tmp->x59_128_high + + +GET_HIGH + + +x59_128_tmp->x59_128_low + + +GET_LOW + + +x61_128_high + +x61_128_high + + +x61_128_high->x71_128_high + + ++ + + +x61_128_low + +x61_128_low + + +x61_128_low->x71_128_low + + ++ + + +x61_128_tmp->x61_128_high + + +GET_HIGH + + +x61_128_tmp->x61_128_low + + +GET_LOW + + +x63_128_high + +x63_128_high + + +x63_128_high->x74_128_high + + ++ + + +x63_128_low + +x63_128_low + + +x63_128_low->x74_128_low + + ++ + + +x63_128_tmp->x63_128_high + + +GET_HIGH + + +x63_128_tmp->x63_128_low + + +GET_LOW + + +x65_128_high + +x65_128_high + + +x65_128_high->x74_128_high + + ++ + + +x65_128_low + +x65_128_low + + +x65_128_low->x74_128_low + + ++ + + +x65_128_tmp->x65_128_high + + +GET_HIGH + + +x65_128_tmp->x65_128_low + + +GET_LOW + + +x67_128_high + +x67_128_high + + +x67_128_high->x77_128_high + + ++ + + +x67_128_low + +x67_128_low + + +x67_128_low->x77_128_low + + ++ + + +x67_128_tmp->x67_128_high + + +GET_HIGH + + +x67_128_tmp->x67_128_low + + +GET_LOW + + +x69->x71_128_low + + ++ - - -x85 - -x85 (r12) + +x84 + +x84 - - -x85->x87 - - + +x70->x84 + + ++ + + +cx71_128 + +cx71_128 + + +cx71_128->x71_128_high + + ++ - - -x84->x86 - - + +x71_128 + +x23_128 + x58_128 + x60_128 + x62_128 + x71_128 - - -x84->x85 - - + +x72 + +x72 - - -x89->out - - + +x71_128->x72 + + +>> + + +x71_128_high->x71_128 + + +COMBINE + + +x71_128_low->cx71_128 + + +GET_CARRY + + +x71_128_low->x71_128 + + +COMBINE - - -x88->x90 - - - - - -x52_128 - -x52_128 (RBP:r11) - - - -x50_128->x52_128 - - + +x73 + +x73 + + +x71_128_low->x73 + + +& + + +x72->x74_128_low + + ++ - - -x5 - -x5 (r12) - - - -x5->x29_128 - - - - - -x5->x24_128 - - - - - -x5->x21_128 - - - - - -x5->x20_128 - - - - - -x36_128 - -x36_128 (r12:r15) - - - -x5->x36_128 - - - - - -x54_128 - -x54_128 (RBP:r11) + +x87 + +x87 - - -x56_128 - -x56_128 (RBP:r11) - - - -x54_128->x56_128 - - - - - -x33_128 - -x33_128 (r9:r14) - - - -x35_128 - -x35_128 (r9:r14) - - - -x33_128->x35_128 - - + +x73->x87 + + ++ + + +cx74_128 + +cx74_128 + + +cx74_128->x74_128_high + + ++ - - -x56_128->x69 - - + +x74_128 + +x26_128 + x28_128 + x64_128 + x66_128 + x74_128 - - -x70 - -x70 (r11) - - - -x56_128->x70 - - - - - -x31_128->x33_128 - - - - - -x38_128 - -x38_128 (r12:r15) - - - -x36_128->x38_128 - - - - - -x52_128->x54_128 - - + +x75 + +x75 - - -x78 - -x78 (r9) + +x74_128->x75 + + +>> + + +x74_128_high->x74_128 + + +COMBINE + + +x74_128_low->cx74_128 + + +GET_CARRY + + +x74_128_low->x74_128 + + +COMBINE - - -x78->x80_128 - - + +x76 + +x76 + + +x74_128_low->x76 + + +& + + +x75->x77_128_low + + ++ - - -x79 - -x79 (r14) + +x90 + +x90 - - -x79->out - - + +x76->x90 + + ++ + + +cx77_128 + +cx77_128 + + +cx77_128->x77_128_high + + ++ - - -x76 - -x76 (r10) + +x77_128 + +x31_128 + x33_128 + x35_128 + x68_128 + x77_128 - - -x76->x90 - - + +x78 + +x78 - -x77_128->x78 - - - - - -x77_128->x79 - - - - - -x38_128->x40_128 - - - - - -x35_128->x68_128 - - +x77_128->x78 + + +>> + + +x77_128_high->x77_128 + + +COMBINE + + +x77_128_low->cx77_128 + + +GET_CARRY + + +x77_128_low->x77_128 + + +COMBINE - - -x72 - -x72 (RSP) + +x79 + +x79 + + +x77_128_low->x79 + + +& + + +x78->x80_128_low + + ++ - - -x72->x74_128 - - + +out + +out - - -x73 - -x73 (r13) + +x79->out + + - - -x73->x87 - - + +cx80_128 + +cx80_128 - - -x70->x84 - - + +cx80_128->x80_128_high + + ++ - - -x71_128->x72 - - + +x80_128 + +x38_128 + x40_128 + x42_128 + x44_128 + x80_128 - - -x71_128->x73 - - + +x81 + +x81 - - -x18 - -x18 (r15) - - - -x18->x36_128 - - - - - -x51_128 - -x51_128 - - - -x18->x51_128 - - - - - -x59_128 - -x59_128 - - - -x18->x59_128 - - - - - -x65_128 - -x65_128 - - - -x18->x65_128 - - - - - -x67_128 - -x67_128 - - - -x18->x67_128 - - + +x80_128->x81 + + +>> + + +x80_128_high->x80_128 + + +COMBINE + + +x80_128_low->cx80_128 + + +GET_CARRY + + +x80_128_low->x80_128 + + +COMBINE - - -x74_128->x75 - - + +x82 + +x82 - - -x74_128->x76 - - + +x80_128_low->x82 + + +& - - -in - -in + +x83 + +x83 - - -in->x5 - - + +x81->x83 + + +* - - -in->x18 - - + +x82->out + + - - -x10 - -x10 + +x83->x84 + + ++ - - -in->x10 - - + +x85 + +x85 - - -x11 - -x11 + +x84->x85 + + +>> - - -in->x11 - - + +x86 + +x86 - - -x9 - -x9 + +x84->x86 + + +& - - -in->x9 - - + +x85->x87 + + ++ - - -x7 - -x7 + +x86->out + + - - -in->x7 - - + +x88 + +x88 - - -x19 - -x19 + +x87->x88 + + +>> - - -in->x19 - - + +x89 + +x89 - - -x17 - -x17 + +x87->x89 + + +& - - -in->x17 - - + +x88->x90 + + ++ - - -x15 - -x15 + +x89->out + + - - -in->x15 - - + +x90->out + + - - -x13 - -x13 + +in + +in - - -in->x13 - - - - - -x37_128 - -x37_128 - - - -x10->x37_128 - - + +in->x5 + + - - -x45 - -x45 + +in->x7 + + - - -x10->x45 - - - - - -x30_128 - -x30_128 - - - -x11->x30_128 - - - - - -x39_128 - -x39_128 - - - -x11->x39_128 - - + +in->x9 + + - - -x48 - -x48 + +in->x10 + + - - -x11->x48 - - - - - -x25_128 - -x25_128 - - - -x9->x25_128 - - - - - -x34_128 - -x34_128 - - - -x9->x34_128 - - - - - -x43_128 - -x43_128 - - - -x9->x43_128 - - + +in->x11 + + - - -x47 - -x47 + +in->x13 + + - - -x9->x47 - - - - - -x22_128 - -x22_128 - - - -x7->x22_128 - - - - - -x27_128 - -x27_128 - - - -x7->x27_128 - - - - - -x32_128 - -x32_128 - - - -x7->x32_128 - - - - - -x41_128 - -x41_128 - - - -x7->x41_128 - - + +in->x15 + + - - -x46 - -x46 + +in->x17 + + - - -x7->x46 - - - - - -x19->x29_128 - - - - - -x19->x41_128 - - - - - -x53_128 - -x53_128 - - - -x19->x53_128 - - - - - -x61_128 - -x61_128 - - - -x19->x61_128 - - - - - -x63_128 - -x63_128 - - - -x19->x63_128 - - - - - -x17->x24_128 - - - - - -x17->x32_128 - - - - - -x17->x43_128 - - - - - -x55_128 - -x55_128 - - - -x17->x55_128 - - - - - -x57_128 - -x57_128 - - - -x17->x57_128 - - - - - -x15->x21_128 - - - - - -x15->x27_128 - - - - - -x15->x34_128 - - - - - -x15->x39_128 - - - - - -x49_128 - -x49_128 - - - -x15->x49_128 - - - - - -x13->x20_128 - - - - - -x13->x22_128 - - - - - -x13->x25_128 - - - - - -x13->x30_128 - - - - - -x13->x37_128 - - - - - -x22_128->x23_128 - - - - - -x25_128->x26_128 - - - - - -x27_128->x28_128 - - - - - -x30_128->x31_128 - - - - - -x32_128->x33_128 - - - - - -x34_128->x35_128 - - - - - -x37_128->x38_128 - - - - - -x39_128->x40_128 - - - - - -x41_128->x42_128 - - - - - -x43_128->x44_128 - - - - - -x45->x49_128 - - - - - -x45->x57_128 - - - - - -x45->x63_128 - - - - - -x45->x67_128 - - - - - -x46->x51_128 - - - - - -x47->x53_128 - - - - - -x47->x59_128 - - - - - -x48->x55_128 - - - - - -x48->x61_128 - - - - - -x48->x65_128 - - - - - -x49_128->x50_128 - - - - - -x51_128->x52_128 - - - - - -x53_128->x54_128 - - - - - -x55_128->x56_128 - - - - - -x57_128->x58_128 - - - - - -x59_128->x60_128 - - - - - -x61_128->x62_128 - - - - - -x63_128->x64_128 - - - - - -x65_128->x66_128 - - - - - -x67_128->x68_128 - - + +in->x18 + + + + +in->x19 + + diff --git a/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py b/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py new file mode 100755 index 000000000..53088457e --- /dev/null +++ b/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py @@ -0,0 +1,443 @@ +#!/usr/bin/env python +from __future__ import with_statement +from memoize import memoize +import codecs, re, sys, os +import subprocess + +LAMBDA = u'\u03bb' + +OP_NAMES = {'*':'MUL', '+':'ADD', '>>':'SHL', '<<':'SHR', '|':'OR', '&':'AND'} + +REGISTERS = tuple(['RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP'] #, 'RSP'] # RSP is stack pointer? + + ['r%d' % i for i in range(8, 16)]) +REGISTER_COLORS = ['color="black"', 'color="white",fillcolor="black"', 'color="maroon"', 'color="green"', 'fillcolor="olive"', + 'color="navy"', 'color="purple"', 'fillcolor="teal"', 'fillcolor="silver"', 'fillcolor="gray"', 'fillcolor="red"', + 'fillcolor="lime"', 'fillcolor="yellow"', 'fillcolor="blue"', 'fillcolor="fuschia"', 'fillcolor="aqua"'] +REGISTER_COLORS = ['fillcolor="%s"' % c for c in 'aliceblue antiquewhite aquamarine azure beige bisque blue blueviolet brown cadetblue chartreuse cyan red gold deeppink darkorange'.split(' ')] +COLOR_FOR_REGISTER = dict(zip(REGISTERS, REGISTER_COLORS)) + +MAX_INSTRUCTION_WINDOW = 10000 + +def get_lines(filename): + with codecs.open(filename, 'r', encoding='utf8') as f: + lines = f.read().replace('\r\n', '\n') + return [line.strip() for line in re.findall("%s '.*?[Rr]eturn [^\r\n]*" % LAMBDA, lines, flags=re.DOTALL)[0].split('\n')] + +def strip_casts(text): + return re.sub(r'\(u?int[0-9]*_t\)\s*\(?([^\)]*)\)?', r'\1', text) + +def parse_lines(lines): + lines = list(map(strip_casts, lines)) + assert lines[0][:len(LAMBDA + ' ')] == LAMBDA + ' ' + assert lines[0][-1] == ',' + ret = {} + ret['vars'] = lines[0][len(LAMBDA + ' '):-1] + assert lines[-1][-1] == ')' + ret['return'] = lines[-1][:-1].replace('return ', '').replace('Return ', '') + ret['lines'] = [] + for line in lines[1:-1]: + datatype, varname, arg1, op, arg2 = re.findall('^(u?int[0-9]*_t) ([^ ]*) = ([^ ]*) ([^ ]*) ([^ ]*);$', line)[0] + ret['lines'].append({'type':datatype, 'out':varname, 'op':op, 'args':(arg1, arg2), 'source':line}) + print('Compiling %d lines in groups of %d...' % (len(ret['lines']), min(MAX_INSTRUCTION_WINDOW, len(ret['lines'])))) + ret['lines'] = tuple(ret['lines']) + split_ret = [] + for start in range(0, len(ret['lines']), MAX_INSTRUCTION_WINDOW): + cur_ret = dict(ret) + cur_ret['lines'] = ret['lines'][start:][:MAX_INSTRUCTION_WINDOW] + split_ret.append(cur_ret) + return tuple(split_ret) + +def get_var_names(input_data): + return tuple(line['out'] for line in input_data['lines']) + +def get_input_var_names(input_data): + return tuple(i for i in data['vars'].replace('%core', '').replace(',', ' ').replace('(', ' ').replace(')', ' ').replace("'", ' ').split(' ') + if i != '') + +def get_output_var_names(input_data): + return tuple(i for i in data['return'].replace(',', ' ').replace('(', ' ').replace(')', ' ').split(' ') + if i != '') + +def line_of_var(input_data, var): + retv = [line for line in input_data['lines'] if line['out'] == var] + if len(retv) > 0: return retv[0] + return {'out': var, 'args':tuple(), 'op': 'INPUT', 'type':'uint64_t'} + +def make_data_dependencies(input_data): + input_var_names = get_input_var_names(input_data) + dependencies = dict((var, tuple()) for var in input_var_names) + for line in input_data['lines']: + dependencies[line['out']] = tuple(arg for arg in line['args'] + if arg[0] not in '0123456789') + return dependencies +def make_reverse_data_dependencies(dependencies): + reverse_dependencies = dict((k, []) for k in dependencies.keys()) + for k, v in dependencies.items(): + for arg in v: + reverse_dependencies[arg].append(k) + return reverse_dependencies + +def get_low_or_high(obj, low_or_high): + assert(low_or_high in ('low', 'high')) + if obj['op'] == 'COMBINE': + if low_or_high == 'low': return obj['deps'][0] + if low_or_high == 'high': return obj['deps'][1] + else: + return {'out':obj['out'] + '_' + low_or_high, 'style':'', 'deps':(obj,), 'op':'GET_' + low_or_high.upper(), 'type':'uint64_t', 'extra_out':tuple(o + '_' + low_or_high for o in obj['extra_out']), 'rev_deps':tuple()} + +def add_combine_low_high(objs): + for obj in objs: + if obj['type'] == 'uint128_t': + obj_low = get_low_or_high(obj, 'low') + obj_high = get_low_or_high(obj, 'high') + obj_new = {'out':obj['out'], 'style':'', 'deps':(obj_low, obj_high), 'op':'COMBINE', 'type':'uint128_t', 'extra_out':obj['extra_out'], 'rev_deps':obj['rev_deps']} + obj['out'] += '_tmp' + obj['rev_deps'] = (obj_low, obj_high) + obj_high['rev_deps'] = (obj_new,) + obj_low['rev_deps'] = (obj_new,) + for rdep in obj_new['rev_deps']: + rdep['deps'] = tuple(d if d is not obj else obj_new + for d in rdep['deps']) + + +def split_graph(objs): + for obj in objs: + if obj['op'] == '&' and obj['type'] == 'uint64_t' and len(obj['deps']) == 1 and obj['deps'][0]['type'] == 'uint128_t' and obj['deps'][0]['op'] == 'COMBINE': + combine_node = obj['deps'][0] + low = combine_node['deps'][0] + obj['deps'] = (low,) + low['rev_deps'] = tuple(list(low['rev_deps']) + [obj]) + if obj['op'] == '+' and obj['type'] == 'uint128_t' and len(obj['rev_deps']) == 2 and obj['rev_deps'][0]['op'] == 'GET_LOW' and obj['rev_deps'][1]['op'] == 'GET_HIGH': + for tmp in ('_tmp', '_temp'): + if obj['out'][-len(tmp):] == tmp: + obj['out'] = obj['out'][:-len(tmp)] + obj_low, obj_high = obj['rev_deps'] + obj_carry = {'out':'c' + obj['out'], 'style':'', 'deps':(obj_low,), 'op':'GET_CARRY', 'type':'CARRY', 'extra_out':tuple(), 'rev_deps':(obj_high,)} + assert(len(obj_low['deps']) == 1) + assert(len(obj_high['deps']) == 1) + assert(obj_low['type'] == 'uint64_t') + assert(obj_high['type'] == 'uint64_t') + obj_low['deps'], obj_high['deps'] = [], [obj_carry] + obj_low['op'] = '+' + obj_high['op'] = '+' + for dep in obj['deps']: + if dep['type'] == 'uint64_t': + obj_low['deps'].append(dep) + dep['rev_deps'] = tuple(d if d is not obj else obj_low + for d in dep['rev_deps']) + elif dep['type'] == 'uint128_t': + dep_low, dep_high = get_low_or_high(dep, 'low'), get_low_or_high(dep, 'high') + obj_low['deps'].append(dep_low) + obj_high['deps'].append(dep_high) + dep_low['rev_deps'] = tuple(list(dep_low['rev_deps']) + [obj_low]) + dep_high['rev_deps'] = tuple(list(dep_high['rev_deps']) + [obj_high]) + else: + assert(False) + obj_low['deps'], obj_high['deps'] = tuple(obj_low['deps']), tuple(obj_high['deps']) + obj['deps'] = tuple() + obj['rev_deps'] = tuple() + +def collect_ac_buckets(graph): + to_process = list(graph['out'].values()) + while len(to_process) > 0: + line, to_process = to_process[0], to_process[1:] + if line['op'] == '+': + args = list(line['deps']) + new_args = [] + while len(args) > 0: + arg, args = args[0], args[1:] + if arg['op'] == '+' and len(arg['rev_deps']) == 1 and line['type'] == 'uint128_t': + line['extra_out'] = tuple(sorted(list(line['extra_out']) + [arg['out']] + list(arg['extra_out']))) + for arg_arg in arg['deps']: + arg_arg['rev_deps'] = (line,) + args.append(arg_arg) + else: + new_args.append(arg) + line['deps'] = tuple(new_args) + to_process += list(line['deps']) + +def prune(out_vars, objs, seen=None): + if seen is None: seen = set() + for obj in objs: + if obj['out'] in seen: continue + prune(out_vars, obj['rev_deps'], seen=seen) + if any(len(rdep['deps']) == 0 + or (len(rdep['rev_deps']) == 0 and rdep['out'] not in out_vars) + for rdep in obj['rev_deps']): + #print('pruning %s' % obj['out']) + obj['rev_deps'] = tuple(rdep for rdep in obj['rev_deps'] + if len(rdep['deps']) > 0 + and (rdep['out'] in out_vars or len(rdep['rev_deps']) > 0)) + seen.add(obj['out']) + +def to_graph(input_data): + objs = dict((var, {'out':var, 'style':''}) for var in list(get_input_var_names(input_data)) + list(get_var_names(input_data))) + for var in get_input_var_names(input_data): + objs[var]['deps'] = tuple() + objs[var]['op'] = 'INPUT' + objs[var]['type'] = 'uint64_t' + objs[var]['extra_out'] = tuple() + for line in input_data['lines']: + var = line['out'] + objs[var]['extra_out'] = tuple() + objs[var]['op'] = line['op'] + objs[var]['type'] = line['type'] + objs[var]['deps'] = tuple(objs[arg] for arg in line['args'] if arg in objs.keys()) + for var in objs.keys(): + objs[var]['rev_deps'] = tuple(objs[arg] for arg in sorted(objs.keys()) + if any(node['out'] == var for node in objs[arg]['deps'])) + graph = {'out':dict((var, objs[var]) for var in get_output_var_names(input_data)), + 'in':dict((var, objs[var]) for var in get_input_var_names(input_data)) } + collect_ac_buckets(graph) + add_combine_low_high(objs.values()) + split_graph(objs.values()) + prune(set(graph['out'].keys()), objs.values()) + #split_graph(objs) + return graph + + +def print_dependencies(input_data, dependencies): + in_vars = get_input_var_names(input_data) + out_vars = get_output_var_names(input_data) + registers = assign_registers(input_data, dependencies) + body = ( + ''.join(' %s [label="%s (%s)",%s];\n' % (var, var, reg, COLOR_FOR_REGISTER[reg.split(':')[0]]) for var, reg in registers.items()) + + ''.join(' in -> %s ;\n' % var for var in in_vars) + + ''.join(' %s -> out ;\n' % var for var in out_vars) + + ''.join(''.join(' %s -> %s ;\n' % (out_var, in_var) for out_var in sorted(dependencies[in_var])) + for in_var in sorted(dependencies.keys())) + ) + return ('digraph G {\n' + body + '}\n') +def adjust_bits(input_data, graph): + for line in input_data['lines']: + if line['type'] == 'uint128_t': + graph = graph.replace(line['out'], line['out'] + '_128') + return graph + +def fill_node(node, color='red'): + node['style'] = ', style="filled", fillcolor="%s"' % color + +def fill_deps(node, color='red'): + fill_node(node) + for dep in node['deps']: + fill_deps(dep, color=color) + +def fill_subgraph(in_node, color='red'): + #print((in_node['out'], in_node['op'], [d['out'] for d in in_node['rev_deps']])) + fill_node(in_node, color=color) + if in_node['op'] != '+': + fill_deps(in_node, color=color) + for rdep in in_node['rev_deps']: + fill_subgraph(rdep, color=color) + +def is_temp(node): + for tmp in ('_tmp', '_temp'): + if node['out'][-len(tmp):] == tmp: + return True + return False + +# returns {cur_map with new_name->reg}, still_free_temps, still_free_list, all_temps +def allocate_node(existing, node, *args): + cur_map, free_temps, free_list, all_temps, freed, new_buckets = args + free_temps = list(free_temps) + free_list = list(free_list) + all_temps = list(all_temps) + full_map = dict(existing) + cur_map = dict(cur_map) + freed = list(freed) + new_buckets = list(new_buckets) + full_map.update(cur_map) + def do_ret(): + return cur_map, tuple(free_temps), tuple(free_list), tuple(all_temps), tuple(freed), tuple(new_buckets) + def do_free(var): + for reg in full_map[var].split(':'): + if reg in all_temps: + if reg not in free_temps: + free_temps.append(reg) + else: + if reg not in free_list: + free_list.append(reg) + def do_free_deps(node): + for dep in node['deps']: + if dep['out'] in full_map.keys() and all(n['out'] in full_map.keys() or n['out'] in cur_map.keys() for n in dep['rev_deps']): + if dep['out'] not in freed: + do_free(dep['out']) + freed.append(dep['out']) + if node['out'] in full_map.keys(): + do_free_deps(node) + return do_ret() + #print('alloc: %s (of %d)' % (node['out'], len(free_list))) + if node['op'] in ('GET_HIGH', 'GET_LOW') and len(node['deps']) == 1 and len(node['deps'][0]['rev_deps']) <= 2 and all(n['op'] in ('GET_HIGH', 'GET_LOW') for n in node['deps'][0]['rev_deps']) and node['deps'][0]['out'] in full_map.keys(): + reg_idx = {'GET_LOW':0, 'GET_HIGH':1}[node['op']] + cur_map[node['out']] = full_map[node['deps'][0]['out']].split(':')[reg_idx] + return do_ret() + if len(node['deps']) == 1 and len(node['deps'][0]['rev_deps']) == 1 and node['deps'][0]['out'] in full_map.keys() and node['type'] == node['deps'][0]['type']: + cur_map[node['out']] = full_map[node['deps'][0]['out']] + return do_ret() + if len(node['deps']) == 0 and node['op'] == 'INPUT': + assert(node['type'] == 'uint64_t') + cur_map[node['out']] = free_list.pop() + return do_ret() + if is_temp(node): + num_reg = {'uint64_t':1, 'uint128_t':2}[node['type']] + # TODO: make this more efficient by allowing re-use of + # dependnecies which are no longer needed (which are currently + # only reaped after this node is assigned) + while len(free_temps) < num_reg: + reg = free_list.pop() + free_temps.append(reg) + all_temps.append(reg) + cur_map[node['out']] = ':'.join(free_temps[:num_reg]) + free_temps = free_temps[num_reg:] + do_free_deps(node) + return do_ret() + if node['op'] == '+' and node['type'] == 'uint64_t' and len(node['extra_out']) > 0: + cur_map[node['out']] = free_list.pop() + new_buckets.append(node) + do_free_deps(node) + return do_ret() + if node['op'] == '*' and node['type'] == 'uint64_t' and len(node['deps']) == 1: + dep = node['deps'][0] + assert(dep['out'] in full_map.keys()) + if all(rdep is node or (rdep['out'] in full_map.keys() and full_map[rdep['out']] != full_map[dep['out']]) + for rdep in dep['rev_deps']): + cur_map[node['out']] = full_map[dep['out']] + else: + cur_map[node['out']] = free_list.pop() + return do_ret() + raw_input([node['out'], node['op'], node['type'], len(node['deps'])]) + return do_ret() + +def allocate_deps(existing, node, *args): + for dep in node['deps']: + args = allocate_deps(existing, dep, *args) + return allocate_node(existing, node, *args) + +def allocate_subgraph(existing, node, *args): + if node['op'] != '+': + args = allocate_deps(existing, node, *args) + else: + args = allocate_node(existing, node, *args) + if node['op'] != '+': + for rdep in node['rev_deps']: + args = allocate_subgraph(existing, rdep, *args) + return args + +def annotate_with_alloc(objs, mapping): + for obj in objs: + if obj['out'] in mapping.keys(): + obj['reg'] = ' (' + mapping[obj['out']] + ')' + else: + obj['reg'] = '' + +def get_plus_deps(nodes, ops=('+',), types=('uint128_t',), seen=None): + if seen is None: seen = set() + for node in nodes: + for dep in node['deps']: + if dep['out'] in seen: continue + seen.add(dep['out']) + if dep['op'] in ops and dep['type'] in types: + yield dep + for dep in get_plus_deps([dep], ops=ops, types=types, seen=seen): + yield dep + +def get_objects(start, ret=None): + if ret is None: ret = {} + for node in start: + if node['out'] in ret.keys(): continue + ret[node['out']] = node + get_objects(node['deps'], ret=ret) + return ret + +def print_nodes(objs): + for var in sorted(objs.keys(), key=(lambda s:(int(s.strip('cx_lowhightmp')), s))): + yield ' %s [label="%s%s" %s];\n' % (objs[var]['out'], ' + '.join(sorted([objs[var]['out']] + list(objs[var]['extra_out']))), objs[var]['reg'], objs[var]['style']) +def print_deps(objs): + for var in sorted(objs.keys()): + for dep in objs[var]['deps']: + yield ' %s -> %s [ label="%s" ] ;\n' % (dep['out'], objs[var]['out'], objs[var]['op']) + +def allocate_one_subtree(possible_nodes, existing, *args): + cur_map, free_temps, free_list, all_temps, freed, new_buckets = args + existing, cur_map, free_temps, free_list, all_temps, freed, new_buckets \ + = dict(existing), dict(cur_map), list(free_temps), list(free_list), list(all_temps), tuple(freed), tuple(new_buckets) + args = (cur_map, free_temps, free_list, all_temps, freed, new_buckets) + sorted_nodes = [] + for node in possible_nodes: + try: + lens = [len([rd for rd in d['rev_deps'] if rd['out'] not in existing.keys()]) for d in node['deps']] + temp_cur_map, temp_free_temps, temp_free_list, temp_all_temps, temp_freed, temp_new_buckets = allocate_subgraph(existing, node, *args) + if set(temp_free_temps) != set(temp_all_temps): + print(('BAD', node['out'], temp_cur_map, temp_free_temps, temp_free_list, temp_all_temps)) + sorted_nodes.append(((len(temp_free_list), + -min(lens), + -max(lens), + -len(temp_new_buckets), + len(temp_free_temps), + -int(node['out'].strip('x_lowhightemp'))), + node)) + except IndexError: + print('Too many reg: %s' % node['out']) + sorted_nodes = tuple(reversed(sorted(sorted_nodes, key=(lambda v: v[0])))) + print([(n[0], n[1]['out']) for n in sorted_nodes]) + node = sorted_nodes[0][1] + possible_nodes = [n for n in possible_nodes if n is not node] + print('Allocating for %s' % node['out']) + args = allocate_subgraph(existing, node, *args) + fill_subgraph(node) + cur_map, free_temps, free_list, all_temps, freed, new_buckets = args + return possible_nodes, cur_map, free_temps, free_list, all_temps, freed, new_buckets + + +def print_graph(graph, allocs): + objs = get_objects(graph['out'].values()) + annotate_with_alloc(objs.values(), allocs) + body = ''.join(print_nodes(objs)) + body += ''.join(print_deps(objs)) + body += ''.join(' in -> %s ;\n' % node['out'] for node in graph['in'].values()) + body += ''.join(' %s -> out ;\n' % node['out'] for node in graph['out'].values()) + return ('digraph G {\n' + body + '}\n') + +data_list = parse_lines(get_lines('femulDisplay.log')) +for i, data in enumerate(data_list): + graph = to_graph(data) + #buckets = tuple(sorted(get_plus_deps(graph['out'].values()), + # key=(lambda n:len(list(get_plus_deps([n])))))) + possible_nodes = dict((n['out'], n) + for in_obj in graph['in'].values() + for n in in_obj['rev_deps'] + if n['op'] == '*') + for var, node in list(possible_nodes.items()): + possible_nodes.update(dict((n['out'], n) + for n in node['rev_deps'] + if n['op'] == '*')) + possible_nodes = list(sorted(possible_nodes.items())) + possible_nodes = [n for v, n in possible_nodes] + existing, cur_map, free_temps, free_list, all_temps, freed, new_buckets = {}, {}, tuple(), tuple(REGISTERS), tuple(), tuple(), tuple() + for var in tuple(): #('x20_tmp', 'x49_tmp', 'x51_tmp', 'x55_tmp', 'x53_tmp'): + print(var) + cur_possible_nodes = [n for n in possible_nodes if n['out'] == var] + cur_possible_nodes, cur_map, free_temps, free_list, all_temps, freed, new_buckets \ + = allocate_one_subtree(cur_possible_nodes, existing, cur_map, free_temps, free_list, all_temps, freed, new_buckets) + existing.update(cur_map) + cur_map = {} + for count in range(10): + print(count) + possible_nodes, cur_map, free_temps, free_list, all_temps, freed, new_buckets \ + = allocate_one_subtree(possible_nodes, existing, cur_map, free_temps, free_list, all_temps, freed, new_buckets) + existing.update(cur_map) + cur_map = {} + #my_node = [n for n in possible_nodes if n['out'] == 'x36_tmp'][0] + #fill_subgraph(my_node) + #possible_nodes, cur_map, free_temps, free_list, all_temps \ + # = allocate_one_subtree([my_node], existing, cur_map, free_temps, free_list, all_temps) + #mul_node = possible_nodes[0] + #print([n['out'] for n in mul_node['deps']]) + #cur_map, free_temps, free_list, all_temps = allocate_subgraph(existing, mul_node, cur_map, free_temps, free_list, all_temps) + print((existing, free_temps, free_list, all_temps)) + #fill_deps(buckets[0]) + deps = adjust_bits(data, print_graph(graph, existing)) + with codecs.open('femulData%d.dot' % i, 'w', encoding='utf8') as f: + f.write(deps) + for fmt in ('png', 'svg'): + subprocess.call(['dot', '-T%s' % fmt, 'femulData%d.dot' % i, '-o', 'femulData%d.%s' % (i, fmt)]) diff --git a/etc/compile-by-zinc/make-graph-with-reg.py b/etc/compile-by-zinc/make-graph-with-reg.py index 275f45e25..12357bf1b 100755 --- a/etc/compile-by-zinc/make-graph-with-reg.py +++ b/etc/compile-by-zinc/make-graph-with-reg.py @@ -8,7 +8,7 @@ LAMBDA = u'\u03bb' OP_NAMES = {'*':'MUL', '+':'ADD', '>>':'SHL', '<<':'SHR', '|':'OR', '&':'AND'} -REGISTERS = tuple(['RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP', 'RSP'] +REGISTERS = tuple(['RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP'] #, 'RSP'] # RSP is stack pointer? + ['r%d' % i for i in range(8, 16)]) REGISTER_COLORS = ['color="black"', 'color="white",fillcolor="black"', 'color="maroon"', 'color="green"', 'fillcolor="olive"', 'color="navy"', 'color="purple"', 'fillcolor="teal"', 'fillcolor="silver"', 'fillcolor="gray"', 'fillcolor="red"', @@ -18,40 +18,6 @@ COLOR_FOR_REGISTER = dict(zip(REGISTERS, REGISTER_COLORS)) MAX_INSTRUCTION_WINDOW = 10000 -CORE_DATA = (('ADD_MUL', 2), ('MUL_CORE', 1), ('LEA_BW', 2)) -CORES = tuple(name for name, count in CORE_DATA) -CORE_COUNT = dict(CORE_DATA) - -BITWISE_CORES = tuple({ - 'core' : { 'name' : core_name , 'latency' : 1 }, - 'latency' : 1 - } for core_name in ('LEA_BW',)) - -MODEL = { - '*': tuple({ - 'core' : { 'name' : core_name , 'latency' : 1 }, - 'latency' : 3 - } - for core_name in ('ADD_MUL', 'MUL_CORE')), - '+': tuple({ - 'core' : { 'name' : core_name , 'latency' : 1 }, - 'latency' : 1 - } - for core_name in ('ADD_MUL', 'LEA_BW')), - '>>': BITWISE_CORES, - '<<': BITWISE_CORES, - '|': BITWISE_CORES, - '&': BITWISE_CORES, - 'LOAD': tuple({ - 'core' : { 'name' : core_name , 'latency' : 1 }, - 'latency' : 1 - } for core_name in REGISTERS), - 'STORE': tuple({ - 'core' : { 'name' : core_name , 'latency' : 1 }, - 'latency' : 1 - } for core_name in REGISTERS) - } - def get_lines(filename): with codecs.open(filename, 'r', encoding='utf8') as f: lines = f.read().replace('\r\n', '\n') @@ -281,7 +247,7 @@ def adjust_bits(input_data, graph): if line['type'] == 'uint128_t': graph = graph.replace(line['out'], line['out'] + '_128') return graph - + data_list = parse_lines(get_lines('femulDisplay.log')) for i, data in enumerate(data_list): -- cgit v1.2.3