From 7703626d4f78590489c99688ca3f427518089544 Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Tue, 5 Sep 2017 01:49:00 -0400 Subject: Fix schedule --- etc/compile-by-zinc/femulData0.dot | 244 +-- etc/compile-by-zinc/femulData0.png | Bin 1730398 -> 2063288 bytes etc/compile-by-zinc/femulData0.svg | 1776 ++++++++++---------- etc/compile-by-zinc/femulDisplayScheduled0.log | 111 ++ .../make-graph-with-reg-by-ac-buckets.py | 324 +++- 5 files changed, 1401 insertions(+), 1054 deletions(-) create mode 100644 etc/compile-by-zinc/femulDisplayScheduled0.log (limited to 'etc') diff --git a/etc/compile-by-zinc/femulData0.dot b/etc/compile-by-zinc/femulData0.dot index df4261daa..630f3a2cd 100644 --- a/etc/compile-by-zinc/femulData0.dot +++ b/etc/compile-by-zinc/femulData0.dot @@ -1,131 +1,131 @@ digraph G { - x5 [label="x5 (r9)" , style="filled", fillcolor="red"]; - x7 [label="x7 (r15)" , style="filled", fillcolor="red"]; + x5 [label="x5 (RDX)" , style="filled", fillcolor="red"]; + x7 [label="x7 (RDX)" , style="filled", fillcolor="red"]; x9 [label="x9 (RDX)" , style="filled", fillcolor="red"]; - x10 [label="x10" ]; - x11 [label="x11" ]; - x13 [label="x13 (r8)" , style="filled", fillcolor="red"]; - x15 [label="x15 (RSI)" , style="filled", fillcolor="red"]; - x17 [label="x17" ]; - x18 [label="x18 (r13)" , style="filled", fillcolor="red"]; - x19 [label="x19 (RBX)" , style="filled", fillcolor="red"]; - x20_128_high [label="x20_128_high (r11)" , style="filled", fillcolor="red"]; - x20_128_low [label="x20_128_low (r12)" , style="filled", fillcolor="red"]; - x20_128_tmp [label="x20_128_tmp (r12:r11)" , style="filled", fillcolor="red"]; - x21_128_high [label="x21_128_high (r11)" , style="filled", fillcolor="red"]; + x10 [label="x10 (r15)" , style="filled", fillcolor="red"]; + x11 [label="x11 (RDX)" , style="filled", fillcolor="red"]; + x13 [label="x13 (r14)" , style="filled", fillcolor="red"]; + x15 [label="x15 (r11)" , style="filled", fillcolor="red"]; + x17 [label="x17 (r10)" , style="filled", fillcolor="red"]; + x18 [label="x18 (RDI)" , style="filled", fillcolor="red"]; + x19 [label="x19 (r8)" , style="filled", fillcolor="red"]; + x20_128_high [label="x20_128_high (r12)" , style="filled", fillcolor="red"]; + x20_128_low [label="x20_128_low (r13)" , style="filled", fillcolor="red"]; + x20_128_tmp [label="x20_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; + x21_128_high [label="x21_128_high (r13)" , style="filled", fillcolor="red"]; x21_128_low [label="x21_128_low (r12)" , style="filled", fillcolor="red"]; - x21_128_tmp [label="x21_128_tmp (r12:r11)" , style="filled", fillcolor="red"]; - x22_128_high [label="x22_128_high (r11)" , style="filled", fillcolor="red"]; + x21_128_tmp [label="x21_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; + x22_128_high [label="x22_128_high (r13)" , style="filled", fillcolor="red"]; x22_128_low [label="x22_128_low (r12)" , style="filled", fillcolor="red"]; - x22_128_tmp [label="x22_128_tmp (r12:r11)" , style="filled", fillcolor="red"]; - x24_128_high [label="x24_128_high" ]; - x24_128_low [label="x24_128_low" ]; - x24_128_tmp [label="x24_128_tmp" ]; - x25_128_high [label="x25_128_high (r11)" , style="filled", fillcolor="red"]; - x25_128_low [label="x25_128_low (r12)" , style="filled", fillcolor="red"]; - x25_128_tmp [label="x25_128_tmp (r12:r11)" , style="filled", fillcolor="red"]; - x27_128_high [label="x27_128_high (r11)" , style="filled", fillcolor="red"]; - x27_128_low [label="x27_128_low (r12)" , style="filled", fillcolor="red"]; - x27_128_tmp [label="x27_128_tmp (r12:r11)" , style="filled", fillcolor="red"]; - x29_128_high [label="x29_128_high" ]; - x29_128_low [label="x29_128_low" ]; - x29_128_tmp [label="x29_128_tmp" ]; - x30_128_high [label="x30_128_high" ]; - x30_128_low [label="x30_128_low" ]; - x30_128_tmp [label="x30_128_tmp" ]; - x32_128_high [label="x32_128_high" ]; - x32_128_low [label="x32_128_low" ]; - x32_128_tmp [label="x32_128_tmp" ]; - x34_128_high [label="x34_128_high" ]; - x34_128_low [label="x34_128_low" ]; - x34_128_tmp [label="x34_128_tmp" ]; - x36_128_high [label="x36_128_high" ]; - x36_128_low [label="x36_128_low" ]; - x36_128_tmp [label="x36_128_tmp" ]; - x37_128_high [label="x37_128_high" ]; - x37_128_low [label="x37_128_low" ]; - x37_128_tmp [label="x37_128_tmp" ]; - x39_128_high [label="x39_128_high" ]; - x39_128_low [label="x39_128_low" ]; - x39_128_tmp [label="x39_128_tmp" ]; - x41_128_high [label="x41_128_high" ]; - x41_128_low [label="x41_128_low" ]; - x41_128_tmp [label="x41_128_tmp" ]; - x43_128_high [label="x43_128_high" ]; - x43_128_low [label="x43_128_low" ]; - x43_128_tmp [label="x43_128_tmp" ]; - x45 [label="x45" ]; - x46 [label="x46 (r14)" , style="filled", fillcolor="red"]; - x47 [label="x47 (RCX)" , style="filled", fillcolor="red"]; - x48 [label="x48" ]; - x49_128_high [label="x49_128_high" ]; - x49_128_low [label="x49_128_low" ]; - x49_128_tmp [label="x49_128_tmp" ]; - x51_128_high [label="x51_128_high (r11)" , style="filled", fillcolor="red"]; + x22_128_tmp [label="x22_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; + x24_128_high [label="x24_128_high (r12)" , style="filled", fillcolor="red"]; + x24_128_low [label="x24_128_low (r13)" , style="filled", fillcolor="red"]; + x24_128_tmp [label="x24_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; + x25_128_high [label="x25_128_high (r12)" , style="filled", fillcolor="red"]; + x25_128_low [label="x25_128_low (r13)" , style="filled", fillcolor="red"]; + x25_128_tmp [label="x25_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; + x27_128_high [label="x27_128_high (r12)" , style="filled", fillcolor="red"]; + x27_128_low [label="x27_128_low (r13)" , style="filled", fillcolor="red"]; + x27_128_tmp [label="x27_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; + x29_128_high [label="x29_128_high (r13)" , style="filled", fillcolor="red"]; + x29_128_low [label="x29_128_low (r12)" , style="filled", fillcolor="red"]; + x29_128_tmp [label="x29_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; + x30_128_high [label="x30_128_high (r13)" , style="filled", fillcolor="red"]; + x30_128_low [label="x30_128_low (r12)" , style="filled", fillcolor="red"]; + x30_128_tmp [label="x30_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; + x32_128_high [label="x32_128_high (r13)" , style="filled", fillcolor="red"]; + x32_128_low [label="x32_128_low (r12)" , style="filled", fillcolor="red"]; + x32_128_tmp [label="x32_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; + x34_128_high [label="x34_128_high (r13)" , style="filled", fillcolor="red"]; + x34_128_low [label="x34_128_low (r12)" , style="filled", fillcolor="red"]; + x34_128_tmp [label="x34_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; + x36_128_high [label="x36_128_high (r12)" , style="filled", fillcolor="red"]; + x36_128_low [label="x36_128_low (r13)" , style="filled", fillcolor="red"]; + x36_128_tmp [label="x36_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; + x37_128_high [label="x37_128_high (r12)" , style="filled", fillcolor="red"]; + x37_128_low [label="x37_128_low (r13)" , style="filled", fillcolor="red"]; + x37_128_tmp [label="x37_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; + x39_128_high [label="x39_128_high (r12)" , style="filled", fillcolor="red"]; + x39_128_low [label="x39_128_low (r13)" , style="filled", fillcolor="red"]; + x39_128_tmp [label="x39_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; + x41_128_high [label="x41_128_high (r12)" , style="filled", fillcolor="red"]; + x41_128_low [label="x41_128_low (r13)" , style="filled", fillcolor="red"]; + x41_128_tmp [label="x41_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; + x43_128_high [label="x43_128_high (r12)" , style="filled", fillcolor="red"]; + x43_128_low [label="x43_128_low (r13)" , style="filled", fillcolor="red"]; + x43_128_tmp [label="x43_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; + x45 [label="x45 (r15)" , style="filled", fillcolor="red"]; + x46 [label="x46 (RDX)" , style="filled", fillcolor="red"]; + x47 [label="x47 (RDX)" , style="filled", fillcolor="red"]; + x48 [label="x48 (RDX)" , style="filled", fillcolor="red"]; + x49_128_high [label="x49_128_high (r13)" , style="filled", fillcolor="red"]; + x49_128_low [label="x49_128_low (r12)" , style="filled", fillcolor="red"]; + x49_128_tmp [label="x49_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; + x51_128_high [label="x51_128_high (r13)" , style="filled", fillcolor="red"]; x51_128_low [label="x51_128_low (r12)" , style="filled", fillcolor="red"]; - x51_128_tmp [label="x51_128_tmp (r12:r11)" , style="filled", fillcolor="red"]; - x53_128_high [label="x53_128_high (r11)" , style="filled", fillcolor="red"]; + x51_128_tmp [label="x51_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; + x53_128_high [label="x53_128_high (r13)" , style="filled", fillcolor="red"]; x53_128_low [label="x53_128_low (r12)" , style="filled", fillcolor="red"]; - x53_128_tmp [label="x53_128_tmp (r12:r11)" , style="filled", fillcolor="red"]; - x55_128_high [label="x55_128_high" ]; - x55_128_low [label="x55_128_low" ]; - x55_128_tmp [label="x55_128_tmp" ]; - cx56_128 [label="cx56_128" ]; - x56_128 [label="x50_128 + x52_128 + x54_128 + x56_128" ]; - x56_128_high [label="x50_128_high + x52_128_high + x54_128_high + x56_128_high (r10)" , style="filled", fillcolor="red"]; - x56_128_low [label="x50_128_low + x52_128_low + x54_128_low + x56_128_low (r14)" , style="filled", fillcolor="red"]; - x57_128_high [label="x57_128_high" ]; - x57_128_low [label="x57_128_low" ]; - x57_128_tmp [label="x57_128_tmp" ]; - x59_128_high [label="x59_128_high (r11)" , style="filled", fillcolor="red"]; - x59_128_low [label="x59_128_low (r12)" , style="filled", fillcolor="red"]; - x59_128_tmp [label="x59_128_tmp (r12:r11)" , style="filled", fillcolor="red"]; - x61_128_high [label="x61_128_high" ]; - x61_128_low [label="x61_128_low" ]; - x61_128_tmp [label="x61_128_tmp" ]; - x63_128_high [label="x63_128_high" ]; - x63_128_low [label="x63_128_low" ]; - x63_128_tmp [label="x63_128_tmp" ]; - x65_128_high [label="x65_128_high" ]; - x65_128_low [label="x65_128_low" ]; - x65_128_tmp [label="x65_128_tmp" ]; - x67_128_high [label="x67_128_high" ]; - x67_128_low [label="x67_128_low" ]; - x67_128_tmp [label="x67_128_tmp" ]; - x69 [label="x69" ]; - x70 [label="x70" ]; - cx71_128 [label="cx71_128" ]; - x71_128 [label="x23_128 + x58_128 + x60_128 + x62_128 + x71_128" ]; - x71_128_high [label="x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (RDI)" , style="filled", fillcolor="red"]; - x71_128_low [label="x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (RBP)" , style="filled", fillcolor="red"]; - x72 [label="x72" ]; - x73 [label="x73" ]; - cx74_128 [label="cx74_128" ]; - x74_128 [label="x26_128 + x28_128 + x64_128 + x66_128 + x74_128" ]; - x74_128_high [label="x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (RAX)" , style="filled", fillcolor="red"]; - x74_128_low [label="x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (RCX)" , style="filled", fillcolor="red"]; - x75 [label="x75" ]; - x76 [label="x76" ]; - cx77_128 [label="cx77_128" ]; - x77_128 [label="x31_128 + x33_128 + x35_128 + x68_128 + x77_128" ]; - x77_128_high [label="x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high" ]; - x77_128_low [label="x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low" ]; - x78 [label="x78" ]; - x79 [label="x79" ]; - cx80_128 [label="cx80_128" ]; - x80_128 [label="x38_128 + x40_128 + x42_128 + x44_128 + x80_128" ]; - x80_128_high [label="x38_128_high + x40_128_high + x42_128_high + x44_128_high + x80_128_high" ]; - x80_128_low [label="x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low" ]; - x81 [label="x81" ]; - x82 [label="x82" ]; - x83 [label="x83" ]; - x84 [label="x84" ]; - x85 [label="x85" ]; - x86 [label="x86" ]; - x87 [label="x87" ]; - x88 [label="x88" ]; - x89 [label="x89" ]; - x90 [label="x90" ]; + x53_128_tmp [label="x53_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; + x55_128_high [label="x55_128_high (r13)" , style="filled", fillcolor="red"]; + x55_128_low [label="x55_128_low (r12)" , style="filled", fillcolor="red"]; + x55_128_tmp [label="x55_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; + cx56_128 [label="cx56_128 (c0)" , style="filled", fillcolor="red"]; + x56_128 [label="x50_128 + x52_128 + x54_128 + x56_128 (r15:r11)" , style="filled", fillcolor="red"]; + x56_128_high [label="x50_128_high + x52_128_high + x54_128_high + x56_128_high (r11)" , style="filled", fillcolor="red"]; + x56_128_low [label="x50_128_low + x52_128_low + x54_128_low + x56_128_low (r15)" , style="filled", fillcolor="red"]; + x57_128_high [label="x57_128_high (r12)" , style="filled", fillcolor="red"]; + x57_128_low [label="x57_128_low (r13)" , style="filled", fillcolor="red"]; + x57_128_tmp [label="x57_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; + x59_128_high [label="x59_128_high (r12)" , style="filled", fillcolor="red"]; + x59_128_low [label="x59_128_low (r13)" , style="filled", fillcolor="red"]; + x59_128_tmp [label="x59_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; + x61_128_high [label="x61_128_high (r12)" , style="filled", fillcolor="red"]; + x61_128_low [label="x61_128_low (r13)" , style="filled", fillcolor="red"]; + x61_128_tmp [label="x61_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; + x63_128_high [label="x63_128_high (r13)" , style="filled", fillcolor="red"]; + x63_128_low [label="x63_128_low (r12)" , style="filled", fillcolor="red"]; + x63_128_tmp [label="x63_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; + x65_128_high [label="x65_128_high (r13)" , style="filled", fillcolor="red"]; + x65_128_low [label="x65_128_low (r12)" , style="filled", fillcolor="red"]; + x65_128_tmp [label="x65_128_tmp (r12:r13)" , style="filled", fillcolor="red"]; + x67_128_high [label="x67_128_high (r12)" , style="filled", fillcolor="red"]; + x67_128_low [label="x67_128_low (r13)" , style="filled", fillcolor="red"]; + x67_128_tmp [label="x67_128_tmp (r13:r12)" , style="filled", fillcolor="red"]; + x69 [label="x69 (r11)" , style="filled", fillcolor="red"]; + x70 [label="x70 (r15)" , style="filled", fillcolor="red"]; + cx71_128 [label="cx71_128 (c0)" , style="filled", fillcolor="red"]; + x71_128 [label="x23_128 + x58_128 + x60_128 + x62_128 + x71_128 (r9:r10)" , style="filled", fillcolor="red"]; + x71_128_high [label="x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (r10)" , style="filled", fillcolor="red"]; + x71_128_low [label="x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (r9)" , style="filled", fillcolor="red"]; + x72 [label="x72 (r10)" , style="filled", fillcolor="red"]; + x73 [label="x73 (r9)" , style="filled", fillcolor="red"]; + cx74_128 [label="cx74_128 (c0)" , style="filled", fillcolor="red"]; + x74_128 [label="x26_128 + x28_128 + x64_128 + x66_128 + x74_128 (RBP:r8)" , style="filled", fillcolor="red"]; + x74_128_high [label="x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (r8)" , style="filled", fillcolor="red"]; + x74_128_low [label="x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (RBP)" , style="filled", fillcolor="red"]; + x75 [label="x75 (r8)" , style="filled", fillcolor="red"]; + x76 [label="x76 (RBP)" , style="filled", fillcolor="red"]; + cx77_128 [label="cx77_128 (c0)" , style="filled", fillcolor="red"]; + x77_128 [label="x31_128 + x33_128 + x35_128 + x68_128 + x77_128 (RSI:RDI)" , style="filled", fillcolor="red"]; + x77_128_high [label="x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high (RDI)" , style="filled", fillcolor="red"]; + x77_128_low [label="x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low (RSI)" , style="filled", fillcolor="red"]; + x78 [label="x78 (RDI)" , style="filled", fillcolor="red"]; + x79 [label="x79 (RSI)" , style="filled", fillcolor="red"]; + cx80_128 [label="cx80_128 (c0)" , style="filled", fillcolor="red"]; + x80_128 [label="x38_128 + x40_128 + x42_128 + x44_128 + x80_128 (r15:r14)" , style="filled", fillcolor="red"]; + x80_128_high [label="x38_128_high + x40_128_high + x42_128_high + x44_128_high + x80_128_high (r14)" , style="filled", fillcolor="red"]; + x80_128_low [label="x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low (r15)" , style="filled", fillcolor="red"]; + x81 [label="x81 (r14)" , style="filled", fillcolor="red"]; + x82 [label="x82 (r15)" , style="filled", fillcolor="red"]; + x83 [label="x83 (r14)" , style="filled", fillcolor="red"]; + x84 [label="x84 (r15)" , style="filled", fillcolor="red"]; + x85 [label="x85 (r15)" , style="filled", fillcolor="red"]; + x86 [label="x86 (r14)" , style="filled", fillcolor="red"]; + x87 [label="x87 (r15)" , style="filled", fillcolor="red"]; + x88 [label="x88 (r15)" , style="filled", fillcolor="red"]; + x89 [label="x89 (r9)" , style="filled", fillcolor="red"]; + x90 [label="x90 (r15)" , style="filled", fillcolor="red"]; x56_128_low -> cx56_128 [ label="GET_CARRY" ] ; x71_128_low -> cx71_128 [ label="GET_CARRY" ] ; x74_128_low -> cx74_128 [ label="GET_CARRY" ] ; diff --git a/etc/compile-by-zinc/femulData0.png b/etc/compile-by-zinc/femulData0.png index 52d41b310..fb080c265 100644 Binary files a/etc/compile-by-zinc/femulData0.png and b/etc/compile-by-zinc/femulData0.png differ diff --git a/etc/compile-by-zinc/femulData0.svg b/etc/compile-by-zinc/femulData0.svg index 361ceb3d1..51da9ca53 100644 --- a/etc/compile-by-zinc/femulData0.svg +++ b/etc/compile-by-zinc/femulData0.svg @@ -4,1924 +4,1924 @@ - + G - + x5 - -x5 (r9) + +x5 (RDX) x20_128_tmp - -x20_128_tmp (r12:r11) + +x20_128_tmp (r13:r12) x5->x20_128_tmp - - -* + + +* x21_128_tmp - -x21_128_tmp (r12:r11) + +x21_128_tmp (r12:r13) x5->x21_128_tmp - - -* + + +* x24_128_tmp - -x24_128_tmp + +x24_128_tmp (r13:r12) x5->x24_128_tmp - - -* + + +* x29_128_tmp - -x29_128_tmp + +x29_128_tmp (r12:r13) x5->x29_128_tmp - - -* + + +* x36_128_tmp - -x36_128_tmp + +x36_128_tmp (r13:r12) x5->x36_128_tmp - - -* + + +* x7 - -x7 (r15) + +x7 (RDX) x22_128_tmp - -x22_128_tmp (r12:r11) + +x22_128_tmp (r12:r13) x7->x22_128_tmp - - -* + + +* x27_128_tmp - -x27_128_tmp (r12:r11) + +x27_128_tmp (r13:r12) x7->x27_128_tmp - - -* + + +* x32_128_tmp - -x32_128_tmp + +x32_128_tmp (r12:r13) x7->x32_128_tmp - - -* + + +* x41_128_tmp - -x41_128_tmp + +x41_128_tmp (r13:r12) x7->x41_128_tmp - - -* + + +* x46 - -x46 (r14) + +x46 (RDX) x7->x46 - - -* + + +* x9 - -x9 (RDX) + +x9 (RDX) x25_128_tmp - -x25_128_tmp (r12:r11) + +x25_128_tmp (r13:r12) x9->x25_128_tmp - - -* + + +* x34_128_tmp - -x34_128_tmp + +x34_128_tmp (r12:r13) x9->x34_128_tmp - - -* + + +* x43_128_tmp - -x43_128_tmp + +x43_128_tmp (r13:r12) x9->x43_128_tmp - - -* + + +* x47 - -x47 (RCX) + +x47 (RDX) x9->x47 - - -* + + +* x10 - -x10 + +x10 (r15) x37_128_tmp - -x37_128_tmp + +x37_128_tmp (r13:r12) x10->x37_128_tmp - - -* + + +* x45 - -x45 + +x45 (r15) x10->x45 - - -* + + +* x11 - -x11 + +x11 (RDX) x30_128_tmp - -x30_128_tmp + +x30_128_tmp (r12:r13) x11->x30_128_tmp - - -* + + +* x39_128_tmp - -x39_128_tmp + +x39_128_tmp (r13:r12) x11->x39_128_tmp - - -* + + +* x48 - -x48 + +x48 (RDX) x11->x48 - - -* + + +* x13 - -x13 (r8) + +x13 (r14) x13->x20_128_tmp - - -* + + +* x13->x22_128_tmp - - -* + + +* x13->x25_128_tmp - - -* + + +* x13->x30_128_tmp - - -* + + +* x13->x37_128_tmp - - -* + + +* x15 - -x15 (RSI) + +x15 (r11) x15->x21_128_tmp - - -* + + +* x15->x27_128_tmp - - -* + + +* x15->x34_128_tmp - - -* + + +* x15->x39_128_tmp - - -* + + +* x49_128_tmp - -x49_128_tmp + +x49_128_tmp (r12:r13) x15->x49_128_tmp - - -* + + +* x17 - -x17 + +x17 (r10) x17->x24_128_tmp - - -* + + +* x17->x32_128_tmp - - -* + + +* x17->x43_128_tmp - - -* + + +* x55_128_tmp - -x55_128_tmp + +x55_128_tmp (r12:r13) x17->x55_128_tmp - - -* + + +* x57_128_tmp - -x57_128_tmp + +x57_128_tmp (r13:r12) x17->x57_128_tmp - - -* + + +* x18 - -x18 (r13) + +x18 (RDI) x18->x36_128_tmp - - -* + + +* x51_128_tmp - -x51_128_tmp (r12:r11) + +x51_128_tmp (r12:r13) x18->x51_128_tmp - - -* + + +* x59_128_tmp - -x59_128_tmp (r12:r11) + +x59_128_tmp (r13:r12) x18->x59_128_tmp - - -* + + +* x65_128_tmp - -x65_128_tmp + +x65_128_tmp (r12:r13) x18->x65_128_tmp - - -* + + +* x67_128_tmp - -x67_128_tmp + +x67_128_tmp (r13:r12) x18->x67_128_tmp - - -* + + +* x19 - -x19 (RBX) + +x19 (r8) x19->x29_128_tmp - - -* + + +* x19->x41_128_tmp - - -* + + +* x53_128_tmp - -x53_128_tmp (r12:r11) + +x53_128_tmp (r12:r13) x19->x53_128_tmp - - -* + + +* x61_128_tmp - -x61_128_tmp + +x61_128_tmp (r13:r12) x19->x61_128_tmp - - -* + + +* x63_128_tmp - -x63_128_tmp + +x63_128_tmp (r12:r13) x19->x63_128_tmp - - -* + + +* x20_128_high - -x20_128_high (r11) + +x20_128_high (r12) x56_128_high - -x50_128_high + x52_128_high + x54_128_high + x56_128_high (r10) + +x50_128_high + x52_128_high + x54_128_high + x56_128_high (r11) x20_128_high->x56_128_high - - -+ + + ++ x20_128_low - -x20_128_low (r12) + +x20_128_low (r13) x56_128_low - -x50_128_low + x52_128_low + x54_128_low + x56_128_low (r14) + +x50_128_low + x52_128_low + x54_128_low + x56_128_low (r15) x20_128_low->x56_128_low - - -+ + + ++ x20_128_tmp->x20_128_high - - -GET_HIGH + + +GET_HIGH x20_128_tmp->x20_128_low - - -GET_LOW + + +GET_LOW x21_128_high - -x21_128_high (r11) + +x21_128_high (r13) x71_128_high - -x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (RDI) + +x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (r10) x21_128_high->x71_128_high - - -+ + + ++ x21_128_low - -x21_128_low (r12) + +x21_128_low (r12) x71_128_low - -x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (RBP) + +x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (r9) x21_128_low->x71_128_low - - -+ + + ++ x21_128_tmp->x21_128_high - - -GET_HIGH + + +GET_HIGH x21_128_tmp->x21_128_low - - -GET_LOW + + +GET_LOW x22_128_high - -x22_128_high (r11) + +x22_128_high (r13) x22_128_high->x71_128_high - - -+ + + ++ x22_128_low - -x22_128_low (r12) + +x22_128_low (r12) x22_128_low->x71_128_low - - -+ + + ++ x22_128_tmp->x22_128_high - - -GET_HIGH + + +GET_HIGH x22_128_tmp->x22_128_low - - -GET_LOW + + +GET_LOW x24_128_high - -x24_128_high + +x24_128_high (r12) x74_128_high - -x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (RAX) + +x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (r8) x24_128_high->x74_128_high - - -+ + + ++ x24_128_low - -x24_128_low + +x24_128_low (r13) x74_128_low - -x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (RCX) + +x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (RBP) x24_128_low->x74_128_low - - -+ + + ++ x24_128_tmp->x24_128_high - - -GET_HIGH + + +GET_HIGH x24_128_tmp->x24_128_low - - -GET_LOW + + +GET_LOW x25_128_high - -x25_128_high (r11) + +x25_128_high (r12) x25_128_high->x74_128_high - - -+ + + ++ x25_128_low - -x25_128_low (r12) + +x25_128_low (r13) x25_128_low->x74_128_low - - -+ + + ++ x25_128_tmp->x25_128_high - - -GET_HIGH + + +GET_HIGH x25_128_tmp->x25_128_low - - -GET_LOW + + +GET_LOW x27_128_high - -x27_128_high (r11) + +x27_128_high (r12) x27_128_high->x74_128_high - - -+ + + ++ x27_128_low - -x27_128_low (r12) + +x27_128_low (r13) x27_128_low->x74_128_low - - -+ + + ++ x27_128_tmp->x27_128_high - - -GET_HIGH + + +GET_HIGH x27_128_tmp->x27_128_low - - -GET_LOW + + +GET_LOW x29_128_high - -x29_128_high + +x29_128_high (r13) x77_128_high - -x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high + +x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high (RDI) x29_128_high->x77_128_high - - -+ + + ++ x29_128_low - -x29_128_low + +x29_128_low (r12) x77_128_low - -x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low + +x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low (RSI) x29_128_low->x77_128_low - - -+ + + ++ x29_128_tmp->x29_128_high - - -GET_HIGH + + +GET_HIGH x29_128_tmp->x29_128_low - - -GET_LOW + + +GET_LOW x30_128_high - -x30_128_high + +x30_128_high (r13) x30_128_high->x77_128_high - - -+ + + ++ x30_128_low - -x30_128_low + +x30_128_low (r12) x30_128_low->x77_128_low - - -+ + + ++ x30_128_tmp->x30_128_high - - -GET_HIGH + + +GET_HIGH x30_128_tmp->x30_128_low - - -GET_LOW + + +GET_LOW x32_128_high - -x32_128_high + +x32_128_high (r13) x32_128_high->x77_128_high - - -+ + + ++ x32_128_low - -x32_128_low + +x32_128_low (r12) x32_128_low->x77_128_low - - -+ + + ++ x32_128_tmp->x32_128_high - - -GET_HIGH + + +GET_HIGH x32_128_tmp->x32_128_low - - -GET_LOW + + +GET_LOW x34_128_high - -x34_128_high + +x34_128_high (r13) x34_128_high->x77_128_high - - -+ + + ++ x34_128_low - -x34_128_low + +x34_128_low (r12) x34_128_low->x77_128_low - - -+ + + ++ x34_128_tmp->x34_128_high - - -GET_HIGH + + +GET_HIGH x34_128_tmp->x34_128_low - - -GET_LOW + + +GET_LOW x36_128_high - -x36_128_high + +x36_128_high (r12) x80_128_high - -x38_128_high + x40_128_high + x42_128_high + x44_128_high + x80_128_high + +x38_128_high + x40_128_high + x42_128_high + x44_128_high + x80_128_high (r14) x36_128_high->x80_128_high - - -+ + + ++ x36_128_low - -x36_128_low + +x36_128_low (r13) x80_128_low - -x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low + +x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low (r15) x36_128_low->x80_128_low - - -+ + + ++ x36_128_tmp->x36_128_high - - -GET_HIGH + + +GET_HIGH x36_128_tmp->x36_128_low - - -GET_LOW + + +GET_LOW x37_128_high - -x37_128_high + +x37_128_high (r12) x37_128_high->x80_128_high - - -+ + + ++ x37_128_low - -x37_128_low + +x37_128_low (r13) x37_128_low->x80_128_low - - -+ + + ++ x37_128_tmp->x37_128_high - - -GET_HIGH + + +GET_HIGH x37_128_tmp->x37_128_low - - -GET_LOW + + +GET_LOW x39_128_high - -x39_128_high + +x39_128_high (r12) x39_128_high->x80_128_high - - -+ + + ++ x39_128_low - -x39_128_low + +x39_128_low (r13) x39_128_low->x80_128_low - - -+ + + ++ x39_128_tmp->x39_128_high - - -GET_HIGH + + +GET_HIGH x39_128_tmp->x39_128_low - - -GET_LOW + + +GET_LOW x41_128_high - -x41_128_high + +x41_128_high (r12) x41_128_high->x80_128_high - - -+ + + ++ x41_128_low - -x41_128_low + +x41_128_low (r13) x41_128_low->x80_128_low - - -+ + + ++ x41_128_tmp->x41_128_high - - -GET_HIGH + + +GET_HIGH x41_128_tmp->x41_128_low - - -GET_LOW + + +GET_LOW x43_128_high - -x43_128_high + +x43_128_high (r12) x43_128_high->x80_128_high - - -+ + + ++ x43_128_low - -x43_128_low + +x43_128_low (r13) x43_128_low->x80_128_low - - -+ + + ++ x43_128_tmp->x43_128_high - - -GET_HIGH + + +GET_HIGH x43_128_tmp->x43_128_low - - -GET_LOW + + +GET_LOW x45->x49_128_tmp - - -* + + +* x45->x57_128_tmp - - -* + + +* x45->x63_128_tmp - - -* + + +* x45->x67_128_tmp - - -* + + +* x46->x51_128_tmp - - -* + + +* x47->x53_128_tmp - - -* + + +* x47->x59_128_tmp - - -* + + +* x48->x55_128_tmp - - -* + + +* x48->x61_128_tmp - - -* + + +* x48->x65_128_tmp - - -* + + +* x49_128_high - -x49_128_high + +x49_128_high (r13) x49_128_high->x56_128_high - - -+ + + ++ x49_128_low - -x49_128_low + +x49_128_low (r12) x49_128_low->x56_128_low - - -+ + + ++ x49_128_tmp->x49_128_high - - -GET_HIGH + + +GET_HIGH x49_128_tmp->x49_128_low - - -GET_LOW + + +GET_LOW x51_128_high - -x51_128_high (r11) + +x51_128_high (r13) x51_128_high->x56_128_high - - -+ + + ++ x51_128_low - -x51_128_low (r12) + +x51_128_low (r12) x51_128_low->x56_128_low - - -+ + + ++ x51_128_tmp->x51_128_high - - -GET_HIGH + + +GET_HIGH x51_128_tmp->x51_128_low - - -GET_LOW + + +GET_LOW x53_128_high - -x53_128_high (r11) + +x53_128_high (r13) x53_128_high->x56_128_high - - -+ + + ++ x53_128_low - -x53_128_low (r12) + +x53_128_low (r12) x53_128_low->x56_128_low - - -+ + + ++ x53_128_tmp->x53_128_high - - -GET_HIGH + + +GET_HIGH x53_128_tmp->x53_128_low - - -GET_LOW + + +GET_LOW x55_128_high - -x55_128_high + +x55_128_high (r13) x55_128_high->x56_128_high - - -+ + + ++ x55_128_low - -x55_128_low + +x55_128_low (r12) x55_128_low->x56_128_low - - -+ + + ++ x55_128_tmp->x55_128_high - - -GET_HIGH + + +GET_HIGH x55_128_tmp->x55_128_low - - -GET_LOW + + +GET_LOW cx56_128 - -cx56_128 + +cx56_128 (c0) cx56_128->x56_128_high - - -+ + + ++ x56_128 - -x50_128 + x52_128 + x54_128 + x56_128 + +x50_128 + x52_128 + x54_128 + x56_128 (r15:r11) x69 - -x69 + +x69 (r11) x56_128->x69 - - ->> + + +>> x56_128_high->x56_128 - - -COMBINE + + +COMBINE x56_128_low->cx56_128 - - -GET_CARRY + + +GET_CARRY x56_128_low->x56_128 - - -COMBINE + + +COMBINE x70 - -x70 + +x70 (r15) x56_128_low->x70 - - -& + + +& x57_128_high - -x57_128_high + +x57_128_high (r12) x57_128_high->x71_128_high - - -+ + + ++ x57_128_low - -x57_128_low + +x57_128_low (r13) x57_128_low->x71_128_low - - -+ + + ++ x57_128_tmp->x57_128_high - - -GET_HIGH + + +GET_HIGH x57_128_tmp->x57_128_low - - -GET_LOW + + +GET_LOW x59_128_high - -x59_128_high (r11) + +x59_128_high (r12) x59_128_high->x71_128_high - - -+ + + ++ x59_128_low - -x59_128_low (r12) + +x59_128_low (r13) x59_128_low->x71_128_low - - -+ + + ++ x59_128_tmp->x59_128_high - - -GET_HIGH + + +GET_HIGH x59_128_tmp->x59_128_low - - -GET_LOW + + +GET_LOW x61_128_high - -x61_128_high + +x61_128_high (r12) x61_128_high->x71_128_high - - -+ + + ++ x61_128_low - -x61_128_low + +x61_128_low (r13) x61_128_low->x71_128_low - - -+ + + ++ x61_128_tmp->x61_128_high - - -GET_HIGH + + +GET_HIGH x61_128_tmp->x61_128_low - - -GET_LOW + + +GET_LOW x63_128_high - -x63_128_high + +x63_128_high (r13) x63_128_high->x74_128_high - - -+ + + ++ x63_128_low - -x63_128_low + +x63_128_low (r12) x63_128_low->x74_128_low - - -+ + + ++ x63_128_tmp->x63_128_high - - -GET_HIGH + + +GET_HIGH x63_128_tmp->x63_128_low - - -GET_LOW + + +GET_LOW x65_128_high - -x65_128_high + +x65_128_high (r13) x65_128_high->x74_128_high - - -+ + + ++ x65_128_low - -x65_128_low + +x65_128_low (r12) x65_128_low->x74_128_low - - -+ + + ++ x65_128_tmp->x65_128_high - - -GET_HIGH + + +GET_HIGH x65_128_tmp->x65_128_low - - -GET_LOW + + +GET_LOW x67_128_high - -x67_128_high + +x67_128_high (r12) x67_128_high->x77_128_high - - -+ + + ++ x67_128_low - -x67_128_low + +x67_128_low (r13) x67_128_low->x77_128_low - - -+ + + ++ x67_128_tmp->x67_128_high - - -GET_HIGH + + +GET_HIGH x67_128_tmp->x67_128_low - - -GET_LOW + + +GET_LOW x69->x71_128_low - - -+ + + ++ x84 - -x84 + +x84 (r15) x70->x84 - - -+ + + ++ cx71_128 - -cx71_128 + +cx71_128 (c0) cx71_128->x71_128_high - - -+ + + ++ x71_128 - -x23_128 + x58_128 + x60_128 + x62_128 + x71_128 + +x23_128 + x58_128 + x60_128 + x62_128 + x71_128 (r9:r10) x72 - -x72 + +x72 (r10) x71_128->x72 - - ->> + + +>> x71_128_high->x71_128 - - -COMBINE + + +COMBINE x71_128_low->cx71_128 - - -GET_CARRY + + +GET_CARRY x71_128_low->x71_128 - - -COMBINE + + +COMBINE x73 - -x73 + +x73 (r9) x71_128_low->x73 - - -& + + +& x72->x74_128_low - - -+ + + ++ x87 - -x87 + +x87 (r15) x73->x87 - - -+ + + ++ cx74_128 - -cx74_128 + +cx74_128 (c0) cx74_128->x74_128_high - - -+ + + ++ x74_128 - -x26_128 + x28_128 + x64_128 + x66_128 + x74_128 + +x26_128 + x28_128 + x64_128 + x66_128 + x74_128 (RBP:r8) x75 - -x75 + +x75 (r8) x74_128->x75 - - ->> + + +>> x74_128_high->x74_128 - - -COMBINE + + +COMBINE x74_128_low->cx74_128 - - -GET_CARRY + + +GET_CARRY x74_128_low->x74_128 - - -COMBINE + + +COMBINE x76 - -x76 + +x76 (RBP) x74_128_low->x76 - - -& + + +& x75->x77_128_low - - -+ + + ++ x90 - -x90 + +x90 (r15) x76->x90 - - -+ + + ++ cx77_128 - -cx77_128 + +cx77_128 (c0) cx77_128->x77_128_high - - -+ + + ++ x77_128 - -x31_128 + x33_128 + x35_128 + x68_128 + x77_128 + +x31_128 + x33_128 + x35_128 + x68_128 + x77_128 (RSI:RDI) x78 - -x78 + +x78 (RDI) x77_128->x78 - - ->> + + +>> x77_128_high->x77_128 - - -COMBINE + + +COMBINE x77_128_low->cx77_128 - - -GET_CARRY + + +GET_CARRY x77_128_low->x77_128 - - -COMBINE + + +COMBINE x79 - -x79 + +x79 (RSI) x77_128_low->x79 - - -& + + +& x78->x80_128_low - - -+ + + ++ out - -out + +out x79->out - - + + cx80_128 - -cx80_128 + +cx80_128 (c0) cx80_128->x80_128_high - - -+ + + ++ x80_128 - -x38_128 + x40_128 + x42_128 + x44_128 + x80_128 + +x38_128 + x40_128 + x42_128 + x44_128 + x80_128 (r15:r14) x81 - -x81 + +x81 (r14) x80_128->x81 - - ->> + + +>> x80_128_high->x80_128 - - -COMBINE + + +COMBINE x80_128_low->cx80_128 - - -GET_CARRY + + +GET_CARRY x80_128_low->x80_128 - - -COMBINE + + +COMBINE x82 - -x82 + +x82 (r15) x80_128_low->x82 - - -& + + +& x83 - -x83 + +x83 (r14) x81->x83 - - -* + + +* x82->out - - + + x83->x84 - - -+ + + ++ x85 - -x85 + +x85 (r15) x84->x85 - - ->> + + +>> x86 - -x86 + +x86 (r14) x84->x86 - - -& + + +& x85->x87 - - -+ + + ++ x86->out - - + + x88 - -x88 + +x88 (r15) x87->x88 - - ->> + + +>> x89 - -x89 + +x89 (r9) x87->x89 - - -& + + +& x88->x90 - - -+ + + ++ x89->out - - + + x90->out - - + + in - -in + +in in->x5 - - + + in->x7 - - + + in->x9 - - + + in->x10 - - + + in->x11 - - + + in->x13 - - + + in->x15 - - + + in->x17 - - + + in->x18 - - + + in->x19 - - + + diff --git a/etc/compile-by-zinc/femulDisplayScheduled0.log b/etc/compile-by-zinc/femulDisplayScheduled0.log new file mode 100644 index 000000000..2d6bcff17 --- /dev/null +++ b/etc/compile-by-zinc/femulDisplayScheduled0.log @@ -0,0 +1,111 @@ +// Convention is low_reg:high_reg +r15 <- LOAD x10;r14 <- LOAD x13;r13:r12 <- MULX r15, r14; // x37_tmp = x10 * x13 +r14 <- MOV r12; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +r15 <- MOV r13; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +r14 <- ADCX (cx80), r14, 0x0; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +r14 <- ADX r14, c0; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +r14 <- SHR r15:r14, 0x33; // x81 = x80_low:x80_high >> 0x33 +r15 <- AND r15, 0x7ffffffffffff; // x82 = x80_low & 0x7ffffffffffff +r15 <- MULX r15, 0x13; // x45 = x10 * 0x13 +r11 <- LOAD x15;r12:r13 <- MULX r15, r11; // x49_tmp = x45 * x15 +r11 <- MOV r13; // bucket: x50_high + x52_high + x54_high + x56_high +r15 <- MOV r12; // bucket: x50_low + x52_low + x54_low + x56_low +r11 <- ADCX (cx56), r11, 0x0; // bucket: x50_high + x52_high + x54_high + x56_high +r11 <- ADX r11, c0; // bucket: x50_high + x52_high + x54_high + x56_high +r11 <- SHR r15:r11, 0x33; // x69 = x56_low:x56_high >> 0x33 +r9 <- MOV r11; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +r15 <- AND r15, 0x7ffffffffffff; // x70 = x56_low & 0x7ffffffffffff +r10 <- LOAD x17;r13:r12 <- MULX r15, r10; // x57_tmp = x45 * x17 +r10 <- MOV r12; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +r9, (cx71) <- ADD r9, r13; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +r10 <- ADCX (cx71), r10, 0x0; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +r10 <- ADX r10, c0; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +r10 <- SHR r9:r10, 0x33; // x72 = x71_low:x71_high >> 0x33 +RBP <- MOV r10; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +r9 <- AND r9, 0x7ffffffffffff; // x73 = x71_low & 0x7ffffffffffff +r15 <- ADX r15, r14; // x84 = x70 + x83 +r15 <- SHR r15, 0x33; // x85 = x70 >> 0x33 +r14 <- AND r15, 0x7ffffffffffff; // x86 = x84 & 0x7ffffffffffff +r15 <- ADX r15, r9; // x87 = x85 + x73 +r15 <- SHR r15, 0x33; // x88 = x85 >> 0x33 +r9 <- AND r15, 0x7ffffffffffff; // x89 = x87 & 0x7ffffffffffff +r8 <- LOAD x19;r12:r13 <- MULX r15, r8; // x63_tmp = x45 * x19 +r8 <- MOV r13; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +RBP, (cx74) <- ADD RBP, r12; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +r8 <- ADCX (cx74), r8, 0x0; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +r8 <- ADX r8, c0; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +r8 <- SHR RBP:r8, 0x33; // x75 = x74_low:x74_high >> 0x33 +RSI <- MOV r8; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +RBP <- AND RBP, 0x7ffffffffffff; // x76 = x74_low & 0x7ffffffffffff +r15 <- ADX r15, RBP; // x90 = x88 + x76 +RDI <- LOAD x18;r13:r12 <- MULX r15, RDI; // x67_tmp = x45 * x18 +RDI <- MOV r12; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +RSI, (cx77) <- ADD RSI, r13; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +RDI <- ADCX (cx77), RDI, 0x0; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +RDI <- ADX RDI, c0; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +RDI <- SHR RSI:RDI, 0x33; // x78 = x77_low:x77_high >> 0x33 +r15, (cx80) <- ADD r15, RDI; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +RSI <- AND RSI, 0x7ffffffffffff; // x79 = x77_low & 0x7ffffffffffff +RDX <- LOAD x11;r12:r13 <- MULX RDX, r14; // x30_tmp = x11 * x13 +RDI <- ADX RDI, r13; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +RSI, (cx77) <- ADC (cx77), RSI, r12; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +r13:r12 <- MULX RDX, r11; // x39_tmp = x11 * x15 +r14 <- ADX r14, r12; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +r15, (cx80) <- ADC (cx80), r15, r13; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +RDX <- MULX RDX, 0x13; // x48 = x11 * 0x13 +r12:r13 <- MULX RDX, r10; // x55_tmp = x48 * x17 +r11 <- ADX r11, r13; // bucket: x50_high + x52_high + x54_high + x56_high +r15, (cx56) <- ADD r15, r12; // bucket: x50_low + x52_low + x54_low + x56_low +r13:r12 <- MULX RDX, r8; // x61_tmp = x48 * x19 +r10 <- ADX r10, r12; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +r9, (cx71) <- ADC (cx71), r9, r13; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +r12:r13 <- MULX RDX, RDI; // x65_tmp = x48 * x18 +r8 <- ADX r8, r13; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +RBP, (cx74) <- ADC (cx74), RBP, r12; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +RDX <- LOAD x9;r13:r12 <- MULX RDX, r14; // x25_tmp = x9 * x13 +r8 <- ADX r8, r12; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +RBP, (cx74) <- ADC (cx74), RBP, r13; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +r12:r13 <- MULX RDX, r11; // x34_tmp = x9 * x15 +RDI <- ADX RDI, r13; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +RSI, (cx77) <- ADC (cx77), RSI, r12; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +r13:r12 <- MULX RDX, r10; // x43_tmp = x9 * x17 +r14 <- ADX r14, r12; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +r15, (cx80) <- ADC (cx80), r15, r13; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +RDX <- MULX RDX, 0x13; // x47 = x9 * 0x13 +r12:r13 <- MULX RDX, r8; // x53_tmp = x47 * x19 +r11 <- ADX r11, r13; // bucket: x50_high + x52_high + x54_high + x56_high +r15, (cx56) <- ADC (cx56), r15, r12; // bucket: x50_low + x52_low + x54_low + x56_low +r13:r12 <- MULX RDX, RDI; // x59_tmp = x47 * x18 +r10 <- ADX r10, r12; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +r9, (cx71) <- ADC (cx71), r9, r13; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +RDX <- LOAD x7;r12:r13 <- MULX RDX, r14; // x22_tmp = x7 * x13 +r10 <- ADX r10, r13; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +r9, (cx71) <- ADC (cx71), r9, r12; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +r13:r12 <- MULX RDX, r11; // x27_tmp = x7 * x15 +r8 <- ADX r8, r12; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +RBP, (cx74) <- ADC (cx74), RBP, r13; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +r12:r13 <- MULX RDX, r10; // x32_tmp = x7 * x17 +RDI <- ADX RDI, r13; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +RSI, (cx77) <- ADC (cx77), RSI, r12; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +r13:r12 <- MULX RDX, r8; // x41_tmp = x7 * x19 +r14 <- ADX r14, r12; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +r15, (cx80) <- ADC (cx80), r15, r13; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +RDX <- MULX RDX, 0x13; // x46 = x7 * 0x13 +r12:r13 <- MULX RDX, RDI; // x51_tmp = x46 * x18 +r11 <- ADX r11, r13; // bucket: x50_high + x52_high + x54_high + x56_high +r15, (cx56) <- ADC (cx56), r15, r12; // bucket: x50_low + x52_low + x54_low + x56_low +RDX <- LOAD x5;r13:r12 <- MULX RDX, r14; // x20_tmp = x5 * x13 +r11 <- ADX r11, r12; // bucket: x50_high + x52_high + x54_high + x56_high +r15, (cx56) <- ADC (cx56), r15, r13; // bucket: x50_low + x52_low + x54_low + x56_low +r12:r13 <- MULX RDX, r11; // x21_tmp = x5 * x15 +r10 <- ADX r10, r13; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +r9, (cx71) <- ADC (cx71), r9, r12; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +r13:r12 <- MULX RDX, r10; // x24_tmp = x5 * x17 +r8 <- ADX r8, r12; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +RBP, (cx74) <- ADC (cx74), RBP, r13; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +r12:r13 <- MULX RDX, r8; // x29_tmp = x5 * x19 +RDI <- ADX RDI, r13; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +RSI, (cx77) <- ADC (cx77), RSI, r12; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +r13:r12 <- MULX RDX, RDI; // x36_tmp = x5 * x18 +r14 <- ADX r14, r12; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +r15, (cx80) <- ADC (cx80), r15, r13; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low diff --git a/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py b/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py index 53088457e..5f13b28a5 100755 --- a/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py +++ b/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py @@ -134,6 +134,7 @@ def split_graph(objs): else: assert(False) obj_low['deps'], obj_high['deps'] = tuple(obj_low['deps']), tuple(obj_high['deps']) + obj_low['rev_deps'] = list(obj_low['rev_deps']) + [obj_carry] obj['deps'] = tuple() obj['rev_deps'] = tuple() @@ -156,19 +157,19 @@ def collect_ac_buckets(graph): line['deps'] = tuple(new_args) to_process += list(line['deps']) -def prune(out_vars, objs, seen=None): - if seen is None: seen = set() - for obj in objs: - if obj['out'] in seen: continue - prune(out_vars, obj['rev_deps'], seen=seen) - if any(len(rdep['deps']) == 0 - or (len(rdep['rev_deps']) == 0 and rdep['out'] not in out_vars) - for rdep in obj['rev_deps']): - #print('pruning %s' % obj['out']) - obj['rev_deps'] = tuple(rdep for rdep in obj['rev_deps'] - if len(rdep['deps']) > 0 - and (rdep['out'] in out_vars or len(rdep['rev_deps']) > 0)) - seen.add(obj['out']) +def get_objects(start, ret=None): + if ret is None: ret = {} + for node in start: + if node['out'] in ret.keys(): continue + ret[node['out']] = node + get_objects(node['deps'], ret=ret) + return ret + +def prune(start): + objs = get_objects(start) + for var in objs.keys(): + objs[var]['rev_deps'] = tuple(objs[arg] for arg in sorted(objs.keys()) + if any(node['out'] == var for node in objs[arg]['deps'])) def to_graph(input_data): objs = dict((var, {'out':var, 'style':''}) for var in list(get_input_var_names(input_data)) + list(get_var_names(input_data))) @@ -191,7 +192,7 @@ def to_graph(input_data): collect_ac_buckets(graph) add_combine_low_high(objs.values()) split_graph(objs.values()) - prune(set(graph['out'].keys()), objs.values()) + prune(tuple(graph['out'].values())) #split_graph(objs) return graph @@ -236,9 +237,13 @@ def is_temp(node): return True return False -# returns {cur_map with new_name->reg}, still_free_temps, still_free_list, all_temps +def deps_allocated(full_map, node): + if node['out'] not in full_map.keys(): return False + return all(deps_allocated(full_map, dep) for dep in node['deps']) + +# returns {cur_map with new_name->reg}, still_free_temps, still_free_list, all_temps, freed, new_buckets, emit_vars def allocate_node(existing, node, *args): - cur_map, free_temps, free_list, all_temps, freed, new_buckets = args + cur_map, free_temps, free_list, all_temps, freed, new_buckets, emit_vars = args free_temps = list(free_temps) free_list = list(free_list) all_temps = list(all_temps) @@ -246,9 +251,10 @@ def allocate_node(existing, node, *args): cur_map = dict(cur_map) freed = list(freed) new_buckets = list(new_buckets) + emit_vars = list(emit_vars) full_map.update(cur_map) def do_ret(): - return cur_map, tuple(free_temps), tuple(free_list), tuple(all_temps), tuple(freed), tuple(new_buckets) + return cur_map, tuple(free_temps), tuple(free_list), tuple(all_temps), tuple(freed), tuple(new_buckets), tuple(emit_vars) def do_free(var): for reg in full_map[var].split(':'): if reg in all_temps: @@ -258,11 +264,17 @@ def allocate_node(existing, node, *args): if reg not in free_list: free_list.append(reg) def do_free_deps(node): - for dep in node['deps']: - if dep['out'] in full_map.keys() and all(n['out'] in full_map.keys() or n['out'] in cur_map.keys() for n in dep['rev_deps']): + full_map.update(cur_map) + if deps_allocated(full_map, node): + for dep in node['deps']: if dep['out'] not in freed: do_free(dep['out']) freed.append(dep['out']) + elif node['out'] in full_map.keys(): + for dep in node['deps']: + if dep['out'] not in freed and dep['out'] in full_map.keys() and all(reg in all_temps for reg in full_map[dep['out']].split(':')): + do_free(dep['out']) + freed.append(dep['out']) if node['out'] in full_map.keys(): do_free_deps(node) return do_ret() @@ -270,13 +282,16 @@ def allocate_node(existing, node, *args): if node['op'] in ('GET_HIGH', 'GET_LOW') and len(node['deps']) == 1 and len(node['deps'][0]['rev_deps']) <= 2 and all(n['op'] in ('GET_HIGH', 'GET_LOW') for n in node['deps'][0]['rev_deps']) and node['deps'][0]['out'] in full_map.keys(): reg_idx = {'GET_LOW':0, 'GET_HIGH':1}[node['op']] cur_map[node['out']] = full_map[node['deps'][0]['out']].split(':')[reg_idx] + emit_vars.append(node) return do_ret() if len(node['deps']) == 1 and len(node['deps'][0]['rev_deps']) == 1 and node['deps'][0]['out'] in full_map.keys() and node['type'] == node['deps'][0]['type']: cur_map[node['out']] = full_map[node['deps'][0]['out']] + emit_vars.append(node) return do_ret() if len(node['deps']) == 0 and node['op'] == 'INPUT': assert(node['type'] == 'uint64_t') cur_map[node['out']] = free_list.pop() + emit_vars.append(node) return do_ret() if is_temp(node): num_reg = {'uint64_t':1, 'uint128_t':2}[node['type']] @@ -289,10 +304,12 @@ def allocate_node(existing, node, *args): all_temps.append(reg) cur_map[node['out']] = ':'.join(free_temps[:num_reg]) free_temps = free_temps[num_reg:] + emit_vars.append(node) do_free_deps(node) return do_ret() if node['op'] == '+' and node['type'] == 'uint64_t' and len(node['extra_out']) > 0: cur_map[node['out']] = free_list.pop() + emit_vars.append(node) new_buckets.append(node) do_free_deps(node) return do_ret() @@ -304,6 +321,7 @@ def allocate_node(existing, node, *args): cur_map[node['out']] = full_map[dep['out']] else: cur_map[node['out']] = free_list.pop() + emit_vars.append(node) return do_ret() raw_input([node['out'], node['op'], node['type'], len(node['deps'])]) return do_ret() @@ -341,14 +359,6 @@ def get_plus_deps(nodes, ops=('+',), types=('uint128_t',), seen=None): for dep in get_plus_deps([dep], ops=ops, types=types, seen=seen): yield dep -def get_objects(start, ret=None): - if ret is None: ret = {} - for node in start: - if node['out'] in ret.keys(): continue - ret[node['out']] = node - get_objects(node['deps'], ret=ret) - return ret - def print_nodes(objs): for var in sorted(objs.keys(), key=(lambda s:(int(s.strip('cx_lowhightmp')), s))): yield ' %s [label="%s%s" %s];\n' % (objs[var]['out'], ' + '.join(sorted([objs[var]['out']] + list(objs[var]['extra_out']))), objs[var]['reg'], objs[var]['style']) @@ -357,18 +367,88 @@ def print_deps(objs): for dep in objs[var]['deps']: yield ' %s -> %s [ label="%s" ] ;\n' % (dep['out'], objs[var]['out'], objs[var]['op']) -def allocate_one_subtree(possible_nodes, existing, *args): - cur_map, free_temps, free_list, all_temps, freed, new_buckets = args - existing, cur_map, free_temps, free_list, all_temps, freed, new_buckets \ - = dict(existing), dict(cur_map), list(free_temps), list(free_list), list(all_temps), tuple(freed), tuple(new_buckets) - args = (cur_map, free_temps, free_list, all_temps, freed, new_buckets) +def push_allocate(existing, nodes, *args, **kwargs): + if 'seen' not in kwargs.keys(): kwargs['seen'] = set() + full_map = dict(existing) + for node in nodes: + if node['out'] in kwargs['seen']: continue + cur_map, free_temps, free_list, all_temps, freed, new_buckets, emit_vars = args + free_temps = list(free_temps) + free_list = list(free_list) + all_temps = list(all_temps) + cur_map = dict(cur_map) + freed = list(freed) + new_buckets = list(new_buckets) + emit_vars = list(emit_vars) + full_map.update(cur_map) + if node['out'] in full_map.keys() and node['op'] == '+' and all(d['out'] not in full_map.keys() for d in node['rev_deps']) and set(d['op'] for d in node['rev_deps']) == set(('&', 'COMBINE', 'GET_CARRY')): + and_node = [d for d in node['rev_deps'] if d['op'] == '&'][0] + carry_node = [d for d in node['rev_deps'] if d['op'] == 'GET_CARRY'][0] + combine_node = [d for d in node['rev_deps'] if d['op'] == 'COMBINE'][0] + high_node = [d for d in combine_node['deps'] if d is not node][0] + assert(len(combine_node['rev_deps']) == 1) + shr_node = combine_node['rev_deps'][0] + assert(shr_node['op'] == '>>') + assert(shr_node['out'] not in full_map.keys()) + assert(len(combine_node['deps']) == 2) + assert(all(d['out'] in full_map.keys() for d in combine_node['deps'])) + cur_map[carry_node['out']] = 'c0' + emit_vars.append(carry_node) + cur_map[combine_node['out']] = ':'.join(full_map[d['out']] for d in combine_node['deps']) + emit_vars.append(combine_node) + assert(high_node['out'] in full_map.keys()) + cur_map[shr_node['out']] = full_map[high_node['out']] + emit_vars.append(shr_node) + cur_map[and_node['out']] = full_map[node['out']] + emit_vars.append(and_node) + fill_node(combine_node) + fill_node(carry_node) + fill_node(shr_node) + fill_node(and_node) + elif node['out'] in full_map.keys() and len(node['rev_deps']) == 1 and all(d['out'] not in full_map.keys() for d in node['rev_deps']) and len(node['rev_deps'][0]['deps']) == 1 and node['type'] == node['rev_deps'][0]['type']: + next_node = node['rev_deps'][0] + cur_map[next_node['out']] = full_map[node['out']] + fill_node(next_node) + full_map.update(cur_map) + elif node['out'] not in full_map.keys() and len(node['rev_deps']) == 2 and len(node['deps']) == 2 and all(d['out'] not in full_map.keys() for d in node['rev_deps']) and all(d['out'] in full_map.keys() for d in node['deps']) and node['type'] == 'uint64_t' and all(d['type'] == 'uint64_t' for d in node['rev_deps']) and all(d['type'] == 'uint64_t' for d in node['deps']): + from1, from2 = node['deps'] + to1, to2 = node['rev_deps'] + assert(full_map[from1['out']] != full_map[from2['out']]) + cur_map[node['out']] = full_map[from1['out']] + emit_vars.append(node) + cur_map[to1['out']] = full_map[from1['out']] + emit_vars.append(to1) + cur_map[to2['out']] = full_map[from2['out']] + emit_vars.append(to2) + fill_node(node) + fill_node(to1) + fill_node(to2) + full_map.update(cur_map) + elif node['out'] not in full_map.keys() and len(node['rev_deps']) == 0 and len(node['deps']) == 2 and all(d['out'] not in full_map.keys() for d in node['rev_deps']) and all(d['out'] in full_map.keys() for d in node['deps']) and node['type'] == 'uint64_t' and all(d['type'] == 'uint64_t' for d in node['rev_deps']) and all(d['type'] == 'uint64_t' for d in node['deps']): + from1, from2 = node['deps'] + assert(full_map[from1['out']] != full_map[from2['out']]) + cur_map[node['out']] = full_map[from1['out']] + emit_vars.append(node) + fill_node(node) + full_map.update(cur_map) + full_map.update(cur_map) + args = (cur_map, tuple(free_temps), tuple(free_list), tuple(all_temps), tuple(freed), tuple(new_buckets), tuple(emit_vars)) + kwargs['seen'].add(node['out']) + args = push_allocate(existing, node['rev_deps'], *args, **kwargs) + return args + +def allocate_one_subtree(in_nodes, possible_nodes, existing, *args): + cur_map, free_temps, free_list, all_temps, freed, new_buckets, emit_vars = args + existing, cur_map, free_temps, free_list, all_temps, freed, new_buckets, emit_vars \ + = dict(existing), dict(cur_map), list(free_temps), list(free_list), list(all_temps), tuple(freed), tuple(new_buckets), tuple(emit_vars) + args = (cur_map, free_temps, free_list, all_temps, freed, new_buckets, emit_vars) sorted_nodes = [] for node in possible_nodes: try: lens = [len([rd for rd in d['rev_deps'] if rd['out'] not in existing.keys()]) for d in node['deps']] - temp_cur_map, temp_free_temps, temp_free_list, temp_all_temps, temp_freed, temp_new_buckets = allocate_subgraph(existing, node, *args) + temp_cur_map, temp_free_temps, temp_free_list, temp_all_temps, temp_freed, temp_new_buckets, temp_emit_vars = allocate_subgraph(existing, node, *args) if set(temp_free_temps) != set(temp_all_temps): - print(('BAD', node['out'], temp_cur_map, temp_free_temps, temp_free_list, temp_all_temps)) + print(('BAD', node['out'], temp_cur_map, temp_free_temps, temp_free_list, temp_all_temps, temp_freed)) sorted_nodes.append(((len(temp_free_list), -min(lens), -max(lens), @@ -385,8 +465,9 @@ def allocate_one_subtree(possible_nodes, existing, *args): print('Allocating for %s' % node['out']) args = allocate_subgraph(existing, node, *args) fill_subgraph(node) - cur_map, free_temps, free_list, all_temps, freed, new_buckets = args - return possible_nodes, cur_map, free_temps, free_list, all_temps, freed, new_buckets + args = push_allocate(existing, in_nodes, *args) + cur_map, free_temps, free_list, all_temps, freed, new_buckets, emit_vars = args + return possible_nodes, cur_map, free_temps, free_list, all_temps, freed, new_buckets, emit_vars def print_graph(graph, allocs): @@ -398,6 +479,149 @@ def print_graph(graph, allocs): body += ''.join(' %s -> out ;\n' % node['out'] for node in graph['out'].values()) return ('digraph G {\n' + body + '}\n') +def schedule(input_data, existing, emit_vars): + ret = '' + buckets_seen = set() + buckets_carried = set() + ret += ('// Convention is low_reg:high_reg\n') + for node in emit_vars: + if node['op'] == 'INPUT': + ret += ('%s <- LOAD %s;' % (existing[node['out']], node['out'])) + elif node['op'] == '*' and len(node['deps']) == 2: + ret += ('%s <- MULX %s, %s; // %s = %s * %s\n' + % (existing[node['out']], + existing[node['deps'][0]['out']], + existing[node['deps'][1]['out']], + node['out'], + node['deps'][0]['out'], + node['deps'][1]['out'])) + elif node['op'] == '*' and len(node['deps']) == 1: + extra_arg = [arg for arg in line_of_var(data, node['out'])['args'] if arg[:2] == '0x'][0] + ret += ('%s <- MULX %s, %s; // %s = %s * %s\n' + % (existing[node['out']], + existing[node['deps'][0]['out']], + extra_arg, + node['out'], + node['deps'][0]['out'], + extra_arg)) + elif node['op'] == '&' and len(node['deps']) == 1: + extra_arg = [arg for arg in line_of_var(data, node['out'])['args'] if arg[:2] == '0x'][0] + ret += ('%s <- AND %s, %s; // %s = %s & %s\n' + % (existing[node['out']], + existing[node['deps'][0]['out']], + extra_arg, + node['out'], + node['deps'][0]['out'], + extra_arg)) + elif node['op'] == '>>' and len(node['deps']) == 1 and node['deps'][0]['op'] == 'COMBINE': + extra_arg = [arg for arg in line_of_var(data, node['out'])['args'] if arg[:2] == '0x'][0] + ret += ('%s <- SHR %s:%s, %s; // %s = %s:%s >> %s\n' + % (existing[node['out']], + existing[node['deps'][0]['deps'][0]['out']], + existing[node['deps'][0]['deps'][1]['out']], + extra_arg, + node['out'], + node['deps'][0]['deps'][0]['out'], + node['deps'][0]['deps'][1]['out'], + extra_arg)) + elif node['op'] == '>>' and len(node['deps']) == 1 and node['deps'][0]['type'] == 'uint64_t': + extra_arg = [arg for arg in line_of_var(data, node['out'])['args'] if arg[:2] == '0x'][0] + ret += ('%s <- SHR %s, %s; // %s = %s >> %s\n' + % (existing[node['out']], + existing[node['deps'][0]['deps'][0]['out']], + extra_arg, + node['out'], + node['deps'][0]['deps'][0]['out'], + extra_arg)) + elif node['op'] in ('GET_HIGH', 'GET_LOW'): + if node['rev_deps'][0]['out'] not in buckets_seen: + ret += ('%s <- MOV %s; // bucket: %s\n' + % (existing[node['rev_deps'][0]['out']], + existing[node['out']], + ' + '.join(sorted([node['rev_deps'][0]['out']] + list(node['rev_deps'][0]['extra_out']))))) + buckets_seen.add(node['rev_deps'][0]['out']) + elif node['op'] == 'GET_HIGH': + ret += ('%s <- ADX %s, %s; // bucket: %s\n' + % (existing[node['rev_deps'][0]['out']], + existing[node['rev_deps'][0]['out']], + existing[node['out']], + ' + '.join(sorted([node['rev_deps'][0]['out']] + list(node['rev_deps'][0]['extra_out']))))) + elif node['op'] == 'GET_LOW': + carry = 'c' + node['rev_deps'][0]['out'][:-len('_low')] + if node['rev_deps'][0]['out'] not in buckets_carried: + ret += ('%s, (%s) <- ADD %s, %s; // bucket: %s\n' + % (existing[node['rev_deps'][0]['out']], + carry, + existing[node['rev_deps'][0]['out']], + existing[node['out']], + ' + '.join(sorted([node['rev_deps'][0]['out']] + list(node['rev_deps'][0]['extra_out']))))) + buckets_carried.add(node['rev_deps'][0]['out']) + else: + ret += ('%s, (%s) <- ADC (%s), %s, %s; // bucket: %s\n' + % (existing[node['rev_deps'][0]['out']], + carry, + carry, + existing[node['rev_deps'][0]['out']], + existing[node['out']], + ' + '.join(sorted([node['rev_deps'][0]['out']] + list(node['rev_deps'][0]['extra_out']))))) + elif node['op'] in ('GET_CARRY',): + carry = 'c' + node['rev_deps'][0]['out'][:-len('_high')] + ret += ('%s <- ADCX (%s), %s, 0x0; // bucket: %s\n' + % (existing[node['rev_deps'][0]['out']], + carry, + existing[node['rev_deps'][0]['out']], + ' + '.join(sorted([node['rev_deps'][0]['out']] + list(node['rev_deps'][0]['extra_out']))))) + elif node['op'] == '+' and len(node['extra_out']) > 0: + pass + elif node['op'] == '+' and len(node['deps']) == 2 and node['type'] == 'uint64_t': + ret += ('%s <- ADX %s, %s; // %s = %s + %s\n' + % (existing[node['out']], + existing[node['deps'][0]['out']], + existing[node['deps'][1]['out']], + node['out'], + node['deps'][0]['out'], + node['deps'][1]['out'])) + elif node['op'] in ('COMBINE',): + pass + else: + raw_input((node['out'], node['op'])) + if node['op'] not in ('GET_HIGH', 'GET_LOW', 'COMBINE'): + for rdep in node['rev_deps']: + if len(rdep['extra_out']) > 0 and rdep['op'] == '+': + if rdep['out'] not in buckets_seen: + ret += ('%s <- MOV %s; // bucket: %s\n' + % (existing[rdep['out']], + existing[node['out']], + ' + '.join(sorted([rdep['out']] + list(rdep['extra_out']))))) + buckets_seen.add(rdep['out']) + elif 'high' in rdep['out']: + ret += ('%s <- ADX %s, %s; // bucket: %s\n' + % (existing[rdep['out']], + existing[rdep['out']], + existing[node['out']], + ' + '.join(sorted([rdep['out']] + list(rdep['extra_out']))))) + elif 'low' in rdep['out']: + carry = 'c' + rdep['out'][:-len('_low')] + if rdep['out'] not in buckets_carried: + ret += ('%s, (%s) <- ADD %s, %s; // bucket: %s\n' + % (existing[rdep['out']], + carry, + existing[rdep['out']], + existing[node['out']], + ' + '.join(sorted([rdep['out']] + list(rdep['extra_out']))))) + buckets_carried.add(rdep['out']) + else: + ret += ('%s, (%s) <- ADC (%s), %s, %s; // bucket: %s\n' + % (existing[rdep['out']], + carry, + carry, + existing[rdep['out']], + existing[node['out']], + ' + '.join(sorted([rdep['out']] + list(rdep['extra_out']))))) + else: + assert(False) + return ret + data_list = parse_lines(get_lines('femulDisplay.log')) for i, data in enumerate(data_list): graph = to_graph(data) @@ -413,18 +637,28 @@ for i, data in enumerate(data_list): if n['op'] == '*')) possible_nodes = list(sorted(possible_nodes.items())) possible_nodes = [n for v, n in possible_nodes] - existing, cur_map, free_temps, free_list, all_temps, freed, new_buckets = {}, {}, tuple(), tuple(REGISTERS), tuple(), tuple(), tuple() - for var in tuple(): #('x20_tmp', 'x49_tmp', 'x51_tmp', 'x55_tmp', 'x53_tmp'): + in_nodes = tuple(graph['in'].values()) + existing, cur_map, free_temps, free_list, all_temps, freed, new_buckets, emit_vars = {}, {}, tuple(), tuple(REGISTERS), tuple(), tuple(), tuple(), tuple() + objs = get_objects(graph['out'].values()) + def vars_for(var, rec=True): + pre_ret = [n['out'] for n in objs[var]['rev_deps']] + ret = [v for v in pre_ret if 'tmp' in v] + if rec: + for v in pre_ret: + if 'tmp' not in v: + ret += list(vars_for(v, rec=False)) + return tuple(ret) + for var in list(vars_for('x10')) + list(vars_for('x11')) + list(vars_for('x9')) + list(vars_for('x7')) + list(vars_for('x5')): # tuple(): #('x20_tmp', 'x49_tmp', 'x51_tmp', 'x55_tmp', 'x53_tmp'): print(var) cur_possible_nodes = [n for n in possible_nodes if n['out'] == var] - cur_possible_nodes, cur_map, free_temps, free_list, all_temps, freed, new_buckets \ - = allocate_one_subtree(cur_possible_nodes, existing, cur_map, free_temps, free_list, all_temps, freed, new_buckets) + cur_possible_nodes, cur_map, free_temps, free_list, all_temps, freed, new_buckets, emit_vars \ + = allocate_one_subtree(in_nodes, cur_possible_nodes, existing, cur_map, free_temps, free_list, all_temps, freed, new_buckets, emit_vars) existing.update(cur_map) cur_map = {} - for count in range(10): + for count in range(0 * 16): print(count) - possible_nodes, cur_map, free_temps, free_list, all_temps, freed, new_buckets \ - = allocate_one_subtree(possible_nodes, existing, cur_map, free_temps, free_list, all_temps, freed, new_buckets) + possible_nodes, cur_map, free_temps, free_list, all_temps, freed, new_buckets, emit_vars \ + = allocate_one_subtree(in_nodes, possible_nodes, existing, cur_map, free_temps, free_list, all_temps, freed, new_buckets, emit_vars) existing.update(cur_map) cur_map = {} #my_node = [n for n in possible_nodes if n['out'] == 'x36_tmp'][0] @@ -434,10 +668,12 @@ for i, data in enumerate(data_list): #mul_node = possible_nodes[0] #print([n['out'] for n in mul_node['deps']]) #cur_map, free_temps, free_list, all_temps = allocate_subgraph(existing, mul_node, cur_map, free_temps, free_list, all_temps) - print((existing, free_temps, free_list, all_temps)) + sched = schedule(data, existing, emit_vars) #fill_deps(buckets[0]) deps = adjust_bits(data, print_graph(graph, existing)) with codecs.open('femulData%d.dot' % i, 'w', encoding='utf8') as f: f.write(deps) + with codecs.open('femulDisplayScheduled%d.log' % i, 'w', encoding='utf8') as f: + f.write(sched) for fmt in ('png', 'svg'): subprocess.call(['dot', '-T%s' % fmt, 'femulData%d.dot' % i, '-o', 'femulData%d.%s' % (i, fmt)]) -- cgit v1.2.3