From 127994985895060a84afabba2d48fd59cfe9f2c8 Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Mon, 11 Sep 2017 14:52:53 -0400 Subject: WIP on reg alloc for asm output --- etc/compile-by-zinc/femulData0.dot | 244 +-- etc/compile-by-zinc/femulData0.png | Bin 2076561 -> 2043015 bytes etc/compile-by-zinc/femulData0.svg | 1664 ++++++++++---------- etc/compile-by-zinc/femulData0_1.png | Bin 452548 -> 0 bytes etc/compile-by-zinc/femulData0_1.svg | 1118 ------------- etc/compile-by-zinc/femulDisplayScheduled0.log | 328 ++-- .../make-graph-with-reg-by-ac-buckets.py | 309 ++-- 7 files changed, 1374 insertions(+), 2289 deletions(-) delete mode 100644 etc/compile-by-zinc/femulData0_1.png delete mode 100644 etc/compile-by-zinc/femulData0_1.svg (limited to 'etc') diff --git a/etc/compile-by-zinc/femulData0.dot b/etc/compile-by-zinc/femulData0.dot index db07f732c..2bc6416f7 100644 --- a/etc/compile-by-zinc/femulData0.dot +++ b/etc/compile-by-zinc/femulData0.dot @@ -1,131 +1,131 @@ digraph G { - x5 [label="x5 (RAX)" , style="filled", fillcolor="red"]; - x7 [label="x7 (RAX)" , style="filled", fillcolor="red"]; - x9 [label="x9 (RAX)" , style="filled", fillcolor="red"]; - x10 [label="x10 (r18)" , style="filled", fillcolor="red"]; - x11 [label="x11 (RAX)" , style="filled", fillcolor="red"]; - x13 [label="x13 (r17)" , style="filled", fillcolor="red"]; - x15 [label="x15 (r12)" , style="filled", fillcolor="red"]; - x17 [label="x17 (r9)" , style="filled", fillcolor="red"]; - x18 [label="x18 (RCX)" , style="filled", fillcolor="red"]; - x19 [label="x19 (RDI)" , style="filled", fillcolor="red"]; - x20_128_high [label="x20_128_high (r15)" , style="filled", fillcolor="red"]; - x20_128_low [label="x20_128_low (r16)" , style="filled", fillcolor="red"]; - x20_128_tmp [label="x20_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; - x21_128_high [label="x21_128_high (r16)" , style="filled", fillcolor="red"]; - x21_128_low [label="x21_128_low (r15)" , style="filled", fillcolor="red"]; - x21_128_tmp [label="x21_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; - x22_128_high [label="x22_128_high (r16)" , style="filled", fillcolor="red"]; - x22_128_low [label="x22_128_low (r15)" , style="filled", fillcolor="red"]; - x22_128_tmp [label="x22_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; - x24_128_high [label="x24_128_high (r15)" , style="filled", fillcolor="red"]; - x24_128_low [label="x24_128_low (r16)" , style="filled", fillcolor="red"]; - x24_128_tmp [label="x24_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; - x25_128_high [label="x25_128_high (r15)" , style="filled", fillcolor="red"]; - x25_128_low [label="x25_128_low (r16)" , style="filled", fillcolor="red"]; - x25_128_tmp [label="x25_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; - x27_128_high [label="x27_128_high (r15)" , style="filled", fillcolor="red"]; - x27_128_low [label="x27_128_low (r16)" , style="filled", fillcolor="red"]; - x27_128_tmp [label="x27_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; - x29_128_high [label="x29_128_high (r16)" , style="filled", fillcolor="red"]; - x29_128_low [label="x29_128_low (r15)" , style="filled", fillcolor="red"]; - x29_128_tmp [label="x29_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; - x30_128_high [label="x30_128_high (r16)" , style="filled", fillcolor="red"]; - x30_128_low [label="x30_128_low (r15)" , style="filled", fillcolor="red"]; - x30_128_tmp [label="x30_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; - x32_128_high [label="x32_128_high (r16)" , style="filled", fillcolor="red"]; - x32_128_low [label="x32_128_low (r15)" , style="filled", fillcolor="red"]; - x32_128_tmp [label="x32_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; - x34_128_high [label="x34_128_high (r16)" , style="filled", fillcolor="red"]; - x34_128_low [label="x34_128_low (r15)" , style="filled", fillcolor="red"]; - x34_128_tmp [label="x34_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; - x36_128_high [label="x36_128_high (r15)" , style="filled", fillcolor="red"]; - x36_128_low [label="x36_128_low (r16)" , style="filled", fillcolor="red"]; - x36_128_tmp [label="x36_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; - x37_128_high [label="x37_128_high (r15)" , style="filled", fillcolor="red"]; - x37_128_low [label="x37_128_low (r16)" , style="filled", fillcolor="red"]; - x37_128_tmp [label="x37_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; - x39_128_high [label="x39_128_high (r15)" , style="filled", fillcolor="red"]; - x39_128_low [label="x39_128_low (r16)" , style="filled", fillcolor="red"]; - x39_128_tmp [label="x39_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; - x41_128_high [label="x41_128_high (r15)" , style="filled", fillcolor="red"]; - x41_128_low [label="x41_128_low (r16)" , style="filled", fillcolor="red"]; - x41_128_tmp [label="x41_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; - x43_128_high [label="x43_128_high (r15)" , style="filled", fillcolor="red"]; - x43_128_low [label="x43_128_low (r16)" , style="filled", fillcolor="red"]; - x43_128_tmp [label="x43_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; - x45 [label="x45 (r18)" , style="filled", fillcolor="red"]; - x46 [label="x46 (RAX)" , style="filled", fillcolor="red"]; - x47 [label="x47 (RAX)" , style="filled", fillcolor="red"]; - x48 [label="x48 (RAX)" , style="filled", fillcolor="red"]; - x49_128_high [label="x49_128_high (r16)" , style="filled", fillcolor="red"]; - x49_128_low [label="x49_128_low (r15)" , style="filled", fillcolor="red"]; - x49_128_tmp [label="x49_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; - x51_128_high [label="x51_128_high (r16)" , style="filled", fillcolor="red"]; - x51_128_low [label="x51_128_low (r15)" , style="filled", fillcolor="red"]; - x51_128_tmp [label="x51_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; - x53_128_high [label="x53_128_high (r16)" , style="filled", fillcolor="red"]; - x53_128_low [label="x53_128_low (r15)" , style="filled", fillcolor="red"]; - x53_128_tmp [label="x53_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; - x55_128_high [label="x55_128_high (r16)" , style="filled", fillcolor="red"]; - x55_128_low [label="x55_128_low (r15)" , style="filled", fillcolor="red"]; - x55_128_tmp [label="x55_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; + x5 [label="x5 (rx5)" , style="filled", fillcolor="red"]; + x7 [label="x7 (rx7)" , style="filled", fillcolor="red"]; + x9 [label="x9 (rx9)" , style="filled", fillcolor="red"]; + x10 [label="x10 (rx10)" , style="filled", fillcolor="red"]; + x11 [label="x11 (rx11)" , style="filled", fillcolor="red"]; + x13 [label="x13 (rx13)" , style="filled", fillcolor="red"]; + x15 [label="x15 (rx15)" , style="filled", fillcolor="red"]; + x17 [label="x17 (rx17)" , style="filled", fillcolor="red"]; + x18 [label="x18 (rx18)" , style="filled", fillcolor="red"]; + x19 [label="x19 (rx19)" , style="filled", fillcolor="red"]; + x20_128_high [label="x20_128_high (r11)" , style="filled", fillcolor="red"]; + x20_128_low [label="x20_128_low (r10)" , style="filled", fillcolor="red"]; + x20_128_tmp [label="x20_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; + x21_128_high [label="x21_128_high (r10)" , style="filled", fillcolor="red"]; + x21_128_low [label="x21_128_low (r11)" , style="filled", fillcolor="red"]; + x21_128_tmp [label="x21_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; + x22_128_high [label="x22_128_high (r11)" , style="filled", fillcolor="red"]; + x22_128_low [label="x22_128_low (r10)" , style="filled", fillcolor="red"]; + x22_128_tmp [label="x22_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; + x24_128_high [label="x24_128_high (r11)" , style="filled", fillcolor="red"]; + x24_128_low [label="x24_128_low (r10)" , style="filled", fillcolor="red"]; + x24_128_tmp [label="x24_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; + x25_128_high [label="x25_128_high (r10)" , style="filled", fillcolor="red"]; + x25_128_low [label="x25_128_low (r11)" , style="filled", fillcolor="red"]; + x25_128_tmp [label="x25_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; + x27_128_high [label="x27_128_high (r10)" , style="filled", fillcolor="red"]; + x27_128_low [label="x27_128_low (r11)" , style="filled", fillcolor="red"]; + x27_128_tmp [label="x27_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; + x29_128_high [label="x29_128_high (r10)" , style="filled", fillcolor="red"]; + x29_128_low [label="x29_128_low (r11)" , style="filled", fillcolor="red"]; + x29_128_tmp [label="x29_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; + x30_128_high [label="x30_128_high (r11)" , style="filled", fillcolor="red"]; + x30_128_low [label="x30_128_low (r10)" , style="filled", fillcolor="red"]; + x30_128_tmp [label="x30_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; + x32_128_high [label="x32_128_high (r11)" , style="filled", fillcolor="red"]; + x32_128_low [label="x32_128_low (r10)" , style="filled", fillcolor="red"]; + x32_128_tmp [label="x32_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; + x34_128_high [label="x34_128_high (r10)" , style="filled", fillcolor="red"]; + x34_128_low [label="x34_128_low (r11)" , style="filled", fillcolor="red"]; + x34_128_tmp [label="x34_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; + x36_128_high [label="x36_128_high (r11)" , style="filled", fillcolor="red"]; + x36_128_low [label="x36_128_low (r10)" , style="filled", fillcolor="red"]; + x36_128_tmp [label="x36_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; + x37_128_high [label="x37_128_high (r10)" , style="filled", fillcolor="red"]; + x37_128_low [label="x37_128_low (r11)" , style="filled", fillcolor="red"]; + x37_128_tmp [label="x37_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; + x39_128_high [label="x39_128_high (r10)" , style="filled", fillcolor="red"]; + x39_128_low [label="x39_128_low (r11)" , style="filled", fillcolor="red"]; + x39_128_tmp [label="x39_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; + x41_128_high [label="x41_128_high (r11)" , style="filled", fillcolor="red"]; + x41_128_low [label="x41_128_low (r10)" , style="filled", fillcolor="red"]; + x41_128_tmp [label="x41_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; + x43_128_high [label="x43_128_high (r10)" , style="filled", fillcolor="red"]; + x43_128_low [label="x43_128_low (r11)" , style="filled", fillcolor="red"]; + x43_128_tmp [label="x43_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; + x45 [label="x45 (r6)" , style="filled", fillcolor="red"]; + x46 [label="x46 (r6)" , style="filled", fillcolor="red"]; + x47 [label="x47 (r7)" , style="filled", fillcolor="red"]; + x48 [label="x48 (r12)" , style="filled", fillcolor="red"]; + x49_128_high [label="x49_128_high (r10)" , style="filled", fillcolor="red"]; + x49_128_low [label="x49_128_low (r11)" , style="filled", fillcolor="red"]; + x49_128_tmp [label="x49_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; + x51_128_high [label="x51_128_high (r10)" , style="filled", fillcolor="red"]; + x51_128_low [label="x51_128_low (r11)" , style="filled", fillcolor="red"]; + x51_128_tmp [label="x51_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; + x53_128_high [label="x53_128_high (r11)" , style="filled", fillcolor="red"]; + x53_128_low [label="x53_128_low (r10)" , style="filled", fillcolor="red"]; + x53_128_tmp [label="x53_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; + x55_128_high [label="x55_128_high (r10)" , style="filled", fillcolor="red"]; + x55_128_low [label="x55_128_low (r11)" , style="filled", fillcolor="red"]; + x55_128_tmp [label="x55_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; cx56_128 [label="cx56_128 (c0)" , style="filled", fillcolor="red"]; - x56_128 [label="x50_128 + x52_128 + x54_128 + x56_128 (r10:r11)" , style="filled", fillcolor="red"]; - x56_128_high [label="x50_128_high + x52_128_high + x54_128_high + x56_128_high (r11)" , style="filled", fillcolor="red"]; - x56_128_low [label="x50_128_low + x52_128_low + x54_128_low + x56_128_low (r10)" , style="filled", fillcolor="red"]; - x57_128_high [label="x57_128_high (r15)" , style="filled", fillcolor="red"]; - x57_128_low [label="x57_128_low (r16)" , style="filled", fillcolor="red"]; - x57_128_tmp [label="x57_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; - x59_128_high [label="x59_128_high (r15)" , style="filled", fillcolor="red"]; - x59_128_low [label="x59_128_low (r16)" , style="filled", fillcolor="red"]; - x59_128_tmp [label="x59_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; - x61_128_high [label="x61_128_high (r15)" , style="filled", fillcolor="red"]; - x61_128_low [label="x61_128_low (r16)" , style="filled", fillcolor="red"]; - x61_128_tmp [label="x61_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; - x63_128_high [label="x63_128_high (r16)" , style="filled", fillcolor="red"]; - x63_128_low [label="x63_128_low (r15)" , style="filled", fillcolor="red"]; - x63_128_tmp [label="x63_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; - x65_128_high [label="x65_128_high (r16)" , style="filled", fillcolor="red"]; - x65_128_low [label="x65_128_low (r15)" , style="filled", fillcolor="red"]; - x65_128_tmp [label="x65_128_tmp (r15:r16)" , style="filled", fillcolor="red"]; - x67_128_high [label="x67_128_high (r15)" , style="filled", fillcolor="red"]; - x67_128_low [label="x67_128_low (r16)" , style="filled", fillcolor="red"]; - x67_128_tmp [label="x67_128_tmp (r16:r15)" , style="filled", fillcolor="red"]; - x69 [label="x69 (r11)" , style="filled", fillcolor="red"]; - x70 [label="x70 (r10)" , style="filled", fillcolor="red"]; + x56_128 [label="x50_128 + x52_128 + x54_128 + x56_128 (r8:r9)" , style="filled", fillcolor="red"]; + x56_128_high [label="x50_128_high + x52_128_high + x54_128_high + x56_128_high (r9)" , style="filled", fillcolor="red"]; + x56_128_low [label="x50_128_low + x52_128_low + x54_128_low + x56_128_low (r8)" , style="filled", fillcolor="red"]; + x57_128_high [label="x57_128_high (r11)" , style="filled", fillcolor="red"]; + x57_128_low [label="x57_128_low (r10)" , style="filled", fillcolor="red"]; + x57_128_tmp [label="x57_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; + x59_128_high [label="x59_128_high (r10)" , style="filled", fillcolor="red"]; + x59_128_low [label="x59_128_low (r11)" , style="filled", fillcolor="red"]; + x59_128_tmp [label="x59_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; + x61_128_high [label="x61_128_high (r11)" , style="filled", fillcolor="red"]; + x61_128_low [label="x61_128_low (r10)" , style="filled", fillcolor="red"]; + x61_128_tmp [label="x61_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; + x63_128_high [label="x63_128_high (r11)" , style="filled", fillcolor="red"]; + x63_128_low [label="x63_128_low (r10)" , style="filled", fillcolor="red"]; + x63_128_tmp [label="x63_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; + x65_128_high [label="x65_128_high (r10)" , style="filled", fillcolor="red"]; + x65_128_low [label="x65_128_low (r11)" , style="filled", fillcolor="red"]; + x65_128_tmp [label="x65_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; + x67_128_high [label="x67_128_high (r11)" , style="filled", fillcolor="red"]; + x67_128_low [label="x67_128_low (r10)" , style="filled", fillcolor="red"]; + x67_128_tmp [label="x67_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; + x69 [label="x69 (r9)" , style="filled", fillcolor="red"]; + x70 [label="x70 (r8)" , style="filled", fillcolor="red"]; cx71_128 [label="cx71_128 (c0)" , style="filled", fillcolor="red"]; - x71_128 [label="x23_128 + x58_128 + x60_128 + x62_128 + x71_128 (RBP:r8)" , style="filled", fillcolor="red"]; - x71_128_high [label="x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (r8)" , style="filled", fillcolor="red"]; - x71_128_low [label="x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (RBP)" , style="filled", fillcolor="red"]; - x72 [label="x72 (r8)" , style="filled", fillcolor="red"]; - x73 [label="x73 (RBP)" , style="filled", fillcolor="red"]; + x71_128 [label="x23_128 + x58_128 + x60_128 + x62_128 + x71_128 (r4:r5)" , style="filled", fillcolor="red"]; + x71_128_high [label="x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (r5)" , style="filled", fillcolor="red"]; + x71_128_low [label="x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (r4)" , style="filled", fillcolor="red"]; + x72 [label="x72 (r5)" , style="filled", fillcolor="red"]; + x73 [label="x73 (r4)" , style="filled", fillcolor="red"]; cx74_128 [label="cx74_128 (c0)" , style="filled", fillcolor="red"]; - x74_128 [label="x26_128 + x28_128 + x64_128 + x66_128 + x74_128 (RDX:RSI)" , style="filled", fillcolor="red"]; - x74_128_high [label="x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (RSI)" , style="filled", fillcolor="red"]; - x74_128_low [label="x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (RDX)" , style="filled", fillcolor="red"]; - x75 [label="x75 (RSI)" , style="filled", fillcolor="red"]; - x76 [label="x76 (RDX)" , style="filled", fillcolor="red"]; + x74_128 [label="x26_128 + x28_128 + x64_128 + x66_128 + x74_128 (r9:r12)" , style="filled", fillcolor="red"]; + x74_128_high [label="x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (r12)" , style="filled", fillcolor="red"]; + x74_128_low [label="x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (r9)" , style="filled", fillcolor="red"]; + x75 [label="x75 (r12)" , style="filled", fillcolor="red"]; + x76 [label="x76 (r9)" , style="filled", fillcolor="red"]; cx77_128 [label="cx77_128 (c0)" , style="filled", fillcolor="red"]; - x77_128 [label="x31_128 + x33_128 + x35_128 + x68_128 + x77_128 (RBX:r18)" , style="filled", fillcolor="red"]; - x77_128_high [label="x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high (r18)" , style="filled", fillcolor="red"]; - x77_128_low [label="x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low (RBX)" , style="filled", fillcolor="red"]; - x78 [label="x78 (r18)" , style="filled", fillcolor="red"]; - x79 [label="x79 (RBX)" , style="filled", fillcolor="red"]; + x77_128 [label="x31_128 + x33_128 + x35_128 + x68_128 + x77_128 (r5:r6)" , style="filled", fillcolor="red"]; + x77_128_high [label="x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high (r6)" , style="filled", fillcolor="red"]; + x77_128_low [label="x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low (r5)" , style="filled", fillcolor="red"]; + x78 [label="x78 (r6)" , style="filled", fillcolor="red"]; + x79 [label="x79 (r5)" , style="filled", fillcolor="red"]; cx80_128 [label="cx80_128 (c0)" , style="filled", fillcolor="red"]; - x80_128 [label="x38_128 + x40_128 + x42_128 + x44_128 + x80_128 (r13:r14)" , style="filled", fillcolor="red"]; - x80_128_high [label="x38_128_high + x40_128_high + x42_128_high + x44_128_high + x80_128_high (r14)" , style="filled", fillcolor="red"]; - x80_128_low [label="x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low (r13)" , style="filled", fillcolor="red"]; - x81 [label="x81 (r14)" , style="filled", fillcolor="red"]; - x82 [label="x82 (r13)" , style="filled", fillcolor="red"]; - x83 [label="x83 (r14)" , style="filled", fillcolor="red"]; - x84 [label="x84 (r10)" , style="filled", fillcolor="red"]; - x85 [label="x85 (r10)" , style="filled", fillcolor="red"]; - x86 [label="x86 (r14)" , style="filled", fillcolor="red"]; - x87 [label="x87 (r10)" , style="filled", fillcolor="red"]; - x88 [label="x88 (r10)" , style="filled", fillcolor="red"]; - x89 [label="x89 (RBP)" , style="filled", fillcolor="red"]; - x90 [label="x90 (r10)" , style="filled", fillcolor="red"]; + x80_128 [label="x38_128 + x40_128 + x42_128 + x44_128 + x80_128 (r7:r12)" , style="filled", fillcolor="red"]; + x80_128_high [label="x38_128_high + x40_128_high + x42_128_high + x44_128_high + x80_128_high (r12)" , style="filled", fillcolor="red"]; + x80_128_low [label="x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low (r7)" , style="filled", fillcolor="red"]; + x81 [label="x81 (r12)" , style="filled", fillcolor="red"]; + x82 [label="x82 (r7)" , style="filled", fillcolor="red"]; + x83 [label="x83 (r12)" , style="filled", fillcolor="red"]; + x84 [label="x84 (r8)" , style="filled", fillcolor="red"]; + x85 [label="x85 (r8)" , style="filled", fillcolor="red"]; + x86 [label="x86 (r12)" , style="filled", fillcolor="red"]; + x87 [label="x87 (r8)" , style="filled", fillcolor="red"]; + x88 [label="x88 (r8)" , style="filled", fillcolor="red"]; + x89 [label="x89 (r4)" , style="filled", fillcolor="red"]; + x90 [label="x90 (r8)" , style="filled", fillcolor="red"]; x56_128_low -> cx56_128 [ label="GET_CARRY" ] ; x71_128_low -> cx71_128 [ label="GET_CARRY" ] ; x74_128_low -> cx74_128 [ label="GET_CARRY" ] ; diff --git a/etc/compile-by-zinc/femulData0.png b/etc/compile-by-zinc/femulData0.png index 1878e5b8d..6d6ccc4a7 100644 Binary files a/etc/compile-by-zinc/femulData0.png and b/etc/compile-by-zinc/femulData0.png differ diff --git a/etc/compile-by-zinc/femulData0.svg b/etc/compile-by-zinc/femulData0.svg index 18f75f14d..49a784ced 100644 --- a/etc/compile-by-zinc/femulData0.svg +++ b/etc/compile-by-zinc/femulData0.svg @@ -11,942 +11,942 @@ x5 - -x5 (RAX) + +x5 (rx5) x20_128_tmp -x20_128_tmp (r16:r15) +x20_128_tmp (r10:r11) x5->x20_128_tmp - - -* + + +* x21_128_tmp -x21_128_tmp (r15:r16) +x21_128_tmp (r11:r10) x5->x21_128_tmp - - -* + + +* x24_128_tmp -x24_128_tmp (r16:r15) +x24_128_tmp (r10:r11) x5->x24_128_tmp - - -* + + +* x29_128_tmp -x29_128_tmp (r15:r16) +x29_128_tmp (r11:r10) x5->x29_128_tmp - - -* + + +* x36_128_tmp -x36_128_tmp (r16:r15) +x36_128_tmp (r10:r11) x5->x36_128_tmp - - -* + + +* x7 - -x7 (RAX) + +x7 (rx7) x22_128_tmp -x22_128_tmp (r15:r16) +x22_128_tmp (r10:r11) x7->x22_128_tmp - - -* + + +* x27_128_tmp -x27_128_tmp (r16:r15) +x27_128_tmp (r11:r10) x7->x27_128_tmp - - -* + + +* x32_128_tmp -x32_128_tmp (r15:r16) +x32_128_tmp (r10:r11) x7->x32_128_tmp - - -* + + +* x41_128_tmp -x41_128_tmp (r16:r15) +x41_128_tmp (r10:r11) x7->x41_128_tmp - - -* + + +* x46 - -x46 (RAX) + +x46 (r6) x7->x46 - - -* + + +* x9 - -x9 (RAX) + +x9 (rx9) x25_128_tmp -x25_128_tmp (r16:r15) +x25_128_tmp (r11:r10) x9->x25_128_tmp - - -* + + +* x34_128_tmp -x34_128_tmp (r15:r16) +x34_128_tmp (r11:r10) x9->x34_128_tmp - - -* + + +* x43_128_tmp -x43_128_tmp (r16:r15) +x43_128_tmp (r11:r10) x9->x43_128_tmp - - -* + + +* x47 - -x47 (RAX) + +x47 (r7) x9->x47 - - -* + + +* x10 - -x10 (r18) + +x10 (rx10) x37_128_tmp -x37_128_tmp (r16:r15) +x37_128_tmp (r11:r10) x10->x37_128_tmp - - -* + + +* x45 - -x45 (r18) + +x45 (r6) x10->x45 - - -* + + +* x11 - -x11 (RAX) + +x11 (rx11) x30_128_tmp -x30_128_tmp (r15:r16) +x30_128_tmp (r10:r11) x11->x30_128_tmp - - -* + + +* x39_128_tmp -x39_128_tmp (r16:r15) +x39_128_tmp (r11:r10) x11->x39_128_tmp - - -* + + +* x48 - -x48 (RAX) + +x48 (r12) x11->x48 - - -* + + +* x13 - -x13 (r17) + +x13 (rx13) x13->x20_128_tmp - - -* + + +* x13->x22_128_tmp - - -* + + +* x13->x25_128_tmp - - -* + + +* x13->x30_128_tmp - - -* + + +* x13->x37_128_tmp - + -* +* x15 - -x15 (r12) + +x15 (rx15) x15->x21_128_tmp - - -* + + +* x15->x27_128_tmp - - -* + + +* x15->x34_128_tmp - - -* + + +* x15->x39_128_tmp - + -* +* x49_128_tmp -x49_128_tmp (r15:r16) +x49_128_tmp (r11:r10) x15->x49_128_tmp - - -* + + +* x17 - -x17 (r9) + +x17 (rx17) x17->x24_128_tmp - - -* + + +* x17->x32_128_tmp - - -* + + +* x17->x43_128_tmp - - -* + + +* x55_128_tmp -x55_128_tmp (r15:r16) +x55_128_tmp (r11:r10) x17->x55_128_tmp - - -* + + +* x57_128_tmp -x57_128_tmp (r16:r15) +x57_128_tmp (r10:r11) x17->x57_128_tmp - - -* + + +* x18 - -x18 (RCX) + +x18 (rx18) x18->x36_128_tmp - - -* + + +* x51_128_tmp -x51_128_tmp (r15:r16) +x51_128_tmp (r11:r10) x18->x51_128_tmp - - -* + + +* x59_128_tmp -x59_128_tmp (r16:r15) +x59_128_tmp (r11:r10) x18->x59_128_tmp - - -* + + +* x65_128_tmp -x65_128_tmp (r15:r16) +x65_128_tmp (r11:r10) x18->x65_128_tmp - - -* + + +* x67_128_tmp -x67_128_tmp (r16:r15) +x67_128_tmp (r10:r11) x18->x67_128_tmp - - -* + + +* x19 - -x19 (RDI) + +x19 (rx19) x19->x29_128_tmp - - -* + + +* x19->x41_128_tmp - - -* + + +* x53_128_tmp -x53_128_tmp (r15:r16) +x53_128_tmp (r10:r11) x19->x53_128_tmp - - -* + + +* x61_128_tmp -x61_128_tmp (r16:r15) +x61_128_tmp (r10:r11) x19->x61_128_tmp - - -* + + +* x63_128_tmp -x63_128_tmp (r15:r16) +x63_128_tmp (r10:r11) x19->x63_128_tmp - - -* + + +* x20_128_high - -x20_128_high (r15) + +x20_128_high (r11) x56_128_high - -x50_128_high + x52_128_high + x54_128_high + x56_128_high (r11) + +x50_128_high + x52_128_high + x54_128_high + x56_128_high (r9) x20_128_high->x56_128_high - - -+ + + ++ x20_128_low - -x20_128_low (r16) + +x20_128_low (r10) x56_128_low - -x50_128_low + x52_128_low + x54_128_low + x56_128_low (r10) + +x50_128_low + x52_128_low + x54_128_low + x56_128_low (r8) x20_128_low->x56_128_low - - -+ + + ++ x20_128_tmp->x20_128_high - - -GET_HIGH + + +GET_HIGH x20_128_tmp->x20_128_low - - + + GET_LOW x21_128_high - -x21_128_high (r16) + +x21_128_high (r10) x71_128_high - -x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (r8) + +x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (r5) x21_128_high->x71_128_high - - -+ + + ++ x21_128_low - -x21_128_low (r15) + +x21_128_low (r11) x71_128_low - -x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (RBP) + +x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (r4) x21_128_low->x71_128_low - - -+ + + ++ x21_128_tmp->x21_128_high - - + + GET_HIGH x21_128_tmp->x21_128_low - - -GET_LOW + + +GET_LOW x22_128_high - -x22_128_high (r16) + +x22_128_high (r11) x22_128_high->x71_128_high - - -+ + + ++ x22_128_low - -x22_128_low (r15) + +x22_128_low (r10) x22_128_low->x71_128_low - - -+ + + ++ x22_128_tmp->x22_128_high - - -GET_HIGH + + +GET_HIGH x22_128_tmp->x22_128_low - - -GET_LOW + + +GET_LOW x24_128_high - -x24_128_high (r15) + +x24_128_high (r11) x74_128_high - -x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (RSI) + +x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (r12) x24_128_high->x74_128_high - - -+ + + ++ x24_128_low -x24_128_low (r16) +x24_128_low (r10) x74_128_low - -x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (RDX) + +x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (r9) x24_128_low->x74_128_low - - -+ + + ++ x24_128_tmp->x24_128_high - - -GET_HIGH + + +GET_HIGH x24_128_tmp->x24_128_low - - -GET_LOW + + +GET_LOW x25_128_high - -x25_128_high (r15) + +x25_128_high (r10) x25_128_high->x74_128_high - - -+ + + ++ x25_128_low - -x25_128_low (r16) + +x25_128_low (r11) x25_128_low->x74_128_low - - -+ + + ++ x25_128_tmp->x25_128_high - - -GET_HIGH + + +GET_HIGH x25_128_tmp->x25_128_low - - -GET_LOW + + +GET_LOW x27_128_high - -x27_128_high (r15) + +x27_128_high (r10) x27_128_high->x74_128_high - - -+ + + ++ x27_128_low - -x27_128_low (r16) + +x27_128_low (r11) x27_128_low->x74_128_low - - -+ + + ++ x27_128_tmp->x27_128_high - - -GET_HIGH + + +GET_HIGH x27_128_tmp->x27_128_low - - -GET_LOW + + +GET_LOW x29_128_high - -x29_128_high (r16) + +x29_128_high (r10) x77_128_high - -x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high (r18) + +x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high (r6) x29_128_high->x77_128_high - - + + + x29_128_low - -x29_128_low (r15) + +x29_128_low (r11) x77_128_low - -x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low (RBX) + +x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low (r5) x29_128_low->x77_128_low - - -+ + + ++ x29_128_tmp->x29_128_high - - -GET_HIGH + + +GET_HIGH x29_128_tmp->x29_128_low - - -GET_LOW + + +GET_LOW x30_128_high - -x30_128_high (r16) + +x30_128_high (r11) x30_128_high->x77_128_high - - -+ + + ++ x30_128_low - -x30_128_low (r15) + +x30_128_low (r10) x30_128_low->x77_128_low - - -+ + + ++ x30_128_tmp->x30_128_high - - -GET_HIGH + + +GET_HIGH x30_128_tmp->x30_128_low - - -GET_LOW + + +GET_LOW x32_128_high - -x32_128_high (r16) + +x32_128_high (r11) x32_128_high->x77_128_high - - -+ + + ++ x32_128_low - -x32_128_low (r15) + +x32_128_low (r10) x32_128_low->x77_128_low - - -+ + + ++ x32_128_tmp->x32_128_high - - -GET_HIGH + + +GET_HIGH x32_128_tmp->x32_128_low - - -GET_LOW + + +GET_LOW x34_128_high - -x34_128_high (r16) + +x34_128_high (r10) x34_128_high->x77_128_high - - -+ + + ++ x34_128_low - -x34_128_low (r15) + +x34_128_low (r11) x34_128_low->x77_128_low - - -+ + + ++ x34_128_tmp->x34_128_high - - -GET_HIGH + + +GET_HIGH x34_128_tmp->x34_128_low - - -GET_LOW + + +GET_LOW x36_128_high - -x36_128_high (r15) + +x36_128_high (r11) x80_128_high - -x38_128_high + x40_128_high + x42_128_high + x44_128_high + x80_128_high (r14) + +x38_128_high + x40_128_high + x42_128_high + x44_128_high + x80_128_high (r12) x36_128_high->x80_128_high - - -+ + + ++ x36_128_low - -x36_128_low (r16) + +x36_128_low (r10) x80_128_low - -x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low (r13) + +x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low (r7) x36_128_low->x80_128_low - - -+ + + ++ x36_128_tmp->x36_128_high - - -GET_HIGH + + +GET_HIGH x36_128_tmp->x36_128_low - - -GET_LOW + + +GET_LOW x37_128_high - -x37_128_high (r15) + +x37_128_high (r10) x37_128_high->x80_128_high - - -+ + + ++ x37_128_low - -x37_128_low (r16) + +x37_128_low (r11) x37_128_low->x80_128_low - - -+ + + ++ x37_128_tmp->x37_128_high - - -GET_HIGH + + +GET_HIGH x37_128_tmp->x37_128_low - - + + GET_LOW x39_128_high -x39_128_high (r15) +x39_128_high (r10) x39_128_high->x80_128_high - - + + + x39_128_low - -x39_128_low (r16) + +x39_128_low (r11) x39_128_low->x80_128_low - - -+ + + ++ x39_128_tmp->x39_128_high @@ -956,71 +956,71 @@ x39_128_tmp->x39_128_low - - -GET_LOW + + +GET_LOW x41_128_high -x41_128_high (r15) +x41_128_high (r11) x41_128_high->x80_128_high - - + + + x41_128_low -x41_128_low (r16) +x41_128_low (r10) x41_128_low->x80_128_low - - -+ + + ++ x41_128_tmp->x41_128_high - - -GET_HIGH + + +GET_HIGH x41_128_tmp->x41_128_low - + -GET_LOW +GET_LOW x43_128_high - -x43_128_high (r15) + +x43_128_high (r10) x43_128_high->x80_128_high - - -+ + + ++ x43_128_low -x43_128_low (r16) +x43_128_low (r11) x43_128_low->x80_128_low - - + + + x43_128_tmp->x43_128_high - - -GET_HIGH + + +GET_HIGH x43_128_tmp->x43_128_low @@ -1030,898 +1030,898 @@ x45->x49_128_tmp - - -* + + +* x45->x57_128_tmp - - -* + + +* x45->x63_128_tmp - - -* + + +* x45->x67_128_tmp - - -* + + +* x46->x51_128_tmp - - -* + + +* x47->x53_128_tmp - - -* + + +* x47->x59_128_tmp - - -* + + +* x48->x55_128_tmp - - -* + + +* x48->x61_128_tmp - - -* + + +* x48->x65_128_tmp - - -* + + +* x49_128_high - -x49_128_high (r16) + +x49_128_high (r10) x49_128_high->x56_128_high - - + + + x49_128_low - -x49_128_low (r15) + +x49_128_low (r11) x49_128_low->x56_128_low - - -+ + + ++ x49_128_tmp->x49_128_high - - -GET_HIGH + + +GET_HIGH x49_128_tmp->x49_128_low - - -GET_LOW + + +GET_LOW x51_128_high - -x51_128_high (r16) + +x51_128_high (r10) x51_128_high->x56_128_high - - + + + x51_128_low - -x51_128_low (r15) + +x51_128_low (r11) x51_128_low->x56_128_low - - -+ + + ++ x51_128_tmp->x51_128_high - - + + GET_HIGH x51_128_tmp->x51_128_low - - + + GET_LOW x53_128_high - -x53_128_high (r16) + +x53_128_high (r11) x53_128_high->x56_128_high - - -+ + + ++ x53_128_low - -x53_128_low (r15) + +x53_128_low (r10) x53_128_low->x56_128_low - - -+ + + ++ x53_128_tmp->x53_128_high - - -GET_HIGH + + +GET_HIGH x53_128_tmp->x53_128_low - - -GET_LOW + + +GET_LOW x55_128_high - -x55_128_high (r16) + +x55_128_high (r10) x55_128_high->x56_128_high - - -+ + + ++ x55_128_low - -x55_128_low (r15) + +x55_128_low (r11) x55_128_low->x56_128_low - - -+ + + ++ x55_128_tmp->x55_128_high - - -GET_HIGH + + +GET_HIGH x55_128_tmp->x55_128_low - - + + GET_LOW cx56_128 - -cx56_128 (c0) + +cx56_128 (c0) cx56_128->x56_128_high - - -+ + + ++ x56_128 - -x50_128 + x52_128 + x54_128 + x56_128 (r10:r11) + +x50_128 + x52_128 + x54_128 + x56_128 (r8:r9) x69 - -x69 (r11) + +x69 (r9) x56_128->x69 - - ->> + + +>> x56_128_high->x56_128 - - -COMBINE + + +COMBINE x56_128_low->cx56_128 - - -GET_CARRY + + +GET_CARRY x56_128_low->x56_128 - - -COMBINE + + +COMBINE x70 - -x70 (r10) + +x70 (r8) x56_128_low->x70 - - -& + + +& x57_128_high - -x57_128_high (r15) + +x57_128_high (r11) x57_128_high->x71_128_high - - -+ + + ++ x57_128_low - -x57_128_low (r16) + +x57_128_low (r10) x57_128_low->x71_128_low - - + + + x57_128_tmp->x57_128_high - - -GET_HIGH + + +GET_HIGH x57_128_tmp->x57_128_low - - + + GET_LOW x59_128_high - -x59_128_high (r15) + +x59_128_high (r10) x59_128_high->x71_128_high - - -+ + + ++ x59_128_low - -x59_128_low (r16) + +x59_128_low (r11) x59_128_low->x71_128_low - - -+ + + ++ x59_128_tmp->x59_128_high - - -GET_HIGH + + +GET_HIGH x59_128_tmp->x59_128_low - - -GET_LOW + + +GET_LOW x61_128_high - -x61_128_high (r15) + +x61_128_high (r11) x61_128_high->x71_128_high - - -+ + + ++ x61_128_low - -x61_128_low (r16) + +x61_128_low (r10) x61_128_low->x71_128_low - - -+ + + ++ x61_128_tmp->x61_128_high - - -GET_HIGH + + +GET_HIGH x61_128_tmp->x61_128_low - - -GET_LOW + + +GET_LOW x63_128_high - -x63_128_high (r16) + +x63_128_high (r11) x63_128_high->x74_128_high - - -+ + + ++ x63_128_low - -x63_128_low (r15) + +x63_128_low (r10) x63_128_low->x74_128_low - - -+ + + ++ x63_128_tmp->x63_128_high - - -GET_HIGH + + +GET_HIGH x63_128_tmp->x63_128_low - - -GET_LOW + + +GET_LOW x65_128_high - -x65_128_high (r16) + +x65_128_high (r10) x65_128_high->x74_128_high - - -+ + + ++ x65_128_low - -x65_128_low (r15) + +x65_128_low (r11) x65_128_low->x74_128_low - - -+ + + ++ x65_128_tmp->x65_128_high - - -GET_HIGH + + +GET_HIGH x65_128_tmp->x65_128_low - - -GET_LOW + + +GET_LOW x67_128_high - -x67_128_high (r15) + +x67_128_high (r11) x67_128_high->x77_128_high - - -+ + + ++ x67_128_low - -x67_128_low (r16) + +x67_128_low (r10) x67_128_low->x77_128_low - - -+ + + ++ x67_128_tmp->x67_128_high - - -GET_HIGH + + +GET_HIGH x67_128_tmp->x67_128_low - - -GET_LOW + + +GET_LOW x69->x71_128_low - - -+ + + ++ x84 - -x84 (r10) + +x84 (r8) x70->x84 - - -+ + + ++ cx71_128 - -cx71_128 (c0) + +cx71_128 (c0) cx71_128->x71_128_high - - -+ + + ++ x71_128 - -x23_128 + x58_128 + x60_128 + x62_128 + x71_128 (RBP:r8) + +x23_128 + x58_128 + x60_128 + x62_128 + x71_128 (r4:r5) x72 - -x72 (r8) + +x72 (r5) x71_128->x72 - - ->> + + +>> x71_128_high->x71_128 - - -COMBINE + + +COMBINE x71_128_low->cx71_128 - - -GET_CARRY + + +GET_CARRY x71_128_low->x71_128 - - -COMBINE + + +COMBINE x73 - -x73 (RBP) + +x73 (r4) x71_128_low->x73 - - -& + + +& x72->x74_128_low - - -+ + + ++ x87 - -x87 (r10) + +x87 (r8) x73->x87 - - -+ + + ++ cx74_128 - -cx74_128 (c0) + +cx74_128 (c0) cx74_128->x74_128_high - - -+ + + ++ x74_128 - -x26_128 + x28_128 + x64_128 + x66_128 + x74_128 (RDX:RSI) + +x26_128 + x28_128 + x64_128 + x66_128 + x74_128 (r9:r12) x75 - -x75 (RSI) + +x75 (r12) x74_128->x75 - - ->> + + +>> x74_128_high->x74_128 - - -COMBINE + + +COMBINE x74_128_low->cx74_128 - - -GET_CARRY + + +GET_CARRY x74_128_low->x74_128 - - -COMBINE + + +COMBINE x76 - -x76 (RDX) + +x76 (r9) x74_128_low->x76 - - -& + + +& x75->x77_128_low - - -+ + + ++ x90 - -x90 (r10) + +x90 (r8) x76->x90 - - -+ + + ++ cx77_128 - -cx77_128 (c0) + +cx77_128 (c0) cx77_128->x77_128_high - - -+ + + ++ x77_128 - -x31_128 + x33_128 + x35_128 + x68_128 + x77_128 (RBX:r18) + +x31_128 + x33_128 + x35_128 + x68_128 + x77_128 (r5:r6) x78 - -x78 (r18) + +x78 (r6) x77_128->x78 - - ->> + + +>> x77_128_high->x77_128 - - -COMBINE + + +COMBINE x77_128_low->cx77_128 - - -GET_CARRY + + +GET_CARRY x77_128_low->x77_128 - - -COMBINE + + +COMBINE x79 - -x79 (RBX) + +x79 (r5) x77_128_low->x79 - - -& + + +& x78->x80_128_low - - -+ + + ++ out - -out + +out x79->out - - + + cx80_128 - -cx80_128 (c0) + +cx80_128 (c0) cx80_128->x80_128_high - - -+ + + ++ x80_128 - -x38_128 + x40_128 + x42_128 + x44_128 + x80_128 (r13:r14) + +x38_128 + x40_128 + x42_128 + x44_128 + x80_128 (r7:r12) x81 - -x81 (r14) + +x81 (r12) x80_128->x81 - - ->> + + +>> x80_128_high->x80_128 - - -COMBINE + + +COMBINE x80_128_low->cx80_128 - - -GET_CARRY + + +GET_CARRY x80_128_low->x80_128 - - -COMBINE + + +COMBINE x82 - -x82 (r13) + +x82 (r7) x80_128_low->x82 - - -& + + +& x83 - -x83 (r14) + +x83 (r12) x81->x83 - - -* + + +* x82->out - - + + x83->x84 - - -+ + + ++ x85 - -x85 (r10) + +x85 (r8) x84->x85 - - ->> + + +>> x86 - -x86 (r14) + +x86 (r12) x84->x86 - - -& + + +& x85->x87 - - -+ + + ++ x86->out - - + + x88 - -x88 (r10) + +x88 (r8) x87->x88 - - ->> + + +>> x89 - -x89 (RBP) + +x89 (r4) x87->x89 - - -& + + +& x88->x90 - - -+ + + ++ x89->out - - + + x90->out - - + + in - -in + +in in->x5 - - + + in->x7 - - + + in->x9 - - + + in->x10 - - + + in->x11 - - + + in->x13 - - + + in->x15 - - + + in->x17 - - + + in->x18 - - + + in->x19 - - + + diff --git a/etc/compile-by-zinc/femulData0_1.png b/etc/compile-by-zinc/femulData0_1.png deleted file mode 100644 index 2012013ee..000000000 Binary files a/etc/compile-by-zinc/femulData0_1.png and /dev/null differ diff --git a/etc/compile-by-zinc/femulData0_1.svg b/etc/compile-by-zinc/femulData0_1.svg deleted file mode 100644 index 469b4eab8..000000000 --- a/etc/compile-by-zinc/femulData0_1.svg +++ /dev/null @@ -1,1118 +0,0 @@ - - - - - - -G - - -in - -in - - -x10 - -x10 - - -in->x10 - - - - -x11 - -x11 - - -in->x11 - - - - -x9 - -x9 - - -in->x9 - - - - -x7 - -x7 - - -in->x7 - - - - -x5 - -x5 - - -in->x5 - - - - -x18 - -x18 - - -in->x18 - - - - -x19 - -x19 - - -in->x19 - - - - -x17 - -x17 - - -in->x17 - - - - -x15 - -x15 - - -in->x15 - - - - -x13 - -x13 - - -in->x13 - - - - -x37 - -x37 - - -x10->x37 - - - - -x45 - -x45 - - -x10->x45 - - - - -x30 - -x30 - - -x11->x30 - - - - -x39 - -x39 - - -x11->x39 - - - - -x48 - -x48 - - -x11->x48 - - - - -x25 - -x25 - - -x9->x25 - - - - -x34 - -x34 - - -x9->x34 - - - - -x43 - -x43 - - -x9->x43 - - - - -x47 - -x47 - - -x9->x47 - - - - -x22 - -x22 - - -x7->x22 - - - - -x27 - -x27 - - -x7->x27 - - - - -x32 - -x32 - - -x7->x32 - - - - -x41 - -x41 - - -x7->x41 - - - - -x46 - -x46 - - -x7->x46 - - - - -x20 - -x20 - - -x5->x20 - - - - -x21 - -x21 - - -x5->x21 - - - - -x24 - -x24 - - -x5->x24 - - - - -x29 - -x29 - - -x5->x29 - - - - -x36 - -x36 - - -x5->x36 - - - - -x18->x36 - - - - -x51 - -x51 - - -x18->x51 - - - - -x59 - -x59 - - -x18->x59 - - - - -x65 - -x65 - - -x18->x65 - - - - -x67 - -x67 - - -x18->x67 - - - - -x19->x29 - - - - -x19->x41 - - - - -x53 - -x53 - - -x19->x53 - - - - -x61 - -x61 - - -x19->x61 - - - - -x63 - -x63 - - -x19->x63 - - - - -x17->x24 - - - - -x17->x32 - - - - -x17->x43 - - - - -x55 - -x55 - - -x17->x55 - - - - -x57 - -x57 - - -x17->x57 - - - - -x15->x21 - - - - -x15->x27 - - - - -x15->x34 - - - - -x15->x39 - - - - -x49 - -x49 - - -x15->x49 - - - - -x13->x20 - - - - -x13->x22 - - - - -x13->x25 - - - - -x13->x30 - - - - -x13->x37 - - - - -x82 - -x82 (r12) - - -out - -out - - -x82->out - - - - -x79 - -x79 - - -x79->out - - - - -x90 - -x90 (r15) - - -x90->out - - - - -x89 - -x89 (r14) - - -x89->out - - - - -x86 - -x86 (r13) - - -x86->out - - - - -x50 - -x50 - - -x20->x50 - - - - -x23 - -x23 - - -x21->x23 - - - - -x22->x23 - - - - -x58 - -x58 - - -x23->x58 - - - - -x26 - -x26 - - -x24->x26 - - - - -x25->x26 - - - - -x28 - -x28 - - -x26->x28 - - - - -x27->x28 - - - - -x64 - -x64 - - -x28->x64 - - - - -x31 - -x31 - - -x29->x31 - - - - -x30->x31 - - - - -x33 - -x33 - - -x31->x33 - - - - -x32->x33 - - - - -x35 - -x35 - - -x33->x35 - - - - -x34->x35 - - - - -x68 - -x68 - - -x35->x68 - - - - -x38 - -x38 - - -x36->x38 - - - - -x37->x38 - - - - -x40 - -x40 - - -x38->x40 - - - - -x39->x40 - - - - -x42 - -x42 - - -x40->x42 - - - - -x41->x42 - - - - -x44 - -x44 - - -x42->x44 - - - - -x43->x44 - - - - -x80 - -x80 - - -x44->x80 - - - - -x45->x49 - - - - -x45->x57 - - - - -x45->x63 - - - - -x45->x67 - - - - -x46->x51 - - - - -x47->x53 - - - - -x47->x59 - - - - -x48->x55 - - - - -x48->x61 - - - - -x48->x65 - - - - -x49->x50 - - - - -x52 - -x52 - - -x50->x52 - - - - -x51->x52 - - - - -x54 - -x54 - - -x52->x54 - - - - -x53->x54 - - - - -x56 - -x56 - - -x54->x56 - - - - -x55->x56 - - - - -x69 - -x69 - - -x56->x69 - - - - -x70 - -x70 - - -x56->x70 - - - - -x57->x58 - - - - -x60 - -x60 - - -x58->x60 - - - - -x59->x60 - - - - -x62 - -x62 - - -x60->x62 - - - - -x61->x62 - - - - -x71 - -x71 - - -x62->x71 - - - - -x63->x64 - - - - -x66 - -x66 - - -x64->x66 - - - - -x65->x66 - - - - -x74 - -x74 - - -x66->x74 - - - - -x67->x68 - - - - -x77 - -x77 - - -x68->x77 - - - - -x69->x71 - - - - -x84 - -x84 - - -x70->x84 - - - - -x72 - -x72 - - -x71->x72 - - - - -x73 - -x73 - - -x71->x73 - - - - -x72->x74 - - - - -x87 - -x87 (r14) - - -x73->x87 - - - - -x75 - -x75 - - -x74->x75 - - - - -x76 - -x76 (r11) - - -x74->x76 - - - - -x75->x77 - - - - -x76->x90 - - - - -x77->x79 - - - - -x78 - -x78 - - -x77->x78 - - - - -x78->x80 - - - - -x80->x82 - - - - -x81 - -x81 - - -x80->x81 - - - - -x83 - -x83 - - -x81->x83 - - - - -x83->x84 - - - - -x84->x86 - - - - -x85 - -x85 - - -x84->x85 - - - - -x85->x87 - - - - -x87->x89 - - - - -x88 - -x88 (r15) - - -x87->x88 - - - - -x88->x90 - - - - - diff --git a/etc/compile-by-zinc/femulDisplayScheduled0.log b/etc/compile-by-zinc/femulDisplayScheduled0.log index 82557e31e..4bcf8b04c 100644 --- a/etc/compile-by-zinc/femulDisplayScheduled0.log +++ b/etc/compile-by-zinc/femulDisplayScheduled0.log @@ -1,117 +1,213 @@ // Convention is low_reg:high_reg -r18 <- LOAD x10; -r17 <- LOAD x13; -r16:r15 <- MULX r18, r17; // x37_tmp = x10 * x13 -r14 <- MOV r15; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -r13 <- MOV r16; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low -r18 <- MULX r18, 0x13; // x45 = x10 * 0x13 -r12 <- LOAD x15; -r15:r16 <- MULX r18, r12; // x49_tmp = x45 * x15 -r11 <- MOV r16; // bucket: x50_high + x52_high + x54_high + x56_high -r10 <- MOV r15; // bucket: x50_low + x52_low + x54_low + x56_low -r9 <- LOAD x17; -r16:r15 <- MULX r18, r9; // x57_tmp = x45 * x17 -r8 <- MOV r15; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -RBP <- MOV r16; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -RDI <- LOAD x19; -r15:r16 <- MULX r18, RDI; // x63_tmp = x45 * x19 -RSI <- MOV r16; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -RDX <- MOV r15; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -RCX <- LOAD x18; -r16:r15 <- MULX r18, RCX; // x67_tmp = x45 * x18 -r18 <- MOV r15; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -RBX <- MOV r16; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -RAX <- LOAD x11; -r15:r16 <- MULX RAX, r17; // x30_tmp = x11 * x13 -r18 <- ADX r18, r16; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -RBX, (cx77) <- ADD RBX, r15; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -r16:r15 <- MULX RAX, r12; // x39_tmp = x11 * x15 -r14 <- ADX r14, r15; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -r13, (cx80) <- ADD r13, r16; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low -RAX <- MULX RAX, 0x13; // x48 = x11 * 0x13 -r15:r16 <- MULX RAX, r9; // x55_tmp = x48 * x17 -r11 <- ADX r11, r16; // bucket: x50_high + x52_high + x54_high + x56_high -r10, (cx56) <- ADD r10, r15; // bucket: x50_low + x52_low + x54_low + x56_low -r16:r15 <- MULX RAX, RDI; // x61_tmp = x48 * x19 -r8 <- ADX r8, r15; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -RBP, (cx71) <- ADD RBP, r16; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -r15:r16 <- MULX RAX, RCX; // x65_tmp = x48 * x18 -RSI <- ADX RSI, r16; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -RDX, (cx74) <- ADD RDX, r15; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -RAX <- LOAD x9; -r16:r15 <- MULX RAX, r17; // x25_tmp = x9 * x13 -RSI <- ADX RSI, r15; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -RDX, (cx74) <- ADC (cx74), RDX, r16; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -r15:r16 <- MULX RAX, r12; // x34_tmp = x9 * x15 -r18 <- ADX r18, r16; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -RBX, (cx77) <- ADC (cx77), RBX, r15; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -r16:r15 <- MULX RAX, r9; // x43_tmp = x9 * x17 -r14 <- ADX r14, r15; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -r13, (cx80) <- ADC (cx80), r13, r16; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low -RAX <- MULX RAX, 0x13; // x47 = x9 * 0x13 -r15:r16 <- MULX RAX, RDI; // x53_tmp = x47 * x19 -r11 <- ADX r11, r16; // bucket: x50_high + x52_high + x54_high + x56_high -r10, (cx56) <- ADC (cx56), r10, r15; // bucket: x50_low + x52_low + x54_low + x56_low -r16:r15 <- MULX RAX, RCX; // x59_tmp = x47 * x18 -r8 <- ADX r8, r15; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -RBP, (cx71) <- ADC (cx71), RBP, r16; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -RAX <- LOAD x7; -r15:r16 <- MULX RAX, r17; // x22_tmp = x7 * x13 -r8 <- ADX r8, r16; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -RBP, (cx71) <- ADC (cx71), RBP, r15; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -r16:r15 <- MULX RAX, r12; // x27_tmp = x7 * x15 -RSI <- ADX RSI, r15; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -RDX, (cx74) <- ADC (cx74), RDX, r16; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -r15:r16 <- MULX RAX, r9; // x32_tmp = x7 * x17 -r18 <- ADX r18, r16; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -RBX, (cx77) <- ADC (cx77), RBX, r15; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -r16:r15 <- MULX RAX, RDI; // x41_tmp = x7 * x19 -r14 <- ADX r14, r15; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -r13, (cx80) <- ADC (cx80), r13, r16; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low -RAX <- MULX RAX, 0x13; // x46 = x7 * 0x13 -r15:r16 <- MULX RAX, RCX; // x51_tmp = x46 * x18 -r11 <- ADX r11, r16; // bucket: x50_high + x52_high + x54_high + x56_high -r10, (cx56) <- ADC (cx56), r10, r15; // bucket: x50_low + x52_low + x54_low + x56_low -RAX <- LOAD x5; -r16:r15 <- MULX RAX, r17; // x20_tmp = x5 * x13 -r11 <- ADX r11, r15; // bucket: x50_high + x52_high + x54_high + x56_high -r10, (cx56) <- ADC (cx56), r10, r16; // bucket: x50_low + x52_low + x54_low + x56_low -r11 <- ADCX (cx56), r11, 0x0; // bucket: x50_high + x52_high + x54_high + x56_high -r10 <- AND r10, 0x7ffffffffffff; // x70 = x56_low & 0x7ffffffffffff -r15:r16 <- MULX RAX, r12; // x21_tmp = x5 * x15 -r11 <- SHR r10:r11, 0x33; // x69 = x56_low:x56_high >> 0x33 -RBP, (cx71) <- ADC (cx71), RBP, r11; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -r8 <- ADX r8, r16; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -RBP, (cx71) <- ADC (cx71), RBP, r15; // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -r8 <- ADCX (cx71), r8, 0x0; // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -RBP <- AND RBP, 0x7ffffffffffff; // x73 = x71_low & 0x7ffffffffffff -r16:r15 <- MULX RAX, r9; // x24_tmp = x5 * x17 -r8 <- SHR RBP:r8, 0x33; // x72 = x71_low:x71_high >> 0x33 -RDX, (cx74) <- ADC (cx74), RDX, r8; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -RSI <- ADX RSI, r15; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -RDX, (cx74) <- ADC (cx74), RDX, r16; // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -RSI <- ADCX (cx74), RSI, 0x0; // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -RDX <- AND RDX, 0x7ffffffffffff; // x76 = x74_low & 0x7ffffffffffff -r15:r16 <- MULX RAX, RDI; // x29_tmp = x5 * x19 -RSI <- SHR RDX:RSI, 0x33; // x75 = x74_low:x74_high >> 0x33 -RBX, (cx77) <- ADC (cx77), RBX, RSI; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -r18 <- ADX r18, r16; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -RBX, (cx77) <- ADC (cx77), RBX, r15; // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -r18 <- ADCX (cx77), r18, 0x0; // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -RBX <- AND RBX, 0x7ffffffffffff; // x79 = x77_low & 0x7ffffffffffff -r16:r15 <- MULX RAX, RCX; // x36_tmp = x5 * x18 -r18 <- SHR RBX:r18, 0x33; // x78 = x77_low:x77_high >> 0x33 -r13, (cx80) <- ADC (cx80), r13, r18; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low -r14 <- ADX r14, r15; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -r13, (cx80) <- ADC (cx80), r13, r16; // bucket: x38_low + x40_low + x42_low + x44_low + x80_low -r14 <- ADCX (cx80), r14, 0x0; // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -r13 <- AND r13, 0x7ffffffffffff; // x82 = x80_low & 0x7ffffffffffff -r14 <- SHR r13:r14, 0x33; // x81 = x80_low:x80_high >> 0x33 -r14 <- MULX r14, 0x13; // x83 = x81 * 0x13 -r10 <- ADX r10, r14; // x84 = x70 + x83 -r10 <- SHR r10, 0x33; // x85 = x70 >> 0x33 -r14 <- AND r10, 0x7ffffffffffff; // x86 = x84 & 0x7ffffffffffff -r10 <- ADX r10, RBP; // x87 = x85 + x73 -r10 <- SHR r10, 0x33; // x88 = x85 >> 0x33 -RBP <- AND r10, 0x7ffffffffffff; // x89 = x87 & 0x7ffffffffffff -r10 <- ADX r10, RDX; // x90 = x88 + x76 +"mov %[rx11], %[arg0]\t\n" +FIXME: lea for x48 = x11 * 0x13 +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[r12], %rdx\t\n" +"mov %[rx17], %[arg0]\t\n" +"mulx %[arg0], %[r10], %[r11]\t\n" // x55_tmp = x48 * x17 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[r10], %[r9]\t\n" // bucket: x50_high + x52_high + x54_high + x56_high +"mov %[r11], %[r8]\t\n" // bucket: x50_low + x52_low + x54_low + x56_low +"mov %[rx9], %[arg0]\t\n" +FIXME: lea for x47 = x9 * 0x13 +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[r7], %rdx\t\n" +"mov %[rx19], %[arg0]\t\n" +"mulx %[arg0], %[r11], %[r10]\t\n" // x53_tmp = x47 * x19 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r11], %[r9]\t\n" // bucket: x50_high + x52_high + x54_high + x56_high +"add %[r10], %[r8]\t\n" // bucket: x50_low + x52_low + x54_low + x56_low +"mov %[rx7], %[arg0]\t\n" +FIXME: lea for x46 = x7 * 0x13 +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[r6], %rdx\t\n" +"mov %[rx18], %[arg0]\t\n" +"mulx %[arg0], %[r10], %[r11]\t\n" // x51_tmp = x46 * x18 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r10], %[r9]\t\n" // bucket: x50_high + x52_high + x54_high + x56_high +"adc %[r11], %[r8]\t\n" // bucket: x50_low + x52_low + x54_low + x56_low +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[rx5], %rdx\t\n" +"mov %[rx13], %[arg0]\t\n" +"mulx %[arg0], %[r11], %[r10]\t\n" // x20_tmp = x5 * x13 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r11], %[r9]\t\n" // bucket: x50_high + x52_high + x54_high + x56_high +"adc %[r10], %[r8]\t\n" // bucket: x50_low + x52_low + x54_low + x56_low +"mov %[rx10], %[arg0]\t\n" +FIXME: lea for x45 = x10 * 0x13 +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[r6], %rdx\t\n" +"mov %[rx15], %[arg0]\t\n" +"mulx %[arg0], %[r10], %[r11]\t\n" // x49_tmp = x45 * x15 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r10], %[r9]\t\n" // bucket: x50_high + x52_high + x54_high + x56_high +"adc %[r11], %[r8]\t\n" // bucket: x50_low + x52_low + x54_low + x56_low +"adcx $0, %[r9]\t\n" // bucket: x50_high + x52_high + x54_high + x56_high +"and $0x7ffffffffffff, %[r8]\t\n" // x70 = x56_low & 0x7ffffffffffff +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[r12], %rdx\t\n" +"mov %[rx19], %[arg0]\t\n" +"mulx %[arg0], %[r11], %[r10]\t\n" // x61_tmp = x48 * x19 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +r9 <- SHR r8:r9, 0x33; // x69 = x56_low:x56_high >> 0x33 +"mov %[r9], %[r4]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +"mov %[r11], %[r5]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +"add %[r10], %[r4]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[r7], %rdx\t\n" +"mov %[rx18], %[arg0]\t\n" +"mulx %[arg0], %[r10], %[r11]\t\n" // x59_tmp = x47 * x18 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r10], %[r5]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +"adc %[r11], %[r4]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[r6], %rdx\t\n" +"mov %[rx17], %[arg0]\t\n" +"mulx %[arg0], %[r11], %[r10]\t\n" // x57_tmp = x45 * x17 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r11], %[r5]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +"adc %[r10], %[r4]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[rx5], %rdx\t\n" +"mov %[rx15], %[arg0]\t\n" +"mulx %[arg0], %[r10], %[r11]\t\n" // x21_tmp = x5 * x15 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r10], %[r5]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +"adc %[r11], %[r4]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[rx7], %rdx\t\n" +"mov %[rx13], %[arg0]\t\n" +"mulx %[arg0], %[r11], %[r10]\t\n" // x22_tmp = x7 * x13 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r11], %[r5]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +"adc %[r10], %[r4]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +"adcx $0, %[r5]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +"and $0x7ffffffffffff, %[r4]\t\n" // x73 = x71_low & 0x7ffffffffffff +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[r12], %rdx\t\n" +"mov %[rx18], %[arg0]\t\n" +"mulx %[arg0], %[r10], %[r11]\t\n" // x65_tmp = x48 * x18 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +r5 <- SHR r4:r5, 0x33; // x72 = x71_low:x71_high >> 0x33 +"mov %[r5], %[r9]\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +"mov %[r10], %[r12]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +"add %[r11], %[r9]\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[r6], %rdx\t\n" +"mov %[rx19], %[arg0]\t\n" +"mulx %[arg0], %[r11], %[r10]\t\n" // x63_tmp = x45 * x19 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r11], %[r12]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +"adc %[r10], %[r9]\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[rx7], %rdx\t\n" +"mov %[rx15], %[arg0]\t\n" +"mulx %[arg0], %[r10], %[r11]\t\n" // x27_tmp = x7 * x15 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r10], %[r12]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +"adc %[r11], %[r9]\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[rx5], %rdx\t\n" +"mov %[rx17], %[arg0]\t\n" +"mulx %[arg0], %[r11], %[r10]\t\n" // x24_tmp = x5 * x17 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r11], %[r12]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +"adc %[r10], %[r9]\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[rx9], %rdx\t\n" +"mov %[rx13], %[arg0]\t\n" +"mulx %[arg0], %[r10], %[r11]\t\n" // x25_tmp = x9 * x13 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r10], %[r12]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +"adc %[r11], %[r9]\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +"adcx $0, %[r12]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +"and $0x7ffffffffffff, %[r9]\t\n" // x76 = x74_low & 0x7ffffffffffff +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[r6], %rdx\t\n" +"mov %[rx18], %[arg0]\t\n" +"mulx %[arg0], %[r11], %[r10]\t\n" // x67_tmp = x45 * x18 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +r12 <- SHR r9:r12, 0x33; // x75 = x74_low:x74_high >> 0x33 +"mov %[r12], %[r5]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +"mov %[r11], %[r6]\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +"add %[r10], %[r5]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[rx9], %rdx\t\n" +"mov %[rx15], %[arg0]\t\n" +"mulx %[arg0], %[r10], %[r11]\t\n" // x34_tmp = x9 * x15 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r10], %[r6]\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +"adc %[r11], %[r5]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[rx7], %rdx\t\n" +"mov %[rx17], %[arg0]\t\n" +"mulx %[arg0], %[r11], %[r10]\t\n" // x32_tmp = x7 * x17 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r11], %[r6]\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +"adc %[r10], %[r5]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[rx5], %rdx\t\n" +"mov %[rx19], %[arg0]\t\n" +"mulx %[arg0], %[r10], %[r11]\t\n" // x29_tmp = x5 * x19 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r10], %[r6]\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +"adc %[r11], %[r5]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[rx11], %rdx\t\n" +"mov %[rx13], %[arg0]\t\n" +"mulx %[arg0], %[r11], %[r10]\t\n" // x30_tmp = x11 * x13 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r11], %[r6]\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +"adc %[r10], %[r5]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +"adcx $0, %[r6]\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +"and $0x7ffffffffffff, %[r5]\t\n" // x79 = x77_low & 0x7ffffffffffff +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[rx9], %rdx\t\n" +"mov %[rx17], %[arg0]\t\n" +"mulx %[arg0], %[r10], %[r11]\t\n" // x43_tmp = x9 * x17 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +r6 <- SHR r5:r6, 0x33; // x78 = x77_low:x77_high >> 0x33 +"mov %[r6], %[r7]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +"mov %[r10], %[r12]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +"add %[r11], %[r7]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[rx7], %rdx\t\n" +"mov %[rx19], %[arg0]\t\n" +"mulx %[arg0], %[r11], %[r10]\t\n" // x41_tmp = x7 * x19 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r11], %[r12]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +"adc %[r10], %[r7]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[rx11], %rdx\t\n" +"mov %[rx15], %[arg0]\t\n" +"mulx %[arg0], %[r10], %[r11]\t\n" // x39_tmp = x11 * x15 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r10], %[r12]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +"adc %[r11], %[r7]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[rx5], %rdx\t\n" +"mov %[rx18], %[arg0]\t\n" +"mulx %[arg0], %[r11], %[r10]\t\n" // x36_tmp = x5 * x18 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r11], %[r12]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +"adc %[r10], %[r7]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? +"mov %[rx10], %rdx\t\n" +"mov %[rx13], %[arg0]\t\n" +"mulx %[arg0], %[r10], %[r11]\t\n" // x37_tmp = x10 * x13 +"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? +"adx %[r10], %[r12]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +"adc %[r11], %[r7]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +"adcx $0, %[r12]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +"and $0x7ffffffffffff, %[r7]\t\n" // x82 = x80_low & 0x7ffffffffffff +r12 <- SHR r7:r12, 0x33; // x81 = x80_low:x80_high >> 0x33 +FIXME: lea for x83 = x81 * 0x13 +"adx %[r12], %[r8]\t\n" // bucket: x84 = x70 + x83 +r8 <- SHR r8, 0x33; // x85 = x70 >> 0x33 +"mov %[r8], %[r12]\t\n" +"and $0x7ffffffffffff, %[r12]\t\n" // x86 = x84 & 0x7ffffffffffff +"adx %[r4], %[r8]\t\n" // bucket: x87 = x85 + x73 +r8 <- SHR r8, 0x33; // x88 = x85 >> 0x33 +"mov %[r8], %[r4]\t\n" +"and $0x7ffffffffffff, %[r4]\t\n" // x89 = x87 & 0x7ffffffffffff +"adx %[r9], %[r8]\t\n" // bucket: x90 = x88 + x76 diff --git a/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py b/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py index 4ad14e50f..f37bc3ff1 100755 --- a/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py +++ b/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py @@ -8,8 +8,8 @@ LAMBDA = u'\u03bb' OP_NAMES = {'*':'MUL', '+':'ADD', '>>':'SHL', '<<':'SHR', '|':'OR', '&':'AND'} -REGISTERS = tuple(['RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP'] #, 'RSP'] # RSP is stack pointer? - + ['r%d' % i for i in range(8, 19)]) +REGISTERS = tuple(#['RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP'] + #, 'RSP'] # RSP is stack pointer? + ['r%d' % i for i in range(13)]) REGISTER_COLORS = ['color="black"', 'color="white",fillcolor="black"', 'color="maroon"', 'color="green"', 'fillcolor="olive"', 'color="navy"', 'color="purple"', 'fillcolor="teal"', 'fillcolor="silver"', 'fillcolor="gray"', 'fillcolor="red"', 'fillcolor="lime"', 'fillcolor="yellow"', 'fillcolor="blue"', 'fillcolor="fuschia"', 'fillcolor="aqua"'] @@ -197,18 +197,6 @@ def to_graph(input_data): return graph -def print_dependencies(input_data, dependencies): - in_vars = get_input_var_names(input_data) - out_vars = get_output_var_names(input_data) - registers = assign_registers(input_data, dependencies) - body = ( - ''.join(' %s [label="%s (%s)",%s];\n' % (var, var, reg, COLOR_FOR_REGISTER[reg.split(':')[0]]) for var, reg in registers.items()) + - ''.join(' in -> %s ;\n' % var for var in in_vars) + - ''.join(' %s -> out ;\n' % var for var in out_vars) + - ''.join(''.join(' %s -> %s ;\n' % (out_var, in_var) for out_var in sorted(dependencies[in_var])) - for in_var in sorted(dependencies.keys())) - ) - return ('digraph G {\n' + body + '}\n') def adjust_bits(input_data, graph): for line in input_data['lines']: if line['type'] == 'uint128_t': @@ -237,7 +225,11 @@ def is_temp(node): return True return False +def is_allocated_to_reg(full_map, node): + return node['out'] in full_map.keys() and all(reg in REGISTERS for reg in full_map[node['out']].split(':')) + def deps_allocated(full_map, node): + if node['op'] == 'INPUT': return True if node['out'] not in full_map.keys(): return False return all(deps_allocated(full_map, dep) for dep in node['deps']) @@ -260,7 +252,7 @@ def allocate_node(existing, node, *args): if reg in all_temps: if reg not in free_temps: free_temps.append(reg) - else: + elif reg in REGISTERS: if reg not in free_list: print('freeing %s from %s' % (reg, var)) free_list.append(reg) @@ -269,6 +261,7 @@ def allocate_node(existing, node, *args): if node['out'] in full_map.keys(): for dep in node['deps']: if dep['out'] in freed or dep['out'] not in full_map.keys(): continue + if not is_allocated_to_reg(full_map, dep): continue if (all(deps_allocated(full_map, rdep) for rdep in dep['rev_deps']) or all(reg in all_temps for reg in full_map[dep['out']].split(':'))): do_free(dep['out']) @@ -277,18 +270,18 @@ def allocate_node(existing, node, *args): do_free_deps(node) return do_ret() #print('alloc: %s (of %d)' % (node['out'], len(free_list))) - if node['op'] in ('GET_HIGH', 'GET_LOW') and len(node['deps']) == 1 and len(node['deps'][0]['rev_deps']) <= 2 and all(n['op'] in ('GET_HIGH', 'GET_LOW') for n in node['deps'][0]['rev_deps']) and node['deps'][0]['out'] in full_map.keys(): + if node['op'] in ('GET_HIGH', 'GET_LOW') and len(node['deps']) == 1 and len(node['deps'][0]['rev_deps']) <= 2 and all(n['op'] in ('GET_HIGH', 'GET_LOW') for n in node['deps'][0]['rev_deps']) and is_allocated_to_reg(full_map, node['deps'][0]): reg_idx = {'GET_LOW':0, 'GET_HIGH':1}[node['op']] cur_map[node['out']] = full_map[node['deps'][0]['out']].split(':')[reg_idx] emit_vars.append(node) return do_ret() - if len(node['deps']) == 1 and len(node['deps'][0]['rev_deps']) == 1 and node['deps'][0]['out'] in full_map.keys() and node['type'] == node['deps'][0]['type']: + if len(node['deps']) == 1 and len(node['deps'][0]['rev_deps']) == 1 and is_allocated_to_reg(full_map, node['deps'][0]) and node['type'] == node['deps'][0]['type']: cur_map[node['out']] = full_map[node['deps'][0]['out']] emit_vars.append(node) return do_ret() if len(node['deps']) == 0 and node['op'] == 'INPUT': assert(node['type'] == 'uint64_t') - cur_map[node['out']] = free_list.pop() + cur_map[node['out']] = 'r' + node['out'] # free_list.pop() emit_vars.append(node) return do_ret() if is_temp(node): @@ -314,7 +307,8 @@ def allocate_node(existing, node, *args): if node['op'] == '*' and node['type'] == 'uint64_t' and len(node['deps']) == 1: dep = node['deps'][0] assert(dep['out'] in full_map.keys()) - if all(rdep is node or (rdep['out'] in full_map.keys() and full_map[rdep['out']] != full_map[dep['out']]) + if is_allocated_to_reg(full_map, dep) and \ + all(rdep is node or (is_allocated_to_reg(full_map, rdep) and full_map[rdep['out']] != full_map[dep['out']]) for rdep in dep['rev_deps']): cur_map[node['out']] = full_map[dep['out']] freed += [dep['out']] @@ -322,7 +316,7 @@ def allocate_node(existing, node, *args): cur_map[node['out']] = free_list.pop() emit_vars.append(node) return do_ret() - raw_input([node['out'], node['op'], node['type'], len(node['deps'])]) + raw_input([node['out'], node['op'], node['type'], [(dep['out'], full_map.get(dep['out'])) for dep in node['deps']]]) return do_ret() def allocate_deps(existing, node, *args): @@ -491,7 +485,9 @@ def fix_emit_vars(emit_vars): waiting.append(node) new_waiting = [] for wnode in waiting: - if all(dep['out'] in seen for dep in wnode['deps']): + if wnode['out'] in seen: + continue + elif all(dep['out'] in seen for dep in wnode['deps']): ret.append(wnode) seen.add(wnode['out']) else: @@ -502,7 +498,9 @@ def fix_emit_vars(emit_vars): print(list(sorted(node['out'] for node in waiting))) new_waiting = [] for wnode in waiting: - if all(dep['out'] in seen for dep in wnode['deps']): + if wnode['out'] in seen: + continue + elif all(dep['out'] in seen for dep in wnode['deps']): ret.append(wnode) seen.add(wnode['out']) else: @@ -510,6 +508,114 @@ def fix_emit_vars(emit_vars): waiting = new_waiting return tuple(ret) +def print_input(reg_out, mem_in): + #return '%s <- LOAD %s;\n' % (reg_out, mem_in) + #return '"mov %%[%s], %%[%s]\\n\\t"\n' % (mem_in, reg_out) + return "" + +def print_load_specific_reg(reg, specific_reg='rdx'): + ret = '' + ret += '"mov %%%s, %%[%s_backup]\\t\\n" // XXX: How do I specify that a particular register should be %s?\n' % (specific_reg, specific_reg, specific_reg) + ret += '"mov %%[%s], %%%s\\t\\n"\n' % (reg, specific_reg) + return ret, (specific_reg,) +def print_unload_specific_reg(specific_reg='rdx'): + ret = '' + ret += '"mov %%[%s_backup], %%%s\\t\\n" // XXX: How do I specify that a particular register should be %s?\n' % (specific_reg, specific_reg, specific_reg) + return ret +def print_load(*regs): + TEMP_REG = ['arg%d' % d for d in reversed(range(15))] + ret, out_reg = '', [] + for reg in regs: + if reg in REGISTERS: + out_reg.append(reg) + continue + else: + cur_reg = TEMP_REG.pop() + ret += '"mov %%[%s], %%[%s]\\t\\n"\n' % (reg, cur_reg) + out_reg.append(cur_reg) + if len(out_reg) == 1: return ret, out_reg[0] + return ret, tuple(out_reg) + +def print_mulx(reg_out_low, reg_out_high, rx1, rx2, src): + #return '%s:%s <- MULX %s, %s; // %s\n' % (reg_out_low, reg_out_high, rx1, rx2, src) + ret = '' + ret2, actual_rx1 = print_load_specific_reg(rx1, 'rdx') + ret3, actual_rx2 = print_load(rx2) + ret += ret2 + ret3 + ('"mulx %%[%s], %%[%s], %%[%s]\\t\\n" // %s\n' % (actual_rx2, reg_out_high, reg_out_low, src)) + ret += print_unload_specific_reg('rdx') + return ret + +def print_mov_bucket(reg_out, reg_in, bucket): + #return '%s <- MOV %s; // bucket: %s\n' % (reg_out, reg_in, bucket) + ret, reg_in = print_load(reg_in) + return ret + ('"mov %%[%s], %%[%s]\\t\\n" // bucket: %s\n' % (reg_in, reg_out, bucket)) + +def print_mov(reg_out, reg_in): + #return '%s <- MOV %s;\n' % (reg_out, reg_in) + ret, reg_in = print_load(reg_in) + return ret + ('"mov %%[%s], %%[%s]\\t\\n"\n' % (reg_in, reg_out)) + +LAST_CARRY = None + +def print_mul_by_constant(reg_out, reg_in, constant, src): + #return '%s <- MULX %s, %s; // %s\n' % (ret_out, reg_in, constant, src) + #assert(LAST_CARRY is None) + global LAST_CARRY + ret, reg_in = print_load(reg_in) + if constant == '0x13': + return ret + ('FIXME: lea for %s\n' % src) + else: + LAST_CARRY = None + return ret + ('"imul %%[%s], $%s, %%[%s]\\t\\n" // %s\n' % (reg_in, constant, reg_out, src)) + +def print_adx(reg_out, rx1, rx2, bucket): + #return '%s <- ADX %s, %s; // bucket: %s\n' % (reg_out, rx1, rx2, bucket) + assert(rx1 == reg_out) + ret, rx2 = print_load(rx2) + return ret + ('"adx %%[%s], %%[%s]\\t\\n" // bucket: %s\n' % (rx2, reg_out, bucket)) + +def print_add(reg_out, cf, rx1, rx2, bucket): + #return '%s, (%s) <- ADD %s, %s; // bucket: %s\n' % (reg_out, cf, rx1, rx2, bucket) + global LAST_CARRY + assert(reg_out == rx1) + #assert(LAST_CARRY is None or LAST_CARRY == cf) + LAST_CARRY = cf + ret, rx2 = print_load(rx2) + return ret + ('"add %%[%s], %%[%s]\\t\\n" // bucket: %s\n' % (rx2, reg_out, bucket)) + +def print_adc(reg_out, cf, rx1, rx2, bucket): + #return '%s, (%s) <- ADC (%s), %s, %s; // bucket: %s\n' % (reg_out, cf, cf, rx1, rx2, bucket) + assert(reg_out == rx1) + ret = '' + global LAST_CARRY + if LAST_CARRY != cf: + ret += 'ERRRRRRROR: %s != %s\n' % (LAST_CARRY, cf) + LAST_CARRY = cf + ret2, rx2 = print_load(rx2) + ret += ret2 + return ret + ('"adc %%[%s], %%[%s]\\t\\n" // bucket: %s\n' % (rx2, reg_out, bucket)) + +def print_adcx(reg_out, cf, bucket): + #return '%s <- ADCX (%s), %s, 0x0; // bucket: %s\n' % (reg_out, cf, reg_out, bucket) + assert(LAST_CARRY == cf) + return ('"adcx $0, %%[%s]\\t\\n" // bucket: %s\n' % (reg_out, bucket)) + +def print_and(reg_out, rx1, rx2, src): + #return '%s <- AND %s, %s; // %s\n' % (reg_out, rx1, rx2, src) + global LAST_CARRY + LAST_CARRY = None + if reg_out != rx1: + return print_mov(reg_out, rx1) + print_and(reg_out, reg_out, rx2, src) + else: + if rx2[:2] == '0x': + return ('"and $%s, %%[%s]\\t\\n" // %s\n' % (rx2, reg_out, src)) + else: + ret, rx2 = print_load(rx2) + return ret + ('"and %%[%s], %%[%s]\\t\\n" // %s\n' % (rx2, reg_out, src)) + +#def print_shr(reg_out, rx1, imm, src): + #return '%s <- SHR %s, %s;\n' % + def schedule(input_data, existing, emit_vars): ret = '' buckets_seen = set() @@ -518,33 +624,35 @@ def schedule(input_data, existing, emit_vars): ret += ('// Convention is low_reg:high_reg\n') for node in emit_vars: if node['op'] == 'INPUT': - ret += ('%s <- LOAD %s;\n' % (existing[node['out']], node['out'])) + ret += print_input(existing[node['out']], node['out']) elif node['op'] == '*' and len(node['deps']) == 2: - ret += ('%s <- MULX %s, %s; // %s = %s * %s\n' - % (existing[node['out']], - existing[node['deps'][0]['out']], - existing[node['deps'][1]['out']], - node['out'], - node['deps'][0]['out'], - node['deps'][1]['out'])) + assert(len(existing[node['out']].split(':')) == 2) + out_low, out_high = existing[node['out']].split(':') + ret += print_mulx(out_low, out_high, + existing[node['deps'][0]['out']], + existing[node['deps'][1]['out']], + '%s = %s * %s' + % (node['out'], + node['deps'][0]['out'], + node['deps'][1]['out'])) elif node['op'] == '*' and len(node['deps']) == 1: extra_arg = [arg for arg in line_of_var(data, node['out'])['args'] if arg[:2] == '0x'][0] - ret += ('%s <- MULX %s, %s; // %s = %s * %s\n' - % (existing[node['out']], - existing[node['deps'][0]['out']], - extra_arg, - node['out'], - node['deps'][0]['out'], - extra_arg)) + ret += print_mul_by_constant(existing[node['out']], + existing[node['deps'][0]['out']], + extra_arg, + '%s = %s * %s' + % (node['out'], + node['deps'][0]['out'], + extra_arg)) elif node['op'] == '&' and len(node['deps']) == 1: extra_arg = [arg for arg in line_of_var(data, node['out'])['args'] if arg[:2] == '0x'][0] - ret += ('%s <- AND %s, %s; // %s = %s & %s\n' - % (existing[node['out']], - existing[node['deps'][0]['out']], - extra_arg, - node['out'], - node['deps'][0]['out'], - extra_arg)) + ret += print_and(existing[node['out']], + existing[node['deps'][0]['out']], + extra_arg, + '%s = %s & %s' + % (node['out'], + node['deps'][0]['out'], + extra_arg)) elif node['op'] == '>>' and len(node['deps']) == 1 and node['deps'][0]['op'] == 'COMBINE': extra_arg = [arg for arg in line_of_var(data, node['out'])['args'] if arg[:2] == '0x'][0] ret += ('%s <- SHR %s:%s, %s; // %s = %s:%s >> %s\n' @@ -567,52 +675,45 @@ def schedule(input_data, existing, emit_vars): extra_arg)) elif node['op'] in ('GET_HIGH', 'GET_LOW'): if node['rev_deps'][0]['out'] not in buckets_seen: - ret += ('%s <- MOV %s; // bucket: %s\n' - % (existing[node['rev_deps'][0]['out']], - existing[node['out']], - ' + '.join(sorted([node['rev_deps'][0]['out']] + list(node['rev_deps'][0]['extra_out']))))) + ret += print_mov_bucket(existing[node['rev_deps'][0]['out']], + existing[node['out']], + ' + '.join(sorted([node['rev_deps'][0]['out']] + list(node['rev_deps'][0]['extra_out'])))) buckets_seen.add(node['rev_deps'][0]['out']) elif node['op'] == 'GET_HIGH': - ret += ('%s <- ADX %s, %s; // bucket: %s\n' - % (existing[node['rev_deps'][0]['out']], - existing[node['rev_deps'][0]['out']], - existing[node['out']], - ' + '.join(sorted([node['rev_deps'][0]['out']] + list(node['rev_deps'][0]['extra_out']))))) + ret += print_adx(existing[node['rev_deps'][0]['out']], + existing[node['rev_deps'][0]['out']], + existing[node['out']], + ' + '.join(sorted([node['rev_deps'][0]['out']] + list(node['rev_deps'][0]['extra_out'])))) elif node['op'] == 'GET_LOW': carry = 'c' + node['rev_deps'][0]['out'][:-len('_low')] if node['rev_deps'][0]['out'] not in buckets_carried: - ret += ('%s, (%s) <- ADD %s, %s; // bucket: %s\n' - % (existing[node['rev_deps'][0]['out']], - carry, - existing[node['rev_deps'][0]['out']], - existing[node['out']], - ' + '.join(sorted([node['rev_deps'][0]['out']] + list(node['rev_deps'][0]['extra_out']))))) + ret += print_add(existing[node['rev_deps'][0]['out']], + carry, + existing[node['rev_deps'][0]['out']], + existing[node['out']], + ' + '.join(sorted([node['rev_deps'][0]['out']] + list(node['rev_deps'][0]['extra_out'])))) buckets_carried.add(node['rev_deps'][0]['out']) else: - ret += ('%s, (%s) <- ADC (%s), %s, %s; // bucket: %s\n' - % (existing[node['rev_deps'][0]['out']], - carry, - carry, - existing[node['rev_deps'][0]['out']], - existing[node['out']], - ' + '.join(sorted([node['rev_deps'][0]['out']] + list(node['rev_deps'][0]['extra_out']))))) + ret += print_adc(existing[node['rev_deps'][0]['out']], + carry, + existing[node['rev_deps'][0]['out']], + existing[node['out']], + ' + '.join(sorted([node['rev_deps'][0]['out']] + list(node['rev_deps'][0]['extra_out'])))) elif node['op'] in ('GET_CARRY',): carry = 'c' + node['rev_deps'][0]['out'][:-len('_high')] - ret += ('%s <- ADCX (%s), %s, 0x0; // bucket: %s\n' - % (existing[node['rev_deps'][0]['out']], - carry, - existing[node['rev_deps'][0]['out']], - ' + '.join(sorted([node['rev_deps'][0]['out']] + list(node['rev_deps'][0]['extra_out']))))) + ret += print_adcx(existing[node['rev_deps'][0]['out']], + carry, + ' + '.join(sorted([node['rev_deps'][0]['out']] + list(node['rev_deps'][0]['extra_out'])))) elif node['op'] == '+' and len(node['extra_out']) > 0: pass elif node['op'] == '+' and len(node['deps']) == 2 and node['type'] == 'uint64_t': - ret += ('%s <- ADX %s, %s; // %s = %s + %s\n' - % (existing[node['out']], - existing[node['deps'][0]['out']], - existing[node['deps'][1]['out']], - node['out'], - node['deps'][0]['out'], - node['deps'][1]['out'])) + ret += print_adx(existing[node['out']], + existing[node['deps'][0]['out']], + existing[node['deps'][1]['out']], + '%s = %s + %s' + % (node['out'], + node['deps'][0]['out'], + node['deps'][1]['out'])) elif node['op'] in ('COMBINE',): pass else: @@ -621,35 +722,30 @@ def schedule(input_data, existing, emit_vars): for rdep in node['rev_deps']: if len(rdep['extra_out']) > 0 and rdep['op'] == '+': if rdep['out'] not in buckets_seen: - ret += ('%s <- MOV %s; // bucket: %s\n' - % (existing[rdep['out']], - existing[node['out']], - ' + '.join(sorted([rdep['out']] + list(rdep['extra_out']))))) + ret += print_mov_bucket(existing[rdep['out']], + existing[node['out']], + ' + '.join(sorted([rdep['out']] + list(rdep['extra_out'])))) buckets_seen.add(rdep['out']) elif 'high' in rdep['out']: - ret += ('%s <- ADX %s, %s; // bucket: %s\n' - % (existing[rdep['out']], - existing[rdep['out']], - existing[node['out']], - ' + '.join(sorted([rdep['out']] + list(rdep['extra_out']))))) + ret += print_adx(existing[rdep['out']], + existing[rdep['out']], + existing[node['out']], + ' + '.join(sorted([rdep['out']] + list(rdep['extra_out'])))) elif 'low' in rdep['out']: carry = 'c' + rdep['out'][:-len('_low')] if rdep['out'] not in buckets_carried: - ret += ('%s, (%s) <- ADD %s, %s; // bucket: %s\n' - % (existing[rdep['out']], - carry, - existing[rdep['out']], - existing[node['out']], - ' + '.join(sorted([rdep['out']] + list(rdep['extra_out']))))) + ret += print_add(existing[rdep['out']], + carry, + existing[rdep['out']], + existing[node['out']], + ' + '.join(sorted([rdep['out']] + list(rdep['extra_out'])))) buckets_carried.add(rdep['out']) else: - ret += ('%s, (%s) <- ADC (%s), %s, %s; // bucket: %s\n' - % (existing[rdep['out']], - carry, - carry, - existing[rdep['out']], - existing[node['out']], - ' + '.join(sorted([rdep['out']] + list(rdep['extra_out']))))) + ret += print_adc(existing[rdep['out']], + carry, + existing[rdep['out']], + existing[node['out']], + ' + '.join(sorted([rdep['out']] + list(rdep['extra_out'])))) else: assert(False) return ret @@ -680,7 +776,18 @@ for i, data in enumerate(data_list): if 'tmp' not in v: ret += list(vars_for(v, rec=False)) return tuple(ret) - for var in list(vars_for('x10')) + list(vars_for('x11')) + list(vars_for('x9')) + list(vars_for('x7')) + list(vars_for('x5')): # tuple(): #('x20_tmp', 'x49_tmp', 'x51_tmp', 'x55_tmp', 'x53_tmp'): + def vars_for_bucket(var): + if '_' not in var: + return tuple(list(vars_for_bucket(var + '_low')) + list(vars_for_bucket(var + '_high'))) + ret = [] + for dep in objs[var]['deps']: + if dep['op'] in ('GET_HIGH', 'GET_LOW'): + assert(len(dep['deps']) == 1) + assert('tmp' in dep['deps'][0]['out']) + ret.append(dep['deps'][0]['out']) + return tuple(ret) +# for var in list(vars_for('x10')) + list(vars_for('x11')) + list(vars_for('x9')) + list(vars_for('x7')) + list(vars_for('x5')): # tuple(): #('x20_tmp', 'x49_tmp', 'x51_tmp', 'x55_tmp', 'x53_tmp'): + for var in list(vars_for_bucket('x56')) + list(vars_for_bucket('x71')) + list(vars_for_bucket('x74')) + list(vars_for_bucket('x77')) + list(vars_for_bucket('x80')): # + list(vars_for('x11')) + list(vars_for('x9')) + list(vars_for('x7')) + list(vars_for('x5')): # tuple(): #('x20_tmp', 'x49_tmp', 'x51_tmp', 'x55_tmp', 'x53_tmp'): print(var) cur_possible_nodes = [n for n in possible_nodes if n['out'] == var] cur_possible_nodes, cur_map, free_temps, free_list, all_temps, freed, new_buckets, emit_vars \ -- cgit v1.2.3