From bd4714ebbb3552b2d85222f37fc6274052e3176e Mon Sep 17 00:00:00 2001 From: Jason Gross Date: Tue, 12 Sep 2017 13:15:31 -0400 Subject: Fix assembly --- etc/compile-by-zinc/femulData0.dot | 244 +-- etc/compile-by-zinc/femulData0.png | Bin 2043015 -> 1941404 bytes etc/compile-by-zinc/femulData0.svg | 1776 ++++++++++---------- etc/compile-by-zinc/femulDisplayScheduled0.log | 404 +++-- .../make-graph-with-reg-by-ac-buckets.py | 180 +- 5 files changed, 1325 insertions(+), 1279 deletions(-) (limited to 'etc') diff --git a/etc/compile-by-zinc/femulData0.dot b/etc/compile-by-zinc/femulData0.dot index 2bc6416f7..4691509f1 100644 --- a/etc/compile-by-zinc/femulData0.dot +++ b/etc/compile-by-zinc/femulData0.dot @@ -1,131 +1,131 @@ digraph G { - x5 [label="x5 (rx5)" , style="filled", fillcolor="red"]; - x7 [label="x7 (rx7)" , style="filled", fillcolor="red"]; - x9 [label="x9 (rx9)" , style="filled", fillcolor="red"]; - x10 [label="x10 (rx10)" , style="filled", fillcolor="red"]; - x11 [label="x11 (rx11)" , style="filled", fillcolor="red"]; - x13 [label="x13 (rx13)" , style="filled", fillcolor="red"]; - x15 [label="x15 (rx15)" , style="filled", fillcolor="red"]; - x17 [label="x17 (rx17)" , style="filled", fillcolor="red"]; - x18 [label="x18 (rx18)" , style="filled", fillcolor="red"]; - x19 [label="x19 (rx19)" , style="filled", fillcolor="red"]; - x20_128_high [label="x20_128_high (r11)" , style="filled", fillcolor="red"]; - x20_128_low [label="x20_128_low (r10)" , style="filled", fillcolor="red"]; - x20_128_tmp [label="x20_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; - x21_128_high [label="x21_128_high (r10)" , style="filled", fillcolor="red"]; - x21_128_low [label="x21_128_low (r11)" , style="filled", fillcolor="red"]; - x21_128_tmp [label="x21_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; - x22_128_high [label="x22_128_high (r11)" , style="filled", fillcolor="red"]; - x22_128_low [label="x22_128_low (r10)" , style="filled", fillcolor="red"]; - x22_128_tmp [label="x22_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; - x24_128_high [label="x24_128_high (r11)" , style="filled", fillcolor="red"]; - x24_128_low [label="x24_128_low (r10)" , style="filled", fillcolor="red"]; - x24_128_tmp [label="x24_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; - x25_128_high [label="x25_128_high (r10)" , style="filled", fillcolor="red"]; - x25_128_low [label="x25_128_low (r11)" , style="filled", fillcolor="red"]; - x25_128_tmp [label="x25_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; - x27_128_high [label="x27_128_high (r10)" , style="filled", fillcolor="red"]; - x27_128_low [label="x27_128_low (r11)" , style="filled", fillcolor="red"]; - x27_128_tmp [label="x27_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; - x29_128_high [label="x29_128_high (r10)" , style="filled", fillcolor="red"]; - x29_128_low [label="x29_128_low (r11)" , style="filled", fillcolor="red"]; - x29_128_tmp [label="x29_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; - x30_128_high [label="x30_128_high (r11)" , style="filled", fillcolor="red"]; - x30_128_low [label="x30_128_low (r10)" , style="filled", fillcolor="red"]; - x30_128_tmp [label="x30_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; - x32_128_high [label="x32_128_high (r11)" , style="filled", fillcolor="red"]; - x32_128_low [label="x32_128_low (r10)" , style="filled", fillcolor="red"]; - x32_128_tmp [label="x32_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; - x34_128_high [label="x34_128_high (r10)" , style="filled", fillcolor="red"]; - x34_128_low [label="x34_128_low (r11)" , style="filled", fillcolor="red"]; - x34_128_tmp [label="x34_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; - x36_128_high [label="x36_128_high (r11)" , style="filled", fillcolor="red"]; - x36_128_low [label="x36_128_low (r10)" , style="filled", fillcolor="red"]; - x36_128_tmp [label="x36_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; - x37_128_high [label="x37_128_high (r10)" , style="filled", fillcolor="red"]; - x37_128_low [label="x37_128_low (r11)" , style="filled", fillcolor="red"]; - x37_128_tmp [label="x37_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; - x39_128_high [label="x39_128_high (r10)" , style="filled", fillcolor="red"]; - x39_128_low [label="x39_128_low (r11)" , style="filled", fillcolor="red"]; - x39_128_tmp [label="x39_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; - x41_128_high [label="x41_128_high (r11)" , style="filled", fillcolor="red"]; - x41_128_low [label="x41_128_low (r10)" , style="filled", fillcolor="red"]; - x41_128_tmp [label="x41_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; - x43_128_high [label="x43_128_high (r10)" , style="filled", fillcolor="red"]; - x43_128_low [label="x43_128_low (r11)" , style="filled", fillcolor="red"]; - x43_128_tmp [label="x43_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; - x45 [label="x45 (r6)" , style="filled", fillcolor="red"]; - x46 [label="x46 (r6)" , style="filled", fillcolor="red"]; - x47 [label="x47 (r7)" , style="filled", fillcolor="red"]; - x48 [label="x48 (r12)" , style="filled", fillcolor="red"]; - x49_128_high [label="x49_128_high (r10)" , style="filled", fillcolor="red"]; - x49_128_low [label="x49_128_low (r11)" , style="filled", fillcolor="red"]; - x49_128_tmp [label="x49_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; - x51_128_high [label="x51_128_high (r10)" , style="filled", fillcolor="red"]; - x51_128_low [label="x51_128_low (r11)" , style="filled", fillcolor="red"]; - x51_128_tmp [label="x51_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; - x53_128_high [label="x53_128_high (r11)" , style="filled", fillcolor="red"]; - x53_128_low [label="x53_128_low (r10)" , style="filled", fillcolor="red"]; - x53_128_tmp [label="x53_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; - x55_128_high [label="x55_128_high (r10)" , style="filled", fillcolor="red"]; - x55_128_low [label="x55_128_low (r11)" , style="filled", fillcolor="red"]; - x55_128_tmp [label="x55_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; + x5 [label="x5 (mx5)" , style="filled", fillcolor="red"]; + x7 [label="x7 (mx7)" , style="filled", fillcolor="red"]; + x9 [label="x9 (mx9)" , style="filled", fillcolor="red"]; + x10 [label="x10 (mx10)" , style="filled", fillcolor="red"]; + x11 [label="x11 (mx11)" , style="filled", fillcolor="red"]; + x13 [label="x13 (mx13)" , style="filled", fillcolor="red"]; + x15 [label="x15 (mx15)" , style="filled", fillcolor="red"]; + x17 [label="x17 (mx17)" , style="filled", fillcolor="red"]; + x18 [label="x18 (mx18)" , style="filled", fillcolor="red"]; + x19 [label="x19 (mx19)" , style="filled", fillcolor="red"]; + x20_128_high [label="x20_128_high (reg11)" , style="filled", fillcolor="red"]; + x20_128_low [label="x20_128_low (reg10)" , style="filled", fillcolor="red"]; + x20_128_tmp [label="x20_128_tmp (reg10:reg11)" , style="filled", fillcolor="red"]; + x21_128_high [label="x21_128_high (reg10)" , style="filled", fillcolor="red"]; + x21_128_low [label="x21_128_low (reg11)" , style="filled", fillcolor="red"]; + x21_128_tmp [label="x21_128_tmp (reg11:reg10)" , style="filled", fillcolor="red"]; + x22_128_high [label="x22_128_high (reg11)" , style="filled", fillcolor="red"]; + x22_128_low [label="x22_128_low (reg10)" , style="filled", fillcolor="red"]; + x22_128_tmp [label="x22_128_tmp (reg10:reg11)" , style="filled", fillcolor="red"]; + x24_128_high [label="x24_128_high (reg11)" , style="filled", fillcolor="red"]; + x24_128_low [label="x24_128_low (reg10)" , style="filled", fillcolor="red"]; + x24_128_tmp [label="x24_128_tmp (reg10:reg11)" , style="filled", fillcolor="red"]; + x25_128_high [label="x25_128_high (reg10)" , style="filled", fillcolor="red"]; + x25_128_low [label="x25_128_low (reg11)" , style="filled", fillcolor="red"]; + x25_128_tmp [label="x25_128_tmp (reg11:reg10)" , style="filled", fillcolor="red"]; + x27_128_high [label="x27_128_high (reg10)" , style="filled", fillcolor="red"]; + x27_128_low [label="x27_128_low (reg11)" , style="filled", fillcolor="red"]; + x27_128_tmp [label="x27_128_tmp (reg11:reg10)" , style="filled", fillcolor="red"]; + x29_128_high [label="x29_128_high (reg10)" , style="filled", fillcolor="red"]; + x29_128_low [label="x29_128_low (reg11)" , style="filled", fillcolor="red"]; + x29_128_tmp [label="x29_128_tmp (reg11:reg10)" , style="filled", fillcolor="red"]; + x30_128_high [label="x30_128_high (reg11)" , style="filled", fillcolor="red"]; + x30_128_low [label="x30_128_low (reg10)" , style="filled", fillcolor="red"]; + x30_128_tmp [label="x30_128_tmp (reg10:reg11)" , style="filled", fillcolor="red"]; + x32_128_high [label="x32_128_high (reg11)" , style="filled", fillcolor="red"]; + x32_128_low [label="x32_128_low (reg10)" , style="filled", fillcolor="red"]; + x32_128_tmp [label="x32_128_tmp (reg10:reg11)" , style="filled", fillcolor="red"]; + x34_128_high [label="x34_128_high (reg10)" , style="filled", fillcolor="red"]; + x34_128_low [label="x34_128_low (reg11)" , style="filled", fillcolor="red"]; + x34_128_tmp [label="x34_128_tmp (reg11:reg10)" , style="filled", fillcolor="red"]; + x36_128_high [label="x36_128_high (reg11)" , style="filled", fillcolor="red"]; + x36_128_low [label="x36_128_low (reg10)" , style="filled", fillcolor="red"]; + x36_128_tmp [label="x36_128_tmp (reg10:reg11)" , style="filled", fillcolor="red"]; + x37_128_high [label="x37_128_high (reg10)" , style="filled", fillcolor="red"]; + x37_128_low [label="x37_128_low (reg11)" , style="filled", fillcolor="red"]; + x37_128_tmp [label="x37_128_tmp (reg11:reg10)" , style="filled", fillcolor="red"]; + x39_128_high [label="x39_128_high (reg10)" , style="filled", fillcolor="red"]; + x39_128_low [label="x39_128_low (reg11)" , style="filled", fillcolor="red"]; + x39_128_tmp [label="x39_128_tmp (reg11:reg10)" , style="filled", fillcolor="red"]; + x41_128_high [label="x41_128_high (reg11)" , style="filled", fillcolor="red"]; + x41_128_low [label="x41_128_low (reg10)" , style="filled", fillcolor="red"]; + x41_128_tmp [label="x41_128_tmp (reg10:reg11)" , style="filled", fillcolor="red"]; + x43_128_high [label="x43_128_high (reg10)" , style="filled", fillcolor="red"]; + x43_128_low [label="x43_128_low (reg11)" , style="filled", fillcolor="red"]; + x43_128_tmp [label="x43_128_tmp (reg11:reg10)" , style="filled", fillcolor="red"]; + x45 [label="x45 (reg6)" , style="filled", fillcolor="red"]; + x46 [label="x46 (reg6)" , style="filled", fillcolor="red"]; + x47 [label="x47 (reg7)" , style="filled", fillcolor="red"]; + x48 [label="x48 (reg12)" , style="filled", fillcolor="red"]; + x49_128_high [label="x49_128_high (reg10)" , style="filled", fillcolor="red"]; + x49_128_low [label="x49_128_low (reg11)" , style="filled", fillcolor="red"]; + x49_128_tmp [label="x49_128_tmp (reg11:reg10)" , style="filled", fillcolor="red"]; + x51_128_high [label="x51_128_high (reg10)" , style="filled", fillcolor="red"]; + x51_128_low [label="x51_128_low (reg11)" , style="filled", fillcolor="red"]; + x51_128_tmp [label="x51_128_tmp (reg11:reg10)" , style="filled", fillcolor="red"]; + x53_128_high [label="x53_128_high (reg11)" , style="filled", fillcolor="red"]; + x53_128_low [label="x53_128_low (reg10)" , style="filled", fillcolor="red"]; + x53_128_tmp [label="x53_128_tmp (reg10:reg11)" , style="filled", fillcolor="red"]; + x55_128_high [label="x55_128_high (reg10)" , style="filled", fillcolor="red"]; + x55_128_low [label="x55_128_low (reg11)" , style="filled", fillcolor="red"]; + x55_128_tmp [label="x55_128_tmp (reg11:reg10)" , style="filled", fillcolor="red"]; cx56_128 [label="cx56_128 (c0)" , style="filled", fillcolor="red"]; - x56_128 [label="x50_128 + x52_128 + x54_128 + x56_128 (r8:r9)" , style="filled", fillcolor="red"]; - x56_128_high [label="x50_128_high + x52_128_high + x54_128_high + x56_128_high (r9)" , style="filled", fillcolor="red"]; - x56_128_low [label="x50_128_low + x52_128_low + x54_128_low + x56_128_low (r8)" , style="filled", fillcolor="red"]; - x57_128_high [label="x57_128_high (r11)" , style="filled", fillcolor="red"]; - x57_128_low [label="x57_128_low (r10)" , style="filled", fillcolor="red"]; - x57_128_tmp [label="x57_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; - x59_128_high [label="x59_128_high (r10)" , style="filled", fillcolor="red"]; - x59_128_low [label="x59_128_low (r11)" , style="filled", fillcolor="red"]; - x59_128_tmp [label="x59_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; - x61_128_high [label="x61_128_high (r11)" , style="filled", fillcolor="red"]; - x61_128_low [label="x61_128_low (r10)" , style="filled", fillcolor="red"]; - x61_128_tmp [label="x61_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; - x63_128_high [label="x63_128_high (r11)" , style="filled", fillcolor="red"]; - x63_128_low [label="x63_128_low (r10)" , style="filled", fillcolor="red"]; - x63_128_tmp [label="x63_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; - x65_128_high [label="x65_128_high (r10)" , style="filled", fillcolor="red"]; - x65_128_low [label="x65_128_low (r11)" , style="filled", fillcolor="red"]; - x65_128_tmp [label="x65_128_tmp (r11:r10)" , style="filled", fillcolor="red"]; - x67_128_high [label="x67_128_high (r11)" , style="filled", fillcolor="red"]; - x67_128_low [label="x67_128_low (r10)" , style="filled", fillcolor="red"]; - x67_128_tmp [label="x67_128_tmp (r10:r11)" , style="filled", fillcolor="red"]; - x69 [label="x69 (r9)" , style="filled", fillcolor="red"]; - x70 [label="x70 (r8)" , style="filled", fillcolor="red"]; + x56_128 [label="x50_128 + x52_128 + x54_128 + x56_128 (reg8:reg9)" , style="filled", fillcolor="red"]; + x56_128_high [label="x50_128_high + x52_128_high + x54_128_high + x56_128_high (reg9)" , style="filled", fillcolor="red"]; + x56_128_low [label="x50_128_low + x52_128_low + x54_128_low + x56_128_low (reg8)" , style="filled", fillcolor="red"]; + x57_128_high [label="x57_128_high (reg11)" , style="filled", fillcolor="red"]; + x57_128_low [label="x57_128_low (reg10)" , style="filled", fillcolor="red"]; + x57_128_tmp [label="x57_128_tmp (reg10:reg11)" , style="filled", fillcolor="red"]; + x59_128_high [label="x59_128_high (reg10)" , style="filled", fillcolor="red"]; + x59_128_low [label="x59_128_low (reg11)" , style="filled", fillcolor="red"]; + x59_128_tmp [label="x59_128_tmp (reg11:reg10)" , style="filled", fillcolor="red"]; + x61_128_high [label="x61_128_high (reg11)" , style="filled", fillcolor="red"]; + x61_128_low [label="x61_128_low (reg10)" , style="filled", fillcolor="red"]; + x61_128_tmp [label="x61_128_tmp (reg10:reg11)" , style="filled", fillcolor="red"]; + x63_128_high [label="x63_128_high (reg11)" , style="filled", fillcolor="red"]; + x63_128_low [label="x63_128_low (reg10)" , style="filled", fillcolor="red"]; + x63_128_tmp [label="x63_128_tmp (reg10:reg11)" , style="filled", fillcolor="red"]; + x65_128_high [label="x65_128_high (reg10)" , style="filled", fillcolor="red"]; + x65_128_low [label="x65_128_low (reg11)" , style="filled", fillcolor="red"]; + x65_128_tmp [label="x65_128_tmp (reg11:reg10)" , style="filled", fillcolor="red"]; + x67_128_high [label="x67_128_high (reg11)" , style="filled", fillcolor="red"]; + x67_128_low [label="x67_128_low (reg10)" , style="filled", fillcolor="red"]; + x67_128_tmp [label="x67_128_tmp (reg10:reg11)" , style="filled", fillcolor="red"]; + x69 [label="x69 (reg9)" , style="filled", fillcolor="red"]; + x70 [label="x70 (reg8)" , style="filled", fillcolor="red"]; cx71_128 [label="cx71_128 (c0)" , style="filled", fillcolor="red"]; - x71_128 [label="x23_128 + x58_128 + x60_128 + x62_128 + x71_128 (r4:r5)" , style="filled", fillcolor="red"]; - x71_128_high [label="x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (r5)" , style="filled", fillcolor="red"]; - x71_128_low [label="x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (r4)" , style="filled", fillcolor="red"]; - x72 [label="x72 (r5)" , style="filled", fillcolor="red"]; - x73 [label="x73 (r4)" , style="filled", fillcolor="red"]; + x71_128 [label="x23_128 + x58_128 + x60_128 + x62_128 + x71_128 (reg4:reg5)" , style="filled", fillcolor="red"]; + x71_128_high [label="x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (reg5)" , style="filled", fillcolor="red"]; + x71_128_low [label="x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (reg4)" , style="filled", fillcolor="red"]; + x72 [label="x72 (reg5)" , style="filled", fillcolor="red"]; + x73 [label="x73 (reg4)" , style="filled", fillcolor="red"]; cx74_128 [label="cx74_128 (c0)" , style="filled", fillcolor="red"]; - x74_128 [label="x26_128 + x28_128 + x64_128 + x66_128 + x74_128 (r9:r12)" , style="filled", fillcolor="red"]; - x74_128_high [label="x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (r12)" , style="filled", fillcolor="red"]; - x74_128_low [label="x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (r9)" , style="filled", fillcolor="red"]; - x75 [label="x75 (r12)" , style="filled", fillcolor="red"]; - x76 [label="x76 (r9)" , style="filled", fillcolor="red"]; + x74_128 [label="x26_128 + x28_128 + x64_128 + x66_128 + x74_128 (reg9:reg12)" , style="filled", fillcolor="red"]; + x74_128_high [label="x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (reg12)" , style="filled", fillcolor="red"]; + x74_128_low [label="x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (reg9)" , style="filled", fillcolor="red"]; + x75 [label="x75 (reg12)" , style="filled", fillcolor="red"]; + x76 [label="x76 (reg9)" , style="filled", fillcolor="red"]; cx77_128 [label="cx77_128 (c0)" , style="filled", fillcolor="red"]; - x77_128 [label="x31_128 + x33_128 + x35_128 + x68_128 + x77_128 (r5:r6)" , style="filled", fillcolor="red"]; - x77_128_high [label="x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high (r6)" , style="filled", fillcolor="red"]; - x77_128_low [label="x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low (r5)" , style="filled", fillcolor="red"]; - x78 [label="x78 (r6)" , style="filled", fillcolor="red"]; - x79 [label="x79 (r5)" , style="filled", fillcolor="red"]; + x77_128 [label="x31_128 + x33_128 + x35_128 + x68_128 + x77_128 (reg5:reg6)" , style="filled", fillcolor="red"]; + x77_128_high [label="x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high (reg6)" , style="filled", fillcolor="red"]; + x77_128_low [label="x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low (reg5)" , style="filled", fillcolor="red"]; + x78 [label="x78 (reg6)" , style="filled", fillcolor="red"]; + x79 [label="x79 (reg5)" , style="filled", fillcolor="red"]; cx80_128 [label="cx80_128 (c0)" , style="filled", fillcolor="red"]; - x80_128 [label="x38_128 + x40_128 + x42_128 + x44_128 + x80_128 (r7:r12)" , style="filled", fillcolor="red"]; - x80_128_high [label="x38_128_high + x40_128_high + x42_128_high + x44_128_high + x80_128_high (r12)" , style="filled", fillcolor="red"]; - x80_128_low [label="x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low (r7)" , style="filled", fillcolor="red"]; - x81 [label="x81 (r12)" , style="filled", fillcolor="red"]; - x82 [label="x82 (r7)" , style="filled", fillcolor="red"]; - x83 [label="x83 (r12)" , style="filled", fillcolor="red"]; - x84 [label="x84 (r8)" , style="filled", fillcolor="red"]; - x85 [label="x85 (r8)" , style="filled", fillcolor="red"]; - x86 [label="x86 (r12)" , style="filled", fillcolor="red"]; - x87 [label="x87 (r8)" , style="filled", fillcolor="red"]; - x88 [label="x88 (r8)" , style="filled", fillcolor="red"]; - x89 [label="x89 (r4)" , style="filled", fillcolor="red"]; - x90 [label="x90 (r8)" , style="filled", fillcolor="red"]; + x80_128 [label="x38_128 + x40_128 + x42_128 + x44_128 + x80_128 (reg7:reg12)" , style="filled", fillcolor="red"]; + x80_128_high [label="x38_128_high + x40_128_high + x42_128_high + x44_128_high + x80_128_high (reg12)" , style="filled", fillcolor="red"]; + x80_128_low [label="x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low (reg7)" , style="filled", fillcolor="red"]; + x81 [label="x81 (reg12)" , style="filled", fillcolor="red"]; + x82 [label="x82 (reg7)" , style="filled", fillcolor="red"]; + x83 [label="x83 (reg12)" , style="filled", fillcolor="red"]; + x84 [label="x84 (reg8)" , style="filled", fillcolor="red"]; + x85 [label="x85 (reg8)" , style="filled", fillcolor="red"]; + x86 [label="x86 (reg12)" , style="filled", fillcolor="red"]; + x87 [label="x87 (reg8)" , style="filled", fillcolor="red"]; + x88 [label="x88 (reg8)" , style="filled", fillcolor="red"]; + x89 [label="x89 (reg4)" , style="filled", fillcolor="red"]; + x90 [label="x90 (reg8)" , style="filled", fillcolor="red"]; x56_128_low -> cx56_128 [ label="GET_CARRY" ] ; x71_128_low -> cx71_128 [ label="GET_CARRY" ] ; x74_128_low -> cx74_128 [ label="GET_CARRY" ] ; diff --git a/etc/compile-by-zinc/femulData0.png b/etc/compile-by-zinc/femulData0.png index 6d6ccc4a7..e3d0ad397 100644 Binary files a/etc/compile-by-zinc/femulData0.png and b/etc/compile-by-zinc/femulData0.png differ diff --git a/etc/compile-by-zinc/femulData0.svg b/etc/compile-by-zinc/femulData0.svg index 49a784ced..1346043a3 100644 --- a/etc/compile-by-zinc/femulData0.svg +++ b/etc/compile-by-zinc/femulData0.svg @@ -4,1924 +4,1924 @@ - + G - + x5 - -x5 (rx5) + +x5 (mx5) x20_128_tmp - -x20_128_tmp (r10:r11) + +x20_128_tmp (reg10:reg11) x5->x20_128_tmp - - -* + + +* x21_128_tmp - -x21_128_tmp (r11:r10) + +x21_128_tmp (reg11:reg10) x5->x21_128_tmp - - -* + + +* x24_128_tmp - -x24_128_tmp (r10:r11) + +x24_128_tmp (reg10:reg11) x5->x24_128_tmp - - -* + + +* x29_128_tmp - -x29_128_tmp (r11:r10) + +x29_128_tmp (reg11:reg10) x5->x29_128_tmp - - -* + + +* x36_128_tmp - -x36_128_tmp (r10:r11) + +x36_128_tmp (reg10:reg11) x5->x36_128_tmp - - -* + + +* x7 - -x7 (rx7) + +x7 (mx7) x22_128_tmp - -x22_128_tmp (r10:r11) + +x22_128_tmp (reg10:reg11) x7->x22_128_tmp - - -* + + +* x27_128_tmp - -x27_128_tmp (r11:r10) + +x27_128_tmp (reg11:reg10) x7->x27_128_tmp - - -* + + +* x32_128_tmp - -x32_128_tmp (r10:r11) + +x32_128_tmp (reg10:reg11) x7->x32_128_tmp - - -* + + +* x41_128_tmp - -x41_128_tmp (r10:r11) + +x41_128_tmp (reg10:reg11) x7->x41_128_tmp - - -* + + +* x46 - -x46 (r6) + +x46 (reg6) x7->x46 - - -* + + +* x9 - -x9 (rx9) + +x9 (mx9) x25_128_tmp - -x25_128_tmp (r11:r10) + +x25_128_tmp (reg11:reg10) x9->x25_128_tmp - - -* + + +* x34_128_tmp - -x34_128_tmp (r11:r10) + +x34_128_tmp (reg11:reg10) x9->x34_128_tmp - - -* + + +* x43_128_tmp - -x43_128_tmp (r11:r10) + +x43_128_tmp (reg11:reg10) x9->x43_128_tmp - - -* + + +* x47 - -x47 (r7) + +x47 (reg7) x9->x47 - - -* + + +* x10 - -x10 (rx10) + +x10 (mx10) x37_128_tmp - -x37_128_tmp (r11:r10) + +x37_128_tmp (reg11:reg10) x10->x37_128_tmp - - -* + + +* x45 - -x45 (r6) + +x45 (reg6) x10->x45 - - -* + + +* x11 - -x11 (rx11) + +x11 (mx11) x30_128_tmp - -x30_128_tmp (r10:r11) + +x30_128_tmp (reg10:reg11) x11->x30_128_tmp - - -* + + +* x39_128_tmp - -x39_128_tmp (r11:r10) + +x39_128_tmp (reg11:reg10) x11->x39_128_tmp - - -* + + +* x48 - -x48 (r12) + +x48 (reg12) x11->x48 - - -* + + +* x13 - -x13 (rx13) + +x13 (mx13) x13->x20_128_tmp - - -* + + +* x13->x22_128_tmp - - -* + + +* x13->x25_128_tmp - - -* + + +* x13->x30_128_tmp - - -* + + +* x13->x37_128_tmp - - -* + + +* x15 - -x15 (rx15) + +x15 (mx15) x15->x21_128_tmp - - -* + + +* x15->x27_128_tmp - - -* + + +* x15->x34_128_tmp - - -* + + +* x15->x39_128_tmp - - -* + + +* x49_128_tmp - -x49_128_tmp (r11:r10) + +x49_128_tmp (reg11:reg10) x15->x49_128_tmp - - -* + + +* x17 - -x17 (rx17) + +x17 (mx17) x17->x24_128_tmp - - -* + + +* x17->x32_128_tmp - - -* + + +* x17->x43_128_tmp - - -* + + +* x55_128_tmp - -x55_128_tmp (r11:r10) + +x55_128_tmp (reg11:reg10) x17->x55_128_tmp - - -* + + +* x57_128_tmp - -x57_128_tmp (r10:r11) + +x57_128_tmp (reg10:reg11) x17->x57_128_tmp - - -* + + +* x18 - -x18 (rx18) + +x18 (mx18) x18->x36_128_tmp - - -* + + +* x51_128_tmp - -x51_128_tmp (r11:r10) + +x51_128_tmp (reg11:reg10) x18->x51_128_tmp - - -* + + +* x59_128_tmp - -x59_128_tmp (r11:r10) + +x59_128_tmp (reg11:reg10) x18->x59_128_tmp - - -* + + +* x65_128_tmp - -x65_128_tmp (r11:r10) + +x65_128_tmp (reg11:reg10) x18->x65_128_tmp - - -* + + +* x67_128_tmp - -x67_128_tmp (r10:r11) + +x67_128_tmp (reg10:reg11) x18->x67_128_tmp - - -* + + +* x19 - -x19 (rx19) + +x19 (mx19) x19->x29_128_tmp - - -* + + +* x19->x41_128_tmp - - -* + + +* x53_128_tmp - -x53_128_tmp (r10:r11) + +x53_128_tmp (reg10:reg11) x19->x53_128_tmp - - -* + + +* x61_128_tmp - -x61_128_tmp (r10:r11) + +x61_128_tmp (reg10:reg11) x19->x61_128_tmp - - -* + + +* x63_128_tmp - -x63_128_tmp (r10:r11) + +x63_128_tmp (reg10:reg11) x19->x63_128_tmp - - -* + + +* x20_128_high - -x20_128_high (r11) + +x20_128_high (reg11) x56_128_high - -x50_128_high + x52_128_high + x54_128_high + x56_128_high (r9) + +x50_128_high + x52_128_high + x54_128_high + x56_128_high (reg9) x20_128_high->x56_128_high - - -+ + + ++ x20_128_low - -x20_128_low (r10) + +x20_128_low (reg10) x56_128_low - -x50_128_low + x52_128_low + x54_128_low + x56_128_low (r8) + +x50_128_low + x52_128_low + x54_128_low + x56_128_low (reg8) x20_128_low->x56_128_low - - -+ + + ++ x20_128_tmp->x20_128_high - - -GET_HIGH + + +GET_HIGH x20_128_tmp->x20_128_low - - -GET_LOW + + +GET_LOW x21_128_high - -x21_128_high (r10) + +x21_128_high (reg10) x71_128_high - -x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (r5) + +x23_128_high + x58_128_high + x60_128_high + x62_128_high + x71_128_high (reg5) x21_128_high->x71_128_high - - -+ + + ++ x21_128_low - -x21_128_low (r11) + +x21_128_low (reg11) x71_128_low - -x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (r4) + +x23_128_low + x58_128_low + x60_128_low + x62_128_low + x71_128_low (reg4) x21_128_low->x71_128_low - - -+ + + ++ x21_128_tmp->x21_128_high - - -GET_HIGH + + +GET_HIGH x21_128_tmp->x21_128_low - - -GET_LOW + + +GET_LOW x22_128_high - -x22_128_high (r11) + +x22_128_high (reg11) x22_128_high->x71_128_high - - -+ + + ++ x22_128_low - -x22_128_low (r10) + +x22_128_low (reg10) x22_128_low->x71_128_low - - -+ + + ++ x22_128_tmp->x22_128_high - - -GET_HIGH + + +GET_HIGH x22_128_tmp->x22_128_low - - -GET_LOW + + +GET_LOW x24_128_high - -x24_128_high (r11) + +x24_128_high (reg11) x74_128_high - -x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (r12) + +x26_128_high + x28_128_high + x64_128_high + x66_128_high + x74_128_high (reg12) x24_128_high->x74_128_high - - -+ + + ++ x24_128_low - -x24_128_low (r10) + +x24_128_low (reg10) x74_128_low - -x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (r9) + +x26_128_low + x28_128_low + x64_128_low + x66_128_low + x74_128_low (reg9) x24_128_low->x74_128_low - - -+ + + ++ x24_128_tmp->x24_128_high - - -GET_HIGH + + +GET_HIGH x24_128_tmp->x24_128_low - - -GET_LOW + + +GET_LOW x25_128_high - -x25_128_high (r10) + +x25_128_high (reg10) x25_128_high->x74_128_high - - -+ + + ++ x25_128_low - -x25_128_low (r11) + +x25_128_low (reg11) x25_128_low->x74_128_low - - -+ + + ++ x25_128_tmp->x25_128_high - - -GET_HIGH + + +GET_HIGH x25_128_tmp->x25_128_low - - -GET_LOW + + +GET_LOW x27_128_high - -x27_128_high (r10) + +x27_128_high (reg10) x27_128_high->x74_128_high - - -+ + + ++ x27_128_low - -x27_128_low (r11) + +x27_128_low (reg11) x27_128_low->x74_128_low - - -+ + + ++ x27_128_tmp->x27_128_high - - -GET_HIGH + + +GET_HIGH x27_128_tmp->x27_128_low - - -GET_LOW + + +GET_LOW x29_128_high - -x29_128_high (r10) + +x29_128_high (reg10) x77_128_high - -x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high (r6) + +x31_128_high + x33_128_high + x35_128_high + x68_128_high + x77_128_high (reg6) x29_128_high->x77_128_high - - -+ + + ++ x29_128_low - -x29_128_low (r11) + +x29_128_low (reg11) x77_128_low - -x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low (r5) + +x31_128_low + x33_128_low + x35_128_low + x68_128_low + x77_128_low (reg5) x29_128_low->x77_128_low - - -+ + + ++ x29_128_tmp->x29_128_high - - -GET_HIGH + + +GET_HIGH x29_128_tmp->x29_128_low - - -GET_LOW + + +GET_LOW x30_128_high - -x30_128_high (r11) + +x30_128_high (reg11) x30_128_high->x77_128_high - - -+ + + ++ x30_128_low - -x30_128_low (r10) + +x30_128_low (reg10) x30_128_low->x77_128_low - - -+ + + ++ x30_128_tmp->x30_128_high - - -GET_HIGH + + +GET_HIGH x30_128_tmp->x30_128_low - - -GET_LOW + + +GET_LOW x32_128_high - -x32_128_high (r11) + +x32_128_high (reg11) x32_128_high->x77_128_high - - -+ + + ++ x32_128_low - -x32_128_low (r10) + +x32_128_low (reg10) x32_128_low->x77_128_low - - -+ + + ++ x32_128_tmp->x32_128_high - - -GET_HIGH + + +GET_HIGH x32_128_tmp->x32_128_low - - -GET_LOW + + +GET_LOW x34_128_high - -x34_128_high (r10) + +x34_128_high (reg10) x34_128_high->x77_128_high - - -+ + + ++ x34_128_low - -x34_128_low (r11) + +x34_128_low (reg11) x34_128_low->x77_128_low - - -+ + + ++ x34_128_tmp->x34_128_high - - -GET_HIGH + + +GET_HIGH x34_128_tmp->x34_128_low - - -GET_LOW + + +GET_LOW x36_128_high - -x36_128_high (r11) + +x36_128_high (reg11) x80_128_high - -x38_128_high + x40_128_high + x42_128_high + x44_128_high + x80_128_high (r12) + +x38_128_high + x40_128_high + x42_128_high + x44_128_high + x80_128_high (reg12) x36_128_high->x80_128_high - - -+ + + ++ x36_128_low - -x36_128_low (r10) + +x36_128_low (reg10) x80_128_low - -x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low (r7) + +x38_128_low + x40_128_low + x42_128_low + x44_128_low + x80_128_low (reg7) x36_128_low->x80_128_low - - -+ + + ++ x36_128_tmp->x36_128_high - - -GET_HIGH + + +GET_HIGH x36_128_tmp->x36_128_low - - -GET_LOW + + +GET_LOW x37_128_high - -x37_128_high (r10) + +x37_128_high (reg10) x37_128_high->x80_128_high - - -+ + + ++ x37_128_low - -x37_128_low (r11) + +x37_128_low (reg11) x37_128_low->x80_128_low - - -+ + + ++ x37_128_tmp->x37_128_high - - -GET_HIGH + + +GET_HIGH x37_128_tmp->x37_128_low - - -GET_LOW + + +GET_LOW x39_128_high - -x39_128_high (r10) + +x39_128_high (reg10) x39_128_high->x80_128_high - - -+ + + ++ x39_128_low - -x39_128_low (r11) + +x39_128_low (reg11) x39_128_low->x80_128_low - - -+ + + ++ x39_128_tmp->x39_128_high - - -GET_HIGH + + +GET_HIGH x39_128_tmp->x39_128_low - - -GET_LOW + + +GET_LOW x41_128_high - -x41_128_high (r11) + +x41_128_high (reg11) x41_128_high->x80_128_high - - -+ + + ++ x41_128_low - -x41_128_low (r10) + +x41_128_low (reg10) x41_128_low->x80_128_low - - -+ + + ++ x41_128_tmp->x41_128_high - - -GET_HIGH + + +GET_HIGH x41_128_tmp->x41_128_low - - -GET_LOW + + +GET_LOW x43_128_high - -x43_128_high (r10) + +x43_128_high (reg10) x43_128_high->x80_128_high - - -+ + + ++ x43_128_low - -x43_128_low (r11) + +x43_128_low (reg11) x43_128_low->x80_128_low - - -+ + + ++ x43_128_tmp->x43_128_high - - -GET_HIGH + + +GET_HIGH x43_128_tmp->x43_128_low - - -GET_LOW + + +GET_LOW x45->x49_128_tmp - - -* + + +* x45->x57_128_tmp - - -* + + +* x45->x63_128_tmp - - -* + + +* x45->x67_128_tmp - - -* + + +* x46->x51_128_tmp - - -* + + +* x47->x53_128_tmp - - -* + + +* x47->x59_128_tmp - - -* + + +* x48->x55_128_tmp - - -* + + +* x48->x61_128_tmp - - -* + + +* x48->x65_128_tmp - - -* + + +* x49_128_high - -x49_128_high (r10) + +x49_128_high (reg10) x49_128_high->x56_128_high - - -+ + + ++ x49_128_low - -x49_128_low (r11) + +x49_128_low (reg11) x49_128_low->x56_128_low - - -+ + + ++ x49_128_tmp->x49_128_high - - -GET_HIGH + + +GET_HIGH x49_128_tmp->x49_128_low - - -GET_LOW + + +GET_LOW x51_128_high - -x51_128_high (r10) + +x51_128_high (reg10) x51_128_high->x56_128_high - - -+ + + ++ x51_128_low - -x51_128_low (r11) + +x51_128_low (reg11) x51_128_low->x56_128_low - - -+ + + ++ x51_128_tmp->x51_128_high - - -GET_HIGH + + +GET_HIGH x51_128_tmp->x51_128_low - - -GET_LOW + + +GET_LOW x53_128_high - -x53_128_high (r11) + +x53_128_high (reg11) x53_128_high->x56_128_high - - -+ + + ++ x53_128_low - -x53_128_low (r10) + +x53_128_low (reg10) x53_128_low->x56_128_low - - -+ + + ++ x53_128_tmp->x53_128_high - - -GET_HIGH + + +GET_HIGH x53_128_tmp->x53_128_low - - -GET_LOW + + +GET_LOW x55_128_high - -x55_128_high (r10) + +x55_128_high (reg10) x55_128_high->x56_128_high - - -+ + + ++ x55_128_low - -x55_128_low (r11) + +x55_128_low (reg11) x55_128_low->x56_128_low - - -+ + + ++ x55_128_tmp->x55_128_high - - -GET_HIGH + + +GET_HIGH x55_128_tmp->x55_128_low - - -GET_LOW + + +GET_LOW cx56_128 - -cx56_128 (c0) + +cx56_128 (c0) cx56_128->x56_128_high - - -+ + + ++ x56_128 - -x50_128 + x52_128 + x54_128 + x56_128 (r8:r9) + +x50_128 + x52_128 + x54_128 + x56_128 (reg8:reg9) x69 - -x69 (r9) + +x69 (reg9) x56_128->x69 - - ->> + + +>> x56_128_high->x56_128 - - -COMBINE + + +COMBINE x56_128_low->cx56_128 - - -GET_CARRY + + +GET_CARRY x56_128_low->x56_128 - - -COMBINE + + +COMBINE x70 - -x70 (r8) + +x70 (reg8) x56_128_low->x70 - - -& + + +& x57_128_high - -x57_128_high (r11) + +x57_128_high (reg11) x57_128_high->x71_128_high - - -+ + + ++ x57_128_low - -x57_128_low (r10) + +x57_128_low (reg10) x57_128_low->x71_128_low - - -+ + + ++ x57_128_tmp->x57_128_high - - -GET_HIGH + + +GET_HIGH x57_128_tmp->x57_128_low - - -GET_LOW + + +GET_LOW x59_128_high - -x59_128_high (r10) + +x59_128_high (reg10) x59_128_high->x71_128_high - - -+ + + ++ x59_128_low - -x59_128_low (r11) + +x59_128_low (reg11) x59_128_low->x71_128_low - - -+ + + ++ x59_128_tmp->x59_128_high - - -GET_HIGH + + +GET_HIGH x59_128_tmp->x59_128_low - - -GET_LOW + + +GET_LOW x61_128_high - -x61_128_high (r11) + +x61_128_high (reg11) x61_128_high->x71_128_high - - -+ + + ++ x61_128_low - -x61_128_low (r10) + +x61_128_low (reg10) x61_128_low->x71_128_low - - -+ + + ++ x61_128_tmp->x61_128_high - - -GET_HIGH + + +GET_HIGH x61_128_tmp->x61_128_low - - -GET_LOW + + +GET_LOW x63_128_high - -x63_128_high (r11) + +x63_128_high (reg11) x63_128_high->x74_128_high - - -+ + + ++ x63_128_low - -x63_128_low (r10) + +x63_128_low (reg10) x63_128_low->x74_128_low - - -+ + + ++ x63_128_tmp->x63_128_high - - -GET_HIGH + + +GET_HIGH x63_128_tmp->x63_128_low - - -GET_LOW + + +GET_LOW x65_128_high - -x65_128_high (r10) + +x65_128_high (reg10) x65_128_high->x74_128_high - - -+ + + ++ x65_128_low - -x65_128_low (r11) + +x65_128_low (reg11) x65_128_low->x74_128_low - - -+ + + ++ x65_128_tmp->x65_128_high - - -GET_HIGH + + +GET_HIGH x65_128_tmp->x65_128_low - - -GET_LOW + + +GET_LOW x67_128_high - -x67_128_high (r11) + +x67_128_high (reg11) x67_128_high->x77_128_high - - -+ + + ++ x67_128_low - -x67_128_low (r10) + +x67_128_low (reg10) x67_128_low->x77_128_low - - -+ + + ++ x67_128_tmp->x67_128_high - - -GET_HIGH + + +GET_HIGH x67_128_tmp->x67_128_low - - -GET_LOW + + +GET_LOW x69->x71_128_low - - -+ + + ++ x84 - -x84 (r8) + +x84 (reg8) x70->x84 - - -+ + + ++ cx71_128 - -cx71_128 (c0) + +cx71_128 (c0) cx71_128->x71_128_high - - -+ + + ++ x71_128 - -x23_128 + x58_128 + x60_128 + x62_128 + x71_128 (r4:r5) + +x23_128 + x58_128 + x60_128 + x62_128 + x71_128 (reg4:reg5) x72 - -x72 (r5) + +x72 (reg5) x71_128->x72 - - ->> + + +>> x71_128_high->x71_128 - - -COMBINE + + +COMBINE x71_128_low->cx71_128 - - -GET_CARRY + + +GET_CARRY x71_128_low->x71_128 - - -COMBINE + + +COMBINE x73 - -x73 (r4) + +x73 (reg4) x71_128_low->x73 - - -& + + +& x72->x74_128_low - - -+ + + ++ x87 - -x87 (r8) + +x87 (reg8) x73->x87 - - -+ + + ++ cx74_128 - -cx74_128 (c0) + +cx74_128 (c0) cx74_128->x74_128_high - - -+ + + ++ x74_128 - -x26_128 + x28_128 + x64_128 + x66_128 + x74_128 (r9:r12) + +x26_128 + x28_128 + x64_128 + x66_128 + x74_128 (reg9:reg12) x75 - -x75 (r12) + +x75 (reg12) x74_128->x75 - - ->> + + +>> x74_128_high->x74_128 - - -COMBINE + + +COMBINE x74_128_low->cx74_128 - - -GET_CARRY + + +GET_CARRY x74_128_low->x74_128 - - -COMBINE + + +COMBINE x76 - -x76 (r9) + +x76 (reg9) x74_128_low->x76 - - -& + + +& x75->x77_128_low - - -+ + + ++ x90 - -x90 (r8) + +x90 (reg8) x76->x90 - - -+ + + ++ cx77_128 - -cx77_128 (c0) + +cx77_128 (c0) cx77_128->x77_128_high - - -+ + + ++ x77_128 - -x31_128 + x33_128 + x35_128 + x68_128 + x77_128 (r5:r6) + +x31_128 + x33_128 + x35_128 + x68_128 + x77_128 (reg5:reg6) x78 - -x78 (r6) + +x78 (reg6) x77_128->x78 - - ->> + + +>> x77_128_high->x77_128 - - -COMBINE + + +COMBINE x77_128_low->cx77_128 - - -GET_CARRY + + +GET_CARRY x77_128_low->x77_128 - - -COMBINE + + +COMBINE x79 - -x79 (r5) + +x79 (reg5) x77_128_low->x79 - - -& + + +& x78->x80_128_low - - -+ + + ++ out - -out + +out x79->out - - + + cx80_128 - -cx80_128 (c0) + +cx80_128 (c0) cx80_128->x80_128_high - - -+ + + ++ x80_128 - -x38_128 + x40_128 + x42_128 + x44_128 + x80_128 (r7:r12) + +x38_128 + x40_128 + x42_128 + x44_128 + x80_128 (reg7:reg12) x81 - -x81 (r12) + +x81 (reg12) x80_128->x81 - - ->> + + +>> x80_128_high->x80_128 - - -COMBINE + + +COMBINE x80_128_low->cx80_128 - - -GET_CARRY + + +GET_CARRY x80_128_low->x80_128 - - -COMBINE + + +COMBINE x82 - -x82 (r7) + +x82 (reg7) x80_128_low->x82 - - -& + + +& x83 - -x83 (r12) + +x83 (reg12) x81->x83 - - -* + + +* x82->out - - + + x83->x84 - - -+ + + ++ x85 - -x85 (r8) + +x85 (reg8) x84->x85 - - ->> + + +>> x86 - -x86 (r12) + +x86 (reg12) x84->x86 - - -& + + +& x85->x87 - - -+ + + ++ x86->out - - + + x88 - -x88 (r8) + +x88 (reg8) x87->x88 - - ->> + + +>> x89 - -x89 (r4) + +x89 (reg4) x87->x89 - - -& + + +& x88->x90 - - -+ + + ++ x89->out - - + + x90->out - - + + in - -in + +in in->x5 - - + + in->x7 - - + + in->x9 - - + + in->x10 - - + + in->x11 - - + + in->x13 - - + + in->x15 - - + + in->x17 - - + + in->x18 - - + + in->x19 - - + + diff --git a/etc/compile-by-zinc/femulDisplayScheduled0.log b/etc/compile-by-zinc/femulDisplayScheduled0.log index 4bcf8b04c..a5e399038 100644 --- a/etc/compile-by-zinc/femulDisplayScheduled0.log +++ b/etc/compile-by-zinc/femulDisplayScheduled0.log @@ -1,213 +1,193 @@ +asm ( // Convention is low_reg:high_reg -"mov %[rx11], %[arg0]\t\n" -FIXME: lea for x48 = x11 * 0x13 -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[r12], %rdx\t\n" -"mov %[rx17], %[arg0]\t\n" -"mulx %[arg0], %[r10], %[r11]\t\n" // x55_tmp = x48 * x17 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[r10], %[r9]\t\n" // bucket: x50_high + x52_high + x54_high + x56_high -"mov %[r11], %[r8]\t\n" // bucket: x50_low + x52_low + x54_low + x56_low -"mov %[rx9], %[arg0]\t\n" -FIXME: lea for x47 = x9 * 0x13 -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[r7], %rdx\t\n" -"mov %[rx19], %[arg0]\t\n" -"mulx %[arg0], %[r11], %[r10]\t\n" // x53_tmp = x47 * x19 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r11], %[r9]\t\n" // bucket: x50_high + x52_high + x54_high + x56_high -"add %[r10], %[r8]\t\n" // bucket: x50_low + x52_low + x54_low + x56_low -"mov %[rx7], %[arg0]\t\n" -FIXME: lea for x46 = x7 * 0x13 -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[r6], %rdx\t\n" -"mov %[rx18], %[arg0]\t\n" -"mulx %[arg0], %[r10], %[r11]\t\n" // x51_tmp = x46 * x18 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r10], %[r9]\t\n" // bucket: x50_high + x52_high + x54_high + x56_high -"adc %[r11], %[r8]\t\n" // bucket: x50_low + x52_low + x54_low + x56_low -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[rx5], %rdx\t\n" -"mov %[rx13], %[arg0]\t\n" -"mulx %[arg0], %[r11], %[r10]\t\n" // x20_tmp = x5 * x13 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r11], %[r9]\t\n" // bucket: x50_high + x52_high + x54_high + x56_high -"adc %[r10], %[r8]\t\n" // bucket: x50_low + x52_low + x54_low + x56_low -"mov %[rx10], %[arg0]\t\n" -FIXME: lea for x45 = x10 * 0x13 -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[r6], %rdx\t\n" -"mov %[rx15], %[arg0]\t\n" -"mulx %[arg0], %[r10], %[r11]\t\n" // x49_tmp = x45 * x15 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r10], %[r9]\t\n" // bucket: x50_high + x52_high + x54_high + x56_high -"adc %[r11], %[r8]\t\n" // bucket: x50_low + x52_low + x54_low + x56_low -"adcx $0, %[r9]\t\n" // bucket: x50_high + x52_high + x54_high + x56_high -"and $0x7ffffffffffff, %[r8]\t\n" // x70 = x56_low & 0x7ffffffffffff -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[r12], %rdx\t\n" -"mov %[rx19], %[arg0]\t\n" -"mulx %[arg0], %[r11], %[r10]\t\n" // x61_tmp = x48 * x19 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -r9 <- SHR r8:r9, 0x33; // x69 = x56_low:x56_high >> 0x33 -"mov %[r9], %[r4]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -"mov %[r11], %[r5]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -"add %[r10], %[r4]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[r7], %rdx\t\n" -"mov %[rx18], %[arg0]\t\n" -"mulx %[arg0], %[r10], %[r11]\t\n" // x59_tmp = x47 * x18 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r10], %[r5]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -"adc %[r11], %[r4]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[r6], %rdx\t\n" -"mov %[rx17], %[arg0]\t\n" -"mulx %[arg0], %[r11], %[r10]\t\n" // x57_tmp = x45 * x17 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r11], %[r5]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -"adc %[r10], %[r4]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[rx5], %rdx\t\n" -"mov %[rx15], %[arg0]\t\n" -"mulx %[arg0], %[r10], %[r11]\t\n" // x21_tmp = x5 * x15 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r10], %[r5]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -"adc %[r11], %[r4]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[rx7], %rdx\t\n" -"mov %[rx13], %[arg0]\t\n" -"mulx %[arg0], %[r11], %[r10]\t\n" // x22_tmp = x7 * x13 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r11], %[r5]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -"adc %[r10], %[r4]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low -"adcx $0, %[r5]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high -"and $0x7ffffffffffff, %[r4]\t\n" // x73 = x71_low & 0x7ffffffffffff -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[r12], %rdx\t\n" -"mov %[rx18], %[arg0]\t\n" -"mulx %[arg0], %[r10], %[r11]\t\n" // x65_tmp = x48 * x18 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -r5 <- SHR r4:r5, 0x33; // x72 = x71_low:x71_high >> 0x33 -"mov %[r5], %[r9]\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -"mov %[r10], %[r12]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -"add %[r11], %[r9]\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[r6], %rdx\t\n" -"mov %[rx19], %[arg0]\t\n" -"mulx %[arg0], %[r11], %[r10]\t\n" // x63_tmp = x45 * x19 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r11], %[r12]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -"adc %[r10], %[r9]\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[rx7], %rdx\t\n" -"mov %[rx15], %[arg0]\t\n" -"mulx %[arg0], %[r10], %[r11]\t\n" // x27_tmp = x7 * x15 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r10], %[r12]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -"adc %[r11], %[r9]\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[rx5], %rdx\t\n" -"mov %[rx17], %[arg0]\t\n" -"mulx %[arg0], %[r11], %[r10]\t\n" // x24_tmp = x5 * x17 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r11], %[r12]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -"adc %[r10], %[r9]\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[rx9], %rdx\t\n" -"mov %[rx13], %[arg0]\t\n" -"mulx %[arg0], %[r10], %[r11]\t\n" // x25_tmp = x9 * x13 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r10], %[r12]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -"adc %[r11], %[r9]\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low -"adcx $0, %[r12]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high -"and $0x7ffffffffffff, %[r9]\t\n" // x76 = x74_low & 0x7ffffffffffff -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[r6], %rdx\t\n" -"mov %[rx18], %[arg0]\t\n" -"mulx %[arg0], %[r11], %[r10]\t\n" // x67_tmp = x45 * x18 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -r12 <- SHR r9:r12, 0x33; // x75 = x74_low:x74_high >> 0x33 -"mov %[r12], %[r5]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -"mov %[r11], %[r6]\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -"add %[r10], %[r5]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[rx9], %rdx\t\n" -"mov %[rx15], %[arg0]\t\n" -"mulx %[arg0], %[r10], %[r11]\t\n" // x34_tmp = x9 * x15 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r10], %[r6]\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -"adc %[r11], %[r5]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[rx7], %rdx\t\n" -"mov %[rx17], %[arg0]\t\n" -"mulx %[arg0], %[r11], %[r10]\t\n" // x32_tmp = x7 * x17 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r11], %[r6]\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -"adc %[r10], %[r5]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[rx5], %rdx\t\n" -"mov %[rx19], %[arg0]\t\n" -"mulx %[arg0], %[r10], %[r11]\t\n" // x29_tmp = x5 * x19 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r10], %[r6]\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -"adc %[r11], %[r5]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[rx11], %rdx\t\n" -"mov %[rx13], %[arg0]\t\n" -"mulx %[arg0], %[r11], %[r10]\t\n" // x30_tmp = x11 * x13 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r11], %[r6]\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -"adc %[r10], %[r5]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low -"adcx $0, %[r6]\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high -"and $0x7ffffffffffff, %[r5]\t\n" // x79 = x77_low & 0x7ffffffffffff -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[rx9], %rdx\t\n" -"mov %[rx17], %[arg0]\t\n" -"mulx %[arg0], %[r10], %[r11]\t\n" // x43_tmp = x9 * x17 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -r6 <- SHR r5:r6, 0x33; // x78 = x77_low:x77_high >> 0x33 -"mov %[r6], %[r7]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low -"mov %[r10], %[r12]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -"add %[r11], %[r7]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[rx7], %rdx\t\n" -"mov %[rx19], %[arg0]\t\n" -"mulx %[arg0], %[r11], %[r10]\t\n" // x41_tmp = x7 * x19 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r11], %[r12]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -"adc %[r10], %[r7]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[rx11], %rdx\t\n" -"mov %[rx15], %[arg0]\t\n" -"mulx %[arg0], %[r10], %[r11]\t\n" // x39_tmp = x11 * x15 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r10], %[r12]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -"adc %[r11], %[r7]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[rx5], %rdx\t\n" -"mov %[rx18], %[arg0]\t\n" -"mulx %[arg0], %[r11], %[r10]\t\n" // x36_tmp = x5 * x18 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r11], %[r12]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -"adc %[r10], %[r7]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low -"mov %rdx, %[rdx_backup]\t\n" // XXX: How do I specify that a particular register should be rdx? -"mov %[rx10], %rdx\t\n" -"mov %[rx13], %[arg0]\t\n" -"mulx %[arg0], %[r10], %[r11]\t\n" // x37_tmp = x10 * x13 -"mov %[rdx_backup], %rdx\t\n" // XXX: How do I specify that a particular register should be rdx? -"adx %[r10], %[r12]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -"adc %[r11], %[r7]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low -"adcx $0, %[r12]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high -"and $0x7ffffffffffff, %[r7]\t\n" // x82 = x80_low & 0x7ffffffffffff -r12 <- SHR r7:r12, 0x33; // x81 = x80_low:x80_high >> 0x33 -FIXME: lea for x83 = x81 * 0x13 -"adx %[r12], %[r8]\t\n" // bucket: x84 = x70 + x83 -r8 <- SHR r8, 0x33; // x85 = x70 >> 0x33 -"mov %[r8], %[r12]\t\n" -"and $0x7ffffffffffff, %[r12]\t\n" // x86 = x84 & 0x7ffffffffffff -"adx %[r4], %[r8]\t\n" // bucket: x87 = x85 + x73 -r8 <- SHR r8, 0x33; // x88 = x85 >> 0x33 -"mov %[r8], %[r4]\t\n" -"and $0x7ffffffffffff, %[r4]\t\n" // x89 = x87 & 0x7ffffffffffff -"adx %[r9], %[r8]\t\n" // bucket: x90 = x88 + x76 +// FIXME: lea for x48 = x11 * 0x13 +"mov $0x13, %rdx\t\n" +"mov %[mx11], %[rx86]\t\n" +"mulx %[rx86], %rdx, %[rx86]\t\n" // x48 = x11 * 0x13 +"mov %[rx86], %rdx\t\n" +"mov %[mx17], %r13\t\n" +"mulx %r13, %r13, %r14\t\n" // x55_tmp = x48 * x17 +"mov %r13, %r12\t\n" // bucket: x50_high + x52_high + x54_high + x56_high +"mov %r14, %[rx90]\t\n" // bucket: x50_low + x52_low + x54_low + x56_low +// FIXME: lea for x47 = x9 * 0x13 +"mov $0x13, %rdx\t\n" +"mov %[mx9], %[rx82]\t\n" +"mulx %[rx82], %rdx, %[rx82]\t\n" // x47 = x9 * 0x13 +"mov %[rx82], %rdx\t\n" +"mov %[mx19], %r14\t\n" +"mulx %r14, %r14, %r13\t\n" // x53_tmp = x47 * x19 +"adx %r14, %r12\t\n" // bucket: x50_high + x52_high + x54_high + x56_high +"add %r13, %[rx90]\t\n" // bucket: x50_low + x52_low + x54_low + x56_low +// FIXME: lea for x46 = x7 * 0x13 +"mov $0x13, %rdx\t\n" +"mov %[mx7], %r9\t\n" +"mulx %r9, %rdx, %r9\t\n" // x46 = x7 * 0x13 +"mov %r9, %rdx\t\n" +"mov %[mx18], %r13\t\n" +"mulx %r13, %r13, %r14\t\n" // x51_tmp = x46 * x18 +"adx %r13, %r12\t\n" // bucket: x50_high + x52_high + x54_high + x56_high +"adc %r14, %[rx90]\t\n" // bucket: x50_low + x52_low + x54_low + x56_low +"mov %[mx5], %rdx\t\n" +"mov %[mx13], %r14\t\n" +"mulx %r14, %r14, %r13\t\n" // x20_tmp = x5 * x13 +"adx %r14, %r12\t\n" // bucket: x50_high + x52_high + x54_high + x56_high +"adc %r13, %[rx90]\t\n" // bucket: x50_low + x52_low + x54_low + x56_low +// FIXME: lea for x45 = x10 * 0x13 +"mov $0x13, %rdx\t\n" +"mov %[mx10], %r9\t\n" +"mulx %r9, %rdx, %r9\t\n" // x45 = x10 * 0x13 +"mov %r9, %rdx\t\n" +"mov %[mx15], %r13\t\n" +"mulx %r13, %r13, %r14\t\n" // x49_tmp = x45 * x15 +"adx %r13, %r12\t\n" // bucket: x50_high + x52_high + x54_high + x56_high +"adc %r14, %[rx90]\t\n" // bucket: x50_low + x52_low + x54_low + x56_low +"adcx $0, %r12\t\n" // bucket: x50_high + x52_high + x54_high + x56_high +"and $0x7ffffffffffff, %[rx90]\t\n" // x70 = x56_low & 0x7ffffffffffff +"mov %[rx86], %rdx\t\n" +"mov %[mx19], %r14\t\n" +"mulx %r14, %r14, %r13\t\n" // x61_tmp = x48 * x19 +"mov %[rx90], %rdx\t\n" +"mov %[rx90], %r12\t\n" +"mov %rdx, %[rx90]\t\n" +"shrd $0x33, %r12, %[rx90]\t\n" // x69 = x56_low:x56_high >> 0x33 +"mov %r12, %[rx89]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +"mov %r14, %[rx79]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +"add %r13, %[rx89]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +"mov %[rx82], %rdx\t\n" +"mov %[mx18], %r13\t\n" +"mulx %r13, %r13, %r14\t\n" // x59_tmp = x47 * x18 +"adx %r13, %[rx79]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +"adc %r14, %[rx89]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +"mov %r9, %rdx\t\n" +"mov %[mx17], %r14\t\n" +"mulx %r14, %r14, %r13\t\n" // x57_tmp = x45 * x17 +"adx %r14, %[rx79]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +"adc %r13, %[rx89]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +"mov %[mx5], %rdx\t\n" +"mov %[mx15], %r13\t\n" +"mulx %r13, %r13, %r14\t\n" // x21_tmp = x5 * x15 +"adx %r13, %[rx79]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +"adc %r14, %[rx89]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +"mov %[mx7], %rdx\t\n" +"mov %[mx13], %r14\t\n" +"mulx %r14, %r14, %r13\t\n" // x22_tmp = x7 * x13 +"adx %r14, %[rx79]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +"adc %r13, %[rx89]\t\n" // bucket: x23_low + x58_low + x60_low + x62_low + x71_low +"adcx $0, %[rx79]\t\n" // bucket: x23_high + x58_high + x60_high + x62_high + x71_high +"and $0x7ffffffffffff, %[rx89]\t\n" // x73 = x71_low & 0x7ffffffffffff +"mov %[rx86], %rdx\t\n" +"mov %[mx18], %r13\t\n" +"mulx %r13, %r13, %r14\t\n" // x65_tmp = x48 * x18 +"mov %[rx89], %rdx\t\n" +"mov %[rx89], %[rx79]\t\n" +"mov %rdx, %[rx89]\t\n" +"shrd $0x33, %[rx79], %[rx89]\t\n" // x72 = x71_low:x71_high >> 0x33 +"mov %[rx79], %r12\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +"mov %r13, %[rx86]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +"add %r14, %r12\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +"mov %r9, %rdx\t\n" +"mov %[mx19], %r14\t\n" +"mulx %r14, %r14, %r13\t\n" // x63_tmp = x45 * x19 +"adx %r14, %[rx86]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +"adc %r13, %r12\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +"mov %[mx7], %rdx\t\n" +"mov %[mx15], %r13\t\n" +"mulx %r13, %r13, %r14\t\n" // x27_tmp = x7 * x15 +"adx %r13, %[rx86]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +"adc %r14, %r12\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +"mov %[mx5], %rdx\t\n" +"mov %[mx17], %r14\t\n" +"mulx %r14, %r14, %r13\t\n" // x24_tmp = x5 * x17 +"adx %r14, %[rx86]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +"adc %r13, %r12\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +"mov %[mx9], %rdx\t\n" +"mov %[mx13], %r13\t\n" +"mulx %r13, %r13, %r14\t\n" // x25_tmp = x9 * x13 +"adx %r13, %[rx86]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +"adc %r14, %r12\t\n" // bucket: x26_low + x28_low + x64_low + x66_low + x74_low +"adcx $0, %[rx86]\t\n" // bucket: x26_high + x28_high + x64_high + x66_high + x74_high +"and $0x7ffffffffffff, %r12\t\n" // x76 = x74_low & 0x7ffffffffffff +"mov %r9, %rdx\t\n" +"mov %[mx18], %r14\t\n" +"mulx %r14, %r14, %r13\t\n" // x67_tmp = x45 * x18 +"mov %r12, %rdx\t\n" +"mov %r12, %[rx86]\t\n" +"mov %rdx, %r12\t\n" +"shrd $0x33, %[rx86], %r12\t\n" // x75 = x74_low:x74_high >> 0x33 +"mov %[rx86], %[rx79]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +"mov %r14, %r9\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +"add %r13, %[rx79]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +"mov %[mx9], %rdx\t\n" +"mov %[mx15], %r13\t\n" +"mulx %r13, %r13, %r14\t\n" // x34_tmp = x9 * x15 +"adx %r13, %r9\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +"adc %r14, %[rx79]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +"mov %[mx7], %rdx\t\n" +"mov %[mx17], %r14\t\n" +"mulx %r14, %r14, %r13\t\n" // x32_tmp = x7 * x17 +"adx %r14, %r9\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +"adc %r13, %[rx79]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +"mov %[mx5], %rdx\t\n" +"mov %[mx19], %r13\t\n" +"mulx %r13, %r13, %r14\t\n" // x29_tmp = x5 * x19 +"adx %r13, %r9\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +"adc %r14, %[rx79]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +"mov %[mx11], %rdx\t\n" +"mov %[mx13], %r14\t\n" +"mulx %r14, %r14, %r13\t\n" // x30_tmp = x11 * x13 +"adx %r14, %r9\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +"adc %r13, %[rx79]\t\n" // bucket: x31_low + x33_low + x35_low + x68_low + x77_low +"adcx $0, %r9\t\n" // bucket: x31_high + x33_high + x35_high + x68_high + x77_high +"and $0x7ffffffffffff, %[rx79]\t\n" // x79 = x77_low & 0x7ffffffffffff +"mov %[mx9], %rdx\t\n" +"mov %[mx17], %r13\t\n" +"mulx %r13, %r13, %r14\t\n" // x43_tmp = x9 * x17 +"mov %[rx79], %rdx\t\n" +"mov %[rx79], %r9\t\n" +"mov %rdx, %[rx79]\t\n" +"shrd $0x33, %r9, %[rx79]\t\n" // x78 = x77_low:x77_high >> 0x33 +"mov %r9, %[rx82]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +"mov %r13, %[rx86]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +"add %r14, %[rx82]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +"mov %[mx7], %rdx\t\n" +"mov %[mx19], %r14\t\n" +"mulx %r14, %r14, %r13\t\n" // x41_tmp = x7 * x19 +"adx %r14, %[rx86]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +"adc %r13, %[rx82]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +"mov %[mx11], %rdx\t\n" +"mov %[mx15], %r13\t\n" +"mulx %r13, %r13, %r14\t\n" // x39_tmp = x11 * x15 +"adx %r13, %[rx86]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +"adc %r14, %[rx82]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +"mov %[mx5], %rdx\t\n" +"mov %[mx18], %r14\t\n" +"mulx %r14, %r14, %r13\t\n" // x36_tmp = x5 * x18 +"adx %r14, %[rx86]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +"adc %r13, %[rx82]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +"mov %[mx10], %rdx\t\n" +"mov %[mx13], %r13\t\n" +"mulx %r13, %r13, %r14\t\n" // x37_tmp = x10 * x13 +"adx %r13, %[rx86]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +"adc %r14, %[rx82]\t\n" // bucket: x38_low + x40_low + x42_low + x44_low + x80_low +"adcx $0, %[rx86]\t\n" // bucket: x38_high + x40_high + x42_high + x44_high + x80_high +"and $0x7ffffffffffff, %[rx82]\t\n" // x82 = x80_low & 0x7ffffffffffff +"mov %[rx82], %rdx\t\n" +"mov %[rx82], %[rx86]\t\n" +"mov %rdx, %[rx82]\t\n" +"shrd $0x33, %[rx86], %[rx82]\t\n" // x81 = x80_low:x80_high >> 0x33 +// FIXME: lea for x83 = x81 * 0x13 +"mov $0x13, %rdx\t\n" +"mulx %[rx86], %rdx, %[rx86]\t\n" // x83 = x81 * 0x13 +"adx %[rx86], %[rx90]\t\n" // bucket: x84 = x70 + x83 +"shr $0x33, %[rx90]\t\n" // x85 = x70 >> 0x33 +"mov %[rx90], %[rx86]\t\n" +"and $0x7ffffffffffff, %[rx86]\t\n" // x86 = x84 & 0x7ffffffffffff +"adx %[rx89], %[rx90]\t\n" // bucket: x87 = x85 + x73 +"shr $0x33, %[rx90]\t\n" // x88 = x85 >> 0x33 +"mov %[rx90], %[rx89]\t\n" +"and $0x7ffffffffffff, %[rx89]\t\n" // x89 = x87 & 0x7ffffffffffff +"adx %r12, %[rx90]\t\n" // bucket: x90 = x88 + x76 +: [rx89] "=&r" (x89), [rx79] "=&r" (x79), [rx82] "=&r" (x82), [rx90] "=&r" (x90), [rx86] "=&r" (x86) +: [mx9] "m" (x9), [mx7] "m" (x7), [mx5] "m" (x5), [mx10] "m" (x10), [mx11] "m" (x11), [mx13] "m" (x13), [mx15] "m" (x15), [mx17] "m" (x17), [mx18] "m" (x18), [mx19] "m" (x19) +: "cc", "rdx", "r7", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15" +); diff --git a/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py b/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py index f37bc3ff1..1083846d8 100755 --- a/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py +++ b/etc/compile-by-zinc/make-graph-with-reg-by-ac-buckets.py @@ -6,10 +6,12 @@ import subprocess LAMBDA = u'\u03bb' -OP_NAMES = {'*':'MUL', '+':'ADD', '>>':'SHL', '<<':'SHR', '|':'OR', '&':'AND'} - +NAMED_REGISTERS = ('RAX', 'RCX', 'RDX', 'RBX', 'RSP', 'RSI', 'RDI') +NAMED_REGISTER_MAPPING = dict(('r%d' % i, reg) for i, reg in enumerate(NAMED_REGISTERS)) REGISTERS = tuple(#['RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP'] + #, 'RSP'] # RSP is stack pointer? - ['r%d' % i for i in range(13)]) + ['reg%d' % i for i in range(13)]) +#REAL_REGISTERS = tuple(['RAX', 'RBX', 'RCX', 'RDX', 'RSI', 'RDI', 'RBP'] + #, 'RSP'] # RSP is stack pointer? +# ['reg%d' % i for i in range(13)]) REGISTER_COLORS = ['color="black"', 'color="white",fillcolor="black"', 'color="maroon"', 'color="green"', 'fillcolor="olive"', 'color="navy"', 'color="purple"', 'fillcolor="teal"', 'fillcolor="silver"', 'fillcolor="gray"', 'fillcolor="red"', 'fillcolor="lime"', 'fillcolor="yellow"', 'fillcolor="blue"', 'fillcolor="fuschia"', 'fillcolor="aqua"'] @@ -281,7 +283,7 @@ def allocate_node(existing, node, *args): return do_ret() if len(node['deps']) == 0 and node['op'] == 'INPUT': assert(node['type'] == 'uint64_t') - cur_map[node['out']] = 'r' + node['out'] # free_list.pop() + cur_map[node['out']] = 'm' + node['out'] # free_list.pop() emit_vars.append(node) return do_ret() if is_temp(node): @@ -513,65 +515,76 @@ def print_input(reg_out, mem_in): #return '"mov %%[%s], %%[%s]\\n\\t"\n' % (mem_in, reg_out) return "" +def print_val(reg): + if reg.upper() in NAMED_REGISTERS: + return '%%%s' % reg + if reg[:2] == '0x': + return '$%s' % reg + return '%%[%s]' % reg + def print_load_specific_reg(reg, specific_reg='rdx'): ret = '' - ret += '"mov %%%s, %%[%s_backup]\\t\\n" // XXX: How do I specify that a particular register should be %s?\n' % (specific_reg, specific_reg, specific_reg) - ret += '"mov %%[%s], %%%s\\t\\n"\n' % (reg, specific_reg) - return ret, (specific_reg,) + #ret += '"mov %%%s, %%[%s_backup]\\t\\n" // XXX: How do I specify that a particular register should be %s?\n' % (specific_reg, specific_reg, specific_reg) + if reg != specific_reg: + ret += '"mov %s, %s\\t\\n"\n' % (print_val(reg), print_val(specific_reg)) + return ret, specific_reg def print_unload_specific_reg(specific_reg='rdx'): ret = '' - ret += '"mov %%[%s_backup], %%%s\\t\\n" // XXX: How do I specify that a particular register should be %s?\n' % (specific_reg, specific_reg, specific_reg) + #ret += '"mov %%[%s_backup], %%%s\\t\\n" // XXX: How do I specify that a particular register should be %s?\n' % (specific_reg, specific_reg, specific_reg) return ret -def print_load(*regs): - TEMP_REG = ['arg%d' % d for d in reversed(range(15))] - ret, out_reg = '', [] - for reg in regs: - if reg in REGISTERS: - out_reg.append(reg) - continue - else: - cur_reg = TEMP_REG.pop() - ret += '"mov %%[%s], %%[%s]\\t\\n"\n' % (reg, cur_reg) - out_reg.append(cur_reg) - if len(out_reg) == 1: return ret, out_reg[0] - return ret, tuple(out_reg) +#def get_arg_reg(d): +# return 'arg%d' % d +def print_load(reg, can_clobber=tuple(), dont_clobber=tuple()): + assert(not isinstance(can_clobber, str)) + assert(not isinstance(dont_clobber, str)) + can_clobber = [i for i in reversed(can_clobber) if i not in dont_clobber] + if reg in REGISTERS: + return ('', reg) + else: + cur_reg = can_clobber.pop() + ret = '"mov %s, %s\\t\\n"\n' % (print_val(reg), print_val(cur_reg)) + return (ret, cur_reg) def print_mulx(reg_out_low, reg_out_high, rx1, rx2, src): #return '%s:%s <- MULX %s, %s; // %s\n' % (reg_out_low, reg_out_high, rx1, rx2, src) ret = '' ret2, actual_rx1 = print_load_specific_reg(rx1, 'rdx') - ret3, actual_rx2 = print_load(rx2) - ret += ret2 + ret3 + ('"mulx %%[%s], %%[%s], %%[%s]\\t\\n" // %s\n' % (actual_rx2, reg_out_high, reg_out_low, src)) + assert(rx2 != actual_rx1) + ret3, actual_rx2 = print_load(rx2, can_clobber=[reg_out_high, reg_out_low], dont_clobber=[actual_rx1]) + ret += ret2 + ret3 + ('"mulx %s, %s, %s\\t\\n" // %s\n' % (print_val(actual_rx2), print_val(reg_out_high), print_val(reg_out_low), src)) ret += print_unload_specific_reg('rdx') return ret def print_mov_bucket(reg_out, reg_in, bucket): #return '%s <- MOV %s; // bucket: %s\n' % (reg_out, reg_in, bucket) - ret, reg_in = print_load(reg_in) - return ret + ('"mov %%[%s], %%[%s]\\t\\n" // bucket: %s\n' % (reg_in, reg_out, bucket)) + #ret, reg_in = print_load(reg_in, can_clobber=[reg_out]) + return ('"mov %s, %s\\t\\n" // bucket: %s\n' % (print_val(reg_in), print_val(reg_out), bucket)) def print_mov(reg_out, reg_in): #return '%s <- MOV %s;\n' % (reg_out, reg_in) - ret, reg_in = print_load(reg_in) - return ret + ('"mov %%[%s], %%[%s]\\t\\n"\n' % (reg_in, reg_out)) + #ret, reg_in = print_load(reg_in) + return ('"mov %s, %s\\t\\n"\n' % (print_val(reg_in), print_val(reg_out))) + +def print_load_constant(reg_out, imm): + assert(imm[:2] == '0x') + return ('"mov $%s, %s\\t\\n"\n' % (imm, print_val(reg_out))) LAST_CARRY = None def print_mul_by_constant(reg_out, reg_in, constant, src): #return '%s <- MULX %s, %s; // %s\n' % (ret_out, reg_in, constant, src) - #assert(LAST_CARRY is None) - global LAST_CARRY - ret, reg_in = print_load(reg_in) + ret = '' if constant == '0x13': - return ret + ('FIXME: lea for %s\n' % src) - else: - LAST_CARRY = None - return ret + ('"imul %%[%s], $%s, %%[%s]\\t\\n" // %s\n' % (reg_in, constant, reg_out, src)) + ret += ('// FIXME: lea for %s\n' % src) + assert(constant[:2] == '0x') + return ret + \ + print_load_constant('rdx', constant) + \ + print_mulx(reg_out, 'rdx', 'rdx', reg_in, src) def print_adx(reg_out, rx1, rx2, bucket): #return '%s <- ADX %s, %s; // bucket: %s\n' % (reg_out, rx1, rx2, bucket) assert(rx1 == reg_out) - ret, rx2 = print_load(rx2) + ret, rx2 = print_load(rx2, dont_clobber=[rx1]) return ret + ('"adx %%[%s], %%[%s]\\t\\n" // bucket: %s\n' % (rx2, reg_out, bucket)) def print_add(reg_out, cf, rx1, rx2, bucket): @@ -580,7 +593,7 @@ def print_add(reg_out, cf, rx1, rx2, bucket): assert(reg_out == rx1) #assert(LAST_CARRY is None or LAST_CARRY == cf) LAST_CARRY = cf - ret, rx2 = print_load(rx2) + ret, rx2 = print_load(rx2, dont_clobber=[rx1]) return ret + ('"add %%[%s], %%[%s]\\t\\n" // bucket: %s\n' % (rx2, reg_out, bucket)) def print_adc(reg_out, cf, rx1, rx2, bucket): @@ -591,7 +604,7 @@ def print_adc(reg_out, cf, rx1, rx2, bucket): if LAST_CARRY != cf: ret += 'ERRRRRRROR: %s != %s\n' % (LAST_CARRY, cf) LAST_CARRY = cf - ret2, rx2 = print_load(rx2) + ret2, rx2 = print_load(rx2, dont_clobber=[rx1]) ret += ret2 return ret + ('"adc %%[%s], %%[%s]\\t\\n" // bucket: %s\n' % (rx2, reg_out, bucket)) @@ -610,11 +623,30 @@ def print_and(reg_out, rx1, rx2, src): if rx2[:2] == '0x': return ('"and $%s, %%[%s]\\t\\n" // %s\n' % (rx2, reg_out, src)) else: - ret, rx2 = print_load(rx2) + ret, rx2 = print_load(rx2, can_clobber=[reg_out], dont_clobber=[rx1]) return ret + ('"and %%[%s], %%[%s]\\t\\n" // %s\n' % (rx2, reg_out, src)) -#def print_shr(reg_out, rx1, imm, src): - #return '%s <- SHR %s, %s;\n' % +def print_shr(reg_out, rx1, imm, src): + #return '%s <- SHR %s, %s; // %s\n' % (reg_out, rx1, imm, src) + global LAST_CARRY + LAST_CARRY = None + assert(rx1 == reg_out) + assert(imm[:2] == '0x') + return ('"shr $%s, %%[%s]\\t\\n" // %s\n' % (imm, reg_out, src)) + +def print_shrd(reg_out, rx_low, rx_high, imm, src): + #return '%s <- SHR %s, %s; // %s\n' % (reg_out, rx1, imm, src) + global LAST_CARRY + LAST_CARRY = None + if rx_low != reg_out and rx_high == reg_out: + return print_mov('rdx', rx_low) + \ + print_mov(rx_high, rx_low) + \ + print_mov(rx_low, 'rdx') + \ + print_shrd(reg_out, rx_high, rx_low, imm, src) + assert(rx_low == reg_out) + assert(imm[:2] == '0x') + return ('"shrd $%s, %%[%s], %%[%s]\\t\\n" // %s\n' % (imm, rx_low, rx_high, src)) + def schedule(input_data, existing, emit_vars): ret = '' @@ -655,24 +687,24 @@ def schedule(input_data, existing, emit_vars): extra_arg)) elif node['op'] == '>>' and len(node['deps']) == 1 and node['deps'][0]['op'] == 'COMBINE': extra_arg = [arg for arg in line_of_var(data, node['out'])['args'] if arg[:2] == '0x'][0] - ret += ('%s <- SHR %s:%s, %s; // %s = %s:%s >> %s\n' - % (existing[node['out']], - existing[node['deps'][0]['deps'][0]['out']], - existing[node['deps'][0]['deps'][1]['out']], - extra_arg, - node['out'], - node['deps'][0]['deps'][0]['out'], - node['deps'][0]['deps'][1]['out'], - extra_arg)) + ret += print_shrd(existing[node['out']], + existing[node['deps'][0]['deps'][0]['out']], + existing[node['deps'][0]['deps'][1]['out']], + extra_arg, + '%s = %s:%s >> %s' + % (node['out'], + node['deps'][0]['deps'][0]['out'], + node['deps'][0]['deps'][1]['out'], + extra_arg)) elif node['op'] == '>>' and len(node['deps']) == 1 and node['deps'][0]['type'] == 'uint64_t': extra_arg = [arg for arg in line_of_var(data, node['out'])['args'] if arg[:2] == '0x'][0] - ret += ('%s <- SHR %s, %s; // %s = %s >> %s\n' - % (existing[node['out']], - existing[node['deps'][0]['deps'][0]['out']], - extra_arg, - node['out'], - node['deps'][0]['deps'][0]['out'], - extra_arg)) + ret += print_shr(existing[node['out']], + existing[node['deps'][0]['deps'][0]['out']], + extra_arg, + '%s = %s >> %s' + % (node['out'], + node['deps'][0]['deps'][0]['out'], + extra_arg)) elif node['op'] in ('GET_HIGH', 'GET_LOW'): if node['rev_deps'][0]['out'] not in buckets_seen: ret += print_mov_bucket(existing[node['rev_deps'][0]['out']], @@ -750,6 +782,38 @@ def schedule(input_data, existing, emit_vars): assert(False) return ret +def inline_schedule(sched, input_vars, output_vars): + KNOWN_CONSTRAINTS = dict(('r%sx' % l, l) for l in 'abcd') + def int_or_zero_key(v): + orig = v + v = v.strip('abcdefghijklmnopqrstuvwxyz') + if v.isdigit(): return (int(v), orig) + return (0, orig) + variables = list(sorted(set(list(re.findall('%\[([a-zA-Z0-9_]*)\]', sched)) + + list(re.findall('%([a-zA-Z0-9_]+)', sched))), + key=int_or_zero_key)) + mems, variables = [i for i in variables if i[:2] == 'mx'], [i for i in variables if i[:2] != 'mx'] + special_reg, variables = [i for i in variables if i.upper() in NAMED_REGISTERS], [i for i in variables if i.upper() not in NAMED_REGISTERS] + transient_regs, output_regs = [i for i in variables if i not in output_vars.values()], [i for i in variables if i in output_vars.keys()] + available_registers = ['r%d' % i for i in range(16) + if ('r%d' % i) not in NAMED_REGISTER_MAPPING.keys() or NAMED_REGISTER_MAPPING['r%d' % i].lower() not in special_reg] + for reg in output_regs: + sched = sched.replace('%%[%s]' % reg, '%%[r%s]' % output_vars[reg]) + renaming = dict((from_reg, to_reg) for from_reg, to_reg in zip(transient_regs, available_registers[-len(transient_regs):])) + for from_reg, to_reg in renaming.items(): + sched = sched.replace('%%[%s]' % from_reg, '%%%s' % to_reg) + transient_regs = [renaming[reg] for reg in transient_regs] + ret = '' + ret += 'asm (\n' + ret += sched + ret += ': ' + ', '.join(['[r%s] "=&r" (%s)' % (output_vars[reg], output_vars[reg]) for reg in output_regs]) + '\n' + ret += ': ' + ', '.join(['[%s] "m" (%s)' % (reg, input_vars[reg]) for reg in input_vars]) + '\n' + ret += ': ' + ', '.join(['"cc"'] + + ['"%s"' % reg for reg in special_reg] + + ['"%s"' % reg for reg in transient_regs]) + '\n' + ret += ');\n' + return ret + data_list = parse_lines(get_lines('femulDisplay.log')) for i, data in enumerate(data_list): graph = to_graph(data) @@ -807,7 +871,9 @@ for i, data in enumerate(data_list): #mul_node = possible_nodes[0] #print([n['out'] for n in mul_node['deps']]) #cur_map, free_temps, free_list, all_temps = allocate_subgraph(existing, mul_node, cur_map, free_temps, free_list, all_temps) - sched = schedule(data, existing, emit_vars) + sched = inline_schedule(schedule(data, existing, emit_vars), + dict((existing[n['out']], n['out']) for n in graph['in'].values()), + dict((existing[n['out']], n['out']) for n in graph['out'].values())) #fill_deps(buckets[0]) deps = adjust_bits(data, print_graph(graph, existing)) with codecs.open('femulData%d.dot' % i, 'w', encoding='utf8') as f: -- cgit v1.2.3