include "alldifferent.mzn"; include "cumulative.mzn"; %uint128_t x20 = x5 * x13; %uint128_t x21 = x5 * x15; %uint128_t x22 = x7 * x13; %uint128_t x23 = x21 + x22; %uint128_t x24 = x5 * x17; %uint128_t x25 = x9 * x13; %uint128_t x26 = x24 + x25; %uint128_t x27 = x7 * x15; %uint128_t x28 = x26 + x27; %uint128_t x29 = x5 * x19; %uint128_t x30 = x11 * x13; %uint128_t x31 = x29 + x30; %uint128_t x32 = x7 * x17; %uint128_t x33 = x31 + x32; %uint128_t x34 = x9 * x15; %uint128_t x35 = x33 + x34; %uint128_t x36 = x5 * x18; %uint128_t x37 = x10 * x13; %uint128_t x38 = x36 + x37; %uint128_t x39 = x11 * x15; %uint128_t x40 = x38 + x39; %uint128_t x41 = x7 * x19; %uint128_t x42 = x40 + x41; %uint128_t x43 = x9 * x17; %uint128_t x44 = x42 + x43; %uint64_t x45 = x10 * 0x13; %uint64_t x46 = x7 * 0x13; %uint64_t x47 = x9 * 0x13; %uint64_t x48 = x11 * 0x13; %uint128_t x49 = x45 * x15; %uint128_t x50 = x20 + x49; %uint128_t x51 = x46 * x18; %uint128_t x52 = x50 + x51; %uint128_t x53 = x47 * x19; %uint128_t x54 = x52 + x53; %uint128_t x55 = x48 * x17; %uint128_t x56 = x54 + x55; %uint128_t x57 = x45 * x17; %uint128_t x58 = x23 + x57; %uint128_t x59 = x47 * x18; %uint128_t x60 = x58 + x59; %uint128_t x61 = x48 * x19; %uint128_t x62 = x60 + x61; %uint128_t x63 = x45 * x19; %uint128_t x64 = x28 + x63; %uint128_t x65 = x48 * x18; %uint128_t x66 = x64 + x65; %uint128_t x67 = x45 * x18; %uint128_t x68 = x35 + x67; %uint64_t x69 = x56 >> 0x33; %uint64_t x70 = x56 & 0x7ffffffffffff; %uint128_t x71 = x69 + x62; %uint64_t x72 = x71 >> 0x33; set of int: CORE = 1..3; CORE: ADD_MUL = 1; CORE: MUL_CORE = 2; CORE: LEA_BW = 3; array[CORE] of string: CORE_NAMES = ["ADD_MUL", "MUL_CORE", "LEA_BW"]; set of int: INSTRUCTIONS = 1..53; INSTRUCTIONS: x20 = 1; INSTRUCTIONS: x21 = 2; INSTRUCTIONS: x22 = 3; INSTRUCTIONS: x23 = 4; INSTRUCTIONS: x24 = 5; INSTRUCTIONS: x25 = 6; INSTRUCTIONS: x26 = 7; INSTRUCTIONS: x27 = 8; INSTRUCTIONS: x28 = 9; INSTRUCTIONS: x29 = 10; INSTRUCTIONS: x30 = 11; INSTRUCTIONS: x31 = 12; INSTRUCTIONS: x32 = 13; INSTRUCTIONS: x33 = 14; INSTRUCTIONS: x34 = 15; INSTRUCTIONS: x35 = 16; INSTRUCTIONS: x36 = 17; INSTRUCTIONS: x37 = 18; INSTRUCTIONS: x38 = 19; INSTRUCTIONS: x39 = 20; INSTRUCTIONS: x40 = 21; INSTRUCTIONS: x41 = 22; INSTRUCTIONS: x42 = 23; INSTRUCTIONS: x43 = 24; INSTRUCTIONS: x44 = 25; INSTRUCTIONS: x45 = 26; INSTRUCTIONS: x46 = 27; INSTRUCTIONS: x47 = 28; INSTRUCTIONS: x48 = 29; INSTRUCTIONS: x49 = 30; INSTRUCTIONS: x50 = 31; INSTRUCTIONS: x51 = 32; INSTRUCTIONS: x52 = 33; INSTRUCTIONS: x53 = 34; INSTRUCTIONS: x54 = 35; INSTRUCTIONS: x55 = 36; INSTRUCTIONS: x56 = 37; INSTRUCTIONS: x57 = 38; INSTRUCTIONS: x58 = 39; INSTRUCTIONS: x59 = 40; INSTRUCTIONS: x60 = 41; INSTRUCTIONS: x61 = 42; INSTRUCTIONS: x62 = 43; INSTRUCTIONS: x63 = 44; INSTRUCTIONS: x64 = 45; INSTRUCTIONS: x65 = 46; INSTRUCTIONS: x66 = 47; INSTRUCTIONS: x67 = 48; INSTRUCTIONS: x68 = 49; INSTRUCTIONS: x69 = 50; INSTRUCTIONS: x70 = 51; INSTRUCTIONS: x71 = 52; INSTRUCTIONS: x72 = 53; array[INSTRUCTIONS] of string: INSTRUCTIONS_NAMES = ["x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "x29", "x30", "x31", "x32", "x33", "x34", "x35", "x36", "x37", "x38", "x39", "x40", "x41", "x42", "x43", "x44", "x45", "x46", "x47", "x48", "x49", "x50", "x51", "x52", "x53", "x54", "x55", "x56", "x57", "x58", "x59", "x60", "x61", "x62", "x63", "x64", "x65", "x66", "x67", "x68", "x69", "x70", "x71", "x72"]; set of int: OPS = 1..6; OPS: SHL = 1; OPS: AND = 2; OPS: ADD = 3; OPS: MUL = 4; OPS: OR = 5; OPS: SHR = 6; array[OPS] of string: OPS_NAMES = ["SHL", "AND", "ADD", "MUL", "OR", "SHR"]; int: MAX_LOC = 183; set of int: LOCATIONS = 1..MAX_LOC; array[INSTRUCTIONS] of var LOCATIONS: output_locations; array[INSTRUCTIONS] of var int: output_data_latency; array[INSTRUCTIONS] of var int: output_core_latency; array[INSTRUCTIONS] of var CORE: output_cores; array[INSTRUCTIONS] of OPS: input_ops = [MUL, MUL, MUL, ADD, MUL, MUL, ADD, MUL, ADD, MUL, MUL, ADD, MUL, ADD, MUL, ADD, MUL, MUL, ADD, MUL, ADD, MUL, ADD, MUL, ADD, MUL, MUL, MUL, MUL, MUL, ADD, MUL, ADD, MUL, ADD, MUL, ADD, MUL, ADD, MUL, ADD, MUL, ADD, MUL, ADD, MUL, ADD, MUL, ADD, SHL, AND, ADD, SHL]; array[INSTRUCTIONS] of var int: output_ADD_MUL_core_latency; array[INSTRUCTIONS] of var 0..1: output_ADD_MUL_core_use; constraint forall (i in INSTRUCTIONS) (0 <= output_ADD_MUL_core_latency[i]); constraint forall (i in INSTRUCTIONS) (output_ADD_MUL_core_use[i] == 1 -> output_core_latency[i] == output_ADD_MUL_core_latency[i]); array[INSTRUCTIONS] of var int: output_MUL_CORE_core_latency; array[INSTRUCTIONS] of var 0..1: output_MUL_CORE_core_use; constraint forall (i in INSTRUCTIONS) (0 <= output_MUL_CORE_core_latency[i]); constraint forall (i in INSTRUCTIONS) (output_MUL_CORE_core_use[i] == 1 -> output_core_latency[i] == output_MUL_CORE_core_latency[i]); array[INSTRUCTIONS] of var int: output_LEA_BW_core_latency; array[INSTRUCTIONS] of var 0..1: output_LEA_BW_core_use; constraint forall (i in INSTRUCTIONS) (0 <= output_LEA_BW_core_latency[i]); constraint forall (i in INSTRUCTIONS) (output_LEA_BW_core_use[i] == 1 -> output_core_latency[i] == output_LEA_BW_core_latency[i]); var LOCATIONS: RET_loc; constraint alldifferent(output_locations); constraint output_locations[x21] + output_data_latency[x21] <= output_locations[x23]; constraint output_locations[x22] + output_data_latency[x22] <= output_locations[x23]; constraint output_locations[x24] + output_data_latency[x24] <= output_locations[x26]; constraint output_locations[x25] + output_data_latency[x25] <= output_locations[x26]; constraint output_locations[x26] + output_data_latency[x26] <= output_locations[x28]; constraint output_locations[x27] + output_data_latency[x27] <= output_locations[x28]; constraint output_locations[x29] + output_data_latency[x29] <= output_locations[x31]; constraint output_locations[x30] + output_data_latency[x30] <= output_locations[x31]; constraint output_locations[x31] + output_data_latency[x31] <= output_locations[x33]; constraint output_locations[x32] + output_data_latency[x32] <= output_locations[x33]; constraint output_locations[x33] + output_data_latency[x33] <= output_locations[x35]; constraint output_locations[x34] + output_data_latency[x34] <= output_locations[x35]; constraint output_locations[x36] + output_data_latency[x36] <= output_locations[x38]; constraint output_locations[x37] + output_data_latency[x37] <= output_locations[x38]; constraint output_locations[x38] + output_data_latency[x38] <= output_locations[x40]; constraint output_locations[x39] + output_data_latency[x39] <= output_locations[x40]; constraint output_locations[x40] + output_data_latency[x40] <= output_locations[x42]; constraint output_locations[x41] + output_data_latency[x41] <= output_locations[x42]; constraint output_locations[x42] + output_data_latency[x42] <= output_locations[x44]; constraint output_locations[x43] + output_data_latency[x43] <= output_locations[x44]; constraint output_locations[x45] + output_data_latency[x45] <= output_locations[x49]; constraint output_locations[x20] + output_data_latency[x20] <= output_locations[x50]; constraint output_locations[x49] + output_data_latency[x49] <= output_locations[x50]; constraint output_locations[x46] + output_data_latency[x46] <= output_locations[x51]; constraint output_locations[x50] + output_data_latency[x50] <= output_locations[x52]; constraint output_locations[x51] + output_data_latency[x51] <= output_locations[x52]; constraint output_locations[x47] + output_data_latency[x47] <= output_locations[x53]; constraint output_locations[x52] + output_data_latency[x52] <= output_locations[x54]; constraint output_locations[x53] + output_data_latency[x53] <= output_locations[x54]; constraint output_locations[x48] + output_data_latency[x48] <= output_locations[x55]; constraint output_locations[x54] + output_data_latency[x54] <= output_locations[x56]; constraint output_locations[x55] + output_data_latency[x55] <= output_locations[x56]; constraint output_locations[x45] + output_data_latency[x45] <= output_locations[x57]; constraint output_locations[x23] + output_data_latency[x23] <= output_locations[x58]; constraint output_locations[x57] + output_data_latency[x57] <= output_locations[x58]; constraint output_locations[x47] + output_data_latency[x47] <= output_locations[x59]; constraint output_locations[x58] + output_data_latency[x58] <= output_locations[x60]; constraint output_locations[x59] + output_data_latency[x59] <= output_locations[x60]; constraint output_locations[x48] + output_data_latency[x48] <= output_locations[x61]; constraint output_locations[x60] + output_data_latency[x60] <= output_locations[x62]; constraint output_locations[x61] + output_data_latency[x61] <= output_locations[x62]; constraint output_locations[x45] + output_data_latency[x45] <= output_locations[x63]; constraint output_locations[x28] + output_data_latency[x28] <= output_locations[x64]; constraint output_locations[x63] + output_data_latency[x63] <= output_locations[x64]; constraint output_locations[x48] + output_data_latency[x48] <= output_locations[x65]; constraint output_locations[x64] + output_data_latency[x64] <= output_locations[x66]; constraint output_locations[x65] + output_data_latency[x65] <= output_locations[x66]; constraint output_locations[x45] + output_data_latency[x45] <= output_locations[x67]; constraint output_locations[x35] + output_data_latency[x35] <= output_locations[x68]; constraint output_locations[x67] + output_data_latency[x67] <= output_locations[x68]; constraint output_locations[x56] + output_data_latency[x56] <= output_locations[x69]; constraint output_locations[x56] + output_data_latency[x56] <= output_locations[x70]; constraint output_locations[x69] + output_data_latency[x69] <= output_locations[x71]; constraint output_locations[x62] + output_data_latency[x62] <= output_locations[x71]; constraint output_locations[x71] + output_data_latency[x71] <= output_locations[x72]; constraint max([ output_locations[i] + output_data_latency[i] | i in INSTRUCTIONS ]) <= RET_loc; constraint forall (i in INSTRUCTIONS) (input_ops[i] == SHL -> ((output_cores[i] == LEA_BW /\ output_LEA_BW_core_use[i] == 1 /\ output_LEA_BW_core_latency[i] == 4 /\ output_data_latency[i] == 4 /\ output_ADD_MUL_core_use[i] == 0 /\ output_ADD_MUL_core_latency[i] == 0 /\ output_MUL_CORE_core_use[i] == 0 /\ output_MUL_CORE_core_latency[i] == 0))); constraint forall (i in INSTRUCTIONS) (input_ops[i] == AND -> ((output_cores[i] == LEA_BW /\ output_LEA_BW_core_use[i] == 1 /\ output_LEA_BW_core_latency[i] == 4 /\ output_data_latency[i] == 4 /\ output_ADD_MUL_core_use[i] == 0 /\ output_ADD_MUL_core_latency[i] == 0 /\ output_MUL_CORE_core_use[i] == 0 /\ output_MUL_CORE_core_latency[i] == 0))); constraint forall (i in INSTRUCTIONS) (input_ops[i] == ADD -> ((output_cores[i] == ADD_MUL /\ output_ADD_MUL_core_use[i] == 1 /\ output_ADD_MUL_core_latency[i] == 4 /\ output_data_latency[i] == 4 /\ output_MUL_CORE_core_use[i] == 0 /\ output_MUL_CORE_core_latency[i] == 0 /\ output_LEA_BW_core_use[i] == 0 /\ output_LEA_BW_core_latency[i] == 0) \/ (output_cores[i] == LEA_BW /\ output_LEA_BW_core_use[i] == 1 /\ output_LEA_BW_core_latency[i] == 4 /\ output_data_latency[i] == 4 /\ output_ADD_MUL_core_use[i] == 0 /\ output_ADD_MUL_core_latency[i] == 0 /\ output_MUL_CORE_core_use[i] == 0 /\ output_MUL_CORE_core_latency[i] == 0))); constraint forall (i in INSTRUCTIONS) (input_ops[i] == MUL -> ((output_cores[i] == ADD_MUL /\ output_ADD_MUL_core_use[i] == 1 /\ output_ADD_MUL_core_latency[i] == 4 /\ output_data_latency[i] == 12 /\ output_MUL_CORE_core_use[i] == 0 /\ output_MUL_CORE_core_latency[i] == 0 /\ output_LEA_BW_core_use[i] == 0 /\ output_LEA_BW_core_latency[i] == 0) \/ (output_cores[i] == MUL_CORE /\ output_MUL_CORE_core_use[i] == 1 /\ output_MUL_CORE_core_latency[i] == 4 /\ output_data_latency[i] == 12 /\ output_ADD_MUL_core_use[i] == 0 /\ output_ADD_MUL_core_latency[i] == 0 /\ output_LEA_BW_core_use[i] == 0 /\ output_LEA_BW_core_latency[i] == 0))); constraint forall (i in INSTRUCTIONS) (input_ops[i] == OR -> ((output_cores[i] == LEA_BW /\ output_LEA_BW_core_use[i] == 1 /\ output_LEA_BW_core_latency[i] == 4 /\ output_data_latency[i] == 4 /\ output_ADD_MUL_core_use[i] == 0 /\ output_ADD_MUL_core_latency[i] == 0 /\ output_MUL_CORE_core_use[i] == 0 /\ output_MUL_CORE_core_latency[i] == 0))); constraint forall (i in INSTRUCTIONS) (input_ops[i] == SHR -> ((output_cores[i] == LEA_BW /\ output_LEA_BW_core_use[i] == 1 /\ output_LEA_BW_core_latency[i] == 4 /\ output_data_latency[i] == 4 /\ output_ADD_MUL_core_use[i] == 0 /\ output_ADD_MUL_core_latency[i] == 0 /\ output_MUL_CORE_core_use[i] == 0 /\ output_MUL_CORE_core_latency[i] == 0))); constraint cumulative(output_locations, output_ADD_MUL_core_latency, output_ADD_MUL_core_use, 2); constraint cumulative(output_locations, output_MUL_CORE_core_latency, output_MUL_CORE_core_use, 1); constraint cumulative(output_locations, output_LEA_BW_core_latency, output_LEA_BW_core_use, 2); solve minimize RET_loc; output [ "(" ++ show(INSTRUCTIONS_NAMES[i]) ++ ", " ++ show(CORE_NAMES[fix(output_cores[i])]) ++ ", " ++ show(output_locations[i]) ++ ", " ++ show(output_data_latency[i]) ++ ", " ++ show(output_core_latency[i]) ++ ") ,\n" | i in INSTRUCTIONS ]; output [ "RET_loc: " ++ show(RET_loc) ];