From 9930e9583b336edda281a4490cfe69c53082318e Mon Sep 17 00:00:00 2001 From: Benoit Jacob Date: Mon, 2 Mar 2015 18:08:38 -0500 Subject: Improve analyze-blocking-sizes, and in particular give it a evaluate-defaults tool that shows the efficiency of Eigen's default blocking sizes choices, using a previously computed table from benchmark-blocking-sizes. --- bench/analyze-blocking-sizes.cpp | 232 +++++++++++++++++++++++++++++++-------- 1 file changed, 186 insertions(+), 46 deletions(-) (limited to 'bench/analyze-blocking-sizes.cpp') diff --git a/bench/analyze-blocking-sizes.cpp b/bench/analyze-blocking-sizes.cpp index a603c0216..f507eab38 100644 --- a/bench/analyze-blocking-sizes.cpp +++ b/bench/analyze-blocking-sizes.cpp @@ -7,6 +7,10 @@ #include #include #include +#include +#include + +#include using namespace std; @@ -88,6 +92,11 @@ struct preprocessed_inputfile_entry_t float efficiency; }; +bool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2) +{ + return e1.efficiency < e2.efficiency; +} + struct preprocessed_inputfile_t { string filename; @@ -394,63 +403,194 @@ void print_partition( cout << endl; } -int main(int argc, char* argv[]) +struct action_t { - if (argc == 1) { - cerr << "usage: " << argv[0] << " [input files]" << endl; - cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl; - exit(1); - } - cout.precision(3); - cerr.precision(3); - vector inputfilenames; - for (int i = 1; i < argc; i++) { - inputfilenames.emplace_back(argv[i]); - } + virtual const char* invokation_name() const { abort(); return nullptr; } + virtual void run(const vector& preprocessed_inputfiles) const { abort(); } + virtual ~action_t() {} +}; - vector preprocessed_inputfiles; - for (auto it = inputfilenames.begin(); it != inputfilenames.end(); ++it) { - preprocessed_inputfiles.emplace_back(inputfile_t(*it)); +struct partition_action_t : action_t +{ + virtual const char* invokation_name() const { return "partition"; } + virtual void run(const vector& preprocessed_inputfiles) const + { + check_all_files_in_same_exact_order(preprocessed_inputfiles); + + float required_efficiency_to_beat = 0.0f; + vector>> partitions; + cerr << "searching for partitions...\r" << flush; + while (true) + { + vector> partition; + find_partition_with_efficiency_higher_than( + preprocessed_inputfiles, + required_efficiency_to_beat, + partition); + float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition); + cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size() + << " subsets for " << 100.0f * actual_efficiency + << " % efficiency" + << " \r" << flush; + partitions.push_back(partition); + if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) { + break; + } + required_efficiency_to_beat = actual_efficiency; + } + cerr << " " << endl; + while (true) { + bool repeat = false; + for (size_t i = 0; i < partitions.size() - 1; i++) { + if (partitions[i].size() >= partitions[i+1].size()) { + partitions.erase(partitions.begin() + i); + repeat = true; + break; + } + } + if (!repeat) { + break; + } + } + for (auto it = partitions.begin(); it != partitions.end(); ++it) { + print_partition(preprocessed_inputfiles, *it); + } } +}; - check_all_files_in_same_exact_order(preprocessed_inputfiles); +uint8_t log2_pot(size_t x) { + size_t l = 0; + while (x >>= 1) l++; + return l; +} - float required_efficiency_to_beat = 0.0f; - vector>> partitions; - cerr << "searching for partitions...\r" << flush; - while (true) +uint16_t compact_size_triple(size_t k, size_t m, size_t n) +{ + return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n); +} + +// just a helper to store a triple of K,M,N sizes for matrix product +struct size_triple_t +{ + size_t k, m, n; + size_triple_t() : k(0), m(0), n(0) {} + size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {} + size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {} + size_triple_t(uint16_t compact) { - vector> partition; - find_partition_with_efficiency_higher_than( - preprocessed_inputfiles, - required_efficiency_to_beat, - partition); - float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition); - cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size() - << " subsets for " << 100.0f * actual_efficiency - << " % efficiency" - << " \r" << flush; - partitions.push_back(partition); - if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) { - break; - } - required_efficiency_to_beat = actual_efficiency; + k = 1 << ((compact & 0xf00) >> 8); + m = 1 << ((compact & 0x0f0) >> 4); + n = 1 << ((compact & 0x00f) >> 0); } - cerr << " " << endl; - while (true) { - bool repeat = false; - for (size_t i = 0; i < partitions.size() - 1; i++) { - if (partitions[i].size() >= partitions[i+1].size()) { - partitions.erase(partitions.begin() + i); - repeat = true; - break; + bool is_cubic() const { return k == m && m == n; } +}; + +struct evaluate_defaults_action_t : action_t +{ + virtual const char* invokation_name() const { return "evaluate-defaults"; } + virtual void run(const vector& preprocessed_inputfiles) const + { + if (preprocessed_inputfiles.size() > 1) { + cerr << invokation_name() << " only works with one input file." << endl; + exit(1); + } + + const preprocessed_inputfile_t& preprocessed_inputfile = preprocessed_inputfiles.front(); + + uint16_t product_size = 0; + uint16_t default_block_size = 0; + vector results, cubic_results; + for (auto it = preprocessed_inputfile.entries.begin(); it != preprocessed_inputfile.entries.end(); ++it) { + if (it->product_size != product_size) { + product_size = it->product_size; + size_triple_t product_size_triple(product_size); + Eigen::Index k = product_size_triple.k, + m = product_size_triple.m, + n = product_size_triple.n; + Eigen::internal::computeProductBlockingSizes(k, m, n); + default_block_size = compact_size_triple(k, m, n); + } + if (it->block_size == default_block_size) { + results.push_back(*it); + if (size_triple_t(product_size).is_cubic()) { + cubic_results.push_back(*it); + } } } - if (!repeat) { + + cerr << "Below are all results - first column is product size tripe, " << endl + << "second column is block size triple, in hex kmn form where" << endl + << "k, m, n are the log2 of the actual values, i.e. a89 means" << endl + << "k=1024, m=256, n=512." << endl << endl; + for (auto it = results.begin(); it != results.end(); ++it) { + cerr << hex << it->product_size << " " << it->block_size + << " efficiency " << std::dec << 100.0f * it->efficiency << " %" << endl; + } + cerr << endl; + sort(results.begin(), results.end(), lower_efficiency); + sort(cubic_results.begin(), cubic_results.end(), lower_efficiency); + cerr << "Efficiency summary: min = " + << 100.0f * results.front().efficiency << " %, max = " + << 100.0f * results.back().efficiency << " %, median = " + << 100.0f * results[results.size() / 2].efficiency << " %" << endl; + cerr << "20% of product sizes have efficiency <= " << 100.0f * results[results.size() * 20 / 100].efficiency << " %" << endl; + cerr << "10% of product sizes have efficiency <= " << 100.0f * results[results.size() * 10 / 100].efficiency << " %" << endl; + cerr << "5% of product sizes have efficiency <= " << 100.0f * results[results.size() * 5 / 100].efficiency << " %" << endl; + cerr << "Cubic sizes efficiency summary: min = " + << 100.0f * cubic_results.front().efficiency << " %, max = " + << 100.0f * cubic_results.back().efficiency << " %, median = " + << 100.0f * cubic_results[cubic_results.size() / 2].efficiency << " %" << endl; + + } +}; + + +void show_usage_and_exit(int argc, char* argv[], + const vector>& available_actions) +{ + cerr << "usage: " << argv[0] << " " << endl; + cerr << "available actions:" << endl; + for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { + cerr << " " << (*it)->invokation_name() << endl; + } + cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl; + exit(1); +} + +int main(int argc, char* argv[]) +{ + cout.precision(3); + cerr.precision(3); + + vector> available_actions; + available_actions.emplace_back(new partition_action_t); + available_actions.emplace_back(new evaluate_defaults_action_t); + + auto action = available_actions.end(); + + if (argc <= 2) { + show_usage_and_exit(argc, argv, available_actions); + } + for (auto it = available_actions.begin(); it != available_actions.end(); ++it) { + if (!strcmp(argv[1], (*it)->invokation_name())) { + action = it; break; } } - for (auto it = partitions.begin(); it != partitions.end(); ++it) { - print_partition(preprocessed_inputfiles, *it); + + if (action == available_actions.end()) { + show_usage_and_exit(argc, argv, available_actions); } + + vector inputfilenames; + for (int i = 2; i < argc; i++) { + inputfilenames.emplace_back(argv[i]); + } + + vector preprocessed_inputfiles; + for (auto it = inputfilenames.begin(); it != inputfilenames.end(); ++it) { + preprocessed_inputfiles.emplace_back(inputfile_t(*it)); + } + + (*action)->run(preprocessed_inputfiles); } -- cgit v1.2.3