aboutsummaryrefslogtreecommitdiffhomepage
path: root/bench/analyze-blocking-sizes.cpp
diff options
context:
space:
mode:
authorGravatar Benoit Jacob <benoitjacob@google.com>2015-03-02 18:08:38 -0500
committerGravatar Benoit Jacob <benoitjacob@google.com>2015-03-02 18:08:38 -0500
commit9930e9583b336edda281a4490cfe69c53082318e (patch)
tree106ee0a69f11d79ec80ec5893814a7c565428d12 /bench/analyze-blocking-sizes.cpp
parent1ec0f4fadf1b8d95fb1506e87112d0c7888afd95 (diff)
Improve analyze-blocking-sizes, and in particular give it a evaluate-defaults tool
that shows the efficiency of Eigen's default blocking sizes choices, using a previously computed table from benchmark-blocking-sizes.
Diffstat (limited to 'bench/analyze-blocking-sizes.cpp')
-rw-r--r--bench/analyze-blocking-sizes.cpp232
1 files changed, 186 insertions, 46 deletions
diff --git a/bench/analyze-blocking-sizes.cpp b/bench/analyze-blocking-sizes.cpp
index a603c0216..f507eab38 100644
--- a/bench/analyze-blocking-sizes.cpp
+++ b/bench/analyze-blocking-sizes.cpp
@@ -7,6 +7,10 @@
#include <string>
#include <cmath>
#include <cassert>
+#include <cstring>
+#include <memory>
+
+#include <Eigen/Core>
using namespace std;
@@ -88,6 +92,11 @@ struct preprocessed_inputfile_entry_t
float efficiency;
};
+bool lower_efficiency(const preprocessed_inputfile_entry_t& e1, const preprocessed_inputfile_entry_t& e2)
+{
+ return e1.efficiency < e2.efficiency;
+}
+
struct preprocessed_inputfile_t
{
string filename;
@@ -394,63 +403,194 @@ void print_partition(
cout << endl;
}
-int main(int argc, char* argv[])
+struct action_t
{
- if (argc == 1) {
- cerr << "usage: " << argv[0] << " [input files]" << endl;
- cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl;
- exit(1);
- }
- cout.precision(3);
- cerr.precision(3);
- vector<string> inputfilenames;
- for (int i = 1; i < argc; i++) {
- inputfilenames.emplace_back(argv[i]);
- }
+ virtual const char* invokation_name() const { abort(); return nullptr; }
+ virtual void run(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles) const { abort(); }
+ virtual ~action_t() {}
+};
- vector<preprocessed_inputfile_t> preprocessed_inputfiles;
- for (auto it = inputfilenames.begin(); it != inputfilenames.end(); ++it) {
- preprocessed_inputfiles.emplace_back(inputfile_t(*it));
+struct partition_action_t : action_t
+{
+ virtual const char* invokation_name() const { return "partition"; }
+ virtual void run(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles) const
+ {
+ check_all_files_in_same_exact_order(preprocessed_inputfiles);
+
+ float required_efficiency_to_beat = 0.0f;
+ vector<vector<vector<size_t>>> partitions;
+ cerr << "searching for partitions...\r" << flush;
+ while (true)
+ {
+ vector<vector<size_t>> partition;
+ find_partition_with_efficiency_higher_than(
+ preprocessed_inputfiles,
+ required_efficiency_to_beat,
+ partition);
+ float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
+ cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size()
+ << " subsets for " << 100.0f * actual_efficiency
+ << " % efficiency"
+ << " \r" << flush;
+ partitions.push_back(partition);
+ if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) {
+ break;
+ }
+ required_efficiency_to_beat = actual_efficiency;
+ }
+ cerr << " " << endl;
+ while (true) {
+ bool repeat = false;
+ for (size_t i = 0; i < partitions.size() - 1; i++) {
+ if (partitions[i].size() >= partitions[i+1].size()) {
+ partitions.erase(partitions.begin() + i);
+ repeat = true;
+ break;
+ }
+ }
+ if (!repeat) {
+ break;
+ }
+ }
+ for (auto it = partitions.begin(); it != partitions.end(); ++it) {
+ print_partition(preprocessed_inputfiles, *it);
+ }
}
+};
- check_all_files_in_same_exact_order(preprocessed_inputfiles);
+uint8_t log2_pot(size_t x) {
+ size_t l = 0;
+ while (x >>= 1) l++;
+ return l;
+}
- float required_efficiency_to_beat = 0.0f;
- vector<vector<vector<size_t>>> partitions;
- cerr << "searching for partitions...\r" << flush;
- while (true)
+uint16_t compact_size_triple(size_t k, size_t m, size_t n)
+{
+ return (log2_pot(k) << 8) | (log2_pot(m) << 4) | log2_pot(n);
+}
+
+// just a helper to store a triple of K,M,N sizes for matrix product
+struct size_triple_t
+{
+ size_t k, m, n;
+ size_triple_t() : k(0), m(0), n(0) {}
+ size_triple_t(size_t _k, size_t _m, size_t _n) : k(_k), m(_m), n(_n) {}
+ size_triple_t(const size_triple_t& o) : k(o.k), m(o.m), n(o.n) {}
+ size_triple_t(uint16_t compact)
{
- vector<vector<size_t>> partition;
- find_partition_with_efficiency_higher_than(
- preprocessed_inputfiles,
- required_efficiency_to_beat,
- partition);
- float actual_efficiency = efficiency_of_partition(preprocessed_inputfiles, partition);
- cerr << "partition " << preprocessed_inputfiles.size() << " files into " << partition.size()
- << " subsets for " << 100.0f * actual_efficiency
- << " % efficiency"
- << " \r" << flush;
- partitions.push_back(partition);
- if (partition.size() == preprocessed_inputfiles.size() || actual_efficiency == 1.0f) {
- break;
- }
- required_efficiency_to_beat = actual_efficiency;
+ k = 1 << ((compact & 0xf00) >> 8);
+ m = 1 << ((compact & 0x0f0) >> 4);
+ n = 1 << ((compact & 0x00f) >> 0);
}
- cerr << " " << endl;
- while (true) {
- bool repeat = false;
- for (size_t i = 0; i < partitions.size() - 1; i++) {
- if (partitions[i].size() >= partitions[i+1].size()) {
- partitions.erase(partitions.begin() + i);
- repeat = true;
- break;
+ bool is_cubic() const { return k == m && m == n; }
+};
+
+struct evaluate_defaults_action_t : action_t
+{
+ virtual const char* invokation_name() const { return "evaluate-defaults"; }
+ virtual void run(const vector<preprocessed_inputfile_t>& preprocessed_inputfiles) const
+ {
+ if (preprocessed_inputfiles.size() > 1) {
+ cerr << invokation_name() << " only works with one input file." << endl;
+ exit(1);
+ }
+
+ const preprocessed_inputfile_t& preprocessed_inputfile = preprocessed_inputfiles.front();
+
+ uint16_t product_size = 0;
+ uint16_t default_block_size = 0;
+ vector<preprocessed_inputfile_entry_t> results, cubic_results;
+ for (auto it = preprocessed_inputfile.entries.begin(); it != preprocessed_inputfile.entries.end(); ++it) {
+ if (it->product_size != product_size) {
+ product_size = it->product_size;
+ size_triple_t product_size_triple(product_size);
+ Eigen::Index k = product_size_triple.k,
+ m = product_size_triple.m,
+ n = product_size_triple.n;
+ Eigen::internal::computeProductBlockingSizes<float, float>(k, m, n);
+ default_block_size = compact_size_triple(k, m, n);
+ }
+ if (it->block_size == default_block_size) {
+ results.push_back(*it);
+ if (size_triple_t(product_size).is_cubic()) {
+ cubic_results.push_back(*it);
+ }
}
}
- if (!repeat) {
+
+ cerr << "Below are all results - first column is product size tripe, " << endl
+ << "second column is block size triple, in hex kmn form where" << endl
+ << "k, m, n are the log2 of the actual values, i.e. a89 means" << endl
+ << "k=1024, m=256, n=512." << endl << endl;
+ for (auto it = results.begin(); it != results.end(); ++it) {
+ cerr << hex << it->product_size << " " << it->block_size
+ << " efficiency " << std::dec << 100.0f * it->efficiency << " %" << endl;
+ }
+ cerr << endl;
+ sort(results.begin(), results.end(), lower_efficiency);
+ sort(cubic_results.begin(), cubic_results.end(), lower_efficiency);
+ cerr << "Efficiency summary: min = "
+ << 100.0f * results.front().efficiency << " %, max = "
+ << 100.0f * results.back().efficiency << " %, median = "
+ << 100.0f * results[results.size() / 2].efficiency << " %" << endl;
+ cerr << "20% of product sizes have efficiency <= " << 100.0f * results[results.size() * 20 / 100].efficiency << " %" << endl;
+ cerr << "10% of product sizes have efficiency <= " << 100.0f * results[results.size() * 10 / 100].efficiency << " %" << endl;
+ cerr << "5% of product sizes have efficiency <= " << 100.0f * results[results.size() * 5 / 100].efficiency << " %" << endl;
+ cerr << "Cubic sizes efficiency summary: min = "
+ << 100.0f * cubic_results.front().efficiency << " %, max = "
+ << 100.0f * cubic_results.back().efficiency << " %, median = "
+ << 100.0f * cubic_results[cubic_results.size() / 2].efficiency << " %" << endl;
+
+ }
+};
+
+
+void show_usage_and_exit(int argc, char* argv[],
+ const vector<unique_ptr<action_t>>& available_actions)
+{
+ cerr << "usage: " << argv[0] << " <action> <input files...>" << endl;
+ cerr << "available actions:" << endl;
+ for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
+ cerr << " " << (*it)->invokation_name() << endl;
+ }
+ cerr << "the input files should each contain an output of benchmark-blocking-sizes" << endl;
+ exit(1);
+}
+
+int main(int argc, char* argv[])
+{
+ cout.precision(3);
+ cerr.precision(3);
+
+ vector<unique_ptr<action_t>> available_actions;
+ available_actions.emplace_back(new partition_action_t);
+ available_actions.emplace_back(new evaluate_defaults_action_t);
+
+ auto action = available_actions.end();
+
+ if (argc <= 2) {
+ show_usage_and_exit(argc, argv, available_actions);
+ }
+ for (auto it = available_actions.begin(); it != available_actions.end(); ++it) {
+ if (!strcmp(argv[1], (*it)->invokation_name())) {
+ action = it;
break;
}
}
- for (auto it = partitions.begin(); it != partitions.end(); ++it) {
- print_partition(preprocessed_inputfiles, *it);
+
+ if (action == available_actions.end()) {
+ show_usage_and_exit(argc, argv, available_actions);
}
+
+ vector<string> inputfilenames;
+ for (int i = 2; i < argc; i++) {
+ inputfilenames.emplace_back(argv[i]);
+ }
+
+ vector<preprocessed_inputfile_t> preprocessed_inputfiles;
+ for (auto it = inputfilenames.begin(); it != inputfilenames.end(); ++it) {
+ preprocessed_inputfiles.emplace_back(inputfile_t(*it));
+ }
+
+ (*action)->run(preprocessed_inputfiles);
}