diff options
author | Justine Tunney <jart@google.com> | 2018-01-11 19:19:21 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-01-11 19:23:49 -0800 |
commit | f7f515893eed68465726d671f2b7b568977a3af4 (patch) | |
tree | dd193f46c9ee0e7dff73c71b7f52afe3d143ae8d /tensorflow/contrib/tensorboard | |
parent | 8aab9b2197efb90d58e86b18b614cd0e63dc56a1 (diff) |
Create loader and vacuum tools for TensorBoard DB
PiperOrigin-RevId: 181695156
Diffstat (limited to 'tensorflow/contrib/tensorboard')
-rw-r--r-- | tensorflow/contrib/tensorboard/db/BUILD | 33 | ||||
-rw-r--r-- | tensorflow/contrib/tensorboard/db/loader.cc | 124 | ||||
-rw-r--r-- | tensorflow/contrib/tensorboard/db/vacuum.cc | 137 |
3 files changed, 293 insertions, 1 deletions
diff --git a/tensorflow/contrib/tensorboard/db/BUILD b/tensorflow/contrib/tensorboard/db/BUILD index 9932a30368..6ff5a9e2b1 100644 --- a/tensorflow/contrib/tensorboard/db/BUILD +++ b/tensorflow/contrib/tensorboard/db/BUILD @@ -5,7 +5,12 @@ package(default_visibility = ["//tensorflow:internal"]) licenses(["notice"]) # Apache 2.0 -load("//tensorflow:tensorflow.bzl", "tf_cc_test", "tf_copts") +load( + "//tensorflow:tensorflow.bzl", + "tf_cc_binary", + "tf_cc_test", + "tf_copts", +) cc_library( name = "schema", @@ -106,6 +111,32 @@ cc_library( ], ) +tf_cc_binary( + name = "loader", + srcs = ["loader.cc"], + linkstatic = 1, + deps = [ + ":schema", + ":summary_db_writer", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core/lib/db:sqlite", + ], +) + +tf_cc_binary( + name = "vacuum", + srcs = ["vacuum.cc"], + linkstatic = 1, + deps = [ + "//tensorflow/core:framework_internal", + "//tensorflow/core:lib", + "//tensorflow/core/lib/db:sqlite", + ], +) + filegroup( name = "all_files", srcs = glob(["*"]), diff --git a/tensorflow/contrib/tensorboard/db/loader.cc b/tensorflow/contrib/tensorboard/db/loader.cc new file mode 100644 index 0000000000..4d7337a53d --- /dev/null +++ b/tensorflow/contrib/tensorboard/db/loader.cc @@ -0,0 +1,124 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include <iostream> +#include <vector> + +#include "tensorflow/contrib/tensorboard/db/schema.h" +#include "tensorflow/contrib/tensorboard/db/summary_db_writer.h" +#include "tensorflow/core/lib/db/sqlite.h" +#include "tensorflow/core/lib/io/record_reader.h" +#include "tensorflow/core/platform/init_main.h" +#include "tensorflow/core/util/command_line_flags.h" +#include "tensorflow/core/util/event.pb.h" + +namespace tensorflow { +namespace { + +template <typename T> +string AddCommas(T n) { + static_assert(std::is_integral<T>::value, "is_integral"); + string s = strings::StrCat(n); + if (s.size() > 3) { + int extra = s.size() / 3 - (s.size() % 3 == 0 ? 1 : 0); + s.append(extra, 'X'); + int c = 0; + for (int i = s.size() - 1; i > 0; --i) { + s[i] = s[i - extra]; + if (++c % 3 == 0) { + s[--i] = ','; + --extra; + } + } + } + return s; +} + +int main(int argc, char* argv[]) { + string path; + string events; + string experiment_name; + string run_name; + string user_name; + std::vector<Flag> flag_list = { + Flag("db", &path, "Path of SQLite DB file"), + Flag("events", &events, "TensorFlow record proto event log file"), + Flag("experiment_name", &experiment_name, "The DB experiment_name value"), + Flag("run_name", &run_name, "The DB run_name value"), + Flag("user_name", &user_name, "The DB user_name value"), + }; + string usage = Flags::Usage(argv[0], flag_list); + bool parse_result = Flags::Parse(&argc, argv, flag_list); + if (!parse_result || path.empty()) { + std::cerr << "The loader tool imports tf.Event record files, created by\n" + << "SummaryFileWriter, into the sorts of SQLite database files\n" + << "created by SummaryDbWriter.\n\n" + << "In addition to the flags below, the environment variables\n" + << "defined by core/lib/db/sqlite.cc can also be set.\n\n" + << usage; + return -1; + } + port::InitMain(argv[0], &argc, &argv); + Env* env = Env::Default(); + + LOG(INFO) << "Opening SQLite file: " << path; + Sqlite* db; + TF_CHECK_OK(Sqlite::Open( + path, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_NOMUTEX, + &db)); + core::ScopedUnref unref_db(db); + + LOG(INFO) << "Initializing TensorBoard schema"; + TF_CHECK_OK(SetupTensorboardSqliteDb(db)); + + LOG(INFO) << "Creating SummaryDbWriter"; + SummaryWriterInterface* db_writer; + TF_CHECK_OK(CreateSummaryDbWriter(db, experiment_name, run_name, user_name, + env, &db_writer)); + core::ScopedUnref unref(db_writer); + + LOG(INFO) << "Loading TF event log: " << events; + std::unique_ptr<RandomAccessFile> file; + TF_CHECK_OK(env->NewRandomAccessFile(events, &file)); + io::RecordReader reader(file.get()); + + uint64 start = env->NowMicros(); + uint64 records = 0; + uint64 offset = 0; + string record; + while (true) { + std::unique_ptr<Event> event = std::unique_ptr<Event>(new Event); + Status s = reader.ReadRecord(&offset, &record); + if (s.code() == error::OUT_OF_RANGE) break; + TF_CHECK_OK(s); + if (!ParseProtoUnlimited(event.get(), record)) { + LOG(FATAL) << "Corrupt tf.Event record" + << " offset=" << (offset - record.size()) + << " size=" << static_cast<int>(record.size()); + } + TF_CHECK_OK(db_writer->WriteEvent(std::move(event))); + ++records; + } + uint64 elapsed = env->NowMicros() - start; + LOG(INFO) << "Loaded " << AddCommas(offset) << " bytes with " + << AddCommas(records) << " records at " + << AddCommas(offset / (elapsed / 1000000)) << " bps"; + + return 0; +} + +} // namespace +} // namespace tensorflow + +int main(int argc, char* argv[]) { return tensorflow::main(argc, argv); } diff --git a/tensorflow/contrib/tensorboard/db/vacuum.cc b/tensorflow/contrib/tensorboard/db/vacuum.cc new file mode 100644 index 0000000000..5febe63f06 --- /dev/null +++ b/tensorflow/contrib/tensorboard/db/vacuum.cc @@ -0,0 +1,137 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ +#include <iostream> + +#include "tensorflow/core/lib/db/sqlite.h" +#include "tensorflow/core/platform/init_main.h" +#include "tensorflow/core/util/command_line_flags.h" + +namespace tensorflow { +namespace { + +void Vacuum(const char* path) { + LOG(INFO) << "Opening SQLite DB: " << path; + Sqlite* db; + TF_CHECK_OK(Sqlite::Open(path, SQLITE_OPEN_READWRITE, &db)); + core::ScopedUnref db_unref(db); + + // TODO(jart): Maybe defragment rowids on Tensors. + // TODO(jart): Maybe LIMIT deletes and incremental VACUUM. + + // clang-format off + + LOG(INFO) << "Deleting orphaned Experiments"; + db->PrepareOrDie(R"sql( + DELETE FROM + Experiments + WHERE + user_id IS NOT NULL + AND user_id NOT IN (SELECT user_id FROM Users) + )sql").StepAndResetOrDie(); + + LOG(INFO) << "Deleting orphaned Runs"; + db->PrepareOrDie(R"sql( + DELETE FROM + Runs + WHERE + experiment_id IS NOT NULL + AND experiment_id NOT IN (SELECT experiment_id FROM Experiments) + )sql").StepAndResetOrDie(); + + LOG(INFO) << "Deleting orphaned Tags"; + db->PrepareOrDie(R"sql( + DELETE FROM + Tags + WHERE + run_id IS NOT NULL + AND run_id NOT IN (SELECT run_id FROM Runs) + )sql").StepAndResetOrDie(); + + // TODO(jart): What should we do if plugins define non-tag tensor series? + LOG(INFO) << "Deleting orphaned Tensors"; + db->PrepareOrDie(R"sql( + DELETE FROM + Tensors + WHERE + series IS NOT NULL + AND series NOT IN (SELECT tag_id FROM Tags) + )sql").StepAndResetOrDie(); + + LOG(INFO) << "Deleting orphaned TensorStrings"; + db->PrepareOrDie(R"sql( + DELETE FROM + TensorStrings + WHERE + tensor_rowid NOT IN (SELECT rowid FROM Tensors) + )sql").StepAndResetOrDie(); + + LOG(INFO) << "Deleting orphaned Graphs"; + db->PrepareOrDie(R"sql( + DELETE FROM + Graphs + WHERE + run_id IS NOT NULL + AND run_id NOT IN (SELECT run_id FROM Runs) + )sql").StepAndResetOrDie(); + + LOG(INFO) << "Deleting orphaned Nodes"; + db->PrepareOrDie(R"sql( + DELETE FROM + Nodes + WHERE + graph_id NOT IN (SELECT graph_id FROM Graphs) + )sql").StepAndResetOrDie(); + + LOG(INFO) << "Deleting orphaned NodeInputs"; + db->PrepareOrDie(R"sql( + DELETE FROM + NodeInputs + WHERE + graph_id NOT IN (SELECT graph_id FROM Graphs) + )sql").StepAndResetOrDie(); + + LOG(INFO) << "Running VACUUM"; + db->PrepareOrDie("VACUUM").StepAndResetOrDie(); + + // clang-format on +} + +int main(int argc, char* argv[]) { + string usage = Flags::Usage(argv[0], {}); + bool parse_result = Flags::Parse(&argc, argv, {}); + if (!parse_result) { + std::cerr << "The vacuum tool rebuilds SQLite database files created by\n" + << "SummaryDbWriter, which makes them smaller.\n\n" + << "This means deleting orphaned rows and rebuilding b-tree\n" + << "pages so empty space from deleted rows is cleared. Any\n" + << "superfluous padding of Tensor BLOBs is also removed.\n\n" + << usage; + return -1; + } + port::InitMain(argv[0], &argc, &argv); + if (argc < 2 || argv[1][0] == '-') { + std::cerr << "Need at least one SQLite DB path.\n"; + return -1; + } + for (int i = 1; i < argc; ++i) { + Vacuum(argv[i]); + } + return 0; +} + +} // namespace +} // namespace tensorflow + +int main(int argc, char* argv[]) { return tensorflow::main(argc, argv); } |