aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/tensorboard
diff options
context:
space:
mode:
authorGravatar Justine Tunney <jart@google.com>2018-01-11 19:19:21 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-01-11 19:23:49 -0800
commitf7f515893eed68465726d671f2b7b568977a3af4 (patch)
treedd193f46c9ee0e7dff73c71b7f52afe3d143ae8d /tensorflow/contrib/tensorboard
parent8aab9b2197efb90d58e86b18b614cd0e63dc56a1 (diff)
Create loader and vacuum tools for TensorBoard DB
PiperOrigin-RevId: 181695156
Diffstat (limited to 'tensorflow/contrib/tensorboard')
-rw-r--r--tensorflow/contrib/tensorboard/db/BUILD33
-rw-r--r--tensorflow/contrib/tensorboard/db/loader.cc124
-rw-r--r--tensorflow/contrib/tensorboard/db/vacuum.cc137
3 files changed, 293 insertions, 1 deletions
diff --git a/tensorflow/contrib/tensorboard/db/BUILD b/tensorflow/contrib/tensorboard/db/BUILD
index 9932a30368..6ff5a9e2b1 100644
--- a/tensorflow/contrib/tensorboard/db/BUILD
+++ b/tensorflow/contrib/tensorboard/db/BUILD
@@ -5,7 +5,12 @@ package(default_visibility = ["//tensorflow:internal"])
licenses(["notice"]) # Apache 2.0
-load("//tensorflow:tensorflow.bzl", "tf_cc_test", "tf_copts")
+load(
+ "//tensorflow:tensorflow.bzl",
+ "tf_cc_binary",
+ "tf_cc_test",
+ "tf_copts",
+)
cc_library(
name = "schema",
@@ -106,6 +111,32 @@ cc_library(
],
)
+tf_cc_binary(
+ name = "loader",
+ srcs = ["loader.cc"],
+ linkstatic = 1,
+ deps = [
+ ":schema",
+ ":summary_db_writer",
+ "//tensorflow/core:framework",
+ "//tensorflow/core:framework_internal",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:protos_all_cc",
+ "//tensorflow/core/lib/db:sqlite",
+ ],
+)
+
+tf_cc_binary(
+ name = "vacuum",
+ srcs = ["vacuum.cc"],
+ linkstatic = 1,
+ deps = [
+ "//tensorflow/core:framework_internal",
+ "//tensorflow/core:lib",
+ "//tensorflow/core/lib/db:sqlite",
+ ],
+)
+
filegroup(
name = "all_files",
srcs = glob(["*"]),
diff --git a/tensorflow/contrib/tensorboard/db/loader.cc b/tensorflow/contrib/tensorboard/db/loader.cc
new file mode 100644
index 0000000000..4d7337a53d
--- /dev/null
+++ b/tensorflow/contrib/tensorboard/db/loader.cc
@@ -0,0 +1,124 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <iostream>
+#include <vector>
+
+#include "tensorflow/contrib/tensorboard/db/schema.h"
+#include "tensorflow/contrib/tensorboard/db/summary_db_writer.h"
+#include "tensorflow/core/lib/db/sqlite.h"
+#include "tensorflow/core/lib/io/record_reader.h"
+#include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/util/command_line_flags.h"
+#include "tensorflow/core/util/event.pb.h"
+
+namespace tensorflow {
+namespace {
+
+template <typename T>
+string AddCommas(T n) {
+ static_assert(std::is_integral<T>::value, "is_integral");
+ string s = strings::StrCat(n);
+ if (s.size() > 3) {
+ int extra = s.size() / 3 - (s.size() % 3 == 0 ? 1 : 0);
+ s.append(extra, 'X');
+ int c = 0;
+ for (int i = s.size() - 1; i > 0; --i) {
+ s[i] = s[i - extra];
+ if (++c % 3 == 0) {
+ s[--i] = ',';
+ --extra;
+ }
+ }
+ }
+ return s;
+}
+
+int main(int argc, char* argv[]) {
+ string path;
+ string events;
+ string experiment_name;
+ string run_name;
+ string user_name;
+ std::vector<Flag> flag_list = {
+ Flag("db", &path, "Path of SQLite DB file"),
+ Flag("events", &events, "TensorFlow record proto event log file"),
+ Flag("experiment_name", &experiment_name, "The DB experiment_name value"),
+ Flag("run_name", &run_name, "The DB run_name value"),
+ Flag("user_name", &user_name, "The DB user_name value"),
+ };
+ string usage = Flags::Usage(argv[0], flag_list);
+ bool parse_result = Flags::Parse(&argc, argv, flag_list);
+ if (!parse_result || path.empty()) {
+ std::cerr << "The loader tool imports tf.Event record files, created by\n"
+ << "SummaryFileWriter, into the sorts of SQLite database files\n"
+ << "created by SummaryDbWriter.\n\n"
+ << "In addition to the flags below, the environment variables\n"
+ << "defined by core/lib/db/sqlite.cc can also be set.\n\n"
+ << usage;
+ return -1;
+ }
+ port::InitMain(argv[0], &argc, &argv);
+ Env* env = Env::Default();
+
+ LOG(INFO) << "Opening SQLite file: " << path;
+ Sqlite* db;
+ TF_CHECK_OK(Sqlite::Open(
+ path, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_NOMUTEX,
+ &db));
+ core::ScopedUnref unref_db(db);
+
+ LOG(INFO) << "Initializing TensorBoard schema";
+ TF_CHECK_OK(SetupTensorboardSqliteDb(db));
+
+ LOG(INFO) << "Creating SummaryDbWriter";
+ SummaryWriterInterface* db_writer;
+ TF_CHECK_OK(CreateSummaryDbWriter(db, experiment_name, run_name, user_name,
+ env, &db_writer));
+ core::ScopedUnref unref(db_writer);
+
+ LOG(INFO) << "Loading TF event log: " << events;
+ std::unique_ptr<RandomAccessFile> file;
+ TF_CHECK_OK(env->NewRandomAccessFile(events, &file));
+ io::RecordReader reader(file.get());
+
+ uint64 start = env->NowMicros();
+ uint64 records = 0;
+ uint64 offset = 0;
+ string record;
+ while (true) {
+ std::unique_ptr<Event> event = std::unique_ptr<Event>(new Event);
+ Status s = reader.ReadRecord(&offset, &record);
+ if (s.code() == error::OUT_OF_RANGE) break;
+ TF_CHECK_OK(s);
+ if (!ParseProtoUnlimited(event.get(), record)) {
+ LOG(FATAL) << "Corrupt tf.Event record"
+ << " offset=" << (offset - record.size())
+ << " size=" << static_cast<int>(record.size());
+ }
+ TF_CHECK_OK(db_writer->WriteEvent(std::move(event)));
+ ++records;
+ }
+ uint64 elapsed = env->NowMicros() - start;
+ LOG(INFO) << "Loaded " << AddCommas(offset) << " bytes with "
+ << AddCommas(records) << " records at "
+ << AddCommas(offset / (elapsed / 1000000)) << " bps";
+
+ return 0;
+}
+
+} // namespace
+} // namespace tensorflow
+
+int main(int argc, char* argv[]) { return tensorflow::main(argc, argv); }
diff --git a/tensorflow/contrib/tensorboard/db/vacuum.cc b/tensorflow/contrib/tensorboard/db/vacuum.cc
new file mode 100644
index 0000000000..5febe63f06
--- /dev/null
+++ b/tensorflow/contrib/tensorboard/db/vacuum.cc
@@ -0,0 +1,137 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include <iostream>
+
+#include "tensorflow/core/lib/db/sqlite.h"
+#include "tensorflow/core/platform/init_main.h"
+#include "tensorflow/core/util/command_line_flags.h"
+
+namespace tensorflow {
+namespace {
+
+void Vacuum(const char* path) {
+ LOG(INFO) << "Opening SQLite DB: " << path;
+ Sqlite* db;
+ TF_CHECK_OK(Sqlite::Open(path, SQLITE_OPEN_READWRITE, &db));
+ core::ScopedUnref db_unref(db);
+
+ // TODO(jart): Maybe defragment rowids on Tensors.
+ // TODO(jart): Maybe LIMIT deletes and incremental VACUUM.
+
+ // clang-format off
+
+ LOG(INFO) << "Deleting orphaned Experiments";
+ db->PrepareOrDie(R"sql(
+ DELETE FROM
+ Experiments
+ WHERE
+ user_id IS NOT NULL
+ AND user_id NOT IN (SELECT user_id FROM Users)
+ )sql").StepAndResetOrDie();
+
+ LOG(INFO) << "Deleting orphaned Runs";
+ db->PrepareOrDie(R"sql(
+ DELETE FROM
+ Runs
+ WHERE
+ experiment_id IS NOT NULL
+ AND experiment_id NOT IN (SELECT experiment_id FROM Experiments)
+ )sql").StepAndResetOrDie();
+
+ LOG(INFO) << "Deleting orphaned Tags";
+ db->PrepareOrDie(R"sql(
+ DELETE FROM
+ Tags
+ WHERE
+ run_id IS NOT NULL
+ AND run_id NOT IN (SELECT run_id FROM Runs)
+ )sql").StepAndResetOrDie();
+
+ // TODO(jart): What should we do if plugins define non-tag tensor series?
+ LOG(INFO) << "Deleting orphaned Tensors";
+ db->PrepareOrDie(R"sql(
+ DELETE FROM
+ Tensors
+ WHERE
+ series IS NOT NULL
+ AND series NOT IN (SELECT tag_id FROM Tags)
+ )sql").StepAndResetOrDie();
+
+ LOG(INFO) << "Deleting orphaned TensorStrings";
+ db->PrepareOrDie(R"sql(
+ DELETE FROM
+ TensorStrings
+ WHERE
+ tensor_rowid NOT IN (SELECT rowid FROM Tensors)
+ )sql").StepAndResetOrDie();
+
+ LOG(INFO) << "Deleting orphaned Graphs";
+ db->PrepareOrDie(R"sql(
+ DELETE FROM
+ Graphs
+ WHERE
+ run_id IS NOT NULL
+ AND run_id NOT IN (SELECT run_id FROM Runs)
+ )sql").StepAndResetOrDie();
+
+ LOG(INFO) << "Deleting orphaned Nodes";
+ db->PrepareOrDie(R"sql(
+ DELETE FROM
+ Nodes
+ WHERE
+ graph_id NOT IN (SELECT graph_id FROM Graphs)
+ )sql").StepAndResetOrDie();
+
+ LOG(INFO) << "Deleting orphaned NodeInputs";
+ db->PrepareOrDie(R"sql(
+ DELETE FROM
+ NodeInputs
+ WHERE
+ graph_id NOT IN (SELECT graph_id FROM Graphs)
+ )sql").StepAndResetOrDie();
+
+ LOG(INFO) << "Running VACUUM";
+ db->PrepareOrDie("VACUUM").StepAndResetOrDie();
+
+ // clang-format on
+}
+
+int main(int argc, char* argv[]) {
+ string usage = Flags::Usage(argv[0], {});
+ bool parse_result = Flags::Parse(&argc, argv, {});
+ if (!parse_result) {
+ std::cerr << "The vacuum tool rebuilds SQLite database files created by\n"
+ << "SummaryDbWriter, which makes them smaller.\n\n"
+ << "This means deleting orphaned rows and rebuilding b-tree\n"
+ << "pages so empty space from deleted rows is cleared. Any\n"
+ << "superfluous padding of Tensor BLOBs is also removed.\n\n"
+ << usage;
+ return -1;
+ }
+ port::InitMain(argv[0], &argc, &argv);
+ if (argc < 2 || argv[1][0] == '-') {
+ std::cerr << "Need at least one SQLite DB path.\n";
+ return -1;
+ }
+ for (int i = 1; i < argc; ++i) {
+ Vacuum(argv[i]);
+ }
+ return 0;
+}
+
+} // namespace
+} // namespace tensorflow
+
+int main(int argc, char* argv[]) { return tensorflow::main(argc, argv); }