diff options
author | Justine Tunney <jart@google.com> | 2017-12-07 02:32:29 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-12-07 02:36:30 -0800 |
commit | 1730f9743c6a57beee8158bc35c689d24c8df729 (patch) | |
tree | f27ae420e66b21ff12b6c8d7bd2e22b1397cb9f0 | |
parent | b9df87ffc4a53566320e2baf17f0daeca25fdde9 (diff) |
Add Snappy support to SQLite
SWIG and CLIF are way too hard. This is more idiomatic. Native
extensions that add functions for protobufs will follow.
PiperOrigin-RevId: 178215852
-rw-r--r-- | tensorflow/contrib/cmake/external/sqlite.cmake | 1 | ||||
-rw-r--r-- | tensorflow/contrib/tensorboard/db/BUILD | 19 | ||||
-rw-r--r-- | tensorflow/contrib/tensorboard/db/tbsnap.cc | 214 | ||||
-rw-r--r-- | tensorflow/contrib/tensorboard/db/tbsnap_test.py | 70 | ||||
-rw-r--r-- | third_party/sqlite.BUILD | 5 |
5 files changed, 308 insertions, 1 deletions
diff --git a/tensorflow/contrib/cmake/external/sqlite.cmake b/tensorflow/contrib/cmake/external/sqlite.cmake index 785039a469..14d8148e6e 100644 --- a/tensorflow/contrib/cmake/external/sqlite.cmake +++ b/tensorflow/contrib/cmake/external/sqlite.cmake @@ -28,6 +28,7 @@ endif() set(sqlite_HEADERS "${sqlite_BUILD}/sqlite3.h" + "${sqlite_BUILD}/sqlite3ext.h" ) if (WIN32) diff --git a/tensorflow/contrib/tensorboard/db/BUILD b/tensorflow/contrib/tensorboard/db/BUILD index 9d3d60c24d..28b5eba2a4 100644 --- a/tensorflow/contrib/tensorboard/db/BUILD +++ b/tensorflow/contrib/tensorboard/db/BUILD @@ -55,6 +55,25 @@ tf_cc_test( ], ) +cc_library( + name = "tbsnap", + srcs = ["tbsnap.cc"], + deps = [ + "@snappy", + "@sqlite_archive//:sqlite", + ], +) + +py_test( + name = "tbsnap_test", + size = "small", + srcs = ["tbsnap_test.py"], + data = ["libtbsnap.so"], + srcs_version = "PY2AND3", + tags = ["nomac"], + deps = ["//tensorflow/python:client_testlib"], +) + filegroup( name = "all_files", srcs = glob(["*"]), diff --git a/tensorflow/contrib/tensorboard/db/tbsnap.cc b/tensorflow/contrib/tensorboard/db/tbsnap.cc new file mode 100644 index 0000000000..588ea9af89 --- /dev/null +++ b/tensorflow/contrib/tensorboard/db/tbsnap.cc @@ -0,0 +1,214 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +/// \brief SQLite extension for Snappy compression +/// +/// Snappy a compression library that trades ratio for speed, almost going a +/// tenth as fast as memcpy(). +/// +/// This extension adds the following native functions: +/// +/// - snap(value: NULL|BLOB|TEXT) -> NULL|BLOB +/// +/// Applies Snappy compression. If value is NULL, then NULL is returned. If +/// value is TEXT and BLOB, then it is compressed and the result is a BLOB. +/// An uncompressed byte is prepended to indicate the original type. +/// +/// - unsnap(value: NULL|BLOB) -> NULL|TEXT|BLOB +/// +/// Decompresses value created by snap(). If value is NULL, then NULL is +/// returned. If value is empty, then an empty blob is returned. Otherwise +/// the original type is restored from the first byte and the remaining ones +/// are decompressed. +/// +/// These functions are deterministic so they can be used for all purposes, +/// including INDEX. Please note that SQLite currently does not currently +/// perform common sub-expression optimization for pure functions when +/// compiling queries. +/// +/// If your SQLite environment isn't universally UTF8, please file an issue +/// with the TensorBoard team letting us know. While this implementation should +/// work, its performance could be improved to avoid superfluous TEXT coding. + +#include "sqlite3ext.h" +#include "snappy.h" + +namespace { +SQLITE_EXTENSION_INIT1 + +void snap(sqlite3_context* ctx, int argc, sqlite3_value** argv) { + const char* data; + int type = sqlite3_value_type(argv[0]); + switch (type) { + case SQLITE_NULL: + return; + case SQLITE_BLOB: + data = reinterpret_cast<const char*>(sqlite3_value_blob(argv[0])); + break; + case SQLITE_TEXT: + data = reinterpret_cast<const char*>(sqlite3_value_text(argv[0])); + break; + default: + sqlite3_result_error(ctx, "snap() takes NULL|BLOB|TEXT", -1); + sqlite3_result_error_code(ctx, SQLITE_MISMATCH); + return; + } + int size = sqlite3_value_bytes(argv[0]); + if (size <= 0) { + char result[] = {static_cast<char>(type)}; + sqlite3_result_blob(ctx, result, sizeof(result), SQLITE_TRANSIENT); + return; + } + size_t output_size = + snappy::MaxCompressedLength(static_cast<size_t>(size)) + 1; + if (output_size > + sqlite3_limit(sqlite3_context_db_handle(ctx), SQLITE_LIMIT_LENGTH, -1)) { + sqlite3_result_error_toobig(ctx); + return; + } + char* output = + static_cast<char*>(sqlite3_malloc(static_cast<int>(output_size))); + if (output == nullptr) { + sqlite3_result_error_nomem(ctx); + return; + } + *output++ = static_cast<char>(type), --output_size; + snappy::RawCompress(data, static_cast<size_t>(size), output, &output_size); + sqlite3_result_blob(ctx, output - 1, static_cast<int>(output_size + 1), + sqlite3_free); +} + +void unsnap(sqlite3_context* ctx, int argc, sqlite3_value** argv) { + int type = sqlite3_value_type(argv[0]); + if (type == SQLITE_NULL) return; + if (type != SQLITE_BLOB) { + sqlite3_result_error(ctx, "unsnap() takes NULL|BLOB", -1); + sqlite3_result_error_code(ctx, SQLITE_MISMATCH); + return; + } + int size = sqlite3_value_bytes(argv[0]); + const char* blob = reinterpret_cast<const char*>(sqlite3_value_blob(argv[0])); + if (size <= 0) { + sqlite3_result_zeroblob(ctx, 0); + return; + } + type = static_cast<int>(*blob++), --size; + if (type != SQLITE_BLOB && type != SQLITE_TEXT) { + sqlite3_result_error(ctx, "unsnap() first byte is invalid type", -1); + sqlite3_result_error_code(ctx, SQLITE_CORRUPT); + return; + } + if (size == 0) { + if (type == SQLITE_TEXT) { + sqlite3_result_text(ctx, "", 0, SQLITE_STATIC); + } else { + sqlite3_result_zeroblob(ctx, 0); + } + return; + } + size_t output_size; + if (!snappy::GetUncompressedLength(blob, static_cast<size_t>(size), + &output_size)) { + sqlite3_result_error(ctx, "snappy parse error", -1); + sqlite3_result_error_code(ctx, SQLITE_CORRUPT); + return; + } + if (output_size > + sqlite3_limit(sqlite3_context_db_handle(ctx), SQLITE_LIMIT_LENGTH, -1)) { + sqlite3_result_error_toobig(ctx); + return; + } + char* output = + static_cast<char*>(sqlite3_malloc(static_cast<int>(output_size))); + if (output == nullptr) { + sqlite3_result_error_nomem(ctx); + return; + } + if (!snappy::RawUncompress(blob, static_cast<size_t>(size), output)) { + sqlite3_result_error(ctx, "snappy message corruption", -1); + sqlite3_result_error_code(ctx, SQLITE_CORRUPT); + return; + } + if (type == SQLITE_TEXT) { + sqlite3_result_text(ctx, output, static_cast<int>(output_size), + sqlite3_free); + } else { + sqlite3_result_blob(ctx, output, static_cast<int>(output_size), + sqlite3_free); + } +} + +int init(sqlite3* db, const char** pzErrMsg, const sqlite3_api_routines* pApi) { + SQLITE_EXTENSION_INIT2(pApi); + int rc; + + rc = sqlite3_create_function_v2( + db, + "snap", // zFunctionName + 1, // nArg + SQLITE_UTF8 | SQLITE_DETERMINISTIC, // eTextRep + nullptr, // pApp + snap, // xFunc + nullptr, // xStep + nullptr, // xFinal + nullptr // xDestroy + ); + if (rc != SQLITE_OK) { + *pzErrMsg = "oh snap()"; + return rc; + } + + rc = sqlite3_create_function_v2( + db, + "unsnap", // zFunctionName + 1, // nArg + SQLITE_UTF8 | SQLITE_DETERMINISTIC, // eTextRep + nullptr, // pApp + unsnap, // xFunc + nullptr, // xStep + nullptr, // xFinal + nullptr // xDestroy + ); + if (rc != SQLITE_OK) { + *pzErrMsg = "oh unsnap()"; + return rc; + } + + return SQLITE_OK; +} + +} // namespace + +extern "C" { + +#if defined(TF_SQLITE3_AUTO_EXTENSION) +extern int sqlite3_tbsnap_status = sqlite3_auto_extension(init); +#else + +#if defined(_MSC_VER) || defined(__MINGW32__) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT __attribute__((visibility("default"))) +#endif + +// SQLite deduces this function name from "libtbsnap.so". +EXPORT extern int sqlite3_tbsnap_init(sqlite3* db, const char** pzErrMsg, + const sqlite3_api_routines* pApi) { + return init(db, pzErrMsg, pApi); +} + +#endif + +} // extern "C" diff --git a/tensorflow/contrib/tensorboard/db/tbsnap_test.py b/tensorflow/contrib/tensorboard/db/tbsnap_test.py new file mode 100644 index 0000000000..2d011959e0 --- /dev/null +++ b/tensorflow/contrib/tensorboard/db/tbsnap_test.py @@ -0,0 +1,70 @@ +# Copyright 2017 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import sqlite3 + +from tensorflow.python.framework import test_util +from tensorflow.python.platform import resource_loader +from tensorflow.python.platform import test + + +class SqliteSnappyTest(test_util.TensorFlowTestCase): + + def setUp(self): + super(SqliteSnappyTest, self).setUp() + self.db = sqlite3.connect(':memory:') + self.db.enable_load_extension(True) + self.db.execute("select load_extension('%s')" % + resource_loader.get_path_to_datafile('libtbsnap.so')) + self.db.enable_load_extension(False) + + def testRoundTrip(self): + self.assertEqual('hello', + self.db.execute('SELECT unsnap(snap(\'hello\'))') + .fetchone()[0]) + self.assertEqual( + 'hello', + self.db.execute( + 'SELECT CAST(unsnap(snap(CAST(\'hello\' AS BLOB))) AS TEXT)') + .fetchone()[0]) + self.assertEqual( + 'text', + self.db.execute('SELECT typeof(unsnap(snap(\'h\')))').fetchone()[0]) + self.assertEqual( + 'blob', + self.db.execute( + 'SELECT typeof(unsnap(snap(CAST(\'h\' AS BLOB))))').fetchone()[0]) + + def testNull_passesThrough(self): + self.assertIsNone( + self.db.execute('SELECT unsnap(snap(NULL))').fetchone()[0]) + + def testEmpty_passesThrough(self): + self.assertEqual('', + self.db.execute('SELECT unsnap(snap(\'\'))').fetchone()[0]) + self.assertEqual( + 'text', + self.db.execute('SELECT typeof(unsnap(snap(\'\')))').fetchone()[0]) + self.assertEqual( + 'blob', + self.db.execute( + 'SELECT typeof(unsnap(snap(CAST(\'\' AS BLOB))))').fetchone()[0]) + + +if __name__ == '__main__': + test.main() diff --git a/third_party/sqlite.BUILD b/third_party/sqlite.BUILD index 9840d7b151..03a6a7a5da 100644 --- a/third_party/sqlite.BUILD +++ b/third_party/sqlite.BUILD @@ -9,7 +9,10 @@ licenses(["unencumbered"]) # Public Domain cc_library( name = "sqlite", srcs = ["sqlite3.c"], - hdrs = ["sqlite3.h"], + hdrs = [ + "sqlite3.h", + "sqlite3ext.h", + ], includes = ["."], linkopts = ["-lm"], visibility = ["//visibility:public"], |