diff options
-rw-r--r-- | tensorflow/core/kernels/immutable_constant_op_test.cc | 2 | ||||
-rw-r--r-- | tensorflow/core/lib/io/path.cc | 61 | ||||
-rw-r--r-- | tensorflow/core/lib/io/path.h | 15 | ||||
-rw-r--r-- | tensorflow/core/lib/io/path_test.cc | 44 | ||||
-rw-r--r-- | tensorflow/core/platform/cloud/gcs_file_system.cc | 2 | ||||
-rw-r--r-- | tensorflow/core/platform/env.cc | 2 | ||||
-rw-r--r-- | tensorflow/core/platform/env_test.cc | 33 | ||||
-rw-r--r-- | tensorflow/core/platform/file_system.cc | 44 | ||||
-rw-r--r-- | tensorflow/core/platform/file_system.h | 13 | ||||
-rw-r--r-- | tensorflow/core/platform/file_system_test.cc | 2 | ||||
-rw-r--r-- | tensorflow/core/platform/hadoop/hadoop_file_system.cc | 4 | ||||
-rw-r--r-- | tensorflow/core/platform/posix/posix_file_system.h | 3 |
12 files changed, 124 insertions, 101 deletions
diff --git a/tensorflow/core/kernels/immutable_constant_op_test.cc b/tensorflow/core/kernels/immutable_constant_op_test.cc index 93d726a64d..d822e316ea 100644 --- a/tensorflow/core/kernels/immutable_constant_op_test.cc +++ b/tensorflow/core/kernels/immutable_constant_op_test.cc @@ -64,7 +64,7 @@ class TestFileSystem : public NullFileSystem { std::unique_ptr<ReadOnlyMemoryRegion>* result) override { float val = 0; StringPiece scheme, host, path; - ParseURI(fname, &scheme, &host, &path); + io::ParseURI(fname, &scheme, &host, &path); // For the tests create in-memory regions with float values equal to the // region name. if (path == "/2") { diff --git a/tensorflow/core/lib/io/path.cc b/tensorflow/core/lib/io/path.cc index de49d07d62..31397722fe 100644 --- a/tensorflow/core/lib/io/path.cc +++ b/tensorflow/core/lib/io/path.cc @@ -14,6 +14,7 @@ limitations under the License. ==============================================================================*/ #include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/scanner.h" #include "tensorflow/core/lib/strings/strcat.h" namespace tensorflow { @@ -49,11 +50,14 @@ string JoinPathImpl(std::initializer_list<StringPiece> paths) { return result; } -// Return the parts of the path, split on the final "/". If there is no -// "/" in the path, the first part of the output is empty and the second -// is the input. If the only "/" in the path is the first character, it is -// the first part of the output. -std::pair<StringPiece, StringPiece> SplitPath(StringPiece path) { +// Return the parts of the URI, split on the final "/" in the path. If there is +// no "/" in the path, the first part of the output is the scheme and host, and +// the second is the path. If the only "/" in the path is the first character, +// it is included in the first part of the output. +std::pair<StringPiece, StringPiece> SplitPath(StringPiece uri) { + StringPiece scheme, host, path; + ParseURI(uri, &scheme, &host, &path); + auto pos = path.rfind('/'); #ifdef PLATFORM_WINDOWS if (pos == StringPiece::npos) @@ -61,15 +65,17 @@ std::pair<StringPiece, StringPiece> SplitPath(StringPiece path) { #endif // Handle the case with no '/' in 'path'. if (pos == StringPiece::npos) - return std::make_pair(StringPiece(path.data(), 0), path); + return std::make_pair(StringPiece(uri.begin(), host.end() - uri.begin()), + path); // Handle the case with a single leading '/' in 'path'. if (pos == 0) - return std::make_pair(StringPiece(path.data(), 1), - StringPiece(path.data() + 1, path.size() - 1)); + return std::make_pair( + StringPiece(uri.begin(), path.begin() + 1 - uri.begin()), + StringPiece(path.data() + 1, path.size() - 1)); return std::make_pair( - StringPiece(path.data(), pos), + StringPiece(uri.begin(), path.begin() + pos - uri.begin()), StringPiece(path.data() + pos + 1, path.size() - (pos + 1))); } @@ -185,5 +191,42 @@ string CleanPath(StringPiece unclean_path) { return path; } +void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host, + StringPiece* path) { + // 0. Parse scheme + // Make sure scheme matches [a-zA-Z][0-9a-zA-Z.]* + // TODO(keveman): Allow "+" and "-" in the scheme. + if (!strings::Scanner(remaining) + .One(strings::Scanner::LETTER) + .Many(strings::Scanner::LETTER_DIGIT_DOT) + .StopCapture() + .OneLiteral("://") + .GetResult(&remaining, scheme)) { + // If there's no scheme, assume the entire string is a path. + *scheme = StringPiece(remaining.begin(), 0); + *host = StringPiece(remaining.begin(), 0); + *path = remaining; + return; + } + + // 1. Parse host + if (!strings::Scanner(remaining).ScanUntil('/').GetResult(&remaining, host)) { + // No path, so the rest of the URI is the host. + *host = remaining; + *path = StringPiece(remaining.end(), 0); + return; + } + + // 2. The rest is the path + *path = remaining; +} + +string CreateURI(StringPiece scheme, StringPiece host, StringPiece path) { + if (scheme.empty()) { + return path.ToString(); + } + return strings::StrCat(scheme, "://", host, path); +} + } // namespace io } // namespace tensorflow diff --git a/tensorflow/core/lib/io/path.h b/tensorflow/core/lib/io/path.h index 64165f857f..955098f5b5 100644 --- a/tensorflow/core/lib/io/path.h +++ b/tensorflow/core/lib/io/path.h @@ -74,6 +74,21 @@ StringPiece Extension(StringPiece path); // string manipulation, completely independent of process state. string CleanPath(StringPiece path); +// Populates the scheme, host, and path from a URI. scheme, host, and path are +// guaranteed by this function to point into the contents of uri, even if +// empty. +// +// Corner cases: +// - If the URI is invalid, scheme and host are set to empty strings and the +// passed string is assumed to be a path +// - If the URI omits the path (e.g. file://host), then the path is left empty. +void ParseURI(StringPiece uri, StringPiece* scheme, StringPiece* host, + StringPiece* path); + +// Creates a URI from a scheme, host, and path. If the scheme is empty, we just +// return the path. +string CreateURI(StringPiece scheme, StringPiece host, StringPiece path); + } // namespace io } // namespace tensorflow diff --git a/tensorflow/core/lib/io/path_test.cc b/tensorflow/core/lib/io/path_test.cc index f3f3d245d5..e3275b93b6 100644 --- a/tensorflow/core/lib/io/path_test.cc +++ b/tensorflow/core/lib/io/path_test.cc @@ -45,6 +45,8 @@ TEST(PathTest, IsAbsolutePath) { } TEST(PathTest, Dirname) { + EXPECT_EQ("hdfs://127.0.0.1:9000/", + Dirname("hdfs://127.0.0.1:9000/train.csv.tfrecords")); EXPECT_EQ("/hello", Dirname("/hello/")); EXPECT_EQ("/", Dirname("/hello")); EXPECT_EQ("hello", Dirname("hello/world")); @@ -97,5 +99,47 @@ TEST(PathTest, CleanPath) { EXPECT_EQ("../../bar", CleanPath("foo/../../../bar")); } +#define EXPECT_PARSE_URI(uri, scheme, host, path) \ + do { \ + StringPiece u(uri); \ + StringPiece s, h, p; \ + ParseURI(u, &s, &h, &p); \ + EXPECT_EQ(scheme, s.ToString()); \ + EXPECT_EQ(host, h.ToString()); \ + EXPECT_EQ(path, p.ToString()); \ + EXPECT_EQ(uri, CreateURI(scheme, host, path)); \ + EXPECT_LE(u.begin(), s.begin()); \ + EXPECT_GE(u.end(), s.begin()); \ + EXPECT_LE(u.begin(), s.end()); \ + EXPECT_GE(u.end(), s.end()); \ + EXPECT_LE(u.begin(), h.begin()); \ + EXPECT_GE(u.end(), h.begin()); \ + EXPECT_LE(u.begin(), h.end()); \ + EXPECT_GE(u.end(), h.end()); \ + EXPECT_LE(u.begin(), p.begin()); \ + EXPECT_GE(u.end(), p.begin()); \ + EXPECT_LE(u.begin(), p.end()); \ + EXPECT_GE(u.end(), p.end()); \ + } while (0) + +TEST(PathTest, CreateParseURI) { + EXPECT_PARSE_URI("http://foo", "http", "foo", ""); + EXPECT_PARSE_URI("/encrypted/://foo", "", "", "/encrypted/://foo"); + EXPECT_PARSE_URI("/usr/local/foo", "", "", "/usr/local/foo"); + EXPECT_PARSE_URI("file:///usr/local/foo", "file", "", "/usr/local/foo"); + EXPECT_PARSE_URI("local.file:///usr/local/foo", "local.file", "", + "/usr/local/foo"); + EXPECT_PARSE_URI("a-b:///foo", "", "", "a-b:///foo"); + EXPECT_PARSE_URI(":///foo", "", "", ":///foo"); + EXPECT_PARSE_URI("9dfd:///foo", "", "", "9dfd:///foo"); + EXPECT_PARSE_URI("file:", "", "", "file:"); + EXPECT_PARSE_URI("file:/", "", "", "file:/"); + EXPECT_PARSE_URI("hdfs://localhost:8020/path/to/file", "hdfs", + "localhost:8020", "/path/to/file"); + EXPECT_PARSE_URI("hdfs://localhost:8020", "hdfs", "localhost:8020", ""); + EXPECT_PARSE_URI("hdfs://localhost:8020/", "hdfs", "localhost:8020", "/"); +} +#undef EXPECT_PARSE_URI + } // namespace io } // namespace tensorflow diff --git a/tensorflow/core/platform/cloud/gcs_file_system.cc b/tensorflow/core/platform/cloud/gcs_file_system.cc index 867acc7d8a..39228ed869 100644 --- a/tensorflow/core/platform/cloud/gcs_file_system.cc +++ b/tensorflow/core/platform/cloud/gcs_file_system.cc @@ -81,7 +81,7 @@ Status ParseGcsPath(StringPiece fname, bool empty_object_ok, string* bucket, return errors::Internal("bucket and object cannot be null."); } StringPiece scheme, bucketp, objectp; - ParseURI(fname, &scheme, &bucketp, &objectp); + io::ParseURI(fname, &scheme, &bucketp, &objectp); if (scheme != "gs") { return errors::InvalidArgument("GCS path doesn't start with 'gs://': ", fname); diff --git a/tensorflow/core/platform/env.cc b/tensorflow/core/platform/env.cc index a5dd7b45c4..5a09fded9b 100644 --- a/tensorflow/core/platform/env.cc +++ b/tensorflow/core/platform/env.cc @@ -70,7 +70,7 @@ Env::Env() : file_system_registry_(new FileSystemRegistryImpl) {} Status Env::GetFileSystemForFile(const string& fname, FileSystem** result) { StringPiece scheme, host, path; - ParseURI(fname, &scheme, &host, &path); + io::ParseURI(fname, &scheme, &host, &path); FileSystem* file_system = file_system_registry_->Lookup(scheme.ToString()); if (!file_system) { return errors::Unimplemented("File system scheme ", scheme, diff --git a/tensorflow/core/platform/env_test.cc b/tensorflow/core/platform/env_test.cc index dbff7e2531..f6fa27327a 100644 --- a/tensorflow/core/platform/env_test.cc +++ b/tensorflow/core/platform/env_test.cc @@ -229,35 +229,6 @@ TEST_F(DefaultEnvTest, LocalFileSystem) { } } -#define EXPECT_PARSE_URI(uri, scheme, host, path) \ - do { \ - StringPiece s, h, p; \ - ParseURI(uri, &s, &h, &p); \ - EXPECT_EQ(scheme, s.ToString()); \ - EXPECT_EQ(host, h.ToString()); \ - EXPECT_EQ(path, p.ToString()); \ - EXPECT_EQ(uri, CreateURI(scheme, host, path)); \ - } while (0) - -TEST_F(DefaultEnvTest, CreateParseURI) { - EXPECT_PARSE_URI("http://foo", "http", "foo", ""); - EXPECT_PARSE_URI("/encrypted/://foo", "", "", "/encrypted/://foo"); - EXPECT_PARSE_URI("/usr/local/foo", "", "", "/usr/local/foo"); - EXPECT_PARSE_URI("file:///usr/local/foo", "file", "", "/usr/local/foo"); - EXPECT_PARSE_URI("local.file:///usr/local/foo", "local.file", "", - "/usr/local/foo"); - EXPECT_PARSE_URI("a-b:///foo", "", "", "a-b:///foo"); - EXPECT_PARSE_URI(":///foo", "", "", ":///foo"); - EXPECT_PARSE_URI("9dfd:///foo", "", "", "9dfd:///foo"); - EXPECT_PARSE_URI("file:", "", "", "file:"); - EXPECT_PARSE_URI("file:/", "", "", "file:/"); - EXPECT_PARSE_URI("hdfs://localhost:8020/path/to/file", "hdfs", - "localhost:8020", "/path/to/file"); - EXPECT_PARSE_URI("hdfs://localhost:8020", "hdfs", "localhost:8020", ""); - EXPECT_PARSE_URI("hdfs://localhost:8020/", "hdfs", "localhost:8020", "/"); -} -#undef EXPECT_PARSE_URI - TEST_F(DefaultEnvTest, SleepForMicroseconds) { const int64 start = env_->NowMicros(); const int64 sleep_time = 1e6 + 5e5; @@ -274,14 +245,14 @@ class TmpDirFileSystem : public NullFileSystem { public: bool FileExists(const string& dir) override { StringPiece scheme, host, path; - ParseURI(dir, &scheme, &host, &path); + io::ParseURI(dir, &scheme, &host, &path); if (path.empty()) return false; return Env::Default()->FileExists(io::JoinPath(BaseDir(), path)); } Status CreateDir(const string& dir) override { StringPiece scheme, host, path; - ParseURI(dir, &scheme, &host, &path); + io::ParseURI(dir, &scheme, &host, &path); if (scheme != "tmpdirfs") { return errors::FailedPrecondition("scheme must be tmpdirfs"); } diff --git a/tensorflow/core/platform/file_system.cc b/tensorflow/core/platform/file_system.cc index d71ff80143..400835aa07 100644 --- a/tensorflow/core/platform/file_system.cc +++ b/tensorflow/core/platform/file_system.cc @@ -22,7 +22,6 @@ limitations under the License. #include "tensorflow/core/lib/gtl/map_util.h" #include "tensorflow/core/lib/gtl/stl_util.h" #include "tensorflow/core/lib/io/path.h" -#include "tensorflow/core/lib/strings/scanner.h" #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/env.h" @@ -79,43 +78,6 @@ WritableFile::~WritableFile() {} FileSystemRegistry::~FileSystemRegistry() {} -void ParseURI(StringPiece remaining, StringPiece* scheme, StringPiece* host, - StringPiece* path) { - // 0. Parse scheme - // Make sure scheme matches [a-zA-Z][0-9a-zA-Z.]* - // TODO(keveman): Allow "+" and "-" in the scheme. - if (!strings::Scanner(remaining) - .One(strings::Scanner::LETTER) - .Many(strings::Scanner::LETTER_DIGIT_DOT) - .StopCapture() - .OneLiteral("://") - .GetResult(&remaining, scheme)) { - // If there's no scheme, assume the entire string is a path. - scheme->clear(); - host->clear(); - *path = remaining; - return; - } - - // 1. Parse host - if (!strings::Scanner(remaining).ScanUntil('/').GetResult(&remaining, host)) { - // No path, so the rest of the URI is the host. - *host = remaining; - path->clear(); - return; - } - - // 2. The rest is the path - *path = remaining; -} - -string CreateURI(StringPiece scheme, StringPiece host, StringPiece path) { - if (scheme.empty()) { - return path.ToString(); - } - return strings::StrCat(scheme, "://", host, path); -} - Status FileSystem::GetMatchingPaths(const string& pattern, std::vector<string>* results) { results->clear(); @@ -237,9 +199,9 @@ Status FileSystem::DeleteRecursively(const string& dirname, Status FileSystem::RecursivelyCreateDir(const string& dirname) { StringPiece scheme, host, remaining_dir; - ParseURI(dirname, &scheme, &host, &remaining_dir); + io::ParseURI(dirname, &scheme, &host, &remaining_dir); std::vector<StringPiece> sub_dirs; - while (!FileExists(CreateURI(scheme, host, remaining_dir)) && + while (!FileExists(io::CreateURI(scheme, host, remaining_dir)) && !remaining_dir.empty()) { // Basename returns "" for / ending dirs. if (!remaining_dir.ends_with("/")) { @@ -255,7 +217,7 @@ Status FileSystem::RecursivelyCreateDir(const string& dirname) { string built_path = remaining_dir.ToString(); for (const StringPiece sub_dir : sub_dirs) { built_path = io::JoinPath(built_path, sub_dir); - TF_RETURN_IF_ERROR(CreateDir(CreateURI(scheme, host, built_path))); + TF_RETURN_IF_ERROR(CreateDir(io::CreateURI(scheme, host, built_path))); } return Status::OK(); } diff --git a/tensorflow/core/platform/file_system.h b/tensorflow/core/platform/file_system.h index 4456e3f3e9..dfaf75be66 100644 --- a/tensorflow/core/platform/file_system.h +++ b/tensorflow/core/platform/file_system.h @@ -287,19 +287,6 @@ class FileSystemRegistry { std::vector<string>* schemes) = 0; }; -// Populates the scheme, host, and path from a URI. -// -// Corner cases: -// - If the URI is invalid, scheme and host are set to empty strings and the -// passed string is assumed to be a path -// - If the URI omits the path (e.g. file://host), then the path is left empty. -void ParseURI(StringPiece uri, StringPiece* scheme, StringPiece* host, - StringPiece* path); - -// Creates a URI from a scheme, host, and path. If the scheme is empty, we just -// return the path. -string CreateURI(StringPiece scheme, StringPiece host, StringPiece path); - } // namespace tensorflow #endif // TENSORFLOW_CORE_PLATFORM_FILE_SYSTEM_H_ diff --git a/tensorflow/core/platform/file_system_test.cc b/tensorflow/core/platform/file_system_test.cc index 600af91206..8cdabdc8bc 100644 --- a/tensorflow/core/platform/file_system_test.cc +++ b/tensorflow/core/platform/file_system_test.cc @@ -112,7 +112,7 @@ class InterPlanetaryFileSystem : public NullFileSystem { void ParsePath(const string& name, string* parsed_path) { StringPiece scheme, host, path; - ParseURI(name, &scheme, &host, &path); + io::ParseURI(name, &scheme, &host, &path); ASSERT_EQ(scheme, "ipfs"); ASSERT_EQ(host, "solarsystem"); path.Consume("/"); diff --git a/tensorflow/core/platform/hadoop/hadoop_file_system.cc b/tensorflow/core/platform/hadoop/hadoop_file_system.cc index d5792e82cd..749d9e1fcd 100644 --- a/tensorflow/core/platform/hadoop/hadoop_file_system.cc +++ b/tensorflow/core/platform/hadoop/hadoop_file_system.cc @@ -126,7 +126,7 @@ Status HadoopFileSystem::Connect(StringPiece fname, hdfsFS* fs) { TF_RETURN_IF_ERROR(hdfs_->status()); StringPiece scheme, namenode, path; - ParseURI(fname, &scheme, &namenode, &path); + io::ParseURI(fname, &scheme, &namenode, &path); const string nn = namenode.ToString(); hdfsBuilder* builder = hdfs_->hdfsNewBuilder(); @@ -144,7 +144,7 @@ Status HadoopFileSystem::Connect(StringPiece fname, hdfsFS* fs) { string HadoopFileSystem::TranslateName(const string& name) const { StringPiece scheme, namenode, path; - ParseURI(name, &scheme, &namenode, &path); + io::ParseURI(name, &scheme, &namenode, &path); return path.ToString(); } diff --git a/tensorflow/core/platform/posix/posix_file_system.h b/tensorflow/core/platform/posix/posix_file_system.h index 07bb8c9a6f..ccff70cb56 100644 --- a/tensorflow/core/platform/posix/posix_file_system.h +++ b/tensorflow/core/platform/posix/posix_file_system.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef TENSORFLOW_CORE_PLATFORM_POSIX_POSIX_FILE_SYSTEM_H_ #define TENSORFLOW_CORE_PLATFORM_POSIX_POSIX_FILE_SYSTEM_H_ +#include "tensorflow/core/lib/io/path.h" #include "tensorflow/core/platform/env.h" namespace tensorflow { @@ -63,7 +64,7 @@ class LocalPosixFileSystem : public PosixFileSystem { public: string TranslateName(const string& name) const override { StringPiece scheme, host, path; - ParseURI(name, &scheme, &host, &path); + io::ParseURI(name, &scheme, &host, &path); return path.ToString(); } }; |