Add op `tf.contrib.ffmpeg.decode_video` (#13242)

* Add `tf.contrib.ffmpeg.decode_video` This fix tries to address the request raised in 6265 where it was not possible to decode video like the existing op of `decode_audio`. This fix adds the support of `tf.contrib.ffmpeg.decode_video` by invoking ffmpeg the same fashion as `tf.contrib.ffmpeg.decode_audo` so that video could be stored in the tensor `[frames, height, width, channel]`. At the moment, the output format is `RGB24`. This fix fixes 6265. Signed-off-by: Yong Tang <yong.tang.github@outlook.com> * Add python wrapper for `tf.contrib.ffmpeg.decode_video` This fix adds python wrapper for `tf.contrib.ffmpeg.decode_video` Signed-off-by: Yong Tang <yong.tang.github@outlook.com> * Add test cases for `tf.contrib.ffmpeg.decode_video` This fix adds test cases for `tf.contrib.ffmpeg.decode_video`. Signed-off-by: Yong Tang <yong.tang.github@outlook.com> * Address review feedback. Signed-off-by: Yong Tang <yong.tang.github@outlook.com> * Move GetTempFilename to tensorflow/core/lib/io/path.h Signed-off-by: Yong Tang <yong.tang.github@outlook.com> * Disable GetTempFilename on Windows and Android for now. Signed-off-by: Yong Tang <yong.tang.github@outlook.com> * Remove `.` from tmp file template if no extension And sanitize with clang-format Signed-off-by: Yong Tang <yong.tang.github@outlook.com> * Add missing header files. Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
author: Yong Tang <yong.tang.github@outlook.com> 2017-11-13 22:10:27 -0800
committer: Martin Wicke <martin.wicke@gmail.com> 2017-11-13 22:10:27 -0800
commit: 577a55777251dfd85bb285fd246a45e913ead6ca (patch)
tree: 2d5f076f2581d11af209502cfb1d7f7ffc7656a8 /tensorflow/contrib/ffmpeg/decode_video_op.cc
parent: fab6adb40b7279271c4015dbbd4626c62d8732a7 (diff)
1 files changed, 118 insertions, 0 deletions
diff --git a/tensorflow/contrib/ffmpeg/decode_video_op.cc b/tensorflow/contrib/ffmpeg/decode_video_op.cc
new file mode 100644
index 0000000000..d44032968d
--- /dev/null
+++ b/tensorflow/contrib/ffmpeg/decode_video_op.cc
@@ -0,0 +1,118 @@
+// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#include <stdlib.h>
+
+#include <cstdio>
+#include <set>
+
+#include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+namespace ffmpeg {
+
+class DecodeVideoOp : public OpKernel {
+ public:
+  explicit DecodeVideoOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    OP_REQUIRES(
+        context, context->num_inputs() == 1,
+        errors::InvalidArgument("DecodeVideo requires exactly 1 input."));
+    const Tensor& contents_tensor = context->input(0);
+
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(contents_tensor.shape()),
+                errors::InvalidArgument(
+                    "contents must be a rank-0 tensor but got shape ",
+                    contents_tensor.shape().DebugString()));
+    const tensorflow::StringPiece contents = contents_tensor.scalar<string>()();
+
+    // Write the input data to a temp file.
+    string extension;
+    const string temp_filename = io::GetTempFilename(extension);
+    OP_REQUIRES_OK(context, WriteFile(temp_filename, contents));
+    FileDeleter deleter(temp_filename);
+
+    uint32 width = 0;
+    uint32 height = 0;
+    uint32 frames = 0;
+
+    // Run FFmpeg on the data and verify results.
+    std::vector<uint8> output_data;
+    const Status result = ffmpeg::ReadVideoFile(temp_filename, &output_data,
+                                                &width, &height, &frames);
+    if (result.code() == error::Code::NOT_FOUND) {
+      OP_REQUIRES(
+          context, result.ok(),
+          errors::Unavailable("FFmpeg must be installed to run this op. FFmpeg "
+                              "can be found at http://www.ffmpeg.org."));
+    } else if (result.code() == error::UNKNOWN) {
+      LOG(ERROR) << "Ffmpeg failed with error '" << result.error_message()
+                 << "'. Returning empty tensor.";
+      Tensor* output = nullptr;
+      OP_REQUIRES_OK(context,
+                     context->allocate_output(0, TensorShape({0, 0}), &output));
+      return;
+    } else {
+      OP_REQUIRES_OK(context, result);
+    }
+    OP_REQUIRES(context, !output_data.empty(),
+                errors::Unknown("No output created by FFmpeg."));
+    OP_REQUIRES(
+        context, output_data.size() == (frames * height * width * 3),
+        errors::Unknown("Output created by FFmpeg [", output_data.size(),
+                        "] does not match description [", frames, ", ", height,
+                        ", ", width, ", 3]"));
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(
+                       0, TensorShape({frames, height, width, 3}), &output));
+    auto output_flat = output->flat<uint8>();
+    std::copy_n(output_data.begin(), output_data.size(), &output_flat(0));
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name("DecodeVideo").Device(DEVICE_CPU), DecodeVideoOp);
+
+REGISTER_OP("DecodeVideo")
+    .Input("contents: string")
+    .Output("output: uint8")
+    .SetShapeFn([](shape_inference::InferenceContext* c) {
+      c->set_output(0, c->UnknownShapeOfRank(4));
+      return Status::OK();
+    })
+    .Doc(R"doc(
+Processes the contents of an audio file into a tensor using FFmpeg to decode
+the file.
+
+One row of the tensor is created for each channel in the audio file. Each
+channel contains audio samples starting at the beginning of the audio and
+having `1/samples_per_second` time between them. If the `channel_count` is
+different from the contents of the file, channels will be merged or created.
+
+contents: The binary audio file contents, as a string or rank-0 string
+    tensor.
+)doc");
+
+}  // namespace ffmpeg
+}  // namespace tensorflow
author	Yong Tang <yong.tang.github@outlook.com>	2017-11-13 22:10:27 -0800
committer	Martin Wicke <martin.wicke@gmail.com>	2017-11-13 22:10:27 -0800
commit	577a55777251dfd85bb285fd246a45e913ead6ca (patch)
tree	2d5f076f2581d11af209502cfb1d7f7ffc7656a8 /tensorflow/contrib/ffmpeg/decode_video_op.cc
parent	fab6adb40b7279271c4015dbbd4626c62d8732a7 (diff)