aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/contrib/ffmpeg/decode_video_op.cc
diff options
context:
space:
mode:
authorGravatar Yong Tang <yong.tang.github@outlook.com>2017-11-13 22:10:27 -0800
committerGravatar Martin Wicke <martin.wicke@gmail.com>2017-11-13 22:10:27 -0800
commit577a55777251dfd85bb285fd246a45e913ead6ca (patch)
tree2d5f076f2581d11af209502cfb1d7f7ffc7656a8 /tensorflow/contrib/ffmpeg/decode_video_op.cc
parentfab6adb40b7279271c4015dbbd4626c62d8732a7 (diff)
Add op `tf.contrib.ffmpeg.decode_video` (#13242)
* Add `tf.contrib.ffmpeg.decode_video` This fix tries to address the request raised in 6265 where it was not possible to decode video like the existing op of `decode_audio`. This fix adds the support of `tf.contrib.ffmpeg.decode_video` by invoking ffmpeg the same fashion as `tf.contrib.ffmpeg.decode_audo` so that video could be stored in the tensor `[frames, height, width, channel]`. At the moment, the output format is `RGB24`. This fix fixes 6265. Signed-off-by: Yong Tang <yong.tang.github@outlook.com> * Add python wrapper for `tf.contrib.ffmpeg.decode_video` This fix adds python wrapper for `tf.contrib.ffmpeg.decode_video` Signed-off-by: Yong Tang <yong.tang.github@outlook.com> * Add test cases for `tf.contrib.ffmpeg.decode_video` This fix adds test cases for `tf.contrib.ffmpeg.decode_video`. Signed-off-by: Yong Tang <yong.tang.github@outlook.com> * Address review feedback. Signed-off-by: Yong Tang <yong.tang.github@outlook.com> * Move GetTempFilename to tensorflow/core/lib/io/path.h Signed-off-by: Yong Tang <yong.tang.github@outlook.com> * Disable GetTempFilename on Windows and Android for now. Signed-off-by: Yong Tang <yong.tang.github@outlook.com> * Remove `.` from tmp file template if no extension And sanitize with clang-format Signed-off-by: Yong Tang <yong.tang.github@outlook.com> * Add missing header files. Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
Diffstat (limited to 'tensorflow/contrib/ffmpeg/decode_video_op.cc')
-rw-r--r--tensorflow/contrib/ffmpeg/decode_video_op.cc118
1 files changed, 118 insertions, 0 deletions
diff --git a/tensorflow/contrib/ffmpeg/decode_video_op.cc b/tensorflow/contrib/ffmpeg/decode_video_op.cc
new file mode 100644
index 0000000000..d44032968d
--- /dev/null
+++ b/tensorflow/contrib/ffmpeg/decode_video_op.cc
@@ -0,0 +1,118 @@
+// Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+// =============================================================================
+
+#include <stdlib.h>
+
+#include <cstdio>
+#include <set>
+
+#include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h"
+#include "tensorflow/core/framework/op.h"
+#include "tensorflow/core/framework/op_kernel.h"
+#include "tensorflow/core/framework/shape_inference.h"
+#include "tensorflow/core/lib/io/path.h"
+#include "tensorflow/core/lib/strings/str_util.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/platform/env.h"
+#include "tensorflow/core/platform/logging.h"
+
+namespace tensorflow {
+namespace ffmpeg {
+
+class DecodeVideoOp : public OpKernel {
+ public:
+ explicit DecodeVideoOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+ void Compute(OpKernelContext* context) override {
+ OP_REQUIRES(
+ context, context->num_inputs() == 1,
+ errors::InvalidArgument("DecodeVideo requires exactly 1 input."));
+ const Tensor& contents_tensor = context->input(0);
+
+ OP_REQUIRES(context, TensorShapeUtils::IsScalar(contents_tensor.shape()),
+ errors::InvalidArgument(
+ "contents must be a rank-0 tensor but got shape ",
+ contents_tensor.shape().DebugString()));
+ const tensorflow::StringPiece contents = contents_tensor.scalar<string>()();
+
+ // Write the input data to a temp file.
+ string extension;
+ const string temp_filename = io::GetTempFilename(extension);
+ OP_REQUIRES_OK(context, WriteFile(temp_filename, contents));
+ FileDeleter deleter(temp_filename);
+
+ uint32 width = 0;
+ uint32 height = 0;
+ uint32 frames = 0;
+
+ // Run FFmpeg on the data and verify results.
+ std::vector<uint8> output_data;
+ const Status result = ffmpeg::ReadVideoFile(temp_filename, &output_data,
+ &width, &height, &frames);
+ if (result.code() == error::Code::NOT_FOUND) {
+ OP_REQUIRES(
+ context, result.ok(),
+ errors::Unavailable("FFmpeg must be installed to run this op. FFmpeg "
+ "can be found at http://www.ffmpeg.org."));
+ } else if (result.code() == error::UNKNOWN) {
+ LOG(ERROR) << "Ffmpeg failed with error '" << result.error_message()
+ << "'. Returning empty tensor.";
+ Tensor* output = nullptr;
+ OP_REQUIRES_OK(context,
+ context->allocate_output(0, TensorShape({0, 0}), &output));
+ return;
+ } else {
+ OP_REQUIRES_OK(context, result);
+ }
+ OP_REQUIRES(context, !output_data.empty(),
+ errors::Unknown("No output created by FFmpeg."));
+ OP_REQUIRES(
+ context, output_data.size() == (frames * height * width * 3),
+ errors::Unknown("Output created by FFmpeg [", output_data.size(),
+ "] does not match description [", frames, ", ", height,
+ ", ", width, ", 3]"));
+ Tensor* output = nullptr;
+ OP_REQUIRES_OK(context,
+ context->allocate_output(
+ 0, TensorShape({frames, height, width, 3}), &output));
+ auto output_flat = output->flat<uint8>();
+ std::copy_n(output_data.begin(), output_data.size(), &output_flat(0));
+ }
+};
+
+REGISTER_KERNEL_BUILDER(Name("DecodeVideo").Device(DEVICE_CPU), DecodeVideoOp);
+
+REGISTER_OP("DecodeVideo")
+ .Input("contents: string")
+ .Output("output: uint8")
+ .SetShapeFn([](shape_inference::InferenceContext* c) {
+ c->set_output(0, c->UnknownShapeOfRank(4));
+ return Status::OK();
+ })
+ .Doc(R"doc(
+Processes the contents of an audio file into a tensor using FFmpeg to decode
+the file.
+
+One row of the tensor is created for each channel in the audio file. Each
+channel contains audio samples starting at the beginning of the audio and
+having `1/samples_per_second` time between them. If the `channel_count` is
+different from the contents of the file, channels will be merged or created.
+
+contents: The binary audio file contents, as a string or rank-0 string
+ tensor.
+)doc");
+
+} // namespace ffmpeg
+} // namespace tensorflow