diff options
author | Yong Tang <yong.tang.github@outlook.com> | 2017-11-13 22:10:27 -0800 |
---|---|---|
committer | Martin Wicke <martin.wicke@gmail.com> | 2017-11-13 22:10:27 -0800 |
commit | 577a55777251dfd85bb285fd246a45e913ead6ca (patch) | |
tree | 2d5f076f2581d11af209502cfb1d7f7ffc7656a8 /tensorflow/contrib/ffmpeg/decode_video_op.cc | |
parent | fab6adb40b7279271c4015dbbd4626c62d8732a7 (diff) |
Add op `tf.contrib.ffmpeg.decode_video` (#13242)
* Add `tf.contrib.ffmpeg.decode_video`
This fix tries to address the request raised in 6265 where
it was not possible to decode video like the existing op of
`decode_audio`.
This fix adds the support of `tf.contrib.ffmpeg.decode_video`
by invoking ffmpeg the same fashion as `tf.contrib.ffmpeg.decode_audo`
so that video could be stored in the tensor `[frames, height, width, channel]`.
At the moment, the output format is `RGB24`.
This fix fixes 6265.
Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
* Add python wrapper for `tf.contrib.ffmpeg.decode_video`
This fix adds python wrapper for `tf.contrib.ffmpeg.decode_video`
Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
* Add test cases for `tf.contrib.ffmpeg.decode_video`
This fix adds test cases for `tf.contrib.ffmpeg.decode_video`.
Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
* Address review feedback.
Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
* Move GetTempFilename to tensorflow/core/lib/io/path.h
Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
* Disable GetTempFilename on Windows and Android for now.
Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
* Remove `.` from tmp file template if no extension
And sanitize with clang-format
Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
* Add missing header files.
Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
Diffstat (limited to 'tensorflow/contrib/ffmpeg/decode_video_op.cc')
-rw-r--r-- | tensorflow/contrib/ffmpeg/decode_video_op.cc | 118 |
1 files changed, 118 insertions, 0 deletions
diff --git a/tensorflow/contrib/ffmpeg/decode_video_op.cc b/tensorflow/contrib/ffmpeg/decode_video_op.cc new file mode 100644 index 0000000000..d44032968d --- /dev/null +++ b/tensorflow/contrib/ffmpeg/decode_video_op.cc @@ -0,0 +1,118 @@ +// Copyright 2016 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= + +#include <stdlib.h> + +#include <cstdio> +#include <set> + +#include "tensorflow/contrib/ffmpeg/ffmpeg_lib.h" +#include "tensorflow/core/framework/op.h" +#include "tensorflow/core/framework/op_kernel.h" +#include "tensorflow/core/framework/shape_inference.h" +#include "tensorflow/core/lib/io/path.h" +#include "tensorflow/core/lib/strings/str_util.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/platform/env.h" +#include "tensorflow/core/platform/logging.h" + +namespace tensorflow { +namespace ffmpeg { + +class DecodeVideoOp : public OpKernel { + public: + explicit DecodeVideoOp(OpKernelConstruction* context) : OpKernel(context) {} + + void Compute(OpKernelContext* context) override { + OP_REQUIRES( + context, context->num_inputs() == 1, + errors::InvalidArgument("DecodeVideo requires exactly 1 input.")); + const Tensor& contents_tensor = context->input(0); + + OP_REQUIRES(context, TensorShapeUtils::IsScalar(contents_tensor.shape()), + errors::InvalidArgument( + "contents must be a rank-0 tensor but got shape ", + contents_tensor.shape().DebugString())); + const tensorflow::StringPiece contents = contents_tensor.scalar<string>()(); + + // Write the input data to a temp file. + string extension; + const string temp_filename = io::GetTempFilename(extension); + OP_REQUIRES_OK(context, WriteFile(temp_filename, contents)); + FileDeleter deleter(temp_filename); + + uint32 width = 0; + uint32 height = 0; + uint32 frames = 0; + + // Run FFmpeg on the data and verify results. + std::vector<uint8> output_data; + const Status result = ffmpeg::ReadVideoFile(temp_filename, &output_data, + &width, &height, &frames); + if (result.code() == error::Code::NOT_FOUND) { + OP_REQUIRES( + context, result.ok(), + errors::Unavailable("FFmpeg must be installed to run this op. FFmpeg " + "can be found at http://www.ffmpeg.org.")); + } else if (result.code() == error::UNKNOWN) { + LOG(ERROR) << "Ffmpeg failed with error '" << result.error_message() + << "'. Returning empty tensor."; + Tensor* output = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output(0, TensorShape({0, 0}), &output)); + return; + } else { + OP_REQUIRES_OK(context, result); + } + OP_REQUIRES(context, !output_data.empty(), + errors::Unknown("No output created by FFmpeg.")); + OP_REQUIRES( + context, output_data.size() == (frames * height * width * 3), + errors::Unknown("Output created by FFmpeg [", output_data.size(), + "] does not match description [", frames, ", ", height, + ", ", width, ", 3]")); + Tensor* output = nullptr; + OP_REQUIRES_OK(context, + context->allocate_output( + 0, TensorShape({frames, height, width, 3}), &output)); + auto output_flat = output->flat<uint8>(); + std::copy_n(output_data.begin(), output_data.size(), &output_flat(0)); + } +}; + +REGISTER_KERNEL_BUILDER(Name("DecodeVideo").Device(DEVICE_CPU), DecodeVideoOp); + +REGISTER_OP("DecodeVideo") + .Input("contents: string") + .Output("output: uint8") + .SetShapeFn([](shape_inference::InferenceContext* c) { + c->set_output(0, c->UnknownShapeOfRank(4)); + return Status::OK(); + }) + .Doc(R"doc( +Processes the contents of an audio file into a tensor using FFmpeg to decode +the file. + +One row of the tensor is created for each channel in the audio file. Each +channel contains audio samples starting at the beginning of the audio and +having `1/samples_per_second` time between them. If the `channel_count` is +different from the contents of the file, channels will be merged or created. + +contents: The binary audio file contents, as a string or rank-0 string + tensor. +)doc"); + +} // namespace ffmpeg +} // namespace tensorflow |