aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/core/ops/audio_ops.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/core/ops/audio_ops.cc')
-rw-r--r--tensorflow/core/ops/audio_ops.cc79
1 files changed, 0 insertions, 79 deletions
diff --git a/tensorflow/core/ops/audio_ops.cc b/tensorflow/core/ops/audio_ops.cc
index 2f55e45e37..d6dedc3820 100644
--- a/tensorflow/core/ops/audio_ops.cc
+++ b/tensorflow/core/ops/audio_ops.cc
@@ -16,7 +16,6 @@ limitations under the License.
#include "tensorflow/core/framework/common_shape_fns.h"
#include "tensorflow/core/framework/op.h"
#include "tensorflow/core/framework/shape_inference.h"
-#include "tensorflow/core/lib/core/bits.h"
namespace tensorflow {
@@ -67,39 +66,6 @@ Status EncodeWavShapeFn(InferenceContext* c) {
return Status::OK();
}
-Status SpectrogramShapeFn(InferenceContext* c) {
- ShapeHandle input;
- TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input));
- int32 window_size;
- TF_RETURN_IF_ERROR(c->GetAttr("window_size", &window_size));
- int32 stride;
- TF_RETURN_IF_ERROR(c->GetAttr("stride", &stride));
-
- DimensionHandle input_channels = c->Dim(input, 0);
- DimensionHandle input_length = c->Dim(input, 1);
-
- DimensionHandle output_length;
- if (!c->ValueKnown(input_length)) {
- output_length = c->UnknownDim();
- } else {
- const int64 input_length_value = c->Value(input_length);
- const int64 length_minus_window = (input_length_value - window_size);
- int64 output_length_value;
- if (length_minus_window < 0) {
- output_length_value = 0;
- } else {
- output_length_value = 1 + (length_minus_window / stride);
- }
- output_length = c->MakeDim(output_length_value);
- }
-
- DimensionHandle output_channels =
- c->MakeDim(1 + NextPowerOfTwo(window_size) / 2);
- c->set_output(0,
- c->MakeShape({input_channels, output_length, output_channels}));
- return Status::OK();
-}
-
} // namespace
REGISTER_OP("DecodeWav")
@@ -155,49 +121,4 @@ sample_rate: Scalar containing the sample frequency.
contents: 0-D. WAV-encoded file contents.
)doc");
-REGISTER_OP("AudioSpectrogram")
- .Input("input: float")
- .Attr("window_size: int")
- .Attr("stride: int")
- .Attr("magnitude_squared: bool = false")
- .Output("spectrogram: float")
- .SetShapeFn(SpectrogramShapeFn)
- .Doc(R"doc(
-Produces a visualization of audio data over time.
-
-Spectrograms are a standard way of representing audio information as a series of
-slices of frequency information, one slice for each window of time. By joining
-these together into a sequence, they form a distinctive fingerprint of the sound
-over time.
-
-This op expects to receive audio data as an input, stored as floats in the range
--1 to 1, together with a window width in samples, and a stride specifying how
-far to move the window between slices. From this it generates a three
-dimensional output. The lowest dimension has an amplitude value for each
-frequency during that time slice. The next dimension is time, with successive
-frequency slices. The final dimension is for the channels in the input, so a
-stereo audio input would have two here for example.
-
-This means the layout when converted and saved as an image is rotated 90 degrees
-clockwise from a typical spectrogram. Time is descending down the Y axis, and
-the frequency decreases from left to right.
-
-Each value in the result represents the square root of the sum of the real and
-imaginary parts of an FFT on the current window of samples. In this way, the
-lowest dimension represents the power of each frequency in the current window,
-and adjacent windows are concatenated in the next dimension.
-
-To get a more intuitive and visual look at what this operation does, you can run
-tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the
-resulting spectrogram as a PNG image.
-
-input: Float representation of audio data.
-window_size: How wide the input window is in samples. For the highest efficiency
- this should be a power of two, but other values are accepted.
-stride: How widely apart the center of adjacent sample windows should be.
-magnitude_squared: Whether to return the squared magnitude or just the
- magnitude. Using squared magnitude can avoid extra calculations.
-spectrogram: 3D representation of the audio frequencies as an image.
-)doc");
-
} // namespace tensorflow