diff options
Diffstat (limited to 'tensorflow/core/ops/audio_ops.cc')
-rw-r--r-- | tensorflow/core/ops/audio_ops.cc | 79 |
1 files changed, 0 insertions, 79 deletions
diff --git a/tensorflow/core/ops/audio_ops.cc b/tensorflow/core/ops/audio_ops.cc index 2f55e45e37..d6dedc3820 100644 --- a/tensorflow/core/ops/audio_ops.cc +++ b/tensorflow/core/ops/audio_ops.cc @@ -16,7 +16,6 @@ limitations under the License. #include "tensorflow/core/framework/common_shape_fns.h" #include "tensorflow/core/framework/op.h" #include "tensorflow/core/framework/shape_inference.h" -#include "tensorflow/core/lib/core/bits.h" namespace tensorflow { @@ -67,39 +66,6 @@ Status EncodeWavShapeFn(InferenceContext* c) { return Status::OK(); } -Status SpectrogramShapeFn(InferenceContext* c) { - ShapeHandle input; - TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &input)); - int32 window_size; - TF_RETURN_IF_ERROR(c->GetAttr("window_size", &window_size)); - int32 stride; - TF_RETURN_IF_ERROR(c->GetAttr("stride", &stride)); - - DimensionHandle input_channels = c->Dim(input, 0); - DimensionHandle input_length = c->Dim(input, 1); - - DimensionHandle output_length; - if (!c->ValueKnown(input_length)) { - output_length = c->UnknownDim(); - } else { - const int64 input_length_value = c->Value(input_length); - const int64 length_minus_window = (input_length_value - window_size); - int64 output_length_value; - if (length_minus_window < 0) { - output_length_value = 0; - } else { - output_length_value = 1 + (length_minus_window / stride); - } - output_length = c->MakeDim(output_length_value); - } - - DimensionHandle output_channels = - c->MakeDim(1 + NextPowerOfTwo(window_size) / 2); - c->set_output(0, - c->MakeShape({input_channels, output_length, output_channels})); - return Status::OK(); -} - } // namespace REGISTER_OP("DecodeWav") @@ -155,49 +121,4 @@ sample_rate: Scalar containing the sample frequency. contents: 0-D. WAV-encoded file contents. )doc"); -REGISTER_OP("AudioSpectrogram") - .Input("input: float") - .Attr("window_size: int") - .Attr("stride: int") - .Attr("magnitude_squared: bool = false") - .Output("spectrogram: float") - .SetShapeFn(SpectrogramShapeFn) - .Doc(R"doc( -Produces a visualization of audio data over time. - -Spectrograms are a standard way of representing audio information as a series of -slices of frequency information, one slice for each window of time. By joining -these together into a sequence, they form a distinctive fingerprint of the sound -over time. - -This op expects to receive audio data as an input, stored as floats in the range --1 to 1, together with a window width in samples, and a stride specifying how -far to move the window between slices. From this it generates a three -dimensional output. The lowest dimension has an amplitude value for each -frequency during that time slice. The next dimension is time, with successive -frequency slices. The final dimension is for the channels in the input, so a -stereo audio input would have two here for example. - -This means the layout when converted and saved as an image is rotated 90 degrees -clockwise from a typical spectrogram. Time is descending down the Y axis, and -the frequency decreases from left to right. - -Each value in the result represents the square root of the sum of the real and -imaginary parts of an FFT on the current window of samples. In this way, the -lowest dimension represents the power of each frequency in the current window, -and adjacent windows are concatenated in the next dimension. - -To get a more intuitive and visual look at what this operation does, you can run -tensorflow/examples/wav_to_spectrogram to read in an audio file and save out the -resulting spectrogram as a PNG image. - -input: Float representation of audio data. -window_size: How wide the input window is in samples. For the highest efficiency - this should be a power of two, but other values are accepted. -stride: How widely apart the center of adjacent sample windows should be. -magnitude_squared: Whether to return the squared magnitude or just the - magnitude. Using squared magnitude can avoid extra calculations. -spectrogram: 3D representation of the audio frequencies as an image. -)doc"); - } // namespace tensorflow |