diff options
author | Pete Warden <petewarden@google.com> | 2018-03-14 12:53:30 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-03-14 12:57:29 -0700 |
commit | 4f82b6afe918a40a9df980bc6e3a94586a7347a3 (patch) | |
tree | c92dcc6e5a6cf6bf45a9b8ba9925ea43c0ceb214 /tensorflow/core/lib/wav/wav_io.cc | |
parent | 19841465d86dac32d6505ae197d47807e57b17e5 (diff) |
Check for very large chunk sizes in WAV decoding
Change how chunk sizes larger than 2GB are handled, since they're stored as
unsigned int32s, so there are lots of ways for conversions to confuse the
decoding logic. The new behavior is to fail with an error, since such
large WAV files are not common, and are unsupported by many readers.
PiperOrigin-RevId: 189071037
Diffstat (limited to 'tensorflow/core/lib/wav/wav_io.cc')
-rw-r--r-- | tensorflow/core/lib/wav/wav_io.cc | 74 |
1 files changed, 42 insertions, 32 deletions
diff --git a/tensorflow/core/lib/wav/wav_io.cc b/tensorflow/core/lib/wav/wav_io.cc index 77d3c88998..2165415ba5 100644 --- a/tensorflow/core/lib/wav/wav_io.cc +++ b/tensorflow/core/lib/wav/wav_io.cc @@ -81,13 +81,42 @@ inline float Int16SampleToFloat(int16 data) { return data * kMultiplier; } +} // namespace + +// Handles moving the data index forward, validating the arguments, and avoiding +// overflow or underflow. +Status IncrementOffset(int old_offset, size_t increment, size_t max_size, + int* new_offset) { + if (old_offset < 0) { + return errors::InvalidArgument("Negative offsets are not allowed: ", + old_offset); + } + if (old_offset > max_size) { + return errors::InvalidArgument("Initial offset is outside data range: ", + old_offset); + } + if (increment < 0) { + return errors::InvalidArgument("Negative increments are not allowed: ", + old_offset); + } + *new_offset = old_offset + increment; + if (*new_offset > max_size) { + return errors::InvalidArgument("Data too short when trying to read string"); + } + // See above for the check that the input offset is positive. If it's negative + // here then it means that there's been an overflow in the arithmetic. + if (*new_offset < 0) { + return errors::InvalidArgument("Offset too large, overflowed: ", + *new_offset); + } + return Status::OK(); +} + Status ExpectText(const string& data, const string& expected_text, int* offset) { - const int new_offset = *offset + expected_text.size(); - if (new_offset > data.size()) { - return errors::InvalidArgument("Data too short when trying to read ", - expected_text); - } + int new_offset; + TF_RETURN_IF_ERROR( + IncrementOffset(*offset, expected_text.size(), data.size(), &new_offset)); const string found_text(data.begin() + *offset, data.begin() + new_offset); if (found_text != expected_text) { return errors::InvalidArgument("Header mismatch: Expected ", expected_text, @@ -97,40 +126,16 @@ Status ExpectText(const string& data, const string& expected_text, return Status::OK(); } -template <class T> -Status ReadValue(const string& data, T* value, int* offset) { - const int new_offset = *offset + sizeof(T); - if (new_offset > data.size()) { - return errors::InvalidArgument("Data too short when trying to read value"); - } - if (port::kLittleEndian) { - memcpy(value, data.data() + *offset, sizeof(T)); - } else { - *value = 0; - const uint8* data_buf = - reinterpret_cast<const uint8*>(data.data() + *offset); - int shift = 0; - for (int i = 0; i < sizeof(T); ++i, shift += 8) { - *value = *value | (data_buf[i] << shift); - } - } - *offset = new_offset; - return Status::OK(); -} - Status ReadString(const string& data, int expected_length, string* value, int* offset) { - const int new_offset = *offset + expected_length; - if (new_offset > data.size()) { - return errors::InvalidArgument("Data too short when trying to read string"); - } + int new_offset; + TF_RETURN_IF_ERROR( + IncrementOffset(*offset, expected_length, data.size(), &new_offset)); *value = string(data.begin() + *offset, data.begin() + new_offset); *offset = new_offset; return Status::OK(); } -} // namespace - Status EncodeAudioAsS16LEWav(const float* audio, size_t sample_rate, size_t num_channels, size_t num_frames, string* wav_string) { @@ -272,6 +277,11 @@ Status DecodeLin16WaveAsFloatVector(const string& wav_string, TF_RETURN_IF_ERROR(ReadString(wav_string, 4, &chunk_id, &offset)); uint32 chunk_size; TF_RETURN_IF_ERROR(ReadValue<uint32>(wav_string, &chunk_size, &offset)); + if (chunk_size > std::numeric_limits<int32>::max()) { + return errors::InvalidArgument( + "WAV data chunk '", chunk_id, "' is too large: ", chunk_size, + " bytes, but the limit is ", std::numeric_limits<int32>::max()); + } if (chunk_id == kDataChunkId) { if (was_data_found) { return errors::InvalidArgument("More than one data chunk found in WAV"); |