Add mpg123_decode fuzzer (#2037)

* Add fuzzer for mpg123_decode. This fuzzes a different and non-filebased API that read_fuzzer.c. mpg123_decode is, however, streaming based so we need to chunk the fuzzer's input into multiple pieces, and pass them to the library. This is pretty annoying to do by hand so I've introduced byte_stream.h to do this automatically. Sadly, byte_stream.h is very C++ so the decode_fuzzer is also C++ now. This isn't ideal since mpg123 is a C library but this is the easiest way to do it. * Do not carry next input within std::string. * Malloc a new buffer for each decode invocation. I used malloc instead of new since ideally this fuzz target would be written in C.
author: Markus Kusano <mukusano@gmail.com> 2018-12-21 14:20:00 -0500
committer: jonathanmetzman <31354670+jonathanmetzman@users.noreply.github.com> 2018-12-21 11:20:00 -0800
commit: 2ae303bc3b655701223ef151670a30eb92f1c9bb (patch)
tree: dd2a67cd7e6bea433c06948e43085e1e9e509472 /projects/mpg123
parent: 194c7de657fe5a05a8d37ee8eaf0e15660b675f5 (diff)
5 files changed, 195 insertions, 0 deletions
diff --git a/projects/mpg123/Dockerfile b/projects/mpg123/Dockerfile
index 6653b607..31a40758 100644
--- a/projects/mpg123/Dockerfile
+++ b/projects/mpg123/Dockerfile
@@ -22,4 +22,6 @@ RUN tar -xvf snapshot
 RUN mv mpg123* mpg123
 WORKDIR $SRC
 COPY read_fuzzer.c $SRC/
+COPY decode_fuzzer.cc $SRC/
+COPY byte_stream.h $SRC/
 COPY build.sh $SRC/
diff --git a/projects/mpg123/build.sh b/projects/mpg123/build.sh
index abf18cb0..49317c5f 100755
--- a/projects/mpg123/build.sh
+++ b/projects/mpg123/build.sh
@@ -23,3 +23,6 @@ popd
 
 $CC $CXXFLAGS read_fuzzer.c -I$WORK/include $WORK/lib/libmpg123.a \
   -lFuzzingEngine -lc++ -o $OUT/read_fuzzer
+
+$CXX $CXXFLAGS decode_fuzzer.cc -I$WORK/include $WORK/lib/libmpg123.a \
+  -lFuzzingEngine -o $OUT/decode_fuzzer
diff --git a/projects/mpg123/byte_stream.h b/projects/mpg123/byte_stream.h
new file mode 100644
index 00000000..8fb9f3c5
--- /dev/null
+++ b/projects/mpg123/byte_stream.h
@@ -0,0 +1,129 @@
+// Copyright 2018 Google Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef BYTE_STREAM_H_
+#define BYTE_STREAM_H_
+
+#include <algorithm>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <string>
+
+// Wrapper for fuzzer input strings that helps consume and interpret the data
+// as a sequence of values, such as strings and PODs.
+class ByteStream {
+ public:
+  // Does not take ownership of data.
+  ByteStream(const uint8_t* data, size_t size)
+      : data_(data), size_(size), position_(0) {}
+
+  ByteStream(const ByteStream&) = delete;
+  ByteStream& operator=(const ByteStream&) = delete;
+
+  // Returns a string. Strings are obtained from the byte stream by reading a
+  // size_t N followed by N char elements. If there are fewer than N bytes left
+  // in the stream, this returns as many bytes as are available.
+  std::string GetNextString();
+
+  // The following GetNext{integer type} functions all return the next
+  // sizeof(integer type) bytes in the stream or 0 if there is insufficient
+  // capacity.
+  size_t GetNextSizeT() { return ConsumeCopyOrDefault<size_t>(0); }
+  int GetNextInt() { return ConsumeCopyOrDefault<int>(0); }
+  uint8_t GetNextUint8() { return ConsumeCopyOrDefault<uint8_t>(0); }
+  int64_t GetNextInt64() { return ConsumeCopyOrDefault<int64_t>(0); }
+
+  // Returns an integer in the range [0,n) for n > 0 and consumes up to
+  // sizeof(int) bytes. For n<=0, returns 0 and consumes 0 bytes.
+  int GetNextInt(int n);
+
+  // The remaining capacity of the ByteStream.
+  size_t capacity() const { return size_ - position_; }
+
+  // Returns data_ + position_ and then advances position_ by requested bytes.
+  //
+  // This is the canonical way for the class to request regions of memory
+  // or to advance the position by requested bytes. This operation is unchecked
+  // for maintaining that position_ <= size_. Requesting 0 bytes always
+  // succeeds.
+  const uint8_t* UncheckedConsume(size_t requested) {
+    const uint8_t* region = data_ + position_;
+    position_ += requested;
+    return region;
+  }
+
+ private:
+
+  // Directly initialize T by copying sizeof(T) bytes into results if there is
+  // sufficient capacity in the stream. If there is not sufficient capacity
+  // result is unmodified.
+  template <class T>
+  void ConsumeBytesByCopy(T* result) {
+    constexpr size_t type_size = sizeof(T);
+    if (type_size <= capacity()) {
+      const uint8_t* region = UncheckedConsume(type_size);
+      memcpy(static_cast<void*>(result), region, type_size);
+    } else {
+      // Consume the remainder of data_.
+      UncheckedConsume(capacity());
+    }
+  }
+
+  // A helper function for using ConsumeBytesByCopy and returning a default
+  // value `t` if there is insufficient capacity to read a full `T`. T should
+  // probably be a primitive type.
+  template <class T>
+  T ConsumeCopyOrDefault(T t) {
+    ConsumeBytesByCopy(&t);
+    return t;
+  }
+
+  const uint8_t* data_;
+  const size_t size_;
+  size_t position_;
+};
+
+inline std::string ByteStream::GetNextString() {
+  const size_t requested_size = GetNextSizeT();
+  const size_t consumed_size = std::min(requested_size, capacity());
+  const uint8_t* selection = UncheckedConsume(consumed_size);
+  return std::string(reinterpret_cast<const char*>(selection), consumed_size);
+}
+
+inline int ByteStream::GetNextInt(int n) {
+  if (n <= 0) {
+    return 0;
+  }
+  // We grab as few bytes as possible as n will often be fixed.
+  int selection = 0;
+  if (n <= std::numeric_limits<uint8_t>::max()) {
+    selection = static_cast<int>(GetNextUint8());
+  } else if (n <= std::numeric_limits<uint16_t>::max()) {
+    selection = ConsumeCopyOrDefault<uint16_t>(0);
+  } else {
+    selection = GetNextInt();
+  }
+
+  // Take the absolute value of selection w/o undefined behavior.
+  // If selection is INT_MIN, return 0.
+  if (selection == std::numeric_limits<int>::min()) {
+    selection = 0;
+  } else if (selection < 0) {
+    selection = -selection;
+  }
+  return selection % n;
+}
+
+#endif  // BYTE_STREAM_H_
diff --git a/projects/mpg123/decode_fuzzer.cc b/projects/mpg123/decode_fuzzer.cc
new file mode 100644
index 00000000..56006be9
--- /dev/null
+++ b/projects/mpg123/decode_fuzzer.cc
@@ -0,0 +1,59 @@
+#include <cstddef>
+#include <cstdint>
+#include <cstdio>
+#include <cstdlib>
+#include <vector>
+
+#include "mpg123.h"
+#include "byte_stream.h"
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+  static bool initialized = false;
+  if (!initialized) {
+    mpg123_init();
+    initialized = true;
+  }
+  int ret;
+  mpg123_handle* handle = mpg123_new(nullptr, &ret);
+  if (handle == nullptr) {
+    return 0;
+  }
+
+  ret = mpg123_open_feed(handle);
+  if (ret != MPG123_OK) {
+    mpg123_delete(handle);
+    return 0;
+  }
+
+  std::vector<uint8_t> output_buffer(mpg123_outblock(handle));
+
+  size_t output_written = 0;
+  // Initially, start by feeding the decoder more data.
+  int decode_ret = MPG123_NEED_MORE;
+  ByteStream stream(data, size);
+  while ((decode_ret != MPG123_ERR)) {
+    if (decode_ret == MPG123_NEED_MORE) {
+      if (stream.capacity() == 0) {
+        break;
+      }
+      const size_t next_size = std::min(stream.GetNextSizeT(), stream.capacity());
+      uint8_t* next_input = (uint8_t*)malloc(sizeof(uint8_t) * next_size);
+      memcpy(next_input, stream.UncheckedConsume(next_size), next_size);
+      decode_ret = mpg123_decode(
+          handle, reinterpret_cast<const unsigned char*>(next_input),
+          next_size, output_buffer.data(), output_buffer.size(),
+          &output_written);
+      free(next_input);
+    } else if (decode_ret != MPG123_ERR && decode_ret != MPG123_NEED_MORE) {
+      decode_ret = mpg123_decode(handle, nullptr, 0, output_buffer.data(),
+                                 output_buffer.size(), &output_written);
+    } else {
+      // Unhandled mpg123_decode return value.
+      abort();
+    }
+  }
+
+  mpg123_delete(handle);
+
+  return 0;
+}
diff --git a/projects/mpg123/project.yaml b/projects/mpg123/project.yaml
index a45f1d64..3f0a8fee 100644
--- a/projects/mpg123/project.yaml
+++ b/projects/mpg123/project.yaml
@@ -11,3 +11,5 @@ sanitizers:
 labels:
   read_fuzzer:
     - sundew
+  decode_fuzzer:
+    - sundew
author	Markus Kusano <mukusano@gmail.com>	2018-12-21 14:20:00 -0500
committer	jonathanmetzman <31354670+jonathanmetzman@users.noreply.github.com>	2018-12-21 11:20:00 -0800
commit	2ae303bc3b655701223ef151670a30eb92f1c9bb (patch)
tree	dd2a67cd7e6bea433c06948e43085e1e9e509472 /projects/mpg123
parent	194c7de657fe5a05a8d37ee8eaf0e15660b675f5 (diff)