aboutsummaryrefslogtreecommitdiffhomepage
path: root/projects/icu
diff options
context:
space:
mode:
Diffstat (limited to 'projects/icu')
-rw-r--r--projects/icu/Dockerfile22
-rw-r--r--projects/icu/break_iterator_fuzzer.cc46
-rw-r--r--projects/icu/break_iterator_utf32_fuzzer.cc47
-rwxr-xr-xprojects/icu/build.sh48
-rw-r--r--projects/icu/converter_fuzzer.cc45
-rw-r--r--projects/icu/fuzzer_utils.h53
-rw-r--r--projects/icu/number_format_fuzzer.cc30
-rw-r--r--projects/icu/regex.dict103
-rw-r--r--projects/icu/target.yaml1
-rw-r--r--projects/icu/ucasemap_fuzzer.cc53
-rw-r--r--projects/icu/unicode_string_codepage_create_fuzzer.cc73
-rw-r--r--projects/icu/uregex_open_fuzzer.cc23
-rw-r--r--projects/icu/uregex_open_fuzzer.options2
13 files changed, 546 insertions, 0 deletions
diff --git a/projects/icu/Dockerfile b/projects/icu/Dockerfile
new file mode 100644
index 00000000..e420bc5b
--- /dev/null
+++ b/projects/icu/Dockerfile
@@ -0,0 +1,22 @@
+# Copyright 2016 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+
+FROM ossfuzz/base-libfuzzer
+MAINTAINER mike.aizatsky@gmail.com
+RUN apt-get install -y make
+
+RUN svn co http://source.icu-project.org/repos/icu/trunk/icu4c/ icu
+COPY build.sh *.cc *.h *.dict *.options $SRC/
diff --git a/projects/icu/break_iterator_fuzzer.cc b/projects/icu/break_iterator_fuzzer.cc
new file mode 100644
index 00000000..143a74da
--- /dev/null
+++ b/projects/icu/break_iterator_fuzzer.cc
@@ -0,0 +1,46 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <memory>
+#include "fuzzer_utils.h"
+#include "unicode/brkiter.h"
+
+IcuEnvironment* env = new IcuEnvironment();
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ UErrorCode status = U_ZERO_ERROR;
+ icu::UnicodeString str(UnicodeStringFromUtf8(data, size));
+
+ auto rng = CreateRng(data, size);
+ const icu::Locale& locale = GetRandomLocale(&rng);
+
+ std::unique_ptr<icu::BreakIterator> bi;
+
+ switch (rng() % 5) {
+ case 0:
+ bi.reset(icu::BreakIterator::createWordInstance(locale, status));
+ break;
+ case 1:
+ bi.reset(icu::BreakIterator::createLineInstance(locale, status));
+ break;
+ case 2:
+ bi.reset(icu::BreakIterator::createCharacterInstance(locale, status));
+ break;
+ case 3:
+ bi.reset(icu::BreakIterator::createSentenceInstance(locale, status));
+ break;
+ case 4:
+ bi.reset(icu::BreakIterator::createTitleInstance(locale, status));
+ break;
+ }
+ if (U_FAILURE(status)) return 0;
+
+ for (int32_t p = bi->first(); p != icu::BreakIterator::DONE; p = bi->next())
+ if (U_FAILURE(status)) return 0;
+
+ return 0;
+}
+
diff --git a/projects/icu/break_iterator_utf32_fuzzer.cc b/projects/icu/break_iterator_utf32_fuzzer.cc
new file mode 100644
index 00000000..544e5f6d
--- /dev/null
+++ b/projects/icu/break_iterator_utf32_fuzzer.cc
@@ -0,0 +1,47 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <memory>
+#include "fuzzer_utils.h"
+#include "unicode/brkiter.h"
+
+IcuEnvironment* env = new IcuEnvironment();
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ UErrorCode status = U_ZERO_ERROR;
+ icu::UnicodeString str(UnicodeStringFromUtf32(data, size));
+
+ auto rng = CreateRng(data, size);
+ const icu::Locale& locale = GetRandomLocale(&rng);
+
+ std::unique_ptr<icu::BreakIterator> bi;
+
+ switch (rng() % 5) {
+ case 0:
+ bi.reset(icu::BreakIterator::createWordInstance(locale, status));
+ break;
+ case 1:
+ bi.reset(icu::BreakIterator::createLineInstance(locale, status));
+ break;
+ case 2:
+ bi.reset(icu::BreakIterator::createCharacterInstance(locale, status));
+ break;
+ case 3:
+ bi.reset(icu::BreakIterator::createSentenceInstance(locale, status));
+ break;
+ case 4:
+ bi.reset(icu::BreakIterator::createTitleInstance(locale, status));
+ break;
+ }
+ if (U_FAILURE(status))
+ return 0;
+
+ for (int32_t p = bi->first(); p != icu::BreakIterator::DONE; p = bi->next())
+ if (U_FAILURE(status))
+ return 0;
+
+ return 0;
+}
diff --git a/projects/icu/build.sh b/projects/icu/build.sh
new file mode 100755
index 00000000..9cca5484
--- /dev/null
+++ b/projects/icu/build.sh
@@ -0,0 +1,48 @@
+#!/bin/bash -eux
+#
+# Copyright 2016 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+################################################################################
+
+mkdir $WORK/icu
+cd $WORK/icu
+
+# TODO: icu build failes without -DU_USE_STRTOD_L=0
+DEFINES="-DU_CHARSET_IS_UTF8=1 -DU_USING_ICU_NAMESPACE=0 -DU_ENABLE_DYLOAD=0 -DU_USE_STRTOD_L=0"
+CFLAGS="$CFLAGS $DEFINES"
+CXXFLAGS="$CXXFLAGS $DEFINES"
+
+CFLAGS=$CFLAGS CXXFLAGS=$CXXFLAGS CC=$CC CXX=$CXX \
+ /bin/bash $SRC/icu/source/runConfigureICU Linux \
+ --with-library-bits=64 --with-data-packaging=static --enable-static --disable-shared
+
+make -j$(nproc)
+
+FUZZERS="break_iterator_fuzzer \
+ break_iterator_utf32_fuzzer \
+ converter_fuzzer \
+ number_format_fuzzer \
+ ucasemap_fuzzer \
+ unicode_string_codepage_create_fuzzer \
+ uregex_open_fuzzer
+ "
+for fuzzer in $FUZZERS; do
+ $CXX $CXXFLAGS -std=c++11 \
+ $SRC/$fuzzer.cc -o $OUT/$fuzzer \
+ -I$SRC/icu/source/common -I$SRC/icu/source/i18n -L$WORK/icu/lib \
+ -lfuzzer -licui18n -licuuc -licutu -licudata
+done
+
+cp $SRC/*.dict $SRC/*.options $OUT/
diff --git a/projects/icu/converter_fuzzer.cc b/projects/icu/converter_fuzzer.cc
new file mode 100644
index 00000000..cfbdebf6
--- /dev/null
+++ b/projects/icu/converter_fuzzer.cc
@@ -0,0 +1,45 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <memory>
+#include <vector>
+
+#include "fuzzer_utils.h"
+#include "unicode/unistr.h"
+#include "unicode/ucnv.h"
+
+IcuEnvironment* env = new IcuEnvironment();
+
+template <typename T>
+using deleted_unique_ptr = std::unique_ptr<T, std::function<void(T*)>>;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ UErrorCode status = U_ZERO_ERROR;
+ auto rng = CreateRng(data, size);
+ icu::UnicodeString str(UnicodeStringFromUtf8(data, size));
+
+ const char* converter_name =
+ ucnv_getAvailableName(rng() % ucnv_countAvailable());
+
+ deleted_unique_ptr<UConverter> converter(ucnv_open(converter_name, &status),
+ &ucnv_close);
+
+ if (U_FAILURE(status))
+ return 0;
+
+ static const size_t dest_buffer_size = 1024 * 1204;
+ static const std::unique_ptr<char[]> dest_buffer(new char[dest_buffer_size]);
+
+ str.extract(dest_buffer.get(), dest_buffer_size, converter.get(), status);
+
+ if (U_FAILURE(status))
+ return 0;
+
+ return 0;
+}
diff --git a/projects/icu/fuzzer_utils.h b/projects/icu/fuzzer_utils.h
new file mode 100644
index 00000000..d879bc39
--- /dev/null
+++ b/projects/icu/fuzzer_utils.h
@@ -0,0 +1,53 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef FUZZER_UTILS_H_
+#define FUZZER_UTILS_H_
+
+#include <assert.h>
+#include <algorithm>
+#include <random>
+
+#include "unicode/locid.h"
+#include "unicode/uchar.h"
+
+struct IcuEnvironment {
+ IcuEnvironment() {
+ // nothing to initialize yet;
+ }
+};
+
+// Create RNG and seed it from data.
+std::mt19937_64 CreateRng(const uint8_t* data, size_t size) {
+ std::mt19937_64 rng;
+ std::string str = std::string(reinterpret_cast<const char*>(data), size);
+ std::size_t data_hash = std::hash<std::string>()(str);
+ rng.seed(data_hash);
+ return rng;
+}
+
+const icu::Locale& GetRandomLocale(std::mt19937_64* rng) {
+ int32_t num_locales = 0;
+ const icu::Locale* locales = icu::Locale::getAvailableLocales(num_locales);
+ assert(num_locales > 0);
+ return locales[(*rng)() % num_locales];
+}
+
+icu::UnicodeString UnicodeStringFromUtf8(const uint8_t* data, size_t size) {
+ return icu::UnicodeString::fromUTF8(
+ icu::StringPiece(reinterpret_cast<const char*>(data), size));
+}
+
+icu::UnicodeString UnicodeStringFromUtf32(const uint8_t* data, size_t size) {
+ std::vector<UChar32> uchars;
+ uchars.resize(size * sizeof(uint8_t) / (sizeof(UChar32)));
+ memcpy(uchars.data(), data, uchars.size() * sizeof(UChar32));
+ for (size_t i = 0; i < uchars.size(); ++i) {
+ uchars[i] = std::min(uchars[i], UCHAR_MAX_VALUE);
+ }
+
+ return icu::UnicodeString::fromUTF32(uchars.data(), uchars.size());
+}
+
+#endif // FUZZER_UTILS_H_
diff --git a/projects/icu/number_format_fuzzer.cc b/projects/icu/number_format_fuzzer.cc
new file mode 100644
index 00000000..88df77b7
--- /dev/null
+++ b/projects/icu/number_format_fuzzer.cc
@@ -0,0 +1,30 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Fuzzer for NumberFormat::parse.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <memory>
+#include "fuzzer_utils.h"
+#include "unicode/numfmt.h"
+
+IcuEnvironment* env = new IcuEnvironment();
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ UErrorCode status = U_ZERO_ERROR;
+
+ auto rng = CreateRng(data, size);
+ const icu::Locale& locale = GetRandomLocale(&rng);
+
+ std::unique_ptr<icu::NumberFormat> fmt(
+ icu::NumberFormat::createInstance(locale, status));
+ if (U_FAILURE(status)) return 0;
+
+ icu::UnicodeString str(UnicodeStringFromUtf8(data, size));
+ icu::Formattable result;
+ fmt->parse(str, result, status);
+
+ return 0;
+}
diff --git a/projects/icu/regex.dict b/projects/icu/regex.dict
new file mode 100644
index 00000000..b0456e6d
--- /dev/null
+++ b/projects/icu/regex.dict
@@ -0,0 +1,103 @@
+# Copyright 2016 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+"\\a"
+"\\A"
+"\\b"
+"\\B"
+"\\cX"
+"\\cC"
+"\\cZ"
+"\\d"
+"\\D"
+"\\e"
+"\\u001B"
+"\\E"
+"\\f"
+"\\u000C"
+"\\G"
+"\\h"
+"\\u0009"
+"\\H"
+"\\k"
+"\\n"
+"\\N"
+"\\p"
+"\\P"
+"{"
+"}"
+"\\Q"
+"\\r"
+"\\u000D"
+"\\R"
+"\\u000a"
+"\\u000b"
+"\\u000c"
+"\\u000d"
+"\\u0085"
+"\\u2028"
+"\\u2029"
+"\\s"
+"[\\t\\n\\f\\r\\p{Z}]"
+"\\S"
+"\\t"
+"\\u0009"
+"\\u"
+"\\uf0ff"
+"\\U"
+"\\U0010ffff."
+"\\v"
+"\\V"
+"\\w"
+"\\W"
+"\\x"
+"\\xhh"
+"\\X"
+"\\Z"
+"\\z"
+"\\n"
+"\\0"
+"\\0ooo"
+"."
+"^"
+"$"
+"\\"
+"|"
+"*"
+"+"
+"?"
+","
+"*?"
+"+?"
+"??"
+"*+"
+"++"
+"?+"
+"("
+"(?:"
+"(?>"
+"(?#"
+"(?="
+"(?!"
+"(?<="
+"(?<!"
+"(?"
+"-"
+")"
+":"
+"(?ismwx-ismwx:"
+"(?ismwx-ismwx)"
+"(?i)"
+"["
+"]"
+"[\\u0000-\\U0010ffff]"
+"[:script=Greek:]"
+"{script=Greek}"
+"gC"
+"sc"
+"scx"
+"WB"
+"Nd"
+"d"
+"MN"
diff --git a/projects/icu/target.yaml b/projects/icu/target.yaml
new file mode 100644
index 00000000..288124f8
--- /dev/null
+++ b/projects/icu/target.yaml
@@ -0,0 +1 @@
+homepage: "http://site.icu-project.org/"
diff --git a/projects/icu/ucasemap_fuzzer.cc b/projects/icu/ucasemap_fuzzer.cc
new file mode 100644
index 00000000..32ff8c4c
--- /dev/null
+++ b/projects/icu/ucasemap_fuzzer.cc
@@ -0,0 +1,53 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Fuzzer for ucasemap.
+
+#include <stddef.h>
+#include <stdint.h>
+#include <memory>
+#include "fuzzer_utils.h"
+#include "unicode/ucasemap.h"
+
+IcuEnvironment* env = new IcuEnvironment();
+
+template<typename T>
+using deleted_unique_ptr = std::unique_ptr<T,std::function<void(T*)>>;
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ UErrorCode status = U_ZERO_ERROR;
+
+ auto rng = CreateRng(data, size);
+ const icu::Locale& locale = GetRandomLocale(&rng);
+ uint32_t open_flags = static_cast<uint32_t>(rng());
+
+ deleted_unique_ptr<UCaseMap> csm(
+ ucasemap_open(locale.getName(), open_flags, &status),
+ [](UCaseMap* map) { ucasemap_close(map); });
+
+ if (U_FAILURE(status))
+ return 0;
+
+ int32_t dst_size = size * 2;
+ std::unique_ptr<char[]> dst(new char[dst_size]);
+ auto src = reinterpret_cast<const char*>(data);
+
+ switch (rng() % 4) {
+ case 0: ucasemap_utf8ToLower(csm.get(), dst.get(), dst_size, src, size,
+ &status);
+ break;
+ case 1: ucasemap_utf8ToUpper(csm.get(), dst.get(), dst_size, src, size,
+ &status);
+ break;
+ case 2: ucasemap_utf8ToTitle(csm.get(), dst.get(), dst_size, src, size,
+ &status);
+ break;
+ case 3: ucasemap_utf8FoldCase(csm.get(), dst.get(), dst_size, src, size,
+ &status);
+ break;
+ }
+
+ return 0;
+}
+
diff --git a/projects/icu/unicode_string_codepage_create_fuzzer.cc b/projects/icu/unicode_string_codepage_create_fuzzer.cc
new file mode 100644
index 00000000..bb0489ca
--- /dev/null
+++ b/projects/icu/unicode_string_codepage_create_fuzzer.cc
@@ -0,0 +1,73 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include <algorithm>
+#include <array>
+#include <vector>
+
+#include "fuzzer_utils.h"
+#include "unicode/unistr.h"
+
+// Taken from third_party/icu/source/data/mappings/convrtrs.txt file.
+static const std::array<const char*, 45> kConverters = {
+ {
+ "UTF-8",
+ "utf-16be",
+ "utf-16le",
+ "UTF-32",
+ "UTF-32BE",
+ "UTF-32LE",
+ "ibm866-html",
+ "iso-8859-2-html",
+ "iso-8859-3-html",
+ "iso-8859-4-html",
+ "iso-8859-5-html",
+ "iso-8859-6-html",
+ "iso-8859-7-html",
+ "iso-8859-8-html",
+ "ISO-8859-8-I",
+ "iso-8859-10-html",
+ "iso-8859-13-html",
+ "iso-8859-14-html",
+ "iso-8859-15-html",
+ "iso-8859-16-html",
+ "koi8-r-html",
+ "koi8-u-html",
+ "macintosh-html",
+ "windows-874-html",
+ "windows-1250-html",
+ "windows-1251-html",
+ "windows-1252-html",
+ "windows-1253-html",
+ "windows-1254-html",
+ "windows-1255-html",
+ "windows-1256-html",
+ "windows-1257-html",
+ "windows-1258-html",
+ "x-mac-cyrillic-html",
+ "windows-936-2000",
+ "gb18030",
+ "big5-html",
+ "euc-jp-html",
+ "ISO_2022,locale=ja,version=0",
+ "shift_jis-html",
+ "euc-kr-html",
+ "ISO-2022-KR",
+ "ISO-2022-CN",
+ "ISO-2022-CN-EXT",
+ "HZ-GB-2312"
+ }
+};
+
+IcuEnvironment* env = new IcuEnvironment();
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ auto rng = CreateRng(data, size);
+ icu::UnicodeString str(reinterpret_cast<const char*>(data), size,
+ kConverters[rng() % kConverters.size()]);
+ return 0;
+}
diff --git a/projects/icu/uregex_open_fuzzer.cc b/projects/icu/uregex_open_fuzzer.cc
new file mode 100644
index 00000000..7e2744c6
--- /dev/null
+++ b/projects/icu/uregex_open_fuzzer.cc
@@ -0,0 +1,23 @@
+// Copyright 2016 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include <stddef.h>
+#include <stdint.h>
+
+#include "fuzzer_utils.h"
+#include "unicode/regex.h"
+
+IcuEnvironment* env = new IcuEnvironment();
+
+extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) {
+ UParseError pe = { 0 };
+ UErrorCode status = U_ZERO_ERROR;
+ URegularExpression* re = uregex_open(reinterpret_cast<const UChar*>(data),
+ static_cast<int>(size) / sizeof(UChar),
+ 0, &pe, &status);
+ if (re)
+ uregex_close(re);
+
+ return 0;
+}
diff --git a/projects/icu/uregex_open_fuzzer.options b/projects/icu/uregex_open_fuzzer.options
new file mode 100644
index 00000000..0e5d596d
--- /dev/null
+++ b/projects/icu/uregex_open_fuzzer.options
@@ -0,0 +1,2 @@
+[libfuzzer]
+dict = regex.dict